statscollect/collect.zpool.iostat.py

import subprocess
import csv
import os
import time
import re
import argparse

# Configuration parameters
MAX_FILE_SIZE = 1 * 1024 * 1024 * 1024  # 1 GiB in bytes
INTERVAL = 5  # Interval in seconds between zpool iostat commands
COUNT = 1  # Number of samples in each cycle
UPDATE_INTERVAL = 60  # Interval in seconds between updates
OUTPUT_FILE = "/root/zpool_iostat.csv"


# Placeholder for your other functions (load_existing_data, update_json_file, rotate_file_if_needed)
def update_csv_file(output_file, data):
    """
    Writes data to a CSV file and rotates the file if necessary.

    Parameters:
    output_file (str): Path to the CSV file where data will be written.
    data (list): The data to be written to the file.
    """
    try:
        print(f"Attempting to write data to {output_file}...")
        with open(output_file, "w", newline="") as csv_file:
            fieldnames = data[0].keys() if data else []
            csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

            csv_writer.writeheader()
            for row in data:
                csv_writer.writerow(row)

        print(f"Data successfully written to {output_file}")
        rotate_file_if_needed(output_file)
    except Exception as e:
        print(f"Error writing to file: {e}")


def rotate_file_if_needed(file_path):
    """
    Rotates the file if it exceeds a specified size. The existing file is renamed to '.old'.

    Parameters:
    file_path (str): Path to the file that should be rotated.
    """
    try:
        file_size = os.path.getsize(file_path)
        if file_size > MAX_FILE_SIZE:
            old_file = file_path + ".old"
            if os.path.exists(old_file):
                print(f"Removing old file: {old_file}")
                os.remove(old_file)
            print(f"Rotating file: {file_path} to {old_file}")
            os.rename(file_path, old_file)
    except OSError as e:
        print(f"Error during file rotation: {e}")


def load_existing_data(file_path):
    """
    Loads existing data from a CSV file.

    Parameters:
    file_path (str): Path to the CSV file.

    Returns:
    list: The data loaded from the file, or an empty list if the file doesn't exist or an error occurs.
    """
    data = []
    try:
        if os.path.exists(file_path):
            with open(file_path, "r") as file:
                print(f"Loading existing data from {file_path}.")
                csv_reader = csv.DictReader(file)
                for row in csv_reader:
                    data.append(row)
        else:
            print(f"No existing file found at {file_path}. Starting fresh.")
    except Exception as e:
        print(f"Error reading CSV file: {e}. Starting with empty data.")
    return data


def remove_k(value_with_unit):
    """
    Removes the 'K' unit from a value, if present, from the IOPS section.

    Parameters:
    value_with_unit (str): The string containing the value and possibly a 'K' unit.

    Returns:
    float: The numerical value, with 'K' removed if it was present.
    """
    if value_with_unit.endswith("K"):
        return float(value_with_unit[:-1])
    else:
        return float(value_with_unit)


def convert_to_mib(value_with_unit):
    """
    Converts a value with a unit (K, M, G, T) to MiB.

    Parameters:
    value_with_unit (str): The string containing the value and unit.

    Returns:
    float: The value converted to MiB, or 0 if the input is invalid.
    """
    # Check if the string is valid
    if not value_with_unit or not value_with_unit[-1].isalpha():
        return 0.0

    # Extract the unit and value
    unit = value_with_unit[-1].upper()  # Ensure unit is uppercase for consistency
    try:
        value = float(value_with_unit[:-1])
    except ValueError:
        return 0.0

    # Convert based on the unit
    if unit == "K":
        return round(value / 1024.0, 1)  # Convert KiB to MiB
    elif unit == "M":
        return round(value, 1)  # Already in MiB
    elif unit == "G":
        return round(value * 1024.0, 1)  # Convert GiB to MiB
    elif unit == "T":
        return round(value * 1024.0 * 1024.0, 1)  # Convert TiB to MiB
    else:
        print(f"Warning: Unrecognized unit '{unit}' in value '{value_with_unit}'.")
        return 0.0


def convert_time_to_ms(value_with_unit):
    """
    Converts a time value with a unit (ms, us, ns) to milliseconds (ms), rounded to three decimal places.
    Returns 0 if the input is '-', indicating no data.

    Parameters:
    value_with_unit (str): The string containing the value and unit.

    Returns:
    float: The value converted to milliseconds (ms), rounded to three decimal places, or 0 if the input is invalid or '-'.
    """
    if value_with_unit == "-":
        # Handle no data case
        return 0

    try:
        # Strip and lower case to normalize
        value_with_unit = value_with_unit.strip().lower()

        # Check for nanoseconds
        if value_with_unit.endswith("ns"):
            value = float(value_with_unit[:-2])
            return round(value / 1e6, 1)  # Convert ns to ms and round

        # Check for microseconds
        elif value_with_unit.endswith("us"):
            value = float(value_with_unit[:-2])
            return round(value / 1000.0, 1)  # Convert us to ms and round

        # Check for milliseconds
        elif value_with_unit.endswith("ms"):
            value = float(value_with_unit[:-2])
            return round(value, 3)  # Already in ms and round

        # If no known unit, assume it's in milliseconds and round
        else:
            return round(float(value_with_unit), 1)

    except ValueError:
        # Log the error if conversion fails
        print(f"Warning: Unrecognized or malformed time value '{value_with_unit}'.")
        return 0.0


def parse_iostat_output(output):
    """
    Parses the output of the `zpool iostat` command to include pool, vdev, disk level data,
    and does not specifically skip the 'logs' line.

    Parameters:
    output (str): The output string from the `zpool iostat` command.

    Returns:
    list: A list of dictionaries, each representing an entity (pool, vdev, or disk) and its metrics.
    """
    parsed_data = []
    lines = output.strip().split("\n")
    data_started = False

    for line in lines:
        if "capacity" in line and "operations" in line and "bandwidth" in line:
            # This line indicates the end of the headers and the start of relevant data.
            data_started = True
            continue  # Skip the header line itself

        if data_started and "-----" in line or line.startswith("pool"):
            # Skip separator lines and the column headers repeated in the output.
            continue

        if data_started:
            # Process lines containing pool, vdev, and disk level data.
            fields = line.split()
            if len(fields) >= 7:
                try:
                    # Check for numeric data in expected positions to avoid conversion errors.
                    if any(
                        not field.replace(".", "", 1).isdigit()
                        and not field.endswith("K")
                        for field in fields[3:6]
                    ):
                        # Skip lines that do not contain numeric data where expected.
                        continue

                    data_entry = {
                        "time": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
                        "name": fields[0],
                        "alloc": fields[1],
                        "free": fields[2],
                        "ops_read": remove_k(fields[3]),
                        "ops_write": remove_k(fields[4]),
                        "bandwidth_read": convert_to_mib(fields[5]),
                        "bandwidth_write": convert_to_mib(fields[6]),
                        "total_wait_read_ms": convert_time_to_ms(fields[7])
                        if len(fields) > 7
                        else 0,
                        "total_wait_write_ms": convert_time_to_ms(fields[8])
                        if len(fields) > 8
                        else 0,
                        "disk_wait_read_ms": convert_time_to_ms(fields[9])
                        if len(fields) > 9
                        else 0,
                        "disk_wait_write_ms": convert_time_to_ms(fields[10])
                        if len(fields) > 10
                        else 0,
                        "syncq_wait_read_ms": convert_time_to_ms(fields[11])
                        if len(fields) > 11
                        else 0,
                        "syncq_wait_write_ms": convert_time_to_ms(fields[12])
                        if len(fields) > 12
                        else 0,
                        "asyncq_wait_read_ms": convert_time_to_ms(fields[13])
                        if len(fields) > 13
                        else 0,
                        "asyncq_wait_write_ms": convert_time_to_ms(fields[14])
                        if len(fields) > 14
                        else 0,
                        "scrub_wait_ms": convert_time_to_ms(fields[15])
                        if len(fields) > 15 and fields[15] != "-"
                        else 0,
                        "trim_wait_ms": convert_time_to_ms(fields[16])
                        if len(fields) > 16 and fields[16] != "-"
                        else 0,
                        "rebuild_wait_ms": convert_time_to_ms(fields[17])
                        if len(fields) > 17 and fields[17] != "-"
                        else 0,
                    }
                    parsed_data.append(data_entry)
                except Exception as e:
                    print(f"Error processing line: '{line}' - {e}")

    return parsed_data


def run_zpool_iostat(interval, count, output_file, update_interval, TIMETORUN):
    """
    Main function to run the zpool iostat command at regular intervals, parse its output, and update the CSV file.

    Parameters:
    interval (int): Interval in seconds between each command execution.
    count (int): Number of samples to collect in each cycle.
    output_file (str): Path to the output CSV file.
    update_interval (int): Interval in seconds between updates to the CSV file.
    """

    while not TIMETORUN == 0:
        all_data = load_existing_data(output_file)

        for _ in range(count):
            command = "zpool iostat -vvyl 15 1"
            try:
                print("Running zpool iostat command...")
                result = subprocess.run(
                    command, shell=True, capture_output=True, text=True
                )

                if result.returncode != 0:
                    print(f"Error running zpool iostat: {result.stderr}")
                    break
                else:
                    parsed_data = parse_iostat_output(result.stdout)
                    if parsed_data:
                        all_data.extend(parsed_data)
                    else:
                        print("Error in parsing data. Skipping this sample.")
                print("zpool iostat command executed successfully.")
            except Exception as e:
                print(f"Subprocess error occurred: {e}")
                break

            print(f"Completed sample {_+1}. Waiting for the next sample...")
            time.sleep(interval)

        update_csv_file(output_file, all_data)
        print(
            f"Completed data collection cycle. Waiting for the next update in {update_interval} seconds."
        )
        time.sleep(update_interval)
        TIMETORUN = TIMETORUN - 1


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Collect for X minutes.")
    parser.add_argument(
        "--minutes", type=int, help="an integer for collected minutes", required=True
    )
    args = parser.parse_args()
    TIMETORUN = args.minutes

    print("Script started.")
    try:
        run_zpool_iostat(INTERVAL, COUNT, OUTPUT_FILE, UPDATE_INTERVAL, TIMETORUN)
    except Exception as e:
        print(f"An error occurred: {e}")
    print("Script ended.")