jirib/graph.py

## graph.py
#!/usr/bin/env python3
# /// script
# requires-python = ">3.12"
# dependencies = [
#     "matplotlib",
#     "numpy",
# ]
# ///

import argparse
import csv
import sys
from pathlib import Path
from statistics import mean, stdev

import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator


LINE_STYLES = [
    {"linestyle": "-",    "marker": None},
    {"linestyle": "--",   "marker": None},
    {"linestyle": ":",    "marker": None},
    {"linestyle": "None", "marker": "x"},
    {"linestyle": "None", "marker": "o"},
]


def die(msg):
    print(f"error: {msg}", file=sys.stderr)
    sys.exit(1)


def zscore(values):
    μ = mean(values)
    σ = stdev(values)
    if σ == 0:
        die("cannot normalise constant series")
    return [(v - μ) / σ for v in values]


def read_csv_columns(path):
    """
    Returns:
      x_label, x_values, [(y_label, y_values), ...]
    """
    with open(path, newline="") as f:
        reader = csv.reader(f)
        header = next(reader, None)
        if not header or len(header) < 2:
            die(f"{path}: CSV must have at least two columns")

        rows = [row for row in reader if row]

    x_label = header[0]
    y_labels = header[1:]

    x = [row[0] for row in rows]
    ys = []
    for idx, label in enumerate(y_labels, start=1):
        try:
            ys.append((label, [float(row[idx]) for row in rows]))
        except ValueError:
            die(f"{path}: non-numeric value in column '{label}'")

    return x_label, x, ys


def ensure_png(path):
    p = Path(path)
    return p if p.suffix else p.with_suffix(".png")


def plot_single(x, y_series, x_label, output, normalised=False):
    fig, ax = plt.subplots(figsize=(6.5, 4))

    for idx, (label, y) in enumerate(y_series):
        style = LINE_STYLES[idx % len(LINE_STYLES)]
        ax.plot(
            x,
            y,
            color="black",
            linewidth=2,
            label=label,
            linestyle=style["linestyle"],
            marker=style["marker"],
            markersize=6,
            markeredgewidth=1.5,
        )

    ax.set_xlabel(x_label)
    ax.set_ylabel("Normalised value (z-score)" if normalised else "Value")
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.legend(frameon=False)
    ax.grid(True, linestyle=":", alpha=0.4)

    plt.tight_layout()
    plt.savefig(output, dpi=300, facecolor="white", bbox_inches="tight")
    plt.close(fig)


def plot_separate(x, series, x_label, output_prefix):
    base = ensure_png(output_prefix)

    for idx, (label, y) in enumerate(series, start=1):
        fig, ax = plt.subplots(figsize=(6.5, 4))

        ax.plot(
            x,
            y,
            color="black",
            linewidth=2,
            marker="o",
            markersize=4,
        )

        ax.set_xlabel(x_label)
        ax.set_ylabel(label)
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.grid(True, linestyle=":", alpha=0.4)

        out = base.with_name(f"{base.stem}_{label}{base.suffix}")
        plt.tight_layout()
        plt.savefig(out, dpi=300, facecolor="white", bbox_inches="tight")
        plt.close(fig)


def main():
    parser = argparse.ArgumentParser(
        description="Normal and correlation graphing (black & white)"
    )

    parser.add_argument(
        "csv",
        nargs="+",
        help="Input CSV file(s)"
    )

    parser.add_argument(
        "--correlate",
        action="store_true",
        help="Correlate multiple CSV files (normalised overlay)"
    )

    parser.add_argument(
        "--separate",
        action="store_true",
        help="Generate separate graphs per series"
    )

    parser.add_argument(
        "-o", "--output",
        required=True,
        help="Output PNG (or prefix when --separate is used)"
    )

    args = parser.parse_args()

    n_files = len(args.csv)

    # -----------------------------
    # VALIDATE INTENT
    # -----------------------------
    if n_files == 1:
        if args.correlate:
            die("--correlate requires multiple CSV files")

    else:
        if args.correlate and args.separate:
            die("--correlate and --separate are mutually exclusive")

        if not args.correlate and not args.separate:
            die(
                "multiple CSV files provided but neither --correlate nor "
                "--separate specified (ambiguous intent)"
            )

    # -----------------------------
    # READ DATA
    # -----------------------------
    datasets = [read_csv_columns(p) for p in args.csv]

    # Enforce shared X label and values
    x_label_ref, x_ref, _ = datasets[0]

    for x_label, x, _ in datasets[1:]:
        if x_label != x_label_ref:
            die(f"X column name mismatch: '{x_label}' vs '{x_label_ref}'")
        if x != x_ref:
            die("X values mismatch between CSV files")

    # -----------------------------
    # CASE A: SINGLE CSV
    # -----------------------------
    if n_files == 1:
        _, x, y_series = datasets[0]

        if len(y_series) == 1:
            if args.separate:
                die("--separate makes no sense with a single data column")

            plot_single(
                x,
                y_series,
                x_label_ref,
                ensure_png(args.output),
                normalised=False
            )

        else:
            if args.separate:
                plot_separate(
                    x,
                    y_series,
                    x_label_ref,
                    args.output
                )
            else:
                plot_single(
                    x,
                    y_series,
                    x_label_ref,
                    ensure_png(args.output),
                    normalised=False
                )

        return

    # -----------------------------
    # CASE B: MULTIPLE CSV FILES
    # -----------------------------
    if args.separate:
        # One graph per CSV (raw)
        for path, (_, x, y_series) in zip(args.csv, datasets):
            plot_single(
                x,
                y_series,
                x_label_ref,
                ensure_png(f"{args.output}_{Path(path).stem}.png"),
                normalised=False
            )
        return

    if args.correlate:
        # Correlation overlay (normalised)
        combined = []
        for (_, _, y_series), path in zip(datasets, args.csv):
            if len(y_series) != 1:
                die(
                    f"{path}: correlation mode requires exactly one value column per CSV"
                )
            label, y = y_series[0]
            combined.append((Path(path).stem, zscore(y)))

        plot_single(
            x_ref,
            combined,
            x_label_ref,
            ensure_png(args.output),
            normalised=True
        )
        return

    die("internal error")


if __name__ == "__main__":
    main()
	#!/usr/bin/env python3
	# /// script
	# requires-python = ">3.12"
	# dependencies = [
	# "matplotlib",
	# "numpy",
	# ]
	# ///

	import argparse
	import csv
	import sys
	from pathlib import Path
	from statistics import mean, stdev

	import matplotlib.pyplot as plt
	from matplotlib.ticker import MaxNLocator


	LINE_STYLES = [
	{"linestyle": "-", "marker": None},
	{"linestyle": "--", "marker": None},
	{"linestyle": ":", "marker": None},
	{"linestyle": "None", "marker": "x"},
	{"linestyle": "None", "marker": "o"},
	]


	def die(msg):
	print(f"error: {msg}", file=sys.stderr)
	sys.exit(1)


	def zscore(values):
	μ = mean(values)
	σ = stdev(values)
	if σ == 0:
	die("cannot normalise constant series")
	return [(v - μ) / σ for v in values]


	def read_csv_columns(path):
	"""
	Returns:
	x_label, x_values, [(y_label, y_values), ...]
	"""
	with open(path, newline="") as f:
	reader = csv.reader(f)
	header = next(reader, None)
	if not header or len(header) < 2:
	die(f"{path}: CSV must have at least two columns")

	rows = [row for row in reader if row]

	x_label = header[0]
	y_labels = header[1:]

	x = [row[0] for row in rows]
	ys = []
	for idx, label in enumerate(y_labels, start=1):
	try:
	ys.append((label, [float(row[idx]) for row in rows]))
	except ValueError:
	die(f"{path}: non-numeric value in column '{label}'")

	return x_label, x, ys


	def ensure_png(path):
	p = Path(path)
	return p if p.suffix else p.with_suffix(".png")


	def plot_single(x, y_series, x_label, output, normalised=False):
	fig, ax = plt.subplots(figsize=(6.5, 4))

	for idx, (label, y) in enumerate(y_series):
	style = LINE_STYLES[idx % len(LINE_STYLES)]
	ax.plot(
	x,
	y,
	color="black",
	linewidth=2,
	label=label,
	linestyle=style["linestyle"],
	marker=style["marker"],
	markersize=6,
	markeredgewidth=1.5,
	)

	ax.set_xlabel(x_label)
	ax.set_ylabel("Normalised value (z-score)" if normalised else "Value")
	ax.xaxis.set_major_locator(MaxNLocator(integer=True))
	ax.legend(frameon=False)
	ax.grid(True, linestyle=":", alpha=0.4)

	plt.tight_layout()
	plt.savefig(output, dpi=300, facecolor="white", bbox_inches="tight")
	plt.close(fig)


	def plot_separate(x, series, x_label, output_prefix):
	base = ensure_png(output_prefix)

	for idx, (label, y) in enumerate(series, start=1):
	fig, ax = plt.subplots(figsize=(6.5, 4))

	ax.plot(
	x,
	y,
	color="black",
	linewidth=2,
	marker="o",
	markersize=4,
	)

	ax.set_xlabel(x_label)
	ax.set_ylabel(label)
	ax.xaxis.set_major_locator(MaxNLocator(integer=True))
	ax.grid(True, linestyle=":", alpha=0.4)

	out = base.with_name(f"{base.stem}_{label}{base.suffix}")
	plt.tight_layout()
	plt.savefig(out, dpi=300, facecolor="white", bbox_inches="tight")
	plt.close(fig)


	def main():
	parser = argparse.ArgumentParser(
	description="Normal and correlation graphing (black & white)"
	)

	parser.add_argument(
	"csv",
	nargs="+",
	help="Input CSV file(s)"
	)

	parser.add_argument(
	"--correlate",
	action="store_true",
	help="Correlate multiple CSV files (normalised overlay)"
	)

	parser.add_argument(
	"--separate",
	action="store_true",
	help="Generate separate graphs per series"
	)

	parser.add_argument(
	"-o", "--output",
	required=True,
	help="Output PNG (or prefix when --separate is used)"
	)

	args = parser.parse_args()

	n_files = len(args.csv)

	# -----------------------------
	# VALIDATE INTENT
	# -----------------------------
	if n_files == 1:
	if args.correlate:
	die("--correlate requires multiple CSV files")

	else:
	if args.correlate and args.separate:
	die("--correlate and --separate are mutually exclusive")

	if not args.correlate and not args.separate:
	die(
	"multiple CSV files provided but neither --correlate nor "
	"--separate specified (ambiguous intent)"
	)

	# -----------------------------
	# READ DATA
	# -----------------------------
	datasets = [read_csv_columns(p) for p in args.csv]

	# Enforce shared X label and values
	x_label_ref, x_ref, _ = datasets[0]

	for x_label, x, _ in datasets[1:]:
	if x_label != x_label_ref:
	die(f"X column name mismatch: '{x_label}' vs '{x_label_ref}'")
	if x != x_ref:
	die("X values mismatch between CSV files")

	# -----------------------------
	# CASE A: SINGLE CSV
	# -----------------------------
	if n_files == 1:
	_, x, y_series = datasets[0]

	if len(y_series) == 1:
	if args.separate:
	die("--separate makes no sense with a single data column")

	plot_single(
	x,
	y_series,
	x_label_ref,
	ensure_png(args.output),
	normalised=False
	)

	else:
	if args.separate:
	plot_separate(
	x,
	y_series,
	x_label_ref,
	args.output
	)
	else:
	plot_single(
	x,
	y_series,
	x_label_ref,
	ensure_png(args.output),
	normalised=False
	)

	return

	# -----------------------------
	# CASE B: MULTIPLE CSV FILES
	# -----------------------------
	if args.separate:
	# One graph per CSV (raw)
	for path, (_, x, y_series) in zip(args.csv, datasets):
	plot_single(
	x,
	y_series,
	x_label_ref,
	ensure_png(f"{args.output}_{Path(path).stem}.png"),
	normalised=False
	)
	return

	if args.correlate:
	# Correlation overlay (normalised)
	combined = []
	for (_, _, y_series), path in zip(datasets, args.csv):
	if len(y_series) != 1:
	die(
	f"{path}: correlation mode requires exactly one value column per CSV"
	)
	label, y = y_series[0]
	combined.append((Path(path).stem, zscore(y)))

	plot_single(
	x_ref,
	combined,
	x_label_ref,
	ensure_png(args.output),
	normalised=True
	)
	return

	die("internal error")


	if __name__ == "__main__":
	main()
No results found