Skip to content

Instantly share code, notes, and snippets.

@jirib
Last active January 13, 2026 10:33
Show Gist options
  • Select an option

  • Save jirib/e3846e99bf4a3c4d8d63725234edfdc4 to your computer and use it in GitHub Desktop.

Select an option

Save jirib/e3846e99bf4a3c4d8d63725234edfdc4 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# /// script
# requires-python = ">3.12"
# dependencies = [
# "matplotlib",
# "numpy",
# ]
# ///
import argparse
import csv
import sys
from pathlib import Path
from statistics import mean, stdev
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
LINE_STYLES = [
{"linestyle": "-", "marker": None},
{"linestyle": "--", "marker": None},
{"linestyle": ":", "marker": None},
{"linestyle": "None", "marker": "x"},
{"linestyle": "None", "marker": "o"},
]
def die(msg):
print(f"error: {msg}", file=sys.stderr)
sys.exit(1)
def zscore(values):
μ = mean(values)
σ = stdev(values)
if σ == 0:
die("cannot normalise constant series")
return [(v - μ) / σ for v in values]
def read_csv_columns(path):
"""
Returns:
x_label, x_values, [(y_label, y_values), ...]
"""
with open(path, newline="") as f:
reader = csv.reader(f)
header = next(reader, None)
if not header or len(header) < 2:
die(f"{path}: CSV must have at least two columns")
rows = [row for row in reader if row]
x_label = header[0]
y_labels = header[1:]
x = [row[0] for row in rows]
ys = []
for idx, label in enumerate(y_labels, start=1):
try:
ys.append((label, [float(row[idx]) for row in rows]))
except ValueError:
die(f"{path}: non-numeric value in column '{label}'")
return x_label, x, ys
def ensure_png(path):
p = Path(path)
return p if p.suffix else p.with_suffix(".png")
def plot_single(x, y_series, x_label, output, normalised=False):
fig, ax = plt.subplots(figsize=(6.5, 4))
for idx, (label, y) in enumerate(y_series):
style = LINE_STYLES[idx % len(LINE_STYLES)]
ax.plot(
x,
y,
color="black",
linewidth=2,
label=label,
linestyle=style["linestyle"],
marker=style["marker"],
markersize=6,
markeredgewidth=1.5,
)
ax.set_xlabel(x_label)
ax.set_ylabel("Normalised value (z-score)" if normalised else "Value")
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.legend(frameon=False)
ax.grid(True, linestyle=":", alpha=0.4)
plt.tight_layout()
plt.savefig(output, dpi=300, facecolor="white", bbox_inches="tight")
plt.close(fig)
def plot_separate(x, series, x_label, output_prefix):
base = ensure_png(output_prefix)
for idx, (label, y) in enumerate(series, start=1):
fig, ax = plt.subplots(figsize=(6.5, 4))
ax.plot(
x,
y,
color="black",
linewidth=2,
marker="o",
markersize=4,
)
ax.set_xlabel(x_label)
ax.set_ylabel(label)
ax.xaxis.set_major_locator(MaxNLocator(integer=True))
ax.grid(True, linestyle=":", alpha=0.4)
out = base.with_name(f"{base.stem}_{label}{base.suffix}")
plt.tight_layout()
plt.savefig(out, dpi=300, facecolor="white", bbox_inches="tight")
plt.close(fig)
def main():
parser = argparse.ArgumentParser(
description="Normal and correlation graphing (black & white)"
)
parser.add_argument(
"csv",
nargs="+",
help="Input CSV file(s)"
)
parser.add_argument(
"--correlate",
action="store_true",
help="Correlate multiple CSV files (normalised overlay)"
)
parser.add_argument(
"--separate",
action="store_true",
help="Generate separate graphs per series"
)
parser.add_argument(
"-o", "--output",
required=True,
help="Output PNG (or prefix when --separate is used)"
)
args = parser.parse_args()
n_files = len(args.csv)
# -----------------------------
# VALIDATE INTENT
# -----------------------------
if n_files == 1:
if args.correlate:
die("--correlate requires multiple CSV files")
else:
if args.correlate and args.separate:
die("--correlate and --separate are mutually exclusive")
if not args.correlate and not args.separate:
die(
"multiple CSV files provided but neither --correlate nor "
"--separate specified (ambiguous intent)"
)
# -----------------------------
# READ DATA
# -----------------------------
datasets = [read_csv_columns(p) for p in args.csv]
# Enforce shared X label and values
x_label_ref, x_ref, _ = datasets[0]
for x_label, x, _ in datasets[1:]:
if x_label != x_label_ref:
die(f"X column name mismatch: '{x_label}' vs '{x_label_ref}'")
if x != x_ref:
die("X values mismatch between CSV files")
# -----------------------------
# CASE A: SINGLE CSV
# -----------------------------
if n_files == 1:
_, x, y_series = datasets[0]
if len(y_series) == 1:
if args.separate:
die("--separate makes no sense with a single data column")
plot_single(
x,
y_series,
x_label_ref,
ensure_png(args.output),
normalised=False
)
else:
if args.separate:
plot_separate(
x,
y_series,
x_label_ref,
args.output
)
else:
plot_single(
x,
y_series,
x_label_ref,
ensure_png(args.output),
normalised=False
)
return
# -----------------------------
# CASE B: MULTIPLE CSV FILES
# -----------------------------
if args.separate:
# One graph per CSV (raw)
for path, (_, x, y_series) in zip(args.csv, datasets):
plot_single(
x,
y_series,
x_label_ref,
ensure_png(f"{args.output}_{Path(path).stem}.png"),
normalised=False
)
return
if args.correlate:
# Correlation overlay (normalised)
combined = []
for (_, _, y_series), path in zip(datasets, args.csv):
if len(y_series) != 1:
die(
f"{path}: correlation mode requires exactly one value column per CSV"
)
label, y = y_series[0]
combined.append((Path(path).stem, zscore(y)))
plot_single(
x_ref,
combined,
x_label_ref,
ensure_png(args.output),
normalised=True
)
return
die("internal error")
if __name__ == "__main__":
main()
@jirib
Copy link
Author

jirib commented Jan 12, 2026

test

@jirib
Copy link
Author

jirib commented Jan 12, 2026

Install notes:

  • Mise
    • Linux (any)
      • curl https://mise.run | sh
    • Windows
      • (powershell) Set-ExecutionPolicy RemoteSigned -scope CurrentUser
      • (powershell) (irm https://astral.sh/uv/install.ps1) -replace '\bexit\b', '#exit removed' | iex
  • Python & UV
    • mise install python@3.13 # or anything as above 'requires-python'
    • mise use python@3.13 # or anything as above 'requires-python'
    • mise use -g uv@latest
  • Running graph.py (under normal user!)
    • uv run --python python<version> graph.py --help # just to get deps
# the input CSV data
$ cat /tmp/input.csv 
x,sin,cos,shifted_sin,x_markers
0.0,0.0000,1.0000,0.4794,0.8776
0.5,0.4794,0.8776,0.8415,0.5403
1.0,0.8415,0.5403,0.9975,0.0707
1.5,0.9975,0.0707,0.9093,-0.4161
2.0,0.9093,-0.4161,0.5985,-0.8011
2.5,0.5985,-0.8011,0.1411,-0.9899
3.0,0.1411,-0.9899,-0.3508,-0.9365
3.5,-0.3508,-0.9365,-0.7568,-0.6536
4.0,-0.7568,-0.6536,-0.9775,-0.2108
4.5,-0.9775,-0.2108,-0.9589,0.2837
5.0,-0.9589,0.2837,-0.7055,0.7087

# running the script
$ uv run graph.py -i /tmp/input.csv -o /tmp/test.png
$ file /tmp/test.png
/tmp/test.png: PNG image data, 1770 x 1170, 8-bit/color RGBA, non-interlaced

@jirib
Copy link
Author

jirib commented Jan 13, 2026

$ grep -H '' {gdp,murders}.csv
gdp.csv:year,gdp
gdp.csv:1,300
gdp.csv:2,330
gdp.csv:3,350
gdp.csv:4,320
gdp.csv:5,390
murders.csv:year,murders
murders.csv:1,10
murders.csv:2,5
murders.csv:3,7
murders.csv:4,2
murders.csv:5,20

$ uv run graph.py --help
usage: graph.py [-h] [--correlate] [--separate] -o OUTPUT csv [csv ...]

Normal and correlation graphing (black & white)

positional arguments:
  csv                   Input CSV file(s)

options:
  -h, --help            show this help message and exit
  --correlate           Correlate multiple CSV files (normalised overlay)
  --separate            Generate separate graphs per series
  -o, --output OUTPUT   Output PNG (or prefix when --separate is used)
test

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment