Last active
January 13, 2026 10:33
-
-
Save jirib/e3846e99bf4a3c4d8d63725234edfdc4 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python3 | |
| # /// script | |
| # requires-python = ">3.12" | |
| # dependencies = [ | |
| # "matplotlib", | |
| # "numpy", | |
| # ] | |
| # /// | |
| import argparse | |
| import csv | |
| import sys | |
| from pathlib import Path | |
| from statistics import mean, stdev | |
| import matplotlib.pyplot as plt | |
| from matplotlib.ticker import MaxNLocator | |
| LINE_STYLES = [ | |
| {"linestyle": "-", "marker": None}, | |
| {"linestyle": "--", "marker": None}, | |
| {"linestyle": ":", "marker": None}, | |
| {"linestyle": "None", "marker": "x"}, | |
| {"linestyle": "None", "marker": "o"}, | |
| ] | |
| def die(msg): | |
| print(f"error: {msg}", file=sys.stderr) | |
| sys.exit(1) | |
| def zscore(values): | |
| μ = mean(values) | |
| σ = stdev(values) | |
| if σ == 0: | |
| die("cannot normalise constant series") | |
| return [(v - μ) / σ for v in values] | |
| def read_csv_columns(path): | |
| """ | |
| Returns: | |
| x_label, x_values, [(y_label, y_values), ...] | |
| """ | |
| with open(path, newline="") as f: | |
| reader = csv.reader(f) | |
| header = next(reader, None) | |
| if not header or len(header) < 2: | |
| die(f"{path}: CSV must have at least two columns") | |
| rows = [row for row in reader if row] | |
| x_label = header[0] | |
| y_labels = header[1:] | |
| x = [row[0] for row in rows] | |
| ys = [] | |
| for idx, label in enumerate(y_labels, start=1): | |
| try: | |
| ys.append((label, [float(row[idx]) for row in rows])) | |
| except ValueError: | |
| die(f"{path}: non-numeric value in column '{label}'") | |
| return x_label, x, ys | |
| def ensure_png(path): | |
| p = Path(path) | |
| return p if p.suffix else p.with_suffix(".png") | |
| def plot_single(x, y_series, x_label, output, normalised=False): | |
| fig, ax = plt.subplots(figsize=(6.5, 4)) | |
| for idx, (label, y) in enumerate(y_series): | |
| style = LINE_STYLES[idx % len(LINE_STYLES)] | |
| ax.plot( | |
| x, | |
| y, | |
| color="black", | |
| linewidth=2, | |
| label=label, | |
| linestyle=style["linestyle"], | |
| marker=style["marker"], | |
| markersize=6, | |
| markeredgewidth=1.5, | |
| ) | |
| ax.set_xlabel(x_label) | |
| ax.set_ylabel("Normalised value (z-score)" if normalised else "Value") | |
| ax.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
| ax.legend(frameon=False) | |
| ax.grid(True, linestyle=":", alpha=0.4) | |
| plt.tight_layout() | |
| plt.savefig(output, dpi=300, facecolor="white", bbox_inches="tight") | |
| plt.close(fig) | |
| def plot_separate(x, series, x_label, output_prefix): | |
| base = ensure_png(output_prefix) | |
| for idx, (label, y) in enumerate(series, start=1): | |
| fig, ax = plt.subplots(figsize=(6.5, 4)) | |
| ax.plot( | |
| x, | |
| y, | |
| color="black", | |
| linewidth=2, | |
| marker="o", | |
| markersize=4, | |
| ) | |
| ax.set_xlabel(x_label) | |
| ax.set_ylabel(label) | |
| ax.xaxis.set_major_locator(MaxNLocator(integer=True)) | |
| ax.grid(True, linestyle=":", alpha=0.4) | |
| out = base.with_name(f"{base.stem}_{label}{base.suffix}") | |
| plt.tight_layout() | |
| plt.savefig(out, dpi=300, facecolor="white", bbox_inches="tight") | |
| plt.close(fig) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Normal and correlation graphing (black & white)" | |
| ) | |
| parser.add_argument( | |
| "csv", | |
| nargs="+", | |
| help="Input CSV file(s)" | |
| ) | |
| parser.add_argument( | |
| "--correlate", | |
| action="store_true", | |
| help="Correlate multiple CSV files (normalised overlay)" | |
| ) | |
| parser.add_argument( | |
| "--separate", | |
| action="store_true", | |
| help="Generate separate graphs per series" | |
| ) | |
| parser.add_argument( | |
| "-o", "--output", | |
| required=True, | |
| help="Output PNG (or prefix when --separate is used)" | |
| ) | |
| args = parser.parse_args() | |
| n_files = len(args.csv) | |
| # ----------------------------- | |
| # VALIDATE INTENT | |
| # ----------------------------- | |
| if n_files == 1: | |
| if args.correlate: | |
| die("--correlate requires multiple CSV files") | |
| else: | |
| if args.correlate and args.separate: | |
| die("--correlate and --separate are mutually exclusive") | |
| if not args.correlate and not args.separate: | |
| die( | |
| "multiple CSV files provided but neither --correlate nor " | |
| "--separate specified (ambiguous intent)" | |
| ) | |
| # ----------------------------- | |
| # READ DATA | |
| # ----------------------------- | |
| datasets = [read_csv_columns(p) for p in args.csv] | |
| # Enforce shared X label and values | |
| x_label_ref, x_ref, _ = datasets[0] | |
| for x_label, x, _ in datasets[1:]: | |
| if x_label != x_label_ref: | |
| die(f"X column name mismatch: '{x_label}' vs '{x_label_ref}'") | |
| if x != x_ref: | |
| die("X values mismatch between CSV files") | |
| # ----------------------------- | |
| # CASE A: SINGLE CSV | |
| # ----------------------------- | |
| if n_files == 1: | |
| _, x, y_series = datasets[0] | |
| if len(y_series) == 1: | |
| if args.separate: | |
| die("--separate makes no sense with a single data column") | |
| plot_single( | |
| x, | |
| y_series, | |
| x_label_ref, | |
| ensure_png(args.output), | |
| normalised=False | |
| ) | |
| else: | |
| if args.separate: | |
| plot_separate( | |
| x, | |
| y_series, | |
| x_label_ref, | |
| args.output | |
| ) | |
| else: | |
| plot_single( | |
| x, | |
| y_series, | |
| x_label_ref, | |
| ensure_png(args.output), | |
| normalised=False | |
| ) | |
| return | |
| # ----------------------------- | |
| # CASE B: MULTIPLE CSV FILES | |
| # ----------------------------- | |
| if args.separate: | |
| # One graph per CSV (raw) | |
| for path, (_, x, y_series) in zip(args.csv, datasets): | |
| plot_single( | |
| x, | |
| y_series, | |
| x_label_ref, | |
| ensure_png(f"{args.output}_{Path(path).stem}.png"), | |
| normalised=False | |
| ) | |
| return | |
| if args.correlate: | |
| # Correlation overlay (normalised) | |
| combined = [] | |
| for (_, _, y_series), path in zip(datasets, args.csv): | |
| if len(y_series) != 1: | |
| die( | |
| f"{path}: correlation mode requires exactly one value column per CSV" | |
| ) | |
| label, y = y_series[0] | |
| combined.append((Path(path).stem, zscore(y))) | |
| plot_single( | |
| x_ref, | |
| combined, | |
| x_label_ref, | |
| ensure_png(args.output), | |
| normalised=True | |
| ) | |
| return | |
| die("internal error") | |
| if __name__ == "__main__": | |
| main() |
Author
jirib
commented
Jan 12, 2026
Author
Install notes:
- Mise
- Linux (any)
curl https://mise.run | sh
- Windows
- (powershell)
Set-ExecutionPolicy RemoteSigned -scope CurrentUser - (powershell)
(irm https://astral.sh/uv/install.ps1) -replace '\bexit\b', '#exit removed' | iex
- (powershell)
- Linux (any)
- Python & UV
mise install python@3.13 # or anything as above 'requires-python'mise use python@3.13 # or anything as above 'requires-python'mise use -g uv@latest
- Running graph.py (under normal user!)
uv run --python python<version> graph.py --help# just to get deps
# the input CSV data
$ cat /tmp/input.csv
x,sin,cos,shifted_sin,x_markers
0.0,0.0000,1.0000,0.4794,0.8776
0.5,0.4794,0.8776,0.8415,0.5403
1.0,0.8415,0.5403,0.9975,0.0707
1.5,0.9975,0.0707,0.9093,-0.4161
2.0,0.9093,-0.4161,0.5985,-0.8011
2.5,0.5985,-0.8011,0.1411,-0.9899
3.0,0.1411,-0.9899,-0.3508,-0.9365
3.5,-0.3508,-0.9365,-0.7568,-0.6536
4.0,-0.7568,-0.6536,-0.9775,-0.2108
4.5,-0.9775,-0.2108,-0.9589,0.2837
5.0,-0.9589,0.2837,-0.7055,0.7087
# running the script
$ uv run graph.py -i /tmp/input.csv -o /tmp/test.png
$ file /tmp/test.png
/tmp/test.png: PNG image data, 1770 x 1170, 8-bit/color RGBA, non-interlaced
Author
$ grep -H '' {gdp,murders}.csv
gdp.csv:year,gdp
gdp.csv:1,300
gdp.csv:2,330
gdp.csv:3,350
gdp.csv:4,320
gdp.csv:5,390
murders.csv:year,murders
murders.csv:1,10
murders.csv:2,5
murders.csv:3,7
murders.csv:4,2
murders.csv:5,20
$ uv run graph.py --help
usage: graph.py [-h] [--correlate] [--separate] -o OUTPUT csv [csv ...]
Normal and correlation graphing (black & white)
positional arguments:
csv Input CSV file(s)
options:
-h, --help show this help message and exit
--correlate Correlate multiple CSV files (normalised overlay)
--separate Generate separate graphs per series
-o, --output OUTPUT Output PNG (or prefix when --separate is used)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment