Skip to content

Instantly share code, notes, and snippets.

@myociss
Created August 15, 2025 17:30
Show Gist options
  • Select an option

  • Save myociss/ffc537daf95d55ad3d1be7d25b8e00a3 to your computer and use it in GitHub Desktop.

Select an option

Save myociss/ffc537daf95d55ad3d1be7d25b8e00a3 to your computer and use it in GitHub Desktop.
Lightning Flash Clustering: load .dat.gz file
import numpy as np
from typing import List, Dict, Tuple
import gzip
import datetime
array_type = np.ndarray[tuple[float], np.dtype[np.float64]]
def load_dat(gz_file_path: str, min_stations: int=7, max_chi_squared: float=1.0, max_altitude: float=20e3) -> Tuple[array_type, datetime.datetime]:
with gzip.open(gz_file_path, 'rt') as f:
lines = f.readlines()
start_time_str = ""
for l in lines:
if "Data start time:" in l:
start_time_str = l.replace("Data start time:", "").strip()
start_time = datetime.datetime.strptime(start_time_str, "%m/%d/%y %H:%M:%S")
idx = 1 + lines.index("*** data ***\n")
lines = lines[idx:]
data = np.zeros((len(lines), 7))
for line_idx, l in enumerate(lines):
splt = l.strip().split()
for j in range(6):
data[line_idx,j] = float(splt[j])
data[line_idx,6] = float(int(splt[6], 0).bit_count())
# from https://github.com/deeplycloudy/lmatools/blob/8d55e11dfbbe040f58f9a393f83e33e2a4b84b4c/examples/flashsort/clustertests/lma.py#L144
return data[(data[:,6] >= min_stations) & (data[:,4] <= max_chi_squared) & (data[:,4] < max_altitude)], start_time
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment