myociss/load_dat.py

## load_dat.py
import numpy as np
from typing import List, Dict, Tuple
import gzip
import datetime

array_type = np.ndarray[tuple[float], np.dtype[np.float64]]

def load_dat(gz_file_path: str, min_stations: int=7, max_chi_squared: float=1.0, max_altitude: float=20e3) -> Tuple[array_type, datetime.datetime]:

    with gzip.open(gz_file_path, 'rt') as f:
        lines = f.readlines()

    start_time_str = ""
    for l in lines:
        if "Data start time:" in l:
            start_time_str = l.replace("Data start time:", "").strip()

    start_time = datetime.datetime.strptime(start_time_str, "%m/%d/%y %H:%M:%S")
    idx = 1 + lines.index("*** data ***\n")
    lines = lines[idx:]
    data = np.zeros((len(lines), 7))

    for line_idx, l in enumerate(lines):
        splt = l.strip().split()
        for j in range(6):
            data[line_idx,j] = float(splt[j])
        data[line_idx,6] = float(int(splt[6], 0).bit_count())

    # from https://github.com/deeplycloudy/lmatools/blob/8d55e11dfbbe040f58f9a393f83e33e2a4b84b4c/examples/flashsort/clustertests/lma.py#L144
    return data[(data[:,6] >= min_stations) & (data[:,4] <= max_chi_squared) & (data[:,4] < max_altitude)], start_time
	import numpy as np
	from typing import List, Dict, Tuple
	import gzip
	import datetime

	array_type = np.ndarray[tuple[float], np.dtype[np.float64]]

	def load_dat(gz_file_path: str, min_stations: int=7, max_chi_squared: float=1.0, max_altitude: float=20e3) -> Tuple[array_type, datetime.datetime]:

	with gzip.open(gz_file_path, 'rt') as f:
	lines = f.readlines()

	start_time_str = ""
	for l in lines:
	if "Data start time:" in l:
	start_time_str = l.replace("Data start time:", "").strip()

	start_time = datetime.datetime.strptime(start_time_str, "%m/%d/%y %H:%M:%S")
	idx = 1 + lines.index("* data *\n")
	lines = lines[idx:]
	data = np.zeros((len(lines), 7))

	for line_idx, l in enumerate(lines):
	splt = l.strip().split()
	for j in range(6):
	data[line_idx,j] = float(splt[j])
	data[line_idx,6] = float(int(splt[6], 0).bit_count())

	# from https://github.com/deeplycloudy/lmatools/blob/8d55e11dfbbe040f58f9a393f83e33e2a4b84b4c/examples/flashsort/clustertests/lma.py#L144
	return data[(data[:,6] >= min_stations) & (data[:,4] <= max_chi_squared) & (data[:,4] < max_altitude)], start_time
No results found