Last active
February 13, 2026 17:01
-
-
Save dutta-alankar/2888b0b75097170e034cc08eec2349fa to your computer and use it in GitHub Desktop.
Cross match properties across snaps in `Gadget 4` cosmo sim halos
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| Created on Sat Jan 31 13:23:48 2026 | |
| @author: alankar. | |
| Usage: time python tag-halos.py | |
| """ | |
| import numpy as np | |
| import h5py | |
| import os | |
| import sys | |
| # import numba | |
| class tag_halos: | |
| def __init__(self, directory: str, snap_num:int = 7): | |
| self.dark_matter = 1 | |
| self.baryon = 0 | |
| self._total_particle_types = 2 | |
| self.directory = directory | |
| self.snap_num = snap_num | |
| self._sort = True | |
| self._already_sorted = [False, False] | |
| self._cache = True | |
| self._save_cache = True | |
| self._cache_dir = "./halo_cache" | |
| self.other_snap = None | |
| self._load_prop = True | |
| self.log_freq = 10000 | |
| # Initilization | |
| print(directory) | |
| print(f"SnapNum: {snap_num}") | |
| if snap_num!=0: | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.0.hdf5", 'r') as group_data: | |
| # print(list(group_data["/Header"].attrs.keys())) | |
| Ngroups_Total = group_data["/Header"].attrs["Ngroups_Total"] | |
| Nids_Total = group_data["/Header"].attrs["Nids_Total"] | |
| NumFiles_Group = group_data["/Header"].attrs["NumFiles"] | |
| Redshift = group_data["/Header"].attrs["Redshift"] | |
| # sore the number of halos in each group part file | |
| self.num_groups_part = np.zeros(NumFiles_Group, dtype=np.int64) | |
| self.Redshift = Redshift | |
| for group_part_file in range(0, NumFiles_Group): | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data: | |
| # print(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5") | |
| Ngroups_ThisFile = group_data["/Header"].attrs["Ngroups_ThisFile"] | |
| self.num_groups_part[group_part_file] = Ngroups_ThisFile | |
| # print(f"Ngroups_ThisFile = {Ngroups_ThisFile}") | |
| assert int(np.sum(self.num_groups_part)) == Ngroups_Total | |
| # print(Ngroups_Total, np.sum(halo_id_halo_len)) | |
| extra = "ics_" if snap_num==0 else "" | |
| with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.0.hdf5", 'r') as snap_data: | |
| NumFilesPerSnapshot = snap_data["/Header"].attrs["NumFilesPerSnapshot"] | |
| NumPart_Total = snap_data["/Header"].attrs["NumPart_Total"] | |
| print("Particles: ", NumPart_Total[:self._total_particle_types]) | |
| # store the number of particles in each snapshot part file | |
| self.num_particle_len = np.zeros((NumFilesPerSnapshot, self._total_particle_types), dtype=np.int64) | |
| for snap_file in range(0, NumFilesPerSnapshot): | |
| with h5py.File(f"{directory}/snapdir_{snap_num:03d}/snapshot_{extra}{snap_num:03d}.{snap_file}.hdf5", 'r') as snap_data: | |
| NumPart_ThisFile = snap_data["/Header"].attrs["NumPart_ThisFile"] | |
| self.num_particle_len[snap_file, self.baryon] = NumPart_ThisFile[self.baryon] | |
| self.num_particle_len[snap_file, self.dark_matter] = NumPart_ThisFile[self.dark_matter] | |
| assert np.sum(np.sum(self.num_particle_len, axis=0) == NumPart_Total[:self._total_particle_types]) == self._total_particle_types | |
| self.cumulative_particle_count = np.cumsum(self.num_particle_len, axis=0) | |
| if snap_num!=0: | |
| # store the particle count of every group | |
| self._group_sizes() | |
| print("Intialization complete!") | |
| def group_index_to_global(self, group_index_local: int, group_part_number:int) -> int: | |
| assert group_part_number<=(self.num_groups_part.shape[0]-1) | |
| offset = np.cumsum(self.num_groups_part)[group_part_number-1] | |
| return offset + group_index_local | |
| def group_index_to_local(self, group_index_global:int) -> int: | |
| cumulative_halo_count = np.cumsum(self.num_groups_part) | |
| group_part_number = np.sum(cumulative_halo_count <= (group_index_global+1))-1 | |
| return group_index_global - cumulative_halo_count[group_part_number-1] | |
| def particle_index_to_global(self, particle_index_local: int, snap_part_number:int, particle_type:int) -> int: | |
| assert snap_part_number<=(self.num_particle_len.shape[0]-1) | |
| offset = np.cumsum(self.num_particle_len[:,particle_type])[snap_part_number-1] | |
| return offset + particle_index_local | |
| def particle_index_to_local(self, particle_index_global:int, particle_type:int) -> int: | |
| cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type] | |
| snap_part_file = np.sum(cumulative_particle_count_by_type <= (particle_index_global+1))-1 | |
| return particle_index_global - cumulative_particle_count_by_type[snap_part_file-1] | |
| def id_sort(self, particle_type:int): | |
| if self._already_sorted[particle_type]: | |
| # no disk IO/calculation is needed as sorted index has already been loaded or calculated | |
| return | |
| # clear memory if present | |
| self.ID_indx_sorted = None | |
| # self.particle_IDS = [] | |
| # particle_types = np.sort([self.dark_matter, self.baryon]) | |
| extra = "ics_" if self.snap_num==0 else "" | |
| ID_sorted_file = f"{self._cache_dir}/argsort-ID_snap{self.snap_num}_PType{particle_type}.npy" | |
| if not(self._cache and os.path.isfile(ID_sorted_file)): | |
| IDs = np.zeros(np.sum(self.num_particle_len[:,particle_type]), dtype=np.uint64) | |
| print("Loading IDs for sorting ...") | |
| print(f"/PartType{particle_type}") | |
| for snap_part_file in range(self.num_particle_len.shape[0]): | |
| print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n') | |
| cumulative_particle_count_by_type = self.cumulative_particle_count[:,particle_type] | |
| start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1] | |
| stop = cumulative_particle_count_by_type[snap_part_file] - 1 | |
| if start==(stop+1): # empty particle | |
| continue | |
| with h5py.File(f"{self.directory}/snapdir_{self.snap_num:03d}/snapshot_{extra}{self.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data: | |
| IDs[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/ParticleIDs"]) | |
| # self.particle_IDS.append(IDs) | |
| if self._cache and os.path.isfile(ID_sorted_file): | |
| print(f"Loading file {ID_sorted_file}") | |
| self.ID_indx_sorted = np.load(ID_sorted_file) | |
| else: | |
| os.makedirs(self._cache_dir, exist_ok=True) | |
| if self._sort: | |
| print(f"Sorting particle type {particle_type} by ID") | |
| self.ID_indx_sorted = np.argsort(IDs) | |
| print(f"Sorting completed") | |
| if self._save_cache: | |
| print(f"Saving file {ID_sorted_file}") | |
| np.save(ID_sorted_file, self.ID_indx_sorted) | |
| IDs = None # clear the memory | |
| else: | |
| # dummy | |
| self.ID_indx_sorted = np.arange(0, np.sum(self.num_particle_len[:,particle_type]), dtype=np.int64) | |
| self._already_sorted = [False, False] | |
| self._already_sorted[particle_type] = True | |
| # self.particle_IDS = None | |
| def global_particle_index_to_group_index(self, global_particle_index:int, particle_type:int) -> int: | |
| cumulative_group_particle_len_by_type = self.cumulative_group_particle_len[:,particle_type] | |
| return np.searchsorted(cumulative_group_particle_len_by_type, global_particle_index, side='right') | |
| # np.sum(cumulative_group_particle_len_by_type <= global_particle_index) | |
| # def particle_id_to_global_particle_index(self, particle_id:int, particle_type:int) -> int: | |
| # self.ID_indx_sorted | |
| def _group_sizes(self) -> None: | |
| NumFiles_Group = self.num_groups_part.shape[0] | |
| # the last halo is actually a collection of all unclassified fuzz particles | |
| num_all_halos = np.sum(self.num_groups_part)+1 | |
| cumulative_group_count = np.cumsum(self.num_groups_part) | |
| self.num_group_particle_len = np.zeros((num_all_halos, self._total_particle_types), dtype=np.int64) | |
| for group_part_file in range(0, NumFiles_Group): | |
| start = 0 if group_part_file==0 else cumulative_group_count[group_part_file-1] | |
| stop = cumulative_group_count[group_part_file] - 1 | |
| # print(f"DEBUG: {group_part_file}, {start}:{stop+1}") | |
| if start==(stop+1): # empty group | |
| continue | |
| with h5py.File(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5", 'r') as group_data: | |
| # print(f"{directory}/groups_{snap_num:03d}/fof_subhalo_tab_{snap_num:03d}.{group_part_file}.hdf5") | |
| num_particles_per_group = np.array(group_data["/Group/GroupLenType"]) | |
| self.num_group_particle_len[start:stop+1,self.baryon] = num_particles_per_group[:, self.baryon] | |
| self.num_group_particle_len[start:stop+1,self.dark_matter] = num_particles_per_group[:, self.dark_matter] | |
| # fuzz particles | |
| self.num_group_particle_len[-1,self.baryon] = np.sum(self.num_particle_len[:,self.baryon]) - np.sum(self.num_group_particle_len[:-1,self.baryon]) | |
| self.num_group_particle_len[-1,self.dark_matter] = np.sum(self.num_particle_len[:,self.dark_matter]) - np.sum(self.num_group_particle_len[:-1,self.dark_matter]) | |
| self.cumulative_group_particle_len = np.cumsum(self.num_group_particle_len, axis=0) | |
| #typically relate to an earlier progenitor snap | |
| def relate_other_snap(self, snap_num:int, halo_prop:str, particle_type:int) -> None: | |
| # ="Velocities" | |
| self.id_sort(particle_type) | |
| if self.other_snap is None: | |
| self.other_snap = tag_halos(self.directory, snap_num) | |
| self.other_snap.id_sort(particle_type) | |
| print(f"Cross-matching to snap {self.snap_num} with snap {snap_num} for particle type {particle_type} for halo prop: {halo_prop}") | |
| if not(self._load_prop): | |
| print("Dummy run, no props loaded!") | |
| extra = "ics_" if snap_num==0 else "" | |
| if self._load_prop: | |
| with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.0.hdf5", 'r') as snap_data: | |
| prop_data = snap_data[f"/PartType{particle_type}/{halo_prop}"] | |
| if prop_data.ndim>1: | |
| prop_length = snap_data[f"/PartType{particle_type}/{halo_prop}"].shape[1] | |
| else: | |
| prop_length = 1 | |
| prop_array = np.zeros((np.sum(self.other_snap.num_particle_len[:,particle_type]), prop_length), dtype=np.float64) | |
| particle_masses = np.zeros(np.sum(self.other_snap.num_particle_len[:,particle_type]), dtype=np.float64) | |
| if self._load_prop: | |
| print(f"Loading particle property {halo_prop} for SnapNum: {self.other_snap.snap_num}") | |
| if not(self._load_prop): | |
| print("Dummy run, no props loaded!") | |
| for snap_part_file in range(self.other_snap.num_particle_len.shape[0]): | |
| cumulative_particle_count_by_type = self.other_snap.cumulative_particle_count[:,particle_type] | |
| start = 0 if snap_part_file==0 else cumulative_particle_count_by_type[snap_part_file-1] | |
| stop = cumulative_particle_count_by_type[snap_part_file] - 1 | |
| if start==(stop+1): # empty particle | |
| continue | |
| print(f"part file: {snap_part_file}", end='\r' if snap_part_file<(self.num_particle_len.shape[0]-1) else ' ... Done!\n') | |
| with h5py.File(f"{self.other_snap.directory}/snapdir_{self.other_snap.snap_num:03d}/snapshot_{extra}{self.other_snap.snap_num:03d}.{snap_part_file}.hdf5", 'r') as snap_data: | |
| if self._load_prop: | |
| prop_array[start:stop+1,:] = np.array(snap_data[f"/PartType{particle_type}/{halo_prop}"]) | |
| particle_masses[start:stop+1] = np.array(snap_data[f"/PartType{particle_type}/Masses"]) | |
| print("Calling group accumulator") | |
| if self._load_prop: | |
| self.calculate_group_props(prop_array, particle_masses, halo_prop, particle_type) | |
| else: | |
| self.calculate_group_props(None, particle_masses, halo_prop, particle_type) | |
| # @numba.njit | |
| def calculate_group_props(self, prop_array:np.ndarray, particle_masses:np.ndarray, halo_prop:str, particle_type:int) -> None: | |
| halo_prop_file = f"{self._cache_dir}/Group_{halo_prop}_snap{self.other_snap.snap_num}_PType{particle_type}.npy" | |
| print("Calculating group properties ...") | |
| # the last halo is actually a collection of all unclassified fuzz particles | |
| num_all_halos = np.sum(self.num_groups_part)+1 | |
| if self._load_prop and prop_array is not None: | |
| prop_length = prop_array.shape[1] | |
| group_props = np.zeros((num_all_halos, prop_length), dtype=np.float64) | |
| group_mass = np.zeros(num_all_halos, dtype=np.float64) | |
| group_particle_count = np.zeros(num_all_halos, dtype=np.int64) | |
| _newline = True | |
| for count, indx in enumerate(self.ID_indx_sorted): | |
| if (count%self.log_freq)==0: | |
| print(f"count = {count+1}/{self.ID_indx_sorted.shape[0]} ({((count+1)/self.ID_indx_sorted.shape[0]*100):.1f}%)", | |
| end='\r' if count<(self.ID_indx_sorted.shape[0]-1) else '\n') | |
| if count>=(self.ID_indx_sorted.shape[0]-1): | |
| _newline = False | |
| halo_indx = self.global_particle_index_to_group_index(indx, particle_type) | |
| particle_indx_other_snap = self.other_snap.ID_indx_sorted[count] | |
| if self._load_prop and prop_array is not None: | |
| group_props[halo_indx,:] += (prop_array[particle_indx_other_snap,:] * particle_masses[particle_indx_other_snap]) | |
| group_mass[halo_indx] += particle_masses[particle_indx_other_snap] | |
| group_particle_count[halo_indx] += 1 | |
| # normalize | |
| if self._load_prop and prop_array is not None: | |
| print(f"{'\n' if _newline else ''}Normalizing group properties ...") | |
| for halo_indx in range(0, num_all_halos): | |
| group_props[halo_indx,:] = group_props[halo_indx,:]/group_mass[halo_indx] | |
| if prop_length==1: | |
| group_props = group_props.flatten() | |
| print(f"Saving file {halo_prop_file}") | |
| np.save(halo_prop_file, group_props) | |
| print(f"Saving file {halo_prop_file[:-4]}-mass.npy") | |
| np.save(f"{halo_prop_file[:-4]}-mass.npy", group_mass) | |
| print(f"Saving file {halo_prop_file[:-4]}-particle-count.npy") | |
| np.save(f"{halo_prop_file[:-4]}-particle-count.npy", group_particle_count) | |
| # free memory | |
| prop_array = None | |
| particle_masses = None | |
| group_particle_count = None | |
| group_props = None | |
| def refresh(self) -> None: | |
| self.other_snap = None | |
| def group_prop_exixts(self, other_snap_num:int, halo_prop:str, particle_type:int) -> bool: | |
| halo_prop_file = f"{self._cache_dir}/Group_{halo_prop}_snap{other_snap_num}_PType{particle_type}.npy" | |
| return os.path.isfile(halo_prop_file) | |
| def group_prop_difference(self, other_snap_num:int, halo_prop:str) -> np.ndarray: | |
| halo_prop_file_dm = f"{self._cache_dir}/Group_{halo_prop}_snap{other_snap_num}_PType{self.dark_matter}.npy" | |
| if not self.group_prop_exixts(other_snap_num, halo_prop, self.dark_matter): | |
| print(f"{halo_prop_file_dm} does not exist") | |
| sys.exit(1) | |
| halo_prop_dm = np.load(halo_prop_file_dm) | |
| halo_prop_file_baryon = f"{self._cache_dir}/Group_{halo_prop}_snap{other_snap_num}_PType{self.baryon}.npy" | |
| if not self.group_prop_exixts(other_snap_num, halo_prop, self.baryon): | |
| print(f"{halo_prop_file_baryon} does not exist") | |
| sys.exit(1) | |
| halo_prop_baryon = np.load(halo_prop_file_baryon) | |
| velocity_rel = np.abs(halo_prop_dm - halo_prop_baryon) | |
| return velocity_rel | |
| def debug(self): | |
| self.id_sort() | |
| print(self.ID_indx_sorted[self.dark_matter]) | |
| print(self.ID_indx_sorted[self.baryon]) | |
| print(self.ID_indx_sorted[self.dark_matter].shape) | |
| print(self.ID_indx_sorted[self.baryon].shape) | |
| print(np.sum(self.num_particle_len, axis=0)) | |
| print(np.sum(self.num_particle_len, axis=0)) | |
| if __name__ == "__main__": | |
| dark_matter = 1 | |
| baryon = 0 | |
| directory = "/orion/ptmp/adutt/arepo-multi-zoom/gadget-files/gadget4_development/examples/multi-zoom/streaming/parent/output_dumps" | |
| snap_num = 7 | |
| halo_prop = "Velocities" | |
| other_snap_num = 0 | |
| halo = tag_halos(directory, snap_num) | |
| if not halo.group_prop_exixts(other_snap_num, halo_prop, dark_matter): | |
| print("Dark matter: ", dark_matter) | |
| halo.relate_other_snap(other_snap_num, halo_prop, dark_matter) | |
| if not halo.group_prop_exixts(other_snap_num, halo_prop, baryon): | |
| print("Baryon: ", baryon) | |
| halo.relate_other_snap(other_snap_num, halo_prop, baryon) | |
| print("Calculating streaming velocity ... ") | |
| streaming_velocities = halo.group_prop_difference(other_snap_num, halo_prop) | |
| # print(streaming_velocities) | |
| speed = np.sqrt(np.sum(streaming_velocities**2, axis=1)) | |
| # print(speed) | |
| # print(speed.shape) | |
| print(f"Max streaming speed of {np.max(speed[:-1]):.2e} for halo {int(np.argmax(speed[:-1]))}") | |
| sorted_halos_by_streaming = np.argsort(speed[:-1])[::-1] | |
| sorted_streaming_speed = np.sort(speed[:-1])[::-1] | |
| # print(sorted_halos_by_streaming) | |
| # print(sorted_streaming_speed) | |
| streaming_file_name = f"streaming_in_snap{other_snap_num}_halos_from_snap{snap_num}.txt" | |
| print(f"Saving {streaming_file_name}") | |
| np.savetxt(streaming_file_name, | |
| np.c_[sorted_halos_by_streaming, sorted_streaming_speed], | |
| header="haloID streaming_speed (code units)", | |
| fmt="%d %.6e") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment