Created
May 13, 2025 16:07
-
-
Save AdrianoPereira/d6618812c63079d7c309b6213a435103 to your computer and use it in GitHub Desktop.
Parallel code for load, extract and save cropped data from GSMaP from global to specigic region bound box
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import os | |
| import numpy as np | |
| import gzip | |
| from pathlib import Path | |
| from concurrent.futures import ProcessPoolExecutor, as_completed | |
| from tqdm import tqdm | |
| input_base_dir = "/storage/hokusai/GSMaP_NOW/now/half_hour/2024" | |
| output_base_dir = "/storage/hokusai/GSMaP_NOW/SA_2024" | |
| # global coordinates | |
| glat_min = -60.0 | |
| glat_max = 60.0 | |
| glon_min = -180.0 | |
| glon_max = 180.0 | |
| # region coordinates | |
| rlat_min = -55.0 | |
| rlat_max = 6.0 | |
| rlon_min = -83.0 | |
| rlon_max = -33.0 | |
| def read_data(filepath): | |
| with gzip.open(filepath, mode='rb') as handle: | |
| data = np.frombuffer(handle.read(), dtype=np.float32).reshape(1200, 3600) | |
| data = np.roll(data, shift=1800, axis=1)[::-1] | |
| return data | |
| def crop_data(data, lat_min, lat_max, lon_min, lon_max): | |
| lat_resolution = (glat_max - glat_min) / data.shape[0] | |
| lat_idx_min = int((glat_max - lat_max) / lat_resolution) | |
| lat_idx_max = int((glat_max - lat_min) / lat_resolution) | |
| lon_resolution = (glon_max - glon_min) / data.shape[1] | |
| lon_idx_min = int((lon_min - glon_min) / lon_resolution) | |
| lon_idx_max = int((lon_max - glon_min) / lon_resolution) | |
| cropped_data = data[lat_idx_min:lat_idx_max, lon_idx_min:lon_idx_max] | |
| return cropped_data | |
| def process_file(file_info): | |
| input_file, input_dir, output_dir = file_info | |
| relative_path = input_file.relative_to(input_dir) | |
| output_file = output_dir / relative_path | |
| output_file.parent.mkdir(parents=True, exist_ok=True) | |
| try: | |
| data = read_data(input_file) | |
| cropped_data = crop_data(data, rlat_min, rlat_max, rlon_min, rlon_max) | |
| with gzip.open(output_file, 'wb') as f_out: | |
| f_out.write(cropped_data.astype(np.float32).tobytes()) | |
| return True, input_file | |
| except Exception as e: | |
| return False, (input_file, e) | |
| def get_file_list(input_dir): | |
| input_dir = Path(input_dir) | |
| files = [] | |
| for root, _, filenames in os.walk(input_dir): | |
| for filename in filenames: | |
| if filename.endswith('.gz'): | |
| files.append((Path(root) / filename, input_dir, Path(output_base_dir))) | |
| return files | |
| def main(): | |
| files = get_file_list(input_base_dir) | |
| print(f"Total de arquivos a processar: {len(files)}") | |
| with ProcessPoolExecutor() as executor: | |
| futures = {executor.submit(process_file, file_info): file_info[0] for file_info in files} | |
| for future in tqdm(as_completed(futures), total=len(futures), desc="Processando arquivos"): | |
| success, result = future.result() | |
| if not success: | |
| file_path, error = result | |
| print(f"Erro ao processar {file_path}: {error}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment