Skip to content

Instantly share code, notes, and snippets.

@AdrianoPereira
Created May 13, 2025 16:08
Show Gist options
  • Select an option

  • Save AdrianoPereira/e9c087460b26200efec0ef807d6f4b2d to your computer and use it in GitHub Desktop.

Select an option

Save AdrianoPereira/e9c087460b26200efec0ef807d6f4b2d to your computer and use it in GitHub Desktop.
Single process code for load, extract and save cropped data from GSMaP from global to specigic region bound box
import os
import numpy as np
import gzip
from pathlib import Path
input_base_dir = "/storage/hokusai/GSMaP_NOW/now/half_hour/2024"
output_base_dir = "/storage/hokusai/GSMaP_NOW/SA_2024"
# global coordinates
glat_min = -60.0
glat_max = 60.0
glon_min = -180.0
glon_max = 180.0
# region coordinates
rlat_min = -55.0
rlat_max = 6.0
rlon_min = -83.0
rlon_max = -33.0
def read_data(filepath):
with gzip.open(filepath, mode='rb') as handle:
data = np.frombuffer(handle.read(), dtype=np.float32).reshape(1200, 3600)
data = np.roll(data, shift=1800, axis=1)[::-1]
return data
def crop_data(data, lat_min, lat_max, lon_min, lon_max):
# Calcular índices com base nas coordenadas fornecidas
# Para latitude (dimensão vertical)
lat_resolution = (glat_max - glat_min) / data.shape[0]
lat_idx_min = int((glat_max - lat_max) / lat_resolution)
lat_idx_max = int((glat_max - lat_min) / lat_resolution)
# Para longitude (dimensão horizontal)
lon_resolution = (glon_max - glon_min) / data.shape[1]
lon_idx_min = int((lon_min - glon_min) / lon_resolution)
lon_idx_max = int((lon_max - glon_min) / lon_resolution)
# Cortar os dados
cropped_data = data[lat_idx_min:lat_idx_max, lon_idx_min:lon_idx_max]
# crop_lat_min = glat_max - lat_idx_min * lat_resolution
# crop_lat_max = glat_max - lat_idx_max * lat_resolution
# crop_lon_min = glon_min + lon_idx_min * lon_resolution
# crop_lon_max = glon_min + lon_idx_max * lon_resolution
return cropped_data
def process_files(input_dir, output_dir):
input_dir = Path(input_dir)
output_dir = Path(output_dir)
for root, _, files in os.walk(input_dir):
for file in files:
if file.endswith('.gz'):
input_file = Path(root) / file
relative_path = input_file.relative_to(input_dir)
output_file = output_dir / relative_path
# Criar pasta de saída se não existir
output_file.parent.mkdir(parents=True, exist_ok=True)
# Processar arquivo
try:
data = read_data(input_file)
cropped_data = crop_data(data, rlat_min, rlat_max, rlon_min, rlon_max)
# Salvar arquivo
with gzip.open(output_file, 'wb') as f_out:
f_out.write(cropped_data.astype(np.float32).tobytes())
print(f"Processado: {input_file} -> {output_file}")
except Exception as e:
print(f"Erro ao processar {input_file}: {e}")
# Executar
process_files(input_base_dir, output_base_dir)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment