Created
October 10, 2025 18:56
-
-
Save matteodefelice/109d6c12e844bac9fcfc5a1027b7aced to your computer and use it in GitHub Desktop.
Example using h3 grid
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import marimo | |
| __generated_with = "0.16.5" | |
| app = marimo.App(width="medium") | |
| @app.cell | |
| def _(mo): | |
| mo.md( | |
| r""" | |
| The idea is to find a way to visualise a raster file in an interesting and efficient way. | |
| With [this function](https://h3geo.org/docs/api/indexing/) you can have a specific index string for each location. Ah [H3 index](https://h3geo.org/docs/core-library/h3Indexing) is a 64-bit integer that can be represented by an hex string. | |
| The library [h3ronpy](https://h3ronpy.readthedocs.io/en/latest/#) provides functions to convert raster to H3 and vice versa. | |
| """ | |
| ) | |
| return | |
| @app.cell | |
| def _(): | |
| import marimo as mo | |
| import h3 | |
| import rasterio | |
| import rioxarray | |
| import xarray as xr | |
| import xarray.tutorial | |
| import numpy as np | |
| import pandas as pd | |
| import geopandas as gpd | |
| from shapely.geometry import Polygon | |
| from h3ronpy.raster import nearest_h3_resolution | |
| from h3ronpy.pandas.raster import raster_to_dataframe | |
| from h3ronpy.pandas.vector import geodataframe_to_cells, cells_dataframe_to_geodataframe | |
| return ( | |
| Polygon, | |
| cells_dataframe_to_geodataframe, | |
| geodataframe_to_cells, | |
| gpd, | |
| h3, | |
| mo, | |
| nearest_h3_resolution, | |
| pd, | |
| raster_to_dataframe, | |
| xarray, | |
| ) | |
| @app.cell | |
| def _(xarray): | |
| # eobs = xr.open_dataset("C:\\Users\\matte\\data\\EOBS\\rr_0.25deg_reg_v17.0.IP.nc") | |
| # eobs | |
| sample_grid = xarray.tutorial.open_dataset("eraint_uvz").isel(month = 0, level = 2).sel(longitude = slice(-20, 40), latitude = slice(50, 20)) | |
| return (sample_grid,) | |
| @app.cell | |
| def _(sample_grid): | |
| sample_grid['u'].plot(vmin = -10, vmax = 10, cmap = 'RdBu') | |
| return | |
| @app.cell | |
| def _(sample_grid): | |
| test = sample_grid['u'] | |
| return (test,) | |
| @app.cell | |
| def _(mo): | |
| mo.md( | |
| r""" | |
| The [documentation](https://h3ronpy.readthedocs.io/en/latest/api/pandas.html#module-h3ronpy.pandas.raster) says: | |
| > Make use the nearest_h3_resolution function to convert to the H3 resolution nearest to the pixel size of the raster. After that the cell resolution can be changed using the change_resolution function and dataframe libraries can be used to perform the desired aggregations. This can be a rather memory-intensive process. | |
| """ | |
| ) | |
| return | |
| @app.cell | |
| def _(nearest_h3_resolution, test): | |
| h3_res = nearest_h3_resolution(test.values.shape, test.rio.transform(), search_mode="smaller_than_pixel") | |
| print(f"Using H3 resolution {h3_res}") | |
| return (h3_res,) | |
| @app.cell | |
| def _(mo): | |
| mo.md( | |
| r""" | |
| `h3ronpy.pandas.raster.raster_to_dataframe(in_raster: ndarray, transform, h3_resolution: int, nodata_value=None, axis_order: str = 'yx', compact: bool = True, geo: bool = False)→ GeoDataFrame | DataFrame` | |
| Convert a raster/array to a pandas DataFrame containing H3 indexes | |
| This function is parallelized and uses the available CPUs by distributing tiles to a thread pool. | |
| The input geometry must be in WGS84. | |
| Parameters: | |
| - `in_raster` (ndarray) – input 2-d array | |
| - `transform` – the affine transformation | |
| - `nodata_value` – the nodata value. For these cells of the array there will be no h3 indexes generated | |
| - `axis_order` (str) – axis order of the 2d array. Either “xy” or “yx” | |
| - `h3_resolution` (int) – target h3 resolution | |
| - `compact` (bool) – Return compacted h3 indexes (see H3 docs). This results in mixed H3 resolutions, but also can reduce the amount of required memory. | |
| - `geo` (bool) – Return a geopandas GeoDataFrame with geometries. increases the memory usage. | |
| Returns: | |
| pandas DataFrame or GeoDataFrame | |
| Return type: | |
| GeoDataFrame | DataFrame | |
| """ | |
| ) | |
| return | |
| @app.cell | |
| def _(h3_res, raster_to_dataframe, test): | |
| h3_df = raster_to_dataframe( | |
| # 1*np.round(test.values/1), | |
| test.values, | |
| test.rio.transform(), | |
| h3_res, | |
| compact=False, | |
| geo=True | |
| ) | |
| h3_df.dropna().plot( | |
| column="value", linewidth=0.2, edgecolor="black", figsize = (12, 8), | |
| legend = True, vmin = -10, vmax = 10, cmap = 'RdBu' | |
| ) | |
| return (h3_df,) | |
| @app.cell | |
| def _(h3_df): | |
| h3_df.dropna().head() | |
| return | |
| @app.cell | |
| def _(mo): | |
| mo.md(r"""Here converting the int index into a string""") | |
| return | |
| @app.cell | |
| def _(h3, h3_df): | |
| h3_df['strid'] = h3_df['cell'].map(h3.int_to_str) | |
| return | |
| @app.cell | |
| def _(Polygon, gpd, h3): | |
| def h3_to_geopandas(h3_cells): | |
| """ | |
| Convert a list of H3 cells to a GeoDataFrame. | |
| Parameters: | |
| ----------- | |
| h3_cells : list | |
| List of H3 cell identifiers (strings) | |
| Returns: | |
| -------- | |
| geopandas.GeoDataFrame | |
| GeoDataFrame with H3 cells as geometries | |
| """ | |
| geometries = [] | |
| cell_ids = [] | |
| for cell in h3_cells: | |
| # Get the boundary coordinates for each H3 cell | |
| boundary = h3.cell_to_boundary(cell) | |
| # Convert to (lon, lat) format for Shapely | |
| # H3 returns (lat, lon), so we need to swap | |
| polygon = Polygon([(lon, lat) for lat, lon in boundary]) | |
| geometries.append(polygon) | |
| cell_ids.append(cell) | |
| # Create GeoDataFrame | |
| gdf = gpd.GeoDataFrame( | |
| {'h3_cell': cell_ids}, | |
| geometry=geometries, | |
| crs='EPSG:4326' # WGS84 coordinate system | |
| ) | |
| return gdf | |
| return | |
| @app.cell(hide_code=True) | |
| def _(mo): | |
| mo.md( | |
| r""" | |
| ## Things to do | |
| 1. Find all the cells in a shapefile https://h3ronpy.readthedocs.io/en/latest/usage/vector.html | |
| 2. Find the cell in a specific coordinate | |
| 3. Aggregate this with another dataset | |
| """ | |
| ) | |
| return | |
| @app.cell(hide_code=True) | |
| def _(mo): | |
| mo.md(r"""# Cells in a shapefile""") | |
| return | |
| @app.cell | |
| def _(gpd): | |
| # GADM provides detailed administrative boundaries | |
| # ADM2 for Spain (level 2 = provinces) | |
| url = "https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_ESP_2.json" | |
| spain_adm2 = gpd.read_file(url) | |
| spain_adm2.query("NAME_1 == 'Cataluña'").plot() | |
| return (spain_adm2,) | |
| @app.cell | |
| def _( | |
| cells_dataframe_to_geodataframe, | |
| geodataframe_to_cells, | |
| h3_res, | |
| spain_adm2, | |
| ): | |
| df = geodataframe_to_cells(spain_adm2.query("NAME_1 == 'Cataluña'"), h3_res) | |
| gdf = cells_dataframe_to_geodataframe(df) | |
| gdf.plot("NAME_1") | |
| return (gdf,) | |
| @app.cell | |
| def _(gdf, h3_df, pd): | |
| merged = pd.merge( | |
| left = h3_df, | |
| right = gdf, | |
| on = ['cell', 'geometry'] | |
| ) | |
| merged.plot('value') | |
| return | |
| if __name__ == "__main__": | |
| app.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment