matteodefelice/example.py

## example.py
import marimo

__generated_with = "0.16.5"
app = marimo.App(width="medium")


@app.cell
def _(mo):
    mo.md(
        r"""
    The idea is to find a way to visualise a raster file in an interesting and efficient way.
    With [this function](https://h3geo.org/docs/api/indexing/) you can have a specific index string for each location. Ah [H3 index](https://h3geo.org/docs/core-library/h3Indexing) is a 64-bit integer that can be represented by an hex string.

    The library [h3ronpy](https://h3ronpy.readthedocs.io/en/latest/#) provides functions to convert raster to H3 and vice versa.
    """
    )
    return


@app.cell
def _():
    import marimo as mo
    import h3
    import rasterio
    import rioxarray
    import xarray as xr
    import xarray.tutorial
    import numpy as np
    import pandas as pd
    import geopandas as gpd
    from shapely.geometry import Polygon

    from h3ronpy.raster import nearest_h3_resolution
    from h3ronpy.pandas.raster import raster_to_dataframe
    from h3ronpy.pandas.vector import geodataframe_to_cells, cells_dataframe_to_geodataframe
    return (
        Polygon,
        cells_dataframe_to_geodataframe,
        geodataframe_to_cells,
        gpd,
        h3,
        mo,
        nearest_h3_resolution,
        pd,
        raster_to_dataframe,
        xarray,
    )


@app.cell
def _(xarray):
    # eobs = xr.open_dataset("C:\\Users\\matte\\data\\EOBS\\rr_0.25deg_reg_v17.0.IP.nc")
    # eobs
    sample_grid = xarray.tutorial.open_dataset("eraint_uvz").isel(month = 0, level = 2).sel(longitude = slice(-20, 40), latitude = slice(50, 20))
    return (sample_grid,)


@app.cell
def _(sample_grid):
    sample_grid['u'].plot(vmin = -10, vmax = 10, cmap = 'RdBu')
    return


@app.cell
def _(sample_grid):
    test = sample_grid['u']
    return (test,)


@app.cell
def _(mo):
    mo.md(
        r"""
    The [documentation](https://h3ronpy.readthedocs.io/en/latest/api/pandas.html#module-h3ronpy.pandas.raster) says:
    > Make use the nearest_h3_resolution function to convert to the H3 resolution nearest to the pixel size of the raster. After that   the cell resolution can be changed using the change_resolution function and dataframe libraries can be used to perform the    desired aggregations. This can be a rather memory-intensive process.
    """
    )
    return


@app.cell
def _(nearest_h3_resolution, test):
    h3_res = nearest_h3_resolution(test.values.shape, test.rio.transform(), search_mode="smaller_than_pixel")
    print(f"Using H3 resolution {h3_res}")
    return (h3_res,)


@app.cell
def _(mo):
    mo.md(
        r"""
    `h3ronpy.pandas.raster.raster_to_dataframe(in_raster: ndarray, transform, h3_resolution: int, nodata_value=None, axis_order: str = 'yx', compact: bool = True, geo: bool = False)→ GeoDataFrame | DataFrame`
    Convert a raster/array to a pandas DataFrame containing H3 indexes

    This function is parallelized and uses the available CPUs by distributing tiles to a thread pool.

    The input geometry must be in WGS84.

    Parameters:

    - `in_raster` (ndarray) – input 2-d array
    - `transform` – the affine transformation
    - `nodata_value` – the nodata value. For these cells of the array there will be no h3 indexes generated
    - `axis_order` (str) – axis order of the 2d array. Either “xy” or “yx”
    - `h3_resolution` (int) – target h3 resolution
    - `compact` (bool) – Return compacted h3 indexes (see H3 docs). This results in mixed H3 resolutions, but also can reduce the amount of required memory.
    - `geo` (bool) – Return a geopandas GeoDataFrame with geometries. increases the memory usage.

    Returns:
    pandas DataFrame or GeoDataFrame

    Return type:
    GeoDataFrame | DataFrame
    """
    )
    return


@app.cell
def _(h3_res, raster_to_dataframe, test):

    h3_df = raster_to_dataframe(
        # 1*np.round(test.values/1),
        test.values,
        test.rio.transform(),
        h3_res,
        compact=False,
        geo=True
    )

    h3_df.dropna().plot(
        column="value", linewidth=0.2, edgecolor="black", figsize = (12, 8),
        legend = True, vmin = -10, vmax = 10, cmap = 'RdBu'
    )
    return (h3_df,)


@app.cell
def _(h3_df):
    h3_df.dropna().head()
    return


@app.cell
def _(mo):
    mo.md(r"""Here converting the int index into a string""")
    return


@app.cell
def _(h3, h3_df):
    h3_df['strid'] = h3_df['cell'].map(h3.int_to_str)
    return


@app.cell
def _(Polygon, gpd, h3):
    def h3_to_geopandas(h3_cells):
        """
        Convert a list of H3 cells to a GeoDataFrame.

        Parameters:
        -----------
        h3_cells : list
            List of H3 cell identifiers (strings)

        Returns:
        --------
        geopandas.GeoDataFrame
            GeoDataFrame with H3 cells as geometries
        """
        geometries = []
        cell_ids = []

        for cell in h3_cells:
            # Get the boundary coordinates for each H3 cell
            boundary = h3.cell_to_boundary(cell)
            # Convert to (lon, lat) format for Shapely
            # H3 returns (lat, lon), so we need to swap
            polygon = Polygon([(lon, lat) for lat, lon in boundary])
            geometries.append(polygon)
            cell_ids.append(cell)

        # Create GeoDataFrame
        gdf = gpd.GeoDataFrame(
            {'h3_cell': cell_ids},
            geometry=geometries,
            crs='EPSG:4326'  # WGS84 coordinate system
        )

        return gdf
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(
        r"""
    ## Things to do

    1. Find all the cells in a shapefile https://h3ronpy.readthedocs.io/en/latest/usage/vector.html
    2. Find the cell in a specific coordinate
    3. Aggregate this with another dataset
    """
    )
    return


@app.cell(hide_code=True)
def _(mo):
    mo.md(r"""# Cells in a shapefile""")
    return


@app.cell
def _(gpd):
    # GADM provides detailed administrative boundaries
    # ADM2 for Spain (level 2 = provinces)
    url = "https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_ESP_2.json"
    spain_adm2 = gpd.read_file(url)

    spain_adm2.query("NAME_1 == 'Cataluña'").plot()
    return (spain_adm2,)


@app.cell
def _(
    cells_dataframe_to_geodataframe,
    geodataframe_to_cells,
    h3_res,
    spain_adm2,
):
    df = geodataframe_to_cells(spain_adm2.query("NAME_1 == 'Cataluña'"), h3_res)
    gdf = cells_dataframe_to_geodataframe(df)
    gdf.plot("NAME_1")
    return (gdf,)


@app.cell
def _(gdf, h3_df, pd):
    merged = pd.merge(
        left = h3_df,
        right = gdf,
        on = ['cell', 'geometry']
    )
    merged.plot('value')
    return


if __name__ == "__main__":
    app.run()
	import marimo

	__generated_with = "0.16.5"
	app = marimo.App(width="medium")


	@app.cell
	def _(mo):
	mo.md(
	r"""
	The idea is to find a way to visualise a raster file in an interesting and efficient way.
	With [this function](https://h3geo.org/docs/api/indexing/) you can have a specific index string for each location. Ah [H3 index](https://h3geo.org/docs/core-library/h3Indexing) is a 64-bit integer that can be represented by an hex string.

	The library [h3ronpy](https://h3ronpy.readthedocs.io/en/latest/#) provides functions to convert raster to H3 and vice versa.
	"""
	)
	return


	@app.cell
	def _():
	import marimo as mo
	import h3
	import rasterio
	import rioxarray
	import xarray as xr
	import xarray.tutorial
	import numpy as np
	import pandas as pd
	import geopandas as gpd
	from shapely.geometry import Polygon

	from h3ronpy.raster import nearest_h3_resolution
	from h3ronpy.pandas.raster import raster_to_dataframe
	from h3ronpy.pandas.vector import geodataframe_to_cells, cells_dataframe_to_geodataframe
	return (
	Polygon,
	cells_dataframe_to_geodataframe,
	geodataframe_to_cells,
	gpd,
	h3,
	mo,
	nearest_h3_resolution,
	pd,
	raster_to_dataframe,
	xarray,
	)


	@app.cell
	def _(xarray):
	# eobs = xr.open_dataset("C:\\Users\\matte\\data\\EOBS\\rr_0.25deg_reg_v17.0.IP.nc")
	# eobs
	sample_grid = xarray.tutorial.open_dataset("eraint_uvz").isel(month = 0, level = 2).sel(longitude = slice(-20, 40), latitude = slice(50, 20))
	return (sample_grid,)


	@app.cell
	def _(sample_grid):
	sample_grid['u'].plot(vmin = -10, vmax = 10, cmap = 'RdBu')
	return


	@app.cell
	def _(sample_grid):
	test = sample_grid['u']
	return (test,)


	@app.cell
	def _(mo):
	mo.md(
	r"""
	The [documentation](https://h3ronpy.readthedocs.io/en/latest/api/pandas.html#module-h3ronpy.pandas.raster) says:
	> Make use the nearest_h3_resolution function to convert to the H3 resolution nearest to the pixel size of the raster. After that the cell resolution can be changed using the change_resolution function and dataframe libraries can be used to perform the desired aggregations. This can be a rather memory-intensive process.
	"""
	)
	return


	@app.cell
	def _(nearest_h3_resolution, test):
	h3_res = nearest_h3_resolution(test.values.shape, test.rio.transform(), search_mode="smaller_than_pixel")
	print(f"Using H3 resolution {h3_res}")
	return (h3_res,)


	@app.cell
	def _(mo):
	mo.md(
	r"""
	`h3ronpy.pandas.raster.raster_to_dataframe(in_raster: ndarray, transform, h3_resolution: int, nodata_value=None, axis_order: str = 'yx', compact: bool = True, geo: bool = False)→ GeoDataFrame \| DataFrame`
	Convert a raster/array to a pandas DataFrame containing H3 indexes

	This function is parallelized and uses the available CPUs by distributing tiles to a thread pool.

	The input geometry must be in WGS84.

	Parameters:

	- `in_raster` (ndarray) – input 2-d array
	- `transform` – the affine transformation
	- `nodata_value` – the nodata value. For these cells of the array there will be no h3 indexes generated
	- `axis_order` (str) – axis order of the 2d array. Either “xy” or “yx”
	- `h3_resolution` (int) – target h3 resolution
	- `compact` (bool) – Return compacted h3 indexes (see H3 docs). This results in mixed H3 resolutions, but also can reduce the amount of required memory.
	- `geo` (bool) – Return a geopandas GeoDataFrame with geometries. increases the memory usage.

	Returns:
	pandas DataFrame or GeoDataFrame

	Return type:
	GeoDataFrame \| DataFrame
	"""
	)
	return


	@app.cell
	def _(h3_res, raster_to_dataframe, test):

	h3_df = raster_to_dataframe(
	# 1*np.round(test.values/1),
	test.values,
	test.rio.transform(),
	h3_res,
	compact=False,
	geo=True
	)

	h3_df.dropna().plot(
	column="value", linewidth=0.2, edgecolor="black", figsize = (12, 8),
	legend = True, vmin = -10, vmax = 10, cmap = 'RdBu'
	)
	return (h3_df,)


	@app.cell
	def _(h3_df):
	h3_df.dropna().head()
	return


	@app.cell
	def _(mo):
	mo.md(r"""Here converting the int index into a string""")
	return


	@app.cell
	def _(h3, h3_df):
	h3_df['strid'] = h3_df['cell'].map(h3.int_to_str)
	return


	@app.cell
	def _(Polygon, gpd, h3):
	def h3_to_geopandas(h3_cells):
	"""
	Convert a list of H3 cells to a GeoDataFrame.

	Parameters:
	-----------
	h3_cells : list
	List of H3 cell identifiers (strings)

	Returns:
	--------
	geopandas.GeoDataFrame
	GeoDataFrame with H3 cells as geometries
	"""
	geometries = []
	cell_ids = []

	for cell in h3_cells:
	# Get the boundary coordinates for each H3 cell
	boundary = h3.cell_to_boundary(cell)
	# Convert to (lon, lat) format for Shapely
	# H3 returns (lat, lon), so we need to swap
	polygon = Polygon([(lon, lat) for lat, lon in boundary])
	geometries.append(polygon)
	cell_ids.append(cell)

	# Create GeoDataFrame
	gdf = gpd.GeoDataFrame(
	{'h3_cell': cell_ids},
	geometry=geometries,
	crs='EPSG:4326' # WGS84 coordinate system
	)

	return gdf
	return


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(
	r"""
	## Things to do

	1. Find all the cells in a shapefile https://h3ronpy.readthedocs.io/en/latest/usage/vector.html
	2. Find the cell in a specific coordinate
	3. Aggregate this with another dataset
	"""
	)
	return


	@app.cell(hide_code=True)
	def _(mo):
	mo.md(r"""# Cells in a shapefile""")
	return


	@app.cell
	def _(gpd):
	# GADM provides detailed administrative boundaries
	# ADM2 for Spain (level 2 = provinces)
	url = "https://geodata.ucdavis.edu/gadm/gadm4.1/json/gadm41_ESP_2.json"
	spain_adm2 = gpd.read_file(url)

	spain_adm2.query("NAME_1 == 'Cataluña'").plot()
	return (spain_adm2,)


	@app.cell
	def _(
	cells_dataframe_to_geodataframe,
	geodataframe_to_cells,
	h3_res,
	spain_adm2,
	):
	df = geodataframe_to_cells(spain_adm2.query("NAME_1 == 'Cataluña'"), h3_res)
	gdf = cells_dataframe_to_geodataframe(df)
	gdf.plot("NAME_1")
	return (gdf,)


	@app.cell
	def _(gdf, h3_df, pd):
	merged = pd.merge(
	left = h3_df,
	right = gdf,
	on = ['cell', 'geometry']
	)
	merged.plot('value')
	return


	if __name__ == "__main__":
	app.run()
No results found