Last active
November 11, 2025 15:20
-
-
Save do-me/46fe0fbd5898ceb85bf410f1a13d6362 to your computer and use it in GitHub Desktop.
Filter OSM US Layercake boundary.parquet remotely over DuckDB and export e.g. a region to e.g. a parquet or json file.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env bash | |
| # | |
| # duckdb_boundary.sh — run DuckDB spatial queries by region | |
| # | |
| # Usage: | |
| # ./duckdb_boundary.sh --query "Sicilia" [--format PARQUET|CSV|JSON] | |
| # | |
| # Description: | |
| # - Ensures DuckDB 'spatial' extension is installed and loaded | |
| # - Reads OpenStreetMap boundary data (remote parquet) | |
| # - Filters by region name (passed via --query) | |
| # - Writes results to an output file in chosen format | |
| # | |
| # Requirements: | |
| # - DuckDB >= 0.9.0 | |
| # - Internet access for remote parquet | |
| # | |
| set -euo pipefail | |
| REGION_NAME="" | |
| FORMAT="PARQUET" | |
| # Parse CLI flags | |
| while [[ $# -gt 0 ]]; do | |
| case "$1" in | |
| -q|--query) | |
| REGION_NAME="$2" | |
| shift 2 | |
| ;; | |
| -f|--format) | |
| FORMAT=$(echo "$2" | tr '[:lower:]' '[:upper:]') # normalize to uppercase | |
| shift 2 | |
| ;; | |
| -h|--help) | |
| echo "Usage: $0 --query <RegionName> [--format <FORMAT>]" | |
| echo | |
| echo "Options:" | |
| echo " -q, --query Region name to extract (e.g. Sicilia, Lombardia)" | |
| echo " -f, --format Output format (PARQUET, CSV, JSON). Default: PARQUET" | |
| exit 0 | |
| ;; | |
| *) | |
| echo "Unknown argument: $1" | |
| echo "Try '$0 --help'" | |
| exit 1 | |
| ;; | |
| esac | |
| done | |
| if [[ -z "$REGION_NAME" ]]; then | |
| echo "❌ Error: --query flag is required (e.g. --query Sicilia)" | |
| exit 1 | |
| fi | |
| # Normalize output filename (lowercase, underscores) | |
| SAFE_NAME=$(echo "$REGION_NAME" | tr '[:upper:]' '[:lower:]' | tr ' ' '_' ) | |
| FORMAT_LOWER=$(echo "$FORMAT" | tr '[:upper:]' '[:lower:]') | |
| OUTPUT_FILE="${SAFE_NAME}_boundaries.${FORMAT_LOWER}" | |
| # Build DuckDB query | |
| QUERY=" | |
| INSTALL spatial; | |
| LOAD spatial; | |
| COPY ( | |
| SELECT | |
| *, | |
| ST_Force2D(geometry) AS geometry | |
| FROM read_parquet('https://data.openstreetmap.us/layercake/boundaries.parquet') | |
| WHERE list_contains(\"tags\"['name'], '${REGION_NAME}') | |
| ) | |
| TO '${OUTPUT_FILE}' (FORMAT '${FORMAT}'); | |
| " | |
| # Run the query | |
| echo "▶ Running DuckDB query for region: '${REGION_NAME}' (format: ${FORMAT})" | |
| duckdb -c "$QUERY" | |
| echo "✅ Query completed successfully." | |
| echo "📄 Output written to: ${OUTPUT_FILE}" |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Usage
Install DuckDB, download this script, then make it executable and run it.
Alternatively, without downloading the script you can just execute the script directly from this gist (but keep in mind that this is not safe as you're executing random code):
Saves a
sicila_boundaries.parquetfile to disk. Tested for the following regions:Visualized with https://do-me.github.io/geoparquet-visualizer/.