Skip to content

Instantly share code, notes, and snippets.

@geobabbler
Created January 14, 2026 23:21
Show Gist options
  • Select an option

  • Save geobabbler/1c66dff4dceb5af07631a2497aa74d15 to your computer and use it in GitHub Desktop.

Select an option

Save geobabbler/1c66dff4dceb5af07631a2497aa74d15 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
"""
Point-in-Polygon Analysis Script for PostGIS
Performs spatial analysis to determine which points fall within which polygons
and generates a summary report.
Usage:
python point_in_polygon.py --host localhost --port 5432 --database mydb \
--user postgres --password secret \
--point-table stores --point-geom geom --point-name name \
--polygon-table territories --polygon-geom geom --polygon-name territory_name
"""
import argparse
import sys
from typing import Dict, List, Tuple
import psycopg2
from psycopg2 import sql
from psycopg2.extras import RealDictCursor
def get_table_srid(cursor, table: str, geom_column: str) -> int:
"""Get the SRID of a geometry column."""
query = """
SELECT Find_SRID(%s, %s, %s) as srid
"""
# Extract schema and table if schema is provided
parts = table.split('.')
if len(parts) == 2:
schema, table_name = parts
else:
schema = 'public'
table_name = table
cursor.execute(query, (schema, table_name, geom_column))
result = cursor.fetchone()
return result['srid']
def check_spatial_index(cursor, table: str, geom_column: str) -> bool:
"""Check if a spatial index exists on the geometry column."""
parts = table.split('.')
if len(parts) == 2:
schema, table_name = parts
else:
schema = 'public'
table_name = table
query = """
SELECT COUNT(*) as count
FROM pg_indexes
WHERE schemaname = %s
AND tablename = %s
AND indexdef LIKE %s
"""
cursor.execute(query, (schema, table_name, f'%{geom_column}%GIST%'))
result = cursor.fetchone()
return result['count'] > 0
def perform_point_in_polygon(
cursor,
point_table: str,
point_geom: str,
point_name: str,
polygon_table: str,
polygon_geom: str,
polygon_name: str
) -> List[Dict]:
"""
Perform point-in-polygon analysis and return summary results.
Returns a list of dictionaries with polygon names and point counts.
"""
# Get SRIDs
point_srid = get_table_srid(cursor, point_table, point_geom)
polygon_srid = get_table_srid(cursor, polygon_table, polygon_geom)
print(f"\n📍 Point table SRID: {point_srid}")
print(f"🔲 Polygon table SRID: {polygon_srid}")
# Check for spatial indexes
point_indexed = check_spatial_index(cursor, point_table, point_geom)
polygon_indexed = check_spatial_index(cursor, polygon_table, polygon_geom)
print(f"\n🔍 Point table spatial index: {'✓' if point_indexed else '✗ (recommend creating)'}")
print(f"🔍 Polygon table spatial index: {'✓' if polygon_indexed else '✗ (recommend creating)'}")
# Build the query
# If SRIDs differ, transform point geometries to match polygon SRID
if point_srid != polygon_srid:
print(f"\n⚠️ Different SRIDs detected - will transform points from {point_srid} to {polygon_srid}")
point_geom_transformed = f"ST_Transform({point_geom}, {polygon_srid})"
else:
point_geom_transformed = point_geom
query = sql.SQL("""
SELECT
p.{polygon_name} as polygon_name,
COUNT(pt.{point_name}) as point_count
FROM {polygon_table} p
LEFT JOIN {point_table} pt
ON ST_Contains(p.{polygon_geom}, {point_geom_transformed})
GROUP BY p.{polygon_name}
ORDER BY point_count DESC
""").format(
polygon_name=sql.Identifier(polygon_name),
point_name=sql.Identifier(point_name),
polygon_table=sql.Identifier(*point_table.split('.')),
point_table=sql.Identifier(*polygon_table.split('.')),
polygon_geom=sql.Identifier(polygon_geom),
point_geom_transformed=sql.SQL(point_geom_transformed)
)
print(f"\n🔄 Executing spatial query...")
cursor.execute(query)
results = cursor.fetchall()
return results
def print_summary_report(results: List[Dict], point_name: str, polygon_name: str):
"""Print a formatted summary report."""
print("\n" + "="*60)
print("POINT-IN-POLYGON ANALYSIS SUMMARY")
print("="*60)
total_points = sum(r['point_count'] for r in results)
total_polygons = len(results)
polygons_with_points = sum(1 for r in results if r['point_count'] > 0)
print(f"\n📊 Overall Statistics:")
print(f" Total polygons: {total_polygons}")
print(f" Polygons with points: {polygons_with_points}")
print(f" Empty polygons: {total_polygons - polygons_with_points}")
print(f" Total points assigned: {total_points}")
print(f"\n📍 Points per Polygon:")
print(f" {'Polygon Name':<40} {'Point Count':>10}")
print(f" {'-'*40} {'-'*10}")
for result in results:
polygon = str(result['polygon_name'])[:40]
count = result['point_count']
print(f" {polygon:<40} {count:>10,}")
print("\n" + "="*60)
def main():
parser = argparse.ArgumentParser(
description='Perform point-in-polygon analysis on PostGIS tables'
)
# Database connection parameters
parser.add_argument('--host', required=True, help='Database host')
parser.add_argument('--port', type=int, default=5432, help='Database port')
parser.add_argument('--database', required=True, help='Database name')
parser.add_argument('--user', required=True, help='Database user')
parser.add_argument('--password', required=True, help='Database password')
# Table and column parameters
parser.add_argument('--point-table', required=True, help='Point table name (schema.table or table)')
parser.add_argument('--point-geom', required=True, help='Point geometry column name')
parser.add_argument('--point-name', required=True, help='Point name/identifier column')
parser.add_argument('--polygon-table', required=True, help='Polygon table name (schema.table or table)')
parser.add_argument('--polygon-geom', required=True, help='Polygon geometry column name')
parser.add_argument('--polygon-name', required=True, help='Polygon name/identifier column')
args = parser.parse_args()
try:
# Connect to database
print(f"🔌 Connecting to database: {args.host}:{args.port}/{args.database}")
conn = psycopg2.connect(
host=args.host,
port=args.port,
database=args.database,
user=args.user,
password=args.password
)
with conn.cursor(cursor_factory=RealDictCursor) as cursor:
results = perform_point_in_polygon(
cursor,
args.point_table,
args.point_geom,
args.point_name,
args.polygon_table,
args.polygon_geom,
args.polygon_name
)
print_summary_report(results, args.point_name, args.polygon_name)
conn.close()
print("\n✅ Analysis complete!")
return 0
except psycopg2.Error as e:
print(f"\n❌ Database error: {e}", file=sys.stderr)
return 1
except Exception as e:
print(f"\n❌ Error: {e}", file=sys.stderr)
return 1
if __name__ == '__main__':
sys.exit(main())

PostGIS Spatial Query Reference

Point-in-Polygon Functions

Primary Functions

ST_Contains(polygon_geom, point_geom)

  • Returns TRUE if point is completely inside polygon
  • Most common for point-in-polygon analysis
  • Usage: WHERE ST_Contains(polygons.geom, points.geom)

ST_Within(point_geom, polygon_geom)

  • Inverse of ST_Contains
  • Returns TRUE if point is completely inside polygon
  • Usage: WHERE ST_Within(points.geom, polygons.geom)

ST_Intersects(geom1, geom2)

  • More general - returns TRUE if geometries share any space
  • Includes points on boundaries
  • Usage: WHERE ST_Intersects(polygons.geom, points.geom)

When to Use Each

  • ST_Contains: Standard point-in-polygon (point must be inside, not on boundary)
  • ST_Within: Same as ST_Contains but reversed arguments
  • ST_Intersects: When boundary points should be included

Spatial Indexes (GIST)

Why Spatial Indexes Matter

Without spatial indexes, PostGIS performs full table scans for spatial queries - checking every geometry pair. With indexes, performance improves dramatically (often 100-1000x faster).

Creating Spatial Indexes

-- Create GIST index on geometry column
CREATE INDEX idx_table_geom ON schema.table USING GIST (geom);

-- For large tables, create index concurrently (allows reads during creation)
CREATE INDEX CONCURRENTLY idx_table_geom ON schema.table USING GIST (geom);

Checking for Indexes

-- Check if spatial index exists
SELECT schemaname, tablename, indexname, indexdef
FROM pg_indexes
WHERE tablename = 'your_table'
  AND indexdef LIKE '%GIST%';

Best Practices

  1. Always index geometry columns used in spatial queries
  2. Index both tables in a spatial join for optimal performance
  3. Rebuild indexes periodically on frequently updated tables:
    REINDEX INDEX idx_table_geom;

Coordinate Systems (SRID)

Understanding SRID

SRID (Spatial Reference System Identifier) defines the coordinate system. Common SRIDs:

  • 4326: WGS84 (latitude/longitude) - GPS coordinates
  • 3857: Web Mercator - web mapping (Google Maps, OpenStreetMap)
  • 2163: US National Atlas Equal Area - US analysis
  • Regional UTM zones for accurate distance measurements

Checking SRID

-- Find SRID of a geometry column
SELECT Find_SRID('schema_name', 'table_name', 'geom_column');

-- Or query directly
SELECT ST_SRID(geom) FROM table_name LIMIT 1;

Transforming Between SRIDs

-- Transform geometry from one SRID to another
ST_Transform(geom, target_srid)

-- Example: Transform from 4326 to 3857
SELECT ST_Transform(geom, 3857) FROM points WHERE ST_SRID(geom) = 4326;

SRID Best Practices

  1. Always transform to matching SRID before spatial operations
  2. Transform the smaller table in joins (usually points, not polygons)
  3. Store in consistent SRID when possible to avoid repeated transformations
  4. Use appropriate SRID for your analysis:
    • 4326 for global data, lat/lng coordinates
    • Local projected systems for accurate distance/area calculations

Query Optimization Patterns

Basic Point-in-Polygon Query

SELECT 
    p.name as polygon_name,
    COUNT(pt.id) as point_count
FROM polygons p
LEFT JOIN points pt
    ON ST_Contains(p.geom, pt.geom)
GROUP BY p.name;

With SRID Transformation

SELECT 
    p.name as polygon_name,
    COUNT(pt.id) as point_count
FROM polygons p
LEFT JOIN points pt
    ON ST_Contains(p.geom, ST_Transform(pt.geom, ST_SRID(p.geom)))
GROUP BY p.name;

With Spatial Index Hint (for very large tables)

-- Use bounding box pre-filter (automatic with GIST index)
SELECT 
    p.name,
    COUNT(pt.id)
FROM polygons p
LEFT JOIN points pt
    ON p.geom && pt.geom  -- Bounding box operator (index-optimized)
    AND ST_Contains(p.geom, pt.geom)  -- Exact spatial test
GROUP BY p.name;

Finding Unassigned Points

-- Points not in any polygon
SELECT pt.*
FROM points pt
LEFT JOIN polygons p
    ON ST_Contains(p.geom, pt.geom)
WHERE p.id IS NULL;

Performance Tips

  1. Use EXPLAIN ANALYZE to check query plans:

    EXPLAIN ANALYZE
    SELECT COUNT(*) FROM points pt
    JOIN polygons p ON ST_Contains(p.geom, pt.geom);
  2. Look for "Index Scan using idx_" - confirms index usage

  3. Avoid ST_Transform in WHERE clause - transform beforehand if possible

  4. Batch large operations - process in chunks for very large datasets

  5. Use appropriate geometry types:

    • POINT for points
    • POLYGON/MULTIPOLYGON for polygons
    • Don't use GEOMETRY when specific type is known

Common Pitfalls

  1. Missing spatial indexes - Most common performance issue
  2. SRID mismatches - Queries fail or return unexpected results
  3. NULL geometries - Always handle with WHERE geom IS NOT NULL
  4. Incorrect function - Using ST_Intersects when ST_Contains is needed
  5. Not using LEFT JOIN - Missing polygons with zero points in summary

Troubleshooting

Query is slow

  • Check for spatial indexes with \di in psql
  • Run EXPLAIN ANALYZE to see query plan
  • Ensure SRIDs match

No results returned

  • Check SRIDs match or transform correctly
  • Verify geometry validity: SELECT ST_IsValid(geom) FROM table;
  • Check for NULL geometries

Unexpected results

  • Verify correct spatial function (Contains vs Intersects vs Within)
  • Check SRID matches geographic region
  • Validate geometry topology
name description
postgis-spatial-analysis
Perform point-in-polygon spatial analysis on PostGIS databases to determine which points fall within which polygons and generate summary reports. Use when working with PostGIS databases for spatial queries that assign points to polygons, analyze geographic relationships between point and polygon tables, or generate summaries of spatial distributions. Handles SRID transformations, spatial index checks, and optimized query generation.

PostGIS Spatial Analysis

Overview

Perform point-in-polygon analysis on PostGIS databases to assign points from one table to polygons in another, handling coordinate system transformations and generating summary reports.

Quick Start

Use the provided script for standard point-in-polygon analysis:

python scripts/point_in_polygon.py \
    --host localhost --port 5432 --database mydb \
    --user postgres --password secret \
    --point-table stores --point-geom geom --point-name store_name \
    --polygon-table territories --polygon-geom geom --polygon-name territory_name

The script will:

  1. Check SRIDs and transform if needed
  2. Check for spatial indexes and warn if missing
  3. Perform the spatial join
  4. Generate a summary report showing point counts per polygon

Workflow

1. Understand the Requirements

Identify the tables and columns:

  • Point table name and geometry column
  • Polygon table name and geometry column
  • Name/identifier columns for the report
  • Database connection details

2. Check Prerequisites

Before running analysis, verify:

Spatial indexes exist (critical for performance):

-- Check for indexes
SELECT schemaname, tablename, indexname
FROM pg_indexes
WHERE tablename IN ('points_table', 'polygons_table')
  AND indexdef LIKE '%GIST%';

-- Create if missing
CREATE INDEX idx_points_geom ON points_table USING GIST (geom);
CREATE INDEX idx_polygons_geom ON polygons_table USING GIST (geom);

Verify geometry validity:

-- Check for invalid geometries
SELECT COUNT(*) FROM points_table WHERE NOT ST_IsValid(geom) OR geom IS NULL;
SELECT COUNT(*) FROM polygons_table WHERE NOT ST_IsValid(geom) OR geom IS NULL;

3. Run the Analysis

Execute the script with appropriate parameters. The script automatically:

  • Detects SRIDs for both tables
  • Transforms coordinates if SRIDs differ (transforms points to match polygons)
  • Checks for spatial indexes and warns if missing
  • Performs optimized spatial join using ST_Contains
  • Generates formatted summary report

4. Interpret Results

The output includes:

  • Overall statistics (total polygons, polygons with/without points)
  • Detailed breakdown of point counts per polygon
  • Warnings about missing indexes or SRID mismatches

Manual Query Construction

For custom queries or when script modification is needed, see references/postgis_best_practices.md for:

  • Spatial function reference (ST_Contains, ST_Within, ST_Intersects)
  • SRID transformation patterns
  • Query optimization techniques
  • Performance troubleshooting

Basic pattern:

SELECT 
    p.name as polygon_name,
    COUNT(pt.id) as point_count
FROM polygons p
LEFT JOIN points pt
    ON ST_Contains(p.geom, ST_Transform(pt.geom, ST_SRID(p.geom)))
GROUP BY p.name
ORDER BY point_count DESC;

Common Issues and Solutions

Slow query performance:

  • Verify spatial indexes exist on both tables
  • Check query plan with EXPLAIN ANALYZE
  • Ensure SRID transformation happens on smaller table (usually points)

No results or unexpected results:

  • Verify SRIDs match or are being transformed correctly
  • Check for NULL geometries
  • Confirm using correct spatial function (ST_Contains vs ST_Intersects)

SRID mismatch warnings:

  • Script automatically handles this via ST_Transform
  • For manual queries, always transform to matching SRID

Dependencies

Python dependencies required for script:

pip install psycopg2-binary

Resources

scripts/point_in_polygon.py: Main analysis script with parameterized database connection, automatic SRID handling, spatial index checking, and formatted output.

references/postgis_best_practices.md: Comprehensive guide to PostGIS spatial functions, indexing strategies, SRID handling, query optimization patterns, and troubleshooting tips.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment