Skip to content

Instantly share code, notes, and snippets.

@derhuerst
Last active January 12, 2026 23:18
Show Gist options
  • Select an option

  • Save derhuerst/c953c3b594df809520651e35dfe9b1a4 to your computer and use it in GitHub Desktop.

Select an option

Save derhuerst/c953c3b594df809520651e35dfe9b1a4 to your computer and use it in GitHub Desktop.
match GTFS-RT TripDescriptors using DuckDB

This uses gtfs-via-duckdb.

curl -fsSL \
  'https://mobility-api.mobility-database.fintraffic.fi/gtfs-realtime/v2/' \
  -H 'accept: application/protobuf' \
  -H "x-api-key: $api_key" \
  | ~/web/print-gtfs-rt-cli/cli.js -j \
  | duckdb -readonly -csv -f match.sql gtfs/fintraffic.gtfs.duckdb \
  | qsv select '!item' | head -n 10
entity_id,kind,trip_id,route_id,direction_id,start_date,start_time,service_id
73e836b3-1fc1-4ea7-a921-717b21b6cf9b,vp,10003_00012999__1001060106,20734,1,2026-01-10,20:45:00,10003_1001060106
37cbb4e2-183f-4574-9eba-d1e2f3d1463d,vp,10003_00025602__1004062106,20738,0,2026-01-10,18:15:00,10003_1004062106
800b6ea4-6e22-4a75-9277-9d1819acd8f1,vp,10003_00018181__1028021106,20809,1,2026-01-10,23:08:00,10003_1028021106
5a5da731-f65e-400d-bb6a-489b8ba4e948,vp,10003_00016530__10K3010106,103675,1,2026-01-10,15:15:00,10003_10K3010106
2ec6df91-f6a0-471f-bb25-681fe8485640,vp,10003_00020704__2901041106,103638,1,2026-01-10,18:10:00,10003_2901041106
04b09ef0-3500-4e65-86a6-4283ff37aabe,vp,10003_00020663__3721010106,103583,1,2026-01-10,15:20:00,10003_3721010106
aacf71a6-935c-482b-85ff-b3d7326b0cba,tu,10008_Kevättalvi_La_02_1_235500_245000_0,21794,1,2026-01-10,23:55:00,10008_Kevättalvi_La
082ee248-adf0-4fc6-973b-03165c54ef26,tu,10008_Kevättalvi_La_02_0_235500_245000_0,21794,0,2026-01-10,23:55:00,10008_Kevättalvi_La
d2e37721-d9f4-40e3-825e-f66428e9f05e,vp,10008_Kevättalvi_La_02_1_235500_245000_0,21794,1,2026-01-10,23:55:00,10008_Kevättalvi_La
WITH
_rt AS (
SELECT
-- patch for Fintraffic's GTFS-RT
gen_random_uuid() AS id,
trip_update, -- TripUpdate
vehicle -- VehiclePosition
-- todo: alert
-- todo: shape
-- todo: stop
-- todo: trip_modifications
FROM read_json(
'/dev/stdin'
)
),
rt AS (
SELECT
*
REPLACE (
array_slice(start_date, 0, 4) || '-' || array_slice(start_date, 5, 6) || '-' || array_slice(start_date, 7, 8) AS start_date
)
FROM (
SELECT
id AS entity_id,
'tu' AS kind,
unnest(trip_update.trip, max_depth := 1),
trip_update AS item
FROM _rt
WHERE trip_update IS NOT NULL
UNION ALL BY NAME
SELECT
id AS entity_id,
'vp' AS kind,
unnest(vehicle.trip, max_depth := 1),
vehicle AS item
FROM _rt
WHERE vehicle IS NOT NULL
) _rt
)
-- matching by trip_id & start_date
SELECT
entity_id,
kind,
rt.trip_id,
trips.route_id,
trips.direction_id,
rt.start_date,
stop_times.trip_start_time AS start_time,
trips.service_id,
item
FROM (
SELECT *
FROM rt
WHERE rt.trip_id IS NOT NULL
AND rt.start_date IS NOT NULL
) rt
JOIN trips ON trips.trip_id = rt.trip_id
JOIN service_days ON (
service_days.service_id = trips.service_id
AND service_days.date = rt.start_date
)
JOIN stop_times ON (
stop_times.stop_sequence_consec = 0
AND stop_times.trip_id = trips.trip_id
)
UNION ALL BY NAME
-- matching by route_id & co
SELECT
entity_id,
kind,
rt.route_id,
rt.direction_id,
trips.trip_id,
rt.start_date,
rt.start_time,
trips.service_id,
item
FROM (
SELECT *
FROM rt
WHERE rt.route_id IS NOT NULL
AND rt.direction_id IS NOT NULL
AND rt.start_date IS NOT NULL
AND rt.start_time IS NOT NULL
) rt
JOIN routes ON routes.route_id = rt.route_id
JOIN trips ON (
trips.route_id = rt.route_id
AND trips.direction_id = rt.direction_id
)
JOIN service_days ON (
service_days.service_id = trips.service_id
AND service_days.date = rt.start_date
)
JOIN stop_times ON (
stop_times.stop_sequence_consec = 0
AND stop_times.trip_id = trips.trip_id
AND stop_times.trip_start_time = rt.start_time
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment