Skip to content

Instantly share code, notes, and snippets.

@novica
Last active September 20, 2025 19:51
Show Gist options
  • Select an option

  • Save novica/ff55fc4d79c3007bb1acba40e74e9d25 to your computer and use it in GitHub Desktop.

Select an option

Save novica/ff55fc4d79c3007bb1acba40e74e9d25 to your computer and use it in GitHub Desktop.
library("duckdb")
#### Creating the Ducklake
con <- dbConnect(duckdb(), dbdir=":memory:")
dbExecute(con, "INSTALL ducklake;")
dbExecute(con, "ATTACH 'ducklake:metadata.ducklake' AS r_ducklake;")
dbExecute(con, "USE r_ducklake;")
#dbDisconnect(con)
#### Writing data
# Bronze: raw data
dbWriteTable(con, "bronze_mtcars", mtcars)
# Silver: filtered / cleaned
dbExecute(con, "
CREATE TABLE silver_mtcars AS
SELECT *, (mpg/mean(mpg) OVER()) AS mpg_norm
FROM bronze_mtcars;
")
# Gold: aggregated summary
dbExecute(con, "
CREATE TABLE gold_mtcars AS
SELECT cyl, AVG(mpg) AS avg_mpg, AVG(mpg_norm) AS avg_mpg_norm
FROM silver_mtcars
GROUP BY cyl;
")
#### Lineage table
dbExecute(con, "
CREATE TABLE IF NOT EXISTS ducklake_lineage (
parent_table VARCHAR,
child_table VARCHAR,
created_at TIMESTAMP,
description VARCHAR
);
")
# After creating a silver table from bronze
dbExecute(con, "
INSERT INTO ducklake_lineage (parent_table, child_table, created_at, description)
VALUES ('bronze_mtcars', 'silver_mtcars', NOW(), 'normalization / filtering');
")
# After creating gold table from silver
dbExecute(con, "
INSERT INTO ducklake_lineage (parent_table, child_table, created_at, description)
VALUES ('silver_mtcars', 'gold_mtcars', NOW(), 'aggregation');
")
#### Some more pretend transformations
dbExecute(con, "
INSERT INTO ducklake_lineage (parent_table, child_table, created_at, description)
VALUES ('bronze_mtcars', 'silver_mtcars_2', NOW(), 'normalization / filtering');
")
dbExecute(con, "
INSERT INTO ducklake_lineage (parent_table, child_table, created_at, description)
VALUES ('bronze_mtcars', 'silver_mtcars_3', NOW(), 'normalization / filtering');
")
dbExecute(con, "
INSERT INTO ducklake_lineage (parent_table, child_table, created_at, description)
VALUES ('silver_mtcars', 'gold_mtcars_2', NOW(), 'aggregation');
")
#### Visualizing the medallion lineage
library(igraph)
library(ggraph)
edges <- dbGetQuery(con, "SELECT parent_table AS from_table, child_table AS to_table FROM ducklake_lineage")
g <- graph_from_data_frame(edges, directed = TRUE)
ggraph(g, layout = "tree") +
geom_edge_diagonal(arrow = arrow(length = unit(4, 'mm')), end_cap = circle(3, 'mm')) +
geom_node_point(shape = 15, size = 8, color = "steelblue", fill = "lightblue") +
geom_node_text(aes(label = name), vjust = -1.2, size = 4) +
coord_flip() +
scale_y_reverse() +
theme_void() +
ggtitle("DuckLake Medallion Lineage")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment