Last active
September 8, 2022 15:14
-
-
Save thisisnic/14fb9c1001261f2cf249f9317cda6466 to your computer and use it in GitHub Desktop.
lazy_query from dbplyr
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # query details copied from https://github.com/voltrondata-labs/arrowbench/blob/main/R/tpch-queries.R | |
| query_results <- lineitem_db %>% | |
| select(l_shipdate, l_returnflag, l_linestatus, l_quantity, | |
| l_extendedprice, l_discount, l_tax) %>% | |
| # kludge, should be: filter(l_shipdate <= "1998-12-01" - interval x day) %>% | |
| # where x is between 60 and 120, 90 is the only one that will validate. | |
| filter(l_shipdate <= as.Date("1998-09-02")) %>% | |
| select(l_returnflag, l_linestatus, l_quantity, l_extendedprice, l_discount, l_tax) %>% | |
| group_by(l_returnflag, l_linestatus) %>% | |
| summarise( | |
| sum_qty = sum(l_quantity), | |
| sum_base_price = sum(l_extendedprice), | |
| sum_disc_price = sum(l_extendedprice * (1 - l_discount)), | |
| sum_charge = sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), | |
| avg_qty = mean(l_quantity), | |
| avg_price = mean(l_extendedprice), | |
| avg_disc = mean(l_discount), | |
| count_order = n() | |
| ) %>% | |
| ungroup() %>% | |
| arrange(l_returnflag, l_linestatus) | |
| # This is the output lazy_query | |
| structure( | |
| list( | |
| x = structure( | |
| list( | |
| x = structure( | |
| list( | |
| x = structure( | |
| list( | |
| x = structure( | |
| list( | |
| x = structure("lineitem", class = c("ident", | |
| "character")), | |
| vars = c( | |
| "l_orderkey", | |
| "l_partkey", | |
| "l_suppkey", | |
| "l_linenumber", | |
| "l_quantity", | |
| "l_extendedprice", | |
| "l_discount", | |
| "l_tax", | |
| "l_returnflag", | |
| "l_linestatus", | |
| "l_shipdate", | |
| "l_commitdate", | |
| "l_receiptdate", | |
| "l_shipinstruct", | |
| "l_shipmode", | |
| "l_comment" | |
| ), | |
| group_vars = character(0), | |
| order_vars = NULL, | |
| frame = NULL | |
| ), | |
| class = c("lazy_base_remote_query", | |
| "lazy_base_query", "lazy_query") | |
| ), | |
| select = structure( | |
| list( | |
| name = c( | |
| "l_shipdate", | |
| "l_returnflag", | |
| "l_linestatus", | |
| "l_quantity", | |
| "l_extendedprice", | |
| "l_discount", | |
| "l_tax" | |
| ), | |
| expr = list( | |
| l_shipdate, | |
| l_returnflag, | |
| l_linestatus, | |
| l_quantity, | |
| l_extendedprice, | |
| l_discount, | |
| l_tax | |
| ), | |
| group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, | |
| NULL), | |
| order_vars = list(NULL, NULL, NULL, NULL, | |
| NULL, NULL, NULL), | |
| frame = list(NULL, NULL, NULL, | |
| NULL, NULL, NULL, NULL) | |
| ), | |
| class = c("tbl_df", "tbl", | |
| "data.frame"), | |
| row.names = c(NA,-7L) | |
| ), | |
| where = NULL, | |
| group_by = NULL, | |
| order_by = NULL, | |
| distinct = FALSE, | |
| limit = NULL, | |
| select_operation = "mutate", | |
| last_op = "select", | |
| message_summarise = NULL, | |
| group_vars = character(0), | |
| order_vars = NULL, | |
| frame = NULL | |
| ), | |
| class = c("lazy_select_query", | |
| "lazy_query") | |
| ), | |
| select = structure( | |
| list( | |
| name = c( | |
| "l_shipdate", | |
| "l_returnflag", | |
| "l_linestatus", | |
| "l_quantity", | |
| "l_extendedprice", | |
| "l_discount", | |
| "l_tax" | |
| ), | |
| expr = list( | |
| l_shipdate, | |
| l_returnflag, | |
| l_linestatus, | |
| l_quantity, | |
| l_extendedprice, | |
| l_discount, | |
| l_tax | |
| ), | |
| group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL), | |
| order_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL), | |
| frame = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL) | |
| ), | |
| class = c("tbl_df", | |
| "tbl", "data.frame"), | |
| row.names = c(NA,-7L) | |
| ), | |
| where = structure(list(~ l_shipdate <= as.Date("1998-09-02")), names = ""), | |
| group_by = NULL, | |
| order_by = NULL, | |
| distinct = FALSE, | |
| limit = NULL, | |
| select_operation = "mutate", | |
| last_op = "filter", | |
| message_summarise = NULL, | |
| group_vars = character(0), | |
| order_vars = NULL, | |
| frame = NULL | |
| ), | |
| class = c("lazy_select_query", | |
| "lazy_query") | |
| ), | |
| select = structure( | |
| list( | |
| name = c( | |
| "l_returnflag", | |
| "l_linestatus", | |
| "l_quantity", | |
| "l_extendedprice", | |
| "l_discount", | |
| "l_tax" | |
| ), | |
| expr = list( | |
| l_returnflag, | |
| l_linestatus, | |
| l_quantity, | |
| l_extendedprice, | |
| l_discount, | |
| l_tax | |
| ), | |
| group_vars = list(NULL, | |
| NULL, NULL, NULL, NULL, NULL), | |
| order_vars = list(NULL, NULL, | |
| NULL, NULL, NULL, NULL), | |
| frame = list(NULL, NULL, NULL, NULL, | |
| NULL, NULL) | |
| ), | |
| class = c("tbl_df", "tbl", "data.frame"), | |
| row.names = c(NA,-6L) | |
| ), | |
| where = NULL, | |
| group_by = NULL, | |
| order_by = NULL, | |
| distinct = FALSE, | |
| limit = NULL, | |
| select_operation = "mutate", | |
| last_op = "select", | |
| message_summarise = NULL, | |
| group_vars = c("l_returnflag", | |
| "l_linestatus"), | |
| order_vars = NULL, | |
| frame = NULL | |
| ), | |
| class = c("lazy_select_query", | |
| "lazy_query") | |
| ), | |
| select = structure( | |
| list( | |
| name = c( | |
| "l_returnflag", | |
| "l_linestatus", | |
| "sum_qty", | |
| "sum_base_price", | |
| "sum_disc_price", | |
| "sum_charge", | |
| "avg_qty", | |
| "avg_price", | |
| "avg_disc", | |
| "count_order" | |
| ), | |
| expr = list( | |
| l_returnflag, | |
| l_linestatus, | |
| ~ sum(l_quantity), | |
| ~ sum(l_extendedprice), | |
| ~ sum(l_extendedprice * (1 - l_discount)), | |
| ~ sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)), | |
| ~ mean(l_quantity), | |
| ~ mean(l_extendedprice), | |
| ~ mean(l_discount), | |
| ~ n() | |
| ), | |
| group_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL), | |
| order_vars = list(NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
| NULL, NULL, NULL), | |
| frame = list(NULL, NULL, NULL, NULL, | |
| NULL, NULL, NULL, NULL, NULL, NULL) | |
| ), | |
| class = c("tbl_df", | |
| "tbl", "data.frame"), | |
| row.names = c(NA,-10L) | |
| ), | |
| where = NULL, | |
| group_by = list(l_returnflag, l_linestatus), | |
| order_by = list(~ l_returnflag, ~ l_linestatus), | |
| distinct = FALSE, | |
| limit = NULL, | |
| select_operation = "summarise", | |
| last_op = "summarise", | |
| message_summarise = "\033[38;5;232m`summarise()` has grouped output by \033[34m\"l_returnflag\"\033[38;5;232m. You can override using the `.groups` argument.\033[39m", | |
| group_vars = character(0), | |
| order_vars = list( ~ l_returnflag, | |
| ~ l_linestatus), | |
| frame = NULL | |
| ), | |
| class = c("lazy_select_query", | |
| "lazy_query") | |
| ) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Reprex :)
Created on 2022-09-08 by the reprex package (v2.0.1)