Created
June 1, 2023 13:40
-
-
Save vibhatha/e8408546dc3464023dd352bdf904769a to your computer and use it in GitHub Desktop.
Sample substrait plan
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| extension_uris { | |
| extension_uri_anchor: 1 | |
| uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_boolean.yaml" | |
| } | |
| extension_uris { | |
| extension_uri_anchor: 2 | |
| uri: "https://github.com/substrait-io/substrait/blob/main/extensions/functions_comparison.yaml" | |
| } | |
| extensions { | |
| extension_function { | |
| extension_uri_reference: 1 | |
| function_anchor: 1 | |
| name: "and" | |
| } | |
| } | |
| extensions { | |
| extension_function { | |
| extension_uri_reference: 2 | |
| function_anchor: 2 | |
| name: "equal" | |
| } | |
| } | |
| extensions { | |
| extension_function { | |
| extension_uri_reference: 2 | |
| function_anchor: 3 | |
| name: "gt" | |
| } | |
| } | |
| relations { | |
| root { | |
| input { | |
| sort { | |
| input { | |
| filter { | |
| input { | |
| project { | |
| common { | |
| emit { | |
| output_mapping: 12 | |
| output_mapping: 13 | |
| output_mapping: 14 | |
| output_mapping: 15 | |
| output_mapping: 16 | |
| } | |
| } | |
| input { | |
| join { | |
| left { | |
| project { | |
| common { | |
| emit { | |
| output_mapping: 3 | |
| output_mapping: 4 | |
| output_mapping: 5 | |
| } | |
| } | |
| input { | |
| read { | |
| common { | |
| direct { | |
| } | |
| } | |
| base_schema { | |
| names: "tconst" | |
| names: "averageRating" | |
| names: "numVotes" | |
| struct { | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| nullability: NULLABILITY_REQUIRED | |
| } | |
| } | |
| named_table { | |
| names: "ratings" | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| cast { | |
| type { | |
| fp64 { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| input { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 1 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| failure_behavior: FAILURE_BEHAVIOR_THROW_EXCEPTION | |
| } | |
| } | |
| expressions { | |
| cast { | |
| type { | |
| i64 { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| input { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 2 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| failure_behavior: FAILURE_BEHAVIOR_THROW_EXCEPTION | |
| } | |
| } | |
| } | |
| } | |
| right { | |
| project { | |
| common { | |
| emit { | |
| output_mapping: 9 | |
| output_mapping: 10 | |
| output_mapping: 11 | |
| } | |
| } | |
| input { | |
| filter { | |
| input { | |
| read { | |
| common { | |
| direct { | |
| } | |
| } | |
| base_schema { | |
| names: "tconst" | |
| names: "titleType" | |
| names: "primaryTitle" | |
| names: "originalTitle" | |
| names: "isAdult" | |
| names: "startYear" | |
| names: "endYear" | |
| names: "runtimeMinutes" | |
| names: "genres" | |
| struct { | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| types { | |
| string { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| nullability: NULLABILITY_REQUIRED | |
| } | |
| } | |
| named_table { | |
| names: "basics" | |
| } | |
| } | |
| } | |
| condition { | |
| scalar_function { | |
| function_reference: 1 | |
| output_type { | |
| bool { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| arguments { | |
| value { | |
| scalar_function { | |
| function_reference: 2 | |
| output_type { | |
| bool { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| arguments { | |
| value { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 1 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| arguments { | |
| value { | |
| literal { | |
| string: "movie" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| arguments { | |
| value { | |
| scalar_function { | |
| function_reference: 2 | |
| output_type { | |
| bool { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| arguments { | |
| value { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 4 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| arguments { | |
| value { | |
| literal { | |
| string: "0" | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 2 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 5 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| } | |
| expression { | |
| scalar_function { | |
| function_reference: 2 | |
| output_type { | |
| bool { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| arguments { | |
| value { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| arguments { | |
| value { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 3 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| type: JOIN_TYPE_INNER | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 1 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 2 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 4 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| expressions { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 5 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| } | |
| condition { | |
| scalar_function { | |
| function_reference: 3 | |
| output_type { | |
| bool { | |
| nullability: NULLABILITY_NULLABLE | |
| } | |
| } | |
| arguments { | |
| value { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 2 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| } | |
| arguments { | |
| value { | |
| literal { | |
| i32: 200000 | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| sorts { | |
| expr { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 1 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| direction: SORT_DIRECTION_DESC_NULLS_FIRST | |
| } | |
| sorts { | |
| expr { | |
| selection { | |
| direct_reference { | |
| struct_field { | |
| field: 2 | |
| } | |
| } | |
| root_reference { | |
| } | |
| } | |
| } | |
| direction: SORT_DIRECTION_DESC_NULLS_FIRST | |
| } | |
| } | |
| } | |
| names: "tconst" | |
| names: "avg_rating" | |
| names: "num_votes" | |
| names: "primaryTitle" | |
| names: "startYear" | |
| } | |
| } | |
| version { | |
| minor_number: 24 | |
| producer: "ibis-substrait" | |
| } |
Author
I get a bit different plan, using the following code (tables downloaded from https://datasets.imdbws.com/)
import duckdb
from duckdb import DuckDBPyConnection
import pandas as pd
from converter import convert_tsv_to_csv
from google.protobuf import json_format
import ibis
# Setup
def convert_tsv_to_csv(src: str, dst: str) -> None:
"""
A simple python method to convert a tsc file into a csv file.
Args:
src (str): source path for the .tsv file
dst (str): desired destination path
"""
try:
table = pd.read_table(filepath_or_buffer=src, sep="\t")
table.to_csv(dst, index=False)
except Exception as e:
raise Exception(f"Failed to convert TSV to CSV: {str(e)}")
def prepare_tables():
convert_tsv_to_csv(src="../datasets/title.ratings.tsv", dst="../datasets/ratings.csv")
convert_tsv_to_csv(src="../datasets/title.basics.tsv", dst="../datasets/basics.csv")
def load_tables(connection: DuckDBPyConnection):
ratings_df = pd.read_csv("../datasets/ratings.csv")
connection.execute(
"CREATE TABLE ratings AS SELECT * FROM ratings_df"
)
basics_df = pd.read_csv(filepath_or_buffer="../datasets/basics.csv")
connection.execute(
"CREATE TABLE basics AS SELECT * FROM basics_df"
)
duckdb_conn = duckdb.connect(database="imdb.db",read_only=False)
load_tables(duckdb_conn)
import ibis
from ibis import _
from ibis_substrait.compiler.core import SubstraitCompiler
ratings = ibis.table(
[
("tconst", "str"),
("averageRating", "str"),
("numVotes", "str"),
],
name="ratings",
)
ratings = ratings.select(
tconst=ratings.tconst,
avg_rating = ratings.averageRating.cast('float'),
num_votes=ratings.numVotes.cast("int")
)
basics = ibis.table(
[
("tconst", "str"),
("titleType", "str"),
("primaryTitle", "str"),
("originalTitle", "str"),
("isAdult", "str"),
("startYear", "str"),
("endYear", "str"),
("runtimeMinutes", "str"),
("genres", "str"),
],
name="basics",
)
basics = basics.filter([basics.titleType == "movie", basics.isAdult == "0"]).select(
"tconst",
"primaryTitle",
"startYear",
)
topfilms = (
ratings.join(basics, "tconst")
.order_by([_.avg_rating.desc(), _.num_votes.desc()])
.filter(_.num_votes > 200_000)
)
compiler = SubstraitCompiler()
plan = compiler.compile(topfilms)
with open("topfilms.proto", "wb") as f:
f.write(plan.SerializeToString())
duckdb_conn.install_extension("substrait")
duckdb_conn.load_extension("substrait")
with open("topfilms.proto", "rb") as f:
plan_blob = f.read()
plan_result = duckdb_conn.from_substrait(plan_blob)
print(plan_result)
# Good resultsThe plan I get is a bit different (used some text comparison tool):
extension_uris {
extension_uri_anchor: 1
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 1
name: "and"
}
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 2
name: "equal"
}
}
extensions {
extension_function {
extension_uri_reference: 1
function_anchor: 3
name: "gt"
}
}
relations {
root {
input {
sort {
input {
filter {
input {
project {
common {
emit {
output_mapping: 12
output_mapping: 13
output_mapping: 14
output_mapping: 15
output_mapping: 16
}
}
input {
join {
left {
project {
common {
emit {
output_mapping: 3
output_mapping: 4
output_mapping: 5
}
}
input {
read {
common {
direct {
}
}
base_schema {
names: "tconst"
names: "averageRating"
names: "numVotes"
struct {
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
nullability: NULLABILITY_REQUIRED
}
}
named_table {
names: "ratings"
}
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
cast {
type {
fp64 {
nullability: NULLABILITY_NULLABLE
}
}
input {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
}
}
expressions {
cast {
type {
i64 {
nullability: NULLABILITY_NULLABLE
}
}
input {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
}
}
}
}
right {
project {
common {
emit {
output_mapping: 9
output_mapping: 10
output_mapping: 11
}
}
input {
filter {
input {
read {
common {
direct {
}
}
base_schema {
names: "tconst"
names: "titleType"
names: "primaryTitle"
names: "originalTitle"
names: "isAdult"
names: "startYear"
names: "endYear"
names: "runtimeMinutes"
names: "genres"
struct {
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
types {
string {
nullability: NULLABILITY_NULLABLE
}
}
nullability: NULLABILITY_REQUIRED
}
}
named_table {
names: "basics"
}
}
}
condition {
scalar_function {
function_reference: 1
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
string: "movie"
}
}
}
}
}
}
arguments {
value {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 4
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
string: "0"
}
}
}
}
}
}
}
}
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 5
}
}
root_reference {
}
}
}
}
}
expression {
scalar_function {
function_reference: 2
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 3
}
}
root_reference {
}
}
}
}
}
}
type: JOIN_TYPE_INNER
}
}
expressions {
selection {
direct_reference {
struct_field {
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 4
}
}
root_reference {
}
}
}
expressions {
selection {
direct_reference {
struct_field {
field: 5
}
}
root_reference {
}
}
}
}
}
condition {
scalar_function {
function_reference: 3
output_type {
bool {
nullability: NULLABILITY_NULLABLE
}
}
arguments {
value {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
}
arguments {
value {
literal {
i32: 200000
}
}
}
}
}
}
}
sorts {
expr {
selection {
direct_reference {
struct_field {
field: 1
}
}
root_reference {
}
}
}
direction: SORT_DIRECTION_DESC_NULLS_FIRST
}
sorts {
expr {
selection {
direct_reference {
struct_field {
field: 2
}
}
root_reference {
}
}
}
direction: SORT_DIRECTION_DESC_NULLS_FIRST
}
}
}
names: "tconst"
names: "avg_rating"
names: "num_votes"
names: "primaryTitle"
names: "startYear"
}
}
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Generated using the following code