This gist contains two utilities:
parquet2json: a simple macos bash utility for converting parquet files to json using duckdb.from-parquet.nu: adds afrom parquetcommand to nushell.
| # Convert parquet data to nushell table | |
| # | |
| # Examples: | |
| # from parquet data.parquet | |
| # open --raw data.parquet | from parquet | where column == 'value' | |
| def "from parquet" [ | |
| file?: path # Optional parquet file path (reads from stdin if not provided) | |
| ] { | |
| if ($file | is-empty) { | |
| # Read as binary from stdin - use open --raw open file as binary data | |
| $in | ^parquet2json | lines | each { |line| $line | from json } | |
| } else { | |
| # Read from file | |
| ^parquet2json $file | lines | each { |line| $line | from json } | |
| } | |
| } |
| #!/bin/bash | |
| # Check if duckdb is installed | |
| if ! command -v duckdb &> /dev/null; then | |
| echo "Error: duckdb is not installed" >&2 | |
| echo "Install it with: brew install duckdb" >&2 | |
| exit 1 | |
| fi | |
| if [ $# -eq 0 ] || [ "$1" = "-" ]; then | |
| # Read from stdin | |
| temp=$(mktemp /tmp/parquet2json.XXXXXX.parquet) | |
| cat > "$temp" | |
| duckdb -c "COPY (SELECT * FROM '$temp') TO '/dev/stdout' (FORMAT JSON);" | |
| rm "$temp" | |
| else | |
| # Read from file argument | |
| duckdb -c "COPY (SELECT * FROM '$1') TO '/dev/stdout' (FORMAT JSON);" | |
| fi |