Last active
May 19, 2022 20:26
-
-
Save kylebarron/73d4a126499ef113add6ea1dca04e79f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This file is automatically @generated by Cargo. | |
| # It is not intended for manual editing. | |
| version = 3 | |
| [[package]] | |
| name = "adler" | |
| version = "1.0.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" | |
| [[package]] | |
| name = "alloc-no-stdlib" | |
| version = "2.0.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" | |
| [[package]] | |
| name = "alloc-stdlib" | |
| version = "0.2.1" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" | |
| dependencies = [ | |
| "alloc-no-stdlib", | |
| ] | |
| [[package]] | |
| name = "async-stream" | |
| version = "0.3.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" | |
| dependencies = [ | |
| "async-stream-impl", | |
| "futures-core", | |
| ] | |
| [[package]] | |
| name = "async-stream-impl" | |
| version = "0.3.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" | |
| dependencies = [ | |
| "proc-macro2", | |
| "quote", | |
| "syn", | |
| ] | |
| [[package]] | |
| name = "async-trait" | |
| version = "0.1.53" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" | |
| dependencies = [ | |
| "proc-macro2", | |
| "quote", | |
| "syn", | |
| ] | |
| [[package]] | |
| name = "bitpacking" | |
| version = "0.8.4" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" | |
| dependencies = [ | |
| "crunchy", | |
| ] | |
| [[package]] | |
| name = "brotli" | |
| version = "3.3.4" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" | |
| dependencies = [ | |
| "alloc-no-stdlib", | |
| "alloc-stdlib", | |
| "brotli-decompressor", | |
| ] | |
| [[package]] | |
| name = "brotli-decompressor" | |
| version = "2.3.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" | |
| dependencies = [ | |
| "alloc-no-stdlib", | |
| "alloc-stdlib", | |
| ] | |
| [[package]] | |
| name = "cc" | |
| version = "1.0.73" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" | |
| dependencies = [ | |
| "jobserver", | |
| ] | |
| [[package]] | |
| name = "cfg-if" | |
| version = "1.0.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | |
| [[package]] | |
| name = "crc32fast" | |
| version = "1.3.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" | |
| dependencies = [ | |
| "cfg-if", | |
| ] | |
| [[package]] | |
| name = "crunchy" | |
| version = "0.2.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" | |
| [[package]] | |
| name = "fallible-streaming-iterator" | |
| version = "0.1.9" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" | |
| [[package]] | |
| name = "flate2" | |
| version = "1.0.23" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" | |
| dependencies = [ | |
| "cfg-if", | |
| "crc32fast", | |
| "libc", | |
| "miniz_oxide", | |
| ] | |
| [[package]] | |
| name = "futures" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" | |
| dependencies = [ | |
| "futures-channel", | |
| "futures-core", | |
| "futures-executor", | |
| "futures-io", | |
| "futures-sink", | |
| "futures-task", | |
| "futures-util", | |
| ] | |
| [[package]] | |
| name = "futures-channel" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" | |
| dependencies = [ | |
| "futures-core", | |
| "futures-sink", | |
| ] | |
| [[package]] | |
| name = "futures-core" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" | |
| [[package]] | |
| name = "futures-executor" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" | |
| dependencies = [ | |
| "futures-core", | |
| "futures-task", | |
| "futures-util", | |
| ] | |
| [[package]] | |
| name = "futures-io" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" | |
| [[package]] | |
| name = "futures-macro" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" | |
| dependencies = [ | |
| "proc-macro2", | |
| "quote", | |
| "syn", | |
| ] | |
| [[package]] | |
| name = "futures-sink" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" | |
| [[package]] | |
| name = "futures-task" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" | |
| [[package]] | |
| name = "futures-util" | |
| version = "0.3.21" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" | |
| dependencies = [ | |
| "futures-channel", | |
| "futures-core", | |
| "futures-io", | |
| "futures-macro", | |
| "futures-sink", | |
| "futures-task", | |
| "memchr", | |
| "pin-project-lite", | |
| "pin-utils", | |
| "slab", | |
| ] | |
| [[package]] | |
| name = "integer-encoding" | |
| version = "3.0.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "0e85a1509a128c855368e135cffcde7eac17d8e1083f41e2b98c58bc1a5074be" | |
| dependencies = [ | |
| "async-trait", | |
| "futures-util", | |
| ] | |
| [[package]] | |
| name = "jobserver" | |
| version = "0.1.24" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" | |
| dependencies = [ | |
| "libc", | |
| ] | |
| [[package]] | |
| name = "libc" | |
| version = "0.2.126" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" | |
| [[package]] | |
| name = "lz4" | |
| version = "1.23.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" | |
| dependencies = [ | |
| "libc", | |
| "lz4-sys", | |
| ] | |
| [[package]] | |
| name = "lz4-sys" | |
| version = "1.9.3" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" | |
| dependencies = [ | |
| "cc", | |
| "libc", | |
| ] | |
| [[package]] | |
| name = "memchr" | |
| version = "2.5.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" | |
| [[package]] | |
| name = "miniz_oxide" | |
| version = "0.5.1" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" | |
| dependencies = [ | |
| "adler", | |
| ] | |
| [[package]] | |
| name = "parquet-format-async-temp" | |
| version = "0.3.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf" | |
| dependencies = [ | |
| "async-trait", | |
| "futures", | |
| "integer-encoding", | |
| ] | |
| [[package]] | |
| name = "parquet-metadata-demo" | |
| version = "0.1.0" | |
| dependencies = [ | |
| "parquet2", | |
| ] | |
| [[package]] | |
| name = "parquet2" | |
| version = "0.12.0" | |
| source = "git+https://github.com/jorgecarleitao/parquet2?branch=improve_meta_read#9427962ca7af01f99ccf5b960dc4bb3484ec9c3d" | |
| dependencies = [ | |
| "async-stream", | |
| "bitpacking", | |
| "brotli", | |
| "flate2", | |
| "futures", | |
| "lz4", | |
| "parquet-format-async-temp", | |
| "snap", | |
| "streaming-decompression", | |
| "xxhash-rust", | |
| "zstd", | |
| ] | |
| [[package]] | |
| name = "pin-project-lite" | |
| version = "0.2.9" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" | |
| [[package]] | |
| name = "pin-utils" | |
| version = "0.1.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" | |
| [[package]] | |
| name = "proc-macro2" | |
| version = "1.0.39" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" | |
| dependencies = [ | |
| "unicode-ident", | |
| ] | |
| [[package]] | |
| name = "quote" | |
| version = "1.0.18" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" | |
| dependencies = [ | |
| "proc-macro2", | |
| ] | |
| [[package]] | |
| name = "slab" | |
| version = "0.4.6" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" | |
| [[package]] | |
| name = "snap" | |
| version = "1.0.5" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" | |
| [[package]] | |
| name = "streaming-decompression" | |
| version = "0.1.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "9bc687acd5dc742c4a7094f2927a8614a68e4743ef682e7a2f9f0f711656cc92" | |
| dependencies = [ | |
| "fallible-streaming-iterator", | |
| ] | |
| [[package]] | |
| name = "syn" | |
| version = "1.0.95" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" | |
| dependencies = [ | |
| "proc-macro2", | |
| "quote", | |
| "unicode-ident", | |
| ] | |
| [[package]] | |
| name = "unicode-ident" | |
| version = "1.0.0" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" | |
| [[package]] | |
| name = "xxhash-rust" | |
| version = "0.8.5" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "074914ea4eec286eb8d1fd745768504f420a1f7b7919185682a4a267bed7d2e7" | |
| [[package]] | |
| name = "zstd" | |
| version = "0.11.2+zstd.1.5.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" | |
| dependencies = [ | |
| "zstd-safe", | |
| ] | |
| [[package]] | |
| name = "zstd-safe" | |
| version = "5.0.2+zstd.1.5.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" | |
| dependencies = [ | |
| "libc", | |
| "zstd-sys", | |
| ] | |
| [[package]] | |
| name = "zstd-sys" | |
| version = "2.0.1+zstd.1.5.2" | |
| source = "registry+https://github.com/rust-lang/crates.io-index" | |
| checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" | |
| dependencies = [ | |
| "cc", | |
| "libc", | |
| ] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [package] | |
| name = "parquet-metadata-demo" | |
| version = "0.1.0" | |
| edition = "2021" | |
| # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
| [dependencies] | |
| parquet2 = {git = "https://github.com/jorgecarleitao/parquet2", branch = "improve_meta_read"} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| from io import BytesIO | |
| import pyarrow as pa | |
| import pyarrow.parquet as pq | |
| def create_example_file_meta_data(): | |
| data = { | |
| "str": pa.array(["a", "b", "c", "d"], type=pa.string()), | |
| "uint8": pa.array([1, 2, 3, 4], type=pa.uint8()), | |
| "int32": pa.array([0, -2147483638, 2147483637, 1], type=pa.int32()), | |
| "bool": pa.array([True, True, False, False], type=pa.bool_()), | |
| } | |
| table = pa.table(data) | |
| metadata_collector = [] | |
| pq.write_table(table, BytesIO(), metadata_collector=metadata_collector) | |
| return table.schema, metadata_collector[0] | |
| def main(): | |
| schema, meta = create_example_file_meta_data() | |
| print('created collector') | |
| metadata_collector = [meta] * 30_000 | |
| print('writing meta') | |
| pq.write_metadata(schema, '_metadata', metadata_collector=metadata_collector) | |
| if __name__ == '__main__': | |
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| use std::{fs::File, time::Instant, io::BufReader}; | |
| use parquet2::read::read_metadata; | |
| fn main() { | |
| let mut file = BufReader::new(File::open("_metadata").unwrap()); | |
| let now = Instant::now(); | |
| let _ = read_metadata(&mut file).unwrap(); | |
| println!("Time to parse metadata: {}", now.elapsed().as_secs_f32()); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment