Last active
May 19, 2022 20:26
-
-
Save kylebarron/73d4a126499ef113add6ea1dca04e79f to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This file is automatically @generated by Cargo. | |
# It is not intended for manual editing. | |
version = 3 | |
[[package]] | |
name = "adler" | |
version = "1.0.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" | |
[[package]] | |
name = "alloc-no-stdlib" | |
version = "2.0.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "35ef4730490ad1c4eae5c4325b2a95f521d023e5c885853ff7aca0a6a1631db3" | |
[[package]] | |
name = "alloc-stdlib" | |
version = "0.2.1" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "697ed7edc0f1711de49ce108c541623a0af97c6c60b2f6e2b65229847ac843c2" | |
dependencies = [ | |
"alloc-no-stdlib", | |
] | |
[[package]] | |
name = "async-stream" | |
version = "0.3.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "dad5c83079eae9969be7fadefe640a1c566901f05ff91ab221de4b6f68d9507e" | |
dependencies = [ | |
"async-stream-impl", | |
"futures-core", | |
] | |
[[package]] | |
name = "async-stream-impl" | |
version = "0.3.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "10f203db73a71dfa2fb6dd22763990fa26f3d2625a6da2da900d23b87d26be27" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "async-trait" | |
version = "0.1.53" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "ed6aa3524a2dfcf9fe180c51eae2b58738348d819517ceadf95789c51fff7600" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "bitpacking" | |
version = "0.8.4" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a8c7d2ac73c167c06af4a5f37e6e59d84148d57ccbe4480b76f0273eefea82d7" | |
dependencies = [ | |
"crunchy", | |
] | |
[[package]] | |
name = "brotli" | |
version = "3.3.4" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" | |
dependencies = [ | |
"alloc-no-stdlib", | |
"alloc-stdlib", | |
"brotli-decompressor", | |
] | |
[[package]] | |
name = "brotli-decompressor" | |
version = "2.3.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "59ad2d4653bf5ca36ae797b1f4bb4dbddb60ce49ca4aed8a2ce4829f60425b80" | |
dependencies = [ | |
"alloc-no-stdlib", | |
"alloc-stdlib", | |
] | |
[[package]] | |
name = "cc" | |
version = "1.0.73" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" | |
dependencies = [ | |
"jobserver", | |
] | |
[[package]] | |
name = "cfg-if" | |
version = "1.0.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" | |
[[package]] | |
name = "crc32fast" | |
version = "1.3.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" | |
dependencies = [ | |
"cfg-if", | |
] | |
[[package]] | |
name = "crunchy" | |
version = "0.2.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" | |
[[package]] | |
name = "fallible-streaming-iterator" | |
version = "0.1.9" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" | |
[[package]] | |
name = "flate2" | |
version = "1.0.23" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "b39522e96686d38f4bc984b9198e3a0613264abaebaff2c5c918bfa6b6da09af" | |
dependencies = [ | |
"cfg-if", | |
"crc32fast", | |
"libc", | |
"miniz_oxide", | |
] | |
[[package]] | |
name = "futures" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "f73fe65f54d1e12b726f517d3e2135ca3125a437b6d998caf1962961f7172d9e" | |
dependencies = [ | |
"futures-channel", | |
"futures-core", | |
"futures-executor", | |
"futures-io", | |
"futures-sink", | |
"futures-task", | |
"futures-util", | |
] | |
[[package]] | |
name = "futures-channel" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" | |
dependencies = [ | |
"futures-core", | |
"futures-sink", | |
] | |
[[package]] | |
name = "futures-core" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" | |
[[package]] | |
name = "futures-executor" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" | |
dependencies = [ | |
"futures-core", | |
"futures-task", | |
"futures-util", | |
] | |
[[package]] | |
name = "futures-io" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" | |
[[package]] | |
name = "futures-macro" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"syn", | |
] | |
[[package]] | |
name = "futures-sink" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" | |
[[package]] | |
name = "futures-task" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" | |
[[package]] | |
name = "futures-util" | |
version = "0.3.21" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" | |
dependencies = [ | |
"futures-channel", | |
"futures-core", | |
"futures-io", | |
"futures-macro", | |
"futures-sink", | |
"futures-task", | |
"memchr", | |
"pin-project-lite", | |
"pin-utils", | |
"slab", | |
] | |
[[package]] | |
name = "integer-encoding" | |
version = "3.0.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "0e85a1509a128c855368e135cffcde7eac17d8e1083f41e2b98c58bc1a5074be" | |
dependencies = [ | |
"async-trait", | |
"futures-util", | |
] | |
[[package]] | |
name = "jobserver" | |
version = "0.1.24" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa" | |
dependencies = [ | |
"libc", | |
] | |
[[package]] | |
name = "libc" | |
version = "0.2.126" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" | |
[[package]] | |
name = "lz4" | |
version = "1.23.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "4edcb94251b1c375c459e5abe9fb0168c1c826c3370172684844f8f3f8d1a885" | |
dependencies = [ | |
"libc", | |
"lz4-sys", | |
] | |
[[package]] | |
name = "lz4-sys" | |
version = "1.9.3" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d7be8908e2ed6f31c02db8a9fa962f03e36c53fbfde437363eae3306b85d7e17" | |
dependencies = [ | |
"cc", | |
"libc", | |
] | |
[[package]] | |
name = "memchr" | |
version = "2.5.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" | |
[[package]] | |
name = "miniz_oxide" | |
version = "0.5.1" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d2b29bd4bc3f33391105ebee3589c19197c4271e3e5a9ec9bfe8127eeff8f082" | |
dependencies = [ | |
"adler", | |
] | |
[[package]] | |
name = "parquet-format-async-temp" | |
version = "0.3.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "488c8b5f43521d019fade4bcc0ce88cce5da5fd26eb1d38b933807041f5930bf" | |
dependencies = [ | |
"async-trait", | |
"futures", | |
"integer-encoding", | |
] | |
[[package]] | |
name = "parquet-metadata-demo" | |
version = "0.1.0" | |
dependencies = [ | |
"parquet2", | |
] | |
[[package]] | |
name = "parquet2" | |
version = "0.12.0" | |
source = "git+https://github.com/jorgecarleitao/parquet2?branch=improve_meta_read#9427962ca7af01f99ccf5b960dc4bb3484ec9c3d" | |
dependencies = [ | |
"async-stream", | |
"bitpacking", | |
"brotli", | |
"flate2", | |
"futures", | |
"lz4", | |
"parquet-format-async-temp", | |
"snap", | |
"streaming-decompression", | |
"xxhash-rust", | |
"zstd", | |
] | |
[[package]] | |
name = "pin-project-lite" | |
version = "0.2.9" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" | |
[[package]] | |
name = "pin-utils" | |
version = "0.1.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" | |
[[package]] | |
name = "proc-macro2" | |
version = "1.0.39" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "c54b25569025b7fc9651de43004ae593a75ad88543b17178aa5e1b9c4f15f56f" | |
dependencies = [ | |
"unicode-ident", | |
] | |
[[package]] | |
name = "quote" | |
version = "1.0.18" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "a1feb54ed693b93a84e14094943b84b7c4eae204c512b7ccb95ab0c66d278ad1" | |
dependencies = [ | |
"proc-macro2", | |
] | |
[[package]] | |
name = "slab" | |
version = "0.4.6" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" | |
[[package]] | |
name = "snap" | |
version = "1.0.5" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "45456094d1983e2ee2a18fdfebce3189fa451699d0502cb8e3b49dba5ba41451" | |
[[package]] | |
name = "streaming-decompression" | |
version = "0.1.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9bc687acd5dc742c4a7094f2927a8614a68e4743ef682e7a2f9f0f711656cc92" | |
dependencies = [ | |
"fallible-streaming-iterator", | |
] | |
[[package]] | |
name = "syn" | |
version = "1.0.95" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "fbaf6116ab8924f39d52792136fb74fd60a80194cf1b1c6ffa6453eef1c3f942" | |
dependencies = [ | |
"proc-macro2", | |
"quote", | |
"unicode-ident", | |
] | |
[[package]] | |
name = "unicode-ident" | |
version = "1.0.0" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "d22af068fba1eb5edcb4aea19d382b2a3deb4c8f9d475c589b6ada9e0fd493ee" | |
[[package]] | |
name = "xxhash-rust" | |
version = "0.8.5" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "074914ea4eec286eb8d1fd745768504f420a1f7b7919185682a4a267bed7d2e7" | |
[[package]] | |
name = "zstd" | |
version = "0.11.2+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" | |
dependencies = [ | |
"zstd-safe", | |
] | |
[[package]] | |
name = "zstd-safe" | |
version = "5.0.2+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" | |
dependencies = [ | |
"libc", | |
"zstd-sys", | |
] | |
[[package]] | |
name = "zstd-sys" | |
version = "2.0.1+zstd.1.5.2" | |
source = "registry+https://github.com/rust-lang/crates.io-index" | |
checksum = "9fd07cbbc53846d9145dbffdf6dd09a7a0aa52be46741825f5c97bdd4f73f12b" | |
dependencies = [ | |
"cc", | |
"libc", | |
] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[package] | |
name = "parquet-metadata-demo" | |
version = "0.1.0" | |
edition = "2021" | |
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html | |
[dependencies] | |
parquet2 = {git = "https://github.com/jorgecarleitao/parquet2", branch = "improve_meta_read"} | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from io import BytesIO | |
import pyarrow as pa | |
import pyarrow.parquet as pq | |
def create_example_file_meta_data(): | |
data = { | |
"str": pa.array(["a", "b", "c", "d"], type=pa.string()), | |
"uint8": pa.array([1, 2, 3, 4], type=pa.uint8()), | |
"int32": pa.array([0, -2147483638, 2147483637, 1], type=pa.int32()), | |
"bool": pa.array([True, True, False, False], type=pa.bool_()), | |
} | |
table = pa.table(data) | |
metadata_collector = [] | |
pq.write_table(table, BytesIO(), metadata_collector=metadata_collector) | |
return table.schema, metadata_collector[0] | |
def main(): | |
schema, meta = create_example_file_meta_data() | |
print('created collector') | |
metadata_collector = [meta] * 30_000 | |
print('writing meta') | |
pq.write_metadata(schema, '_metadata', metadata_collector=metadata_collector) | |
if __name__ == '__main__': | |
main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
use std::{fs::File, time::Instant, io::BufReader}; | |
use parquet2::read::read_metadata; | |
fn main() { | |
let mut file = BufReader::new(File::open("_metadata").unwrap()); | |
let now = Instant::now(); | |
let _ = read_metadata(&mut file).unwrap(); | |
println!("Time to parse metadata: {}", now.elapsed().as_secs_f32()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment