diff --git a/.gitignore b/.gitignore index 40171ac..fdf4c4b 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ *.log .DS_Store *.fdb_latexmk +.luarc.json \ No newline at end of file diff --git a/introductory/experiments/late_materialization/.gitignore b/introductory/experiments/late_materialization/.gitignore new file mode 100644 index 0000000..68f93bf --- /dev/null +++ b/introductory/experiments/late_materialization/.gitignore @@ -0,0 +1,6 @@ +venv +__pycache__ +target +data +lance_trace.json +.benchmarks \ No newline at end of file diff --git a/introductory/experiments/late_materialization/Cargo.lock b/introductory/experiments/late_materialization/Cargo.lock new file mode 100644 index 0000000..242a5f8 --- /dev/null +++ b/introductory/experiments/late_materialization/Cargo.lock @@ -0,0 +1,2465 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c3a9648d43b9cd48db467b3f87fdd6e146bcc88ab0180006cef2179fe11d01" +dependencies = [ + "cfg-if", + "const-random", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aho-corasick" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b2969dcb958b36655471fc61f7e416fa76033bdd4bfed0678d8fee1e2d07a1f0" +dependencies = [ + "memchr", +] + +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + +[[package]] +name = "arrayref" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6b4930d2cb77ce62f89ee5d5289b4ac049559b1c45539271f5ed4fdc7db34545" + +[[package]] +name = "arrayvec" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" + +[[package]] +name = "arrow" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bc25126d18a012146a888a0298f2c22e1150327bd2765fc76d710a556b2d614" +dependencies = [ + "ahash", + "arrow-arith", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-csv", + "arrow-data", + "arrow-ipc", + "arrow-json", + "arrow-ord", + "arrow-row", + "arrow-schema", + "arrow-select", + "arrow-string", +] + +[[package]] +name = "arrow-arith" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34ccd45e217ffa6e53bbb0080990e77113bdd4e91ddb84e97b77649810bcf1a7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "num", +] + +[[package]] +name = "arrow-array" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bda9acea48b25123c08340f3a8ac361aa0f74469bb36f5ee9acf923fce23e9d" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "chrono-tz", + "half", + "hashbrown", + "num", +] + +[[package]] +name = "arrow-buffer" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01a0fc21915b00fc6c2667b069c1b64bdd920982f426079bc4a7cab86822886c" +dependencies = [ + "bytes", + "half", + "num", +] + +[[package]] +name = "arrow-cast" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5dc0368ed618d509636c1e3cc20db1281148190a78f43519487b2daf07b63b4a" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "base64", + "chrono", + "comfy-table", + "half", + "lexical-core", + "num", +] + +[[package]] +name = "arrow-csv" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e09aa6246a1d6459b3f14baeaa49606cfdbca34435c46320e14054d244987ca" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "csv", + "csv-core", + "lazy_static", + "lexical-core", + "regex", +] + +[[package]] +name = "arrow-data" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "907fafe280a3874474678c1858b9ca4cb7fd83fb8034ff5b6d6376205a08c634" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num", +] + +[[package]] +name = "arrow-ipc" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79a43d6808411886b8c7d4f6f7dd477029c1e77ffffffb7923555cc6579639cd" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "flatbuffers", +] + +[[package]] +name = "arrow-json" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82565c91fd627922ebfe2810ee4e8346841b6f9361b87505a9acea38b614fee" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "indexmap", + "lexical-core", + "num", + "serde", + "serde_json", +] + +[[package]] +name = "arrow-ord" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b23b0e53c0db57c6749997fd343d4c0354c994be7eca67152dd2bdb9a3e1bb4" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "half", + "num", +] + +[[package]] +name = "arrow-row" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "361249898d2d6d4a6eeb7484be6ac74977e48da12a4dd81a708d620cc558117a" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "half", + "hashbrown", +] + +[[package]] +name = "arrow-schema" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09e28a5e781bf1b0f981333684ad13f5901f4cd2f20589eab7cf1797da8fc167" + +[[package]] +name = "arrow-select" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f6208466590960efc1d2a7172bc4ff18a67d6e25c529381d7f96ddaf0dc4036" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num", +] + +[[package]] +name = "arrow-string" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4a48149c63c11c9ff571e50ab8f017d2a7cb71037a882b42f6354ed2da9acc7" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "num", + "regex", + "regex-syntax", +] + +[[package]] +name = "async-compression" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc2d0cfb2a7388d34f590e76686704c494ed7aaceed62ee1ba35cbf363abc2a5" +dependencies = [ + "bzip2", + "flate2", + "futures-core", + "futures-io", + "memchr", + "pin-project-lite", + "tokio", + "xz2", + "zstd", + "zstd-safe", +] + +[[package]] +name = "async-trait" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.21.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35636a1494ede3b646cc98f74f8e62c773a38a659ebc777a2cf26b9b74171df9" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bitflags" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" + +[[package]] +name = "blake2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +dependencies = [ + "digest", +] + +[[package]] +name = "blake3" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0231f06152bf547e9c2b5194f247cd97aacf6dcd8b15d8e5ec0663f64580da87" +dependencies = [ + "arrayref", + "arrayvec", + "cc", + "cfg-if", + "constant_time_eq", +] + +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + +[[package]] +name = "brotli" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "516074a47ef4bce09577a3b379392300159ce5b1ba2e501ff1c819950066100f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e2e4afe60d7dd600fdd3de8d0f08c2b7ec039712e3b6137ff98b7004e82de4f" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + +[[package]] +name = "bumpalo" +version = "3.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" + +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + +[[package]] +name = "bytes" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" + +[[package]] +name = "bzip2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" +dependencies = [ + "bzip2-sys", + "libc", +] + +[[package]] +name = "bzip2-sys" +version = "0.1.11+1.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "cc" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +dependencies = [ + "jobserver", + "libc", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f2c685bad3eb3d45a01354cedb7d5faa66194d1d58ba6e267a8de788f79db38" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets 0.48.5", +] + +[[package]] +name = "chrono-tz" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "91d7b79e99bfaa0d47da0687c43aa3b7381938a62ad3a6498599039321f660b7" +dependencies = [ + "chrono", + "chrono-tz-build", + "phf", +] + +[[package]] +name = "chrono-tz-build" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "433e39f13c9a060046954e0592a8d0a4bcb1040125cbf91cb8ee58964cfb350f" +dependencies = [ + "parse-zoneinfo", + "phf", + "phf_codegen", +] + +[[package]] +name = "comfy-table" +version = "7.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c64043d6c7b7a4c58e39e7efccfdea7b93d885a795d0c054a69dbbf4dd52686" +dependencies = [ + "strum", + "strum_macros", + "unicode-width", +] + +[[package]] +name = "const-random" +version = "0.1.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aaf16c9c2c612020bcfd042e170f6e32de9b9d75adb5277cdbbd2e2c8c8299a" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom", + "once_cell", + "tiny-keccak", +] + +[[package]] +name = "constant_time_eq" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7144d30dcf0fafbce74250a3963025d8d52177934239851c917d29f1df280c2" + +[[package]] +name = "core-foundation-sys" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" + +[[package]] +name = "cpufeatures" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce420fe07aecd3e67c5f910618fe65e94158f6dcc0adf44e00d69ce2bdfe0fd0" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crunchy" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" + +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "csv" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac574ff4d437a7b5ad237ef331c17ccca63c46479e5b5453eb8e10bb99a759fe" +dependencies = [ + "csv-core", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "csv-core" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70" +dependencies = [ + "memchr", +] + +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "datafusion" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-schema", + "async-compression", + "async-trait", + "bytes", + "bzip2", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-optimizer", + "datafusion-physical-expr", + "datafusion-physical-plan", + "datafusion-sql", + "flate2", + "futures", + "glob", + "half", + "hashbrown", + "indexmap", + "itertools 0.12.0", + "log", + "num_cpus", + "object_store", + "parking_lot", + "parquet", + "pin-project-lite", + "rand", + "sqlparser", + "tempfile", + "tokio", + "tokio-util", + "url", + "uuid", + "xz2", + "zstd", +] + +[[package]] +name = "datafusion-common" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "chrono", + "half", + "libc", + "num_cpus", + "object_store", + "parquet", + "sqlparser", +] + +[[package]] +name = "datafusion-execution" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d" +dependencies = [ + "arrow", + "chrono", + "dashmap", + "datafusion-common", + "datafusion-expr", + "futures", + "hashbrown", + "log", + "object_store", + "parking_lot", + "rand", + "tempfile", + "url", +] + +[[package]] +name = "datafusion-expr" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "datafusion-common", + "paste", + "sqlparser", + "strum", + "strum_macros", +] + +[[package]] +name = "datafusion-optimizer" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618" +dependencies = [ + "arrow", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-expr", + "datafusion-physical-expr", + "hashbrown", + "itertools 0.12.0", + "log", + "regex-syntax", +] + +[[package]] +name = "datafusion-physical-expr" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-ord", + "arrow-schema", + "base64", + "blake2", + "blake3", + "chrono", + "datafusion-common", + "datafusion-expr", + "half", + "hashbrown", + "hex", + "indexmap", + "itertools 0.12.0", + "log", + "md-5", + "paste", + "petgraph", + "rand", + "regex", + "sha2", + "unicode-segmentation", + "uuid", +] + +[[package]] +name = "datafusion-physical-plan" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631" +dependencies = [ + "ahash", + "arrow", + "arrow-array", + "arrow-buffer", + "arrow-schema", + "async-trait", + "chrono", + "datafusion-common", + "datafusion-execution", + "datafusion-expr", + "datafusion-physical-expr", + "futures", + "half", + "hashbrown", + "indexmap", + "itertools 0.12.0", + "log", + "once_cell", + "parking_lot", + "pin-project-lite", + "rand", + "tokio", + "uuid", +] + +[[package]] +name = "datafusion-sql" +version = "34.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5" +dependencies = [ + "arrow", + "arrow-schema", + "datafusion-common", + "datafusion-expr", + "log", + "sqlparser", +] + +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + +[[package]] +name = "doc-comment" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" + +[[package]] +name = "either" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + +[[package]] +name = "errno" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a258e46cdc063eb8519c00b9fc845fc47bcfca4130e2f08e88665ceda8474245" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + +[[package]] +name = "fastrand" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" + +[[package]] +name = "fixedbitset" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" + +[[package]] +name = "flatbuffers" +version = "23.5.26" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dac53e22462d78c16d64a1cd22371b54cc3fe94aa15e7886a2fa6e5d1ab8640" +dependencies = [ + "bitflags 1.3.2", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "form_urlencoded" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e13624c2627564efccf4934284bdd98cbaa14e79b0b5a141218e507b3a823456" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "futures" +version = "0.3.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" + +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" + +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "futures-sink" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fb8e00e87438d937621c1c6269e53f536c14d3fbd6a042bb24879e57d474fb5" + +[[package]] +name = "futures-task" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" + +[[package]] +name = "futures-util" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe9006bed769170c11f845cf00c7c1e9092aeb3f268e007c3e760ac68008070f" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "glob" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" + +[[package]] +name = "half" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", +] + +[[package]] +name = "hashbrown" +version = "0.14.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] + +[[package]] +name = "heck" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" + +[[package]] +name = "hermit-abi" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d77f7ec81a6d05a3abb01ab6eb7590f6083d08449fe5a1c8b1e620283546ccb7" + +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "iana-time-zone" +version = "0.1.59" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + +[[package]] +name = "idna" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d530e1a18b1cb4c484e6e34556a0d948706958449fca0cab753d649f2bce3d1f" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "indoc" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e186cfbae8084e513daff4240b4797e342f988cecda4fb6c939150f96315fd8" + +[[package]] +name = "integer-encoding" +version = "3.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" + +[[package]] +name = "itertools" +version = "0.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" +dependencies = [ + "either", +] + +[[package]] +name = "itertools" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +dependencies = [ + "either", +] + +[[package]] +name = "itoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" + +[[package]] +name = "jobserver" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" +dependencies = [ + "libc", +] + +[[package]] +name = "js-sys" +version = "0.3.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cee9c64da59eae3b50095c18d3e74f8b73c0b86d2792824ff01bbce68ba229ca" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "late_materialization" +version = "0.1.0" +dependencies = [ + "async-trait", + "bytes", + "datafusion", + "futures", + "lazy_static", + "object_store", + "pyo3", + "tokio", + "url", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "lexical-core" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cde5de06e8d4c2faabc400238f9ae1c74d5412d03a7bd067645ccbc47070e46" +dependencies = [ + "lexical-parse-float", + "lexical-parse-integer", + "lexical-util", + "lexical-write-float", + "lexical-write-integer", +] + +[[package]] +name = "lexical-parse-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683b3a5ebd0130b8fb52ba0bdc718cc56815b6a097e28ae5a6997d0ad17dc05f" +dependencies = [ + "lexical-parse-integer", + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-parse-integer" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d0994485ed0c312f6d965766754ea177d07f9c00c9b82a5ee62ed5b47945ee9" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "lexical-util" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5255b9ff16ff898710eb9eb63cb39248ea8a5bb036bea8085b1a767ff6c4e3fc" +dependencies = [ + "static_assertions", +] + +[[package]] +name = "lexical-write-float" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accabaa1c4581f05a3923d1b4cfd124c329352288b7b9da09e766b0668116862" +dependencies = [ + "lexical-util", + "lexical-write-integer", + "static_assertions", +] + +[[package]] +name = "lexical-write-integer" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1b6f3d1f4422866b68192d62f77bc5c700bee84f3069f2469d7bc8c77852446" +dependencies = [ + "lexical-util", + "static_assertions", +] + +[[package]] +name = "libc" +version = "0.2.151" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" + +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + +[[package]] +name = "linux-raw-sys" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4cd1a83af159aa67994778be9070f0ae1bd732942279cabb14f86f986a21456" + +[[package]] +name = "lock_api" +version = "0.4.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" + +[[package]] +name = "lz4_flex" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ea9b256699eda7b0387ffbc776dd625e28bde3918446381781245b7a50349d8" +dependencies = [ + "twox-hash", +] + +[[package]] +name = "lzma-sys" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" +dependencies = [ + "cc", + "libc", + "pkg-config", +] + +[[package]] +name = "md-5" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" +dependencies = [ + "cfg-if", + "digest", +] + +[[package]] +name = "memchr" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "523dc4f511e55ab87b694dc30d0f820d60906ef06413f93d4d7a1385599cc149" + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +dependencies = [ + "adler", +] + +[[package]] +name = "num" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +dependencies = [ + "num-bigint", + "num-complex", + "num-integer", + "num-iter", + "num-rational", + "num-traits", +] + +[[package]] +name = "num-bigint" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "608e7659b5c3d7cba262d894801b9ec9d00de989e8a82bd4bef91d08da45cdc0" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-iter" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" +dependencies = [ + "autocfg", + "num-bigint", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +dependencies = [ + "autocfg", + "libm", +] + +[[package]] +name = "num_cpus" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "object_store" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2524735495ea1268be33d200e1ee97455096a0846295a21548cd2f3541de7050" +dependencies = [ + "async-trait", + "bytes", + "chrono", + "futures", + "humantime", + "itertools 0.11.0", + "parking_lot", + "percent-encoding", + "snafu", + "tokio", + "tracing", + "url", + "walkdir", +] + +[[package]] +name = "once_cell" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" + +[[package]] +name = "ordered-float" +version = "2.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets 0.48.5", +] + +[[package]] +name = "parquet" +version = "49.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af88740a842787da39b3d69ce5fbf6fce97d20211d3b299fee0a0da6430c74d4" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-cast", + "arrow-data", + "arrow-ipc", + "arrow-schema", + "arrow-select", + "base64", + "brotli", + "bytes", + "chrono", + "flate2", + "futures", + "hashbrown", + "lz4_flex", + "num", + "num-bigint", + "object_store", + "paste", + "seq-macro", + "snap", + "thrift", + "tokio", + "twox-hash", + "zstd", +] + +[[package]] +name = "parse-zoneinfo" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c705f256449c60da65e11ff6626e0c16a0a0b96aaa348de61376b249bc340f41" +dependencies = [ + "regex", +] + +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + +[[package]] +name = "percent-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" + +[[package]] +name = "petgraph" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" +dependencies = [ + "fixedbitset", + "indexmap", +] + +[[package]] +name = "phf" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc" +dependencies = [ + "phf_shared", +] + +[[package]] +name = "phf_codegen" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_shared" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69d3587f8a9e599cc7ec2c00e331f71c4e69a5f9a4b8a6efd5b07466b9736f9a" + +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + +[[package]] +name = "proc-macro2" +version = "1.0.74" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e82ad98ce1991c9c70c3464ba4187337b9c45fcbbb060d46dca15f0c075e14e2" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5503d0b3aee2c7a8dbb389cd87cd9649f675d4c7f60ca33699a3e3859d81a891" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a79e8d80486a00d11c0dcb27cd2aa17c022cc95c677b461f01797226ba8f41" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4b0dc7eaa578604fab11c8c7ff8934c71249c61d4def8e272c76ed879f03d4" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.20.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "816a4f709e29ddab2e3cdfe94600d554c5556cad0ddfeea95c47b580c3247fa4" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "quote" +version = "1.0.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + +[[package]] +name = "redox_syscall" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" +dependencies = [ + "bitflags 1.3.2", +] + +[[package]] +name = "regex" +version = "1.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" + +[[package]] +name = "rustc_version" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa0f585226d2e68097d4f95d113b15b83a82e819ab25717ec0590d9584ef366" +dependencies = [ + "semver", +] + +[[package]] +name = "rustix" +version = "0.38.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72e572a5e8ca657d7366229cdde4bd14c4eb5499a9573d4d366fe1b599daa316" +dependencies = [ + "bitflags 2.4.1", + "errno", + "libc", + "linux-raw-sys", + "windows-sys 0.52.0", +] + +[[package]] +name = "rustversion" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ffc183a10b4478d04cbbbfc96d0873219d962dd5accaff2ffbd4ceb7df837f4" + +[[package]] +name = "ryu" +version = "1.0.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "semver" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97ed7a9823b74f99c7742f5336af7be5ecd3eeafcb1507d1fa93347b1d589b0" + +[[package]] +name = "seq-macro" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" + +[[package]] +name = "serde" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b114498256798c94a0689e1a15fec6005dee8ac1f41de56404b67afc2a4b773" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.194" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3385e45322e8f9931410f01b3031ec534c3947d0e94c18049af4d9f9907d4e0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "serde_json" +version = "1.0.110" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fbd975230bada99c8bb618e0c365c2eefa219158d5c6c29610fd09ff1833257" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "slab" +version = "0.4.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" +dependencies = [ + "autocfg", +] + +[[package]] +name = "smallvec" +version = "1.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dccd0940a2dcdf68d092b8cbab7dc0ad8fa938bf95787e1b916b0e3d0e8e970" + +[[package]] +name = "snafu" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +dependencies = [ + "doc-comment", + "snafu-derive", +] + +[[package]] +name = "snafu-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "snap" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" + +[[package]] +name = "sqlparser" +version = "0.40.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c80afe31cdb649e56c0d9bb5503be9166600d68a852c38dd445636d126858e5" +dependencies = [ + "log", + "sqlparser_derive", +] + +[[package]] +name = "sqlparser_derive" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + +[[package]] +name = "strum" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +dependencies = [ + "strum_macros", +] + +[[package]] +name = "strum_macros" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23dc1fa9ac9c169a78ba62f0b841814b7abae11bdd047b9c58f893439e309ea0" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn 2.0.46", +] + +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69758bda2e78f098e4ccb393021a0963bb3442eac05f135c30f61b7370bbafae" + +[[package]] +name = "tempfile" +version = "3.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01ce4141aa927a6d1bd34a041795abd0db1cccba5d5f24b009f694bdf3a1f3fa" +dependencies = [ + "cfg-if", + "fastrand", + "redox_syscall", + "rustix", + "windows-sys 0.52.0", +] + +[[package]] +name = "thrift" +version = "0.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" +dependencies = [ + "byteorder", + "integer-encoding", + "ordered-float", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokio" +version = "1.28.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3c786bf8134e5a3a166db9b29ab8f48134739014a3eca7bc6bfa95d673b136f" +dependencies = [ + "autocfg", + "bytes", + "num_cpus", + "parking_lot", + "pin-project-lite", + "tokio-macros", + "windows-sys 0.48.0", +] + +[[package]] +name = "tokio-macros" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "630bdcf245f78637c13ec01ffae6187cca34625e8c63150d424b59e55af2675e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "tokio-util" +version = "0.7.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419f34732d9eb6ee4c3578b7989078579b7f039cbbb9ca2c4da015749371e15" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "tracing-core" +version = "0.1.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", +] + +[[package]] +name = "twox-hash" +version = "1.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675" +dependencies = [ + "cfg-if", + "static_assertions", +] + +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "unicode-bidi" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f2528f27a9eb2b21e69c95319b30bd0efd85d09c379741b0f78ea1d86be2416" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unicode-normalization" +version = "0.1.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c5713f0fc4b5db668a2ac63cdb7bb4469d8c9fed047b1d0292cc7b0ce2ba921" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-segmentation" +version = "1.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1dd624098567895118886609431a7c3b8f516e41d30e0643f03d94592a147e36" + +[[package]] +name = "unicode-width" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85" + +[[package]] +name = "unindent" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" + +[[package]] +name = "url" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e6302e3bb753d46e83516cae55ae196fc0c309407cf11ab35cc51a4c2a4633" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", +] + +[[package]] +name = "uuid" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +dependencies = [ + "getrandom", +] + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "walkdir" +version = "2.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +dependencies = [ + "same-file", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ed0d4f68a3015cc185aff4db9506a015f4b96f95303897bfa23f846db54064e" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b56f625e64f3a1084ded111c4d5f477df9f8c92df113852fa5a374dbda78826" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn 2.0.46", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0162dbf37223cd2afce98f3d0785506dcb8d266223983e4b5b525859e6e182b2" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0eb82fcb7930ae6219a7ecfd55b217f5f0893484b7a13022ebb2b2bf20b5283" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ab9b36309365056cd639da3134bf87fa8f3d86008abf99e612384a6eecd459f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-sys" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9" +dependencies = [ + "windows-targets 0.48.5", +] + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", +] + +[[package]] +name = "windows-targets" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" +dependencies = [ + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04" + +[[package]] +name = "xz2" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" +dependencies = [ + "lzma-sys", +] + +[[package]] +name = "zerocopy" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.46", +] + +[[package]] +name = "zstd" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bffb3309596d527cfcba7dfc6ed6052f1d39dfbd7c867aa2e865e4a449c10110" +dependencies = [ + "zstd-safe", +] + +[[package]] +name = "zstd-safe" +version = "7.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43747c7422e2924c11144d5229878b98180ef8b06cca4ab5af37afc8a8d8ea3e" +dependencies = [ + "zstd-sys", +] + +[[package]] +name = "zstd-sys" +version = "2.0.9+zstd.1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e16efa8a874a0481a574084d34cc26fdb3b99627480f785888deb6386506656" +dependencies = [ + "cc", + "pkg-config", +] diff --git a/introductory/experiments/late_materialization/Cargo.toml b/introductory/experiments/late_materialization/Cargo.toml new file mode 100644 index 0000000..47e47f3 --- /dev/null +++ b/introductory/experiments/late_materialization/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "late_materialization" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[lib] +name = "late_materialization" +crate-type = ["cdylib"] + +# These are pinned for the sake of reproducibility. +[dependencies] +async-trait = "=0.1.77" +bytes = "=1.5.0" +datafusion = "=34.0.0" +object_store = "=0.8.0" +tokio = { version = "=1.28.0", features = ["rt-multi-thread"] } +pyo3 = { version = "=0.20.1", features = ["extension-module"] } +lazy_static = "=1.4.0" +futures = "=0.3.15" +url = "=2.5.0" + +[profile.release] +lto = "thin" diff --git a/introductory/experiments/late_materialization/Makefile b/introductory/experiments/late_materialization/Makefile new file mode 100644 index 0000000..2b39952 --- /dev/null +++ b/introductory/experiments/late_materialization/Makefile @@ -0,0 +1,23 @@ +# These commands can be run from top to bottom to generate the report. + +# Install Python dependencies to run the experiments. +setup: + pip install -r requirements.txt + maturin develop --release +.PHONY: setup + +# Generate the datasets. +datagen: + python datagen.py --verbose +.PHONY: datagen + +# Run the benchmarks. +bench: + pytest bench.py -v --benchmark-save=data + python postprocess.py +.PHONY: bench + +# Build up test data and ancilary files. +clean: + rm -rf data +.PHONY: clean \ No newline at end of file diff --git a/introductory/experiments/late_materialization/bench.py b/introductory/experiments/late_materialization/bench.py new file mode 100644 index 0000000..8e33042 --- /dev/null +++ b/introductory/experiments/late_materialization/bench.py @@ -0,0 +1,260 @@ +import csv +import json +import multiprocessing +import os +from typing import List, NamedTuple + +import pytest +import fsspec +import lance +import pyarrow.dataset as pa_ds +import pyarrow.fs as pa_fs +from late_materialization import scan_datafusion +from lance.lance import trace_to_chrome + +from metered_fs import MeteredFSHandler + +multiprocessing.set_start_method("spawn", force=True) + + +def lance_scan(ds, columns, predicate, late_materialization): + reader = ds.scanner( + columns=columns, + filter=predicate, + use_late_materialization=late_materialization, + use_stats=False, + batch_size=1024 * 10, + ).to_batches() + num_rows = 0 + for batch in reader: + num_rows += batch.num_rows + return num_rows + + +def pyarrow_scan(ds, columns, predicate): + reader = ds.scanner( + columns=columns, + filter=predicate, + ).to_batches() + num_rows = 0 + for batch in reader: + num_rows += batch.num_rows + return num_rows + + +# Runtime benchmarks +@pytest.mark.parametrize("project", ["int", "vec", "img"]) +@pytest.mark.parametrize("min_value", [10 * 1024, 25 * 1024, 50 * 1024, 75 * 1024, 90 * 1024]) +@pytest.mark.parametrize("library", ["Lance", "PyArrow", "DataFusion"]) +@pytest.mark.parametrize("late_materialization", [True, False]) +def test_runtime(benchmark, project, min_value, library, late_materialization): + columns = [project] + if library == "Lance": + ds = lance.dataset("data/lance") + num_rows = benchmark( + lance_scan, + ds, + columns, + predicate=f"id >= {min_value}", + late_materialization=late_materialization, + ) + elif library == "PyArrow": + if late_materialization: + pytest.skip("PyArrow does not support late materialization") + ds = pa_ds.dataset("data/parquet", format="parquet") + num_rows = benchmark( + pyarrow_scan, + ds, + columns, + predicate=pa_ds.field("id") >= min_value, + ) + elif library == "DataFusion": + if columns == ["vec"]: + # See: https://github.com/apache/arrow-datafusion/issues/8742 + pytest.skip("DataFusion does not support vector columns in projection") + num_rows, _ = benchmark( + scan_datafusion, + "data/parquet", + columns, + min_value, + late_materialization=late_materialization, + measure_io=False, + explain=False, + ) + + assert num_rows == 1024 * 100 - min_value + + +class IOResult(NamedTuple): + library: str + columns: str + predicate: str + late_materialization: bool + selectivity: float + num_ios: int + total_bytes: int + + +@pytest.fixture(scope="session") +def io_results(): + data = [] + yield data + with open("io_results.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(IOResult._fields) + for res in data: + assert isinstance(res, IOResult) + writer.writerow(res) + + +def measure_lance_io( + path: str, + columns: List[str], + predicate: str, + late_materialization: bool, +) -> IOResult: + dataset = lance.dataset(path) + + # Enable tracing so we can record IOs + guard = trace_to_chrome("lance_trace.json", "debug") + num_rows = lance_scan( + dataset, + columns=columns, + predicate=predicate, + late_materialization=late_materialization, + ) + guard.finish_tracing() + + with open("lance_trace.json") as f: + trace = json.load(f) + num_ios = 0 + total_bytes = 0 + for event in trace: + # ph is the event type. b is for begin, e is for end + if event["name"] == "get_range" and event["ph"] == "b": + num_ios += 1 + byte_range = event["args"]["range"] + start, end = byte_range.split("..") + total_bytes += int(end) - int(start) + + return IOResult( + library="Lance", + columns=",".join(columns), + predicate=predicate, + late_materialization=late_materialization, + selectivity=num_rows / dataset.count_rows(), + num_ios=num_ios, + total_bytes=total_bytes, + ) + + +def measure_parquet_io( + path: str, + columns: List[str], + predicate: str, +) -> IOResult: + handler = MeteredFSHandler(pa_fs.FSSpecHandler(fsspec.filesystem("file"))) + path = os.path.abspath(path) + dataset = pa_ds.dataset( + path, format="parquet", filesystem=pa_fs.PyFileSystem(handler) + ) + + num_rows = pyarrow_scan( + dataset, + columns=columns, + predicate=predicate, + ) + + return IOResult( + library="PyArrow", + columns=",".join(columns), + predicate=predicate, + late_materialization=False, + selectivity=num_rows / dataset.count_rows(), + num_ios=handler.num_ios, + total_bytes=handler.total_bytes, + ) + + +def measure_datafusion_io( + path: str, + columns: List[str], + min_value: int, + late_materialization: bool, +) -> IOResult: + num_rows, (num_ios, io_bytes) = scan_datafusion( + path, + columns, + min_value, + late_materialization=late_materialization, + measure_io=True, + explain=False, + ) + + dataset = pa_ds.dataset("data/parquet", format="parquet") + + return IOResult( + library="DataFusion", + columns=",".join(columns), + predicate=f"id >= {min_value}", + late_materialization=late_materialization, + selectivity=num_rows / dataset.count_rows(), + num_ios=num_ios, + total_bytes=io_bytes, + ) + + +@pytest.mark.parametrize("project", ["int", "vec", "img"]) +@pytest.mark.parametrize("min_value", list(range(0, 100 * 1024, 2 * 1024))) +@pytest.mark.parametrize("library", ["Lance", "PyArrow", "DataFusion"]) +@pytest.mark.parametrize("late_materialization", [True, False]) +def test_io(io_results, project, min_value, library, late_materialization): + columns = [project] + if library == "Lance": + # Useful for debugging: + # import logging + # logger = multiprocessing.log_to_stderr() + # logger.setLevel(logging.DEBUG) + res = run_in_process( + measure_lance_io, + "data/lance", + columns, + predicate=f"id >= {min_value}", + late_materialization=late_materialization, + ) + elif library == "PyArrow": + if late_materialization: + pytest.skip("PyArrow does not support late materialization") + ds = pa_ds.dataset("data/parquet", format="parquet") + res = measure_parquet_io( + "data/parquet", + columns, + predicate=pa_ds.field("id") >= min_value, + ) + elif library == "DataFusion": + if columns == ["vec"]: + # See: https://github.com/apache/arrow-datafusion/issues/8742 + pytest.skip("DataFusion does not support vector columns in projection") + res = measure_datafusion_io( + "data/parquet", + columns, + min_value, + late_materialization=late_materialization, + ) + + io_results.append(res) + + +def run_in_process(func, *args, **kwargs): + q = multiprocessing.Queue() + p = multiprocessing.Process( + target=run_process_inner, args=(q, func, *args), kwargs=kwargs + ) + p.start() + p.join() + return q.get() + + +def run_process_inner(queue, func, *args, **kwargs): + result = func(*args, **kwargs) + queue.put(result) diff --git a/introductory/experiments/late_materialization/datagen.py b/introductory/experiments/late_materialization/datagen.py new file mode 100644 index 0000000..7ff07f1 --- /dev/null +++ b/introductory/experiments/late_materialization/datagen.py @@ -0,0 +1,63 @@ +import argparse +import random +import shutil +import logging + +import lance +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.parquet as pq +import pyarrow.dataset as ds + +nrows = 100 * 1024 +ndims = 768 + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--verbose", action="store_true") + args = parser.parse_args() + + if args.verbose: + logging.basicConfig(level=logging.INFO) + + values = pc.random(nrows * ndims).cast(pa.float32()) + vectors = pa.FixedSizeListArray.from_arrays(values, ndims) + + logging.info("Generating %i rows of data", nrows) + tab = pa.table( + { + "id": pa.array(range(nrows)), + "int": pa.array((random.randint(0, nrows) for _ in range(nrows)), pa.int64()), + "vec": vectors, + "img": pa.array([random.randbytes(40 * 1024) for _ in range(nrows)]), + } + ) + + file_format = ds.ParquetFileFormat() + + shutil.rmtree("data", ignore_errors=True) + + # We write a version of Parquet without statistics so we can isolate pushdown + # performance from late materialization. + for with_stats in [True, False]: + logging.info("Writing parquet with stats=%s", with_stats) + parquet_name = "data/parquet" + + if with_stats: + parquet_name += "_stats" + + ds.write_dataset( + tab, + parquet_name, + format="parquet", + file_options=file_format.make_write_options( + write_statistics=with_stats, + write_page_index=with_stats, + ), + max_rows_per_group=10 * 1024, + ) + + # We can disable use of statistics at read time for Lance + logging.info("Writing Lance") + lance.write_dataset(tab, "data/lance", max_rows_per_group=10 * 1024) diff --git a/introductory/experiments/late_materialization/io_results.csv b/introductory/experiments/late_materialization/io_results.csv new file mode 100644 index 0000000..2d9d1ac --- /dev/null +++ b/introductory/experiments/late_materialization/io_results.csv @@ -0,0 +1,651 @@ +library,columns,predicate,late_materialization,selectivity,num_ios,total_bytes +Lance,int,id >= 0,True,1.0,23,1643328 +Lance,vec,id >= 0,True,1.0,23,315396928 +Lance,img,id >= 0,True,1.0,33,4195947408 +Lance,int,id >= 2048,True,0.98,23,1626944 +Lance,vec,id >= 2048,True,0.98,23,309105472 +Lance,img,id >= 2048,True,0.98,33,4112044944 +Lance,int,id >= 4096,True,0.96,23,1610560 +Lance,vec,id >= 4096,True,0.96,23,302814016 +Lance,img,id >= 4096,True,0.96,33,4028142480 +Lance,int,id >= 6144,True,0.94,23,1594176 +Lance,vec,id >= 6144,True,0.94,23,296522560 +Lance,img,id >= 6144,True,0.94,33,3944240016 +Lance,int,id >= 8192,True,0.92,23,1577792 +Lance,vec,id >= 8192,True,0.92,23,290231104 +Lance,img,id >= 8192,True,0.92,33,3860337552 +Lance,int,id >= 10240,True,0.9,22,1561408 +Lance,vec,id >= 10240,True,0.9,22,283939648 +Lance,img,id >= 10240,True,0.9,31,3776435080 +Lance,int,id >= 12288,True,0.88,22,1545024 +Lance,vec,id >= 12288,True,0.88,22,277648192 +Lance,img,id >= 12288,True,0.88,31,3692532616 +Lance,int,id >= 14336,True,0.86,22,1528640 +Lance,vec,id >= 14336,True,0.86,22,271356736 +Lance,img,id >= 14336,True,0.86,31,3608630152 +Lance,int,id >= 16384,True,0.84,22,1512256 +Lance,vec,id >= 16384,True,0.84,22,265065280 +Lance,img,id >= 16384,True,0.84,31,3524727688 +Lance,int,id >= 18432,True,0.82,22,1495872 +Lance,vec,id >= 18432,True,0.82,22,258773824 +Lance,img,id >= 18432,True,0.82,31,3440825224 +Lance,int,id >= 20480,True,0.8,21,1479488 +Lance,vec,id >= 20480,True,0.8,21,252482368 +Lance,img,id >= 20480,True,0.8,29,3356922752 +Lance,int,id >= 22528,True,0.78,21,1463104 +Lance,vec,id >= 22528,True,0.78,21,246190912 +Lance,img,id >= 22528,True,0.78,29,3273020288 +Lance,int,id >= 24576,True,0.76,21,1446720 +Lance,vec,id >= 24576,True,0.76,21,239899456 +Lance,img,id >= 24576,True,0.76,29,3189117824 +Lance,int,id >= 26624,True,0.74,21,1430336 +Lance,vec,id >= 26624,True,0.74,21,233608000 +Lance,img,id >= 26624,True,0.74,29,3105215360 +Lance,int,id >= 28672,True,0.72,21,1413952 +Lance,vec,id >= 28672,True,0.72,21,227316544 +Lance,img,id >= 28672,True,0.72,29,3021312896 +Lance,int,id >= 30720,True,0.7,20,1397568 +Lance,vec,id >= 30720,True,0.7,20,221025088 +Lance,img,id >= 30720,True,0.7,27,2937410424 +Lance,int,id >= 32768,True,0.68,20,1381184 +Lance,vec,id >= 32768,True,0.68,20,214733632 +Lance,img,id >= 32768,True,0.68,27,2853507960 +Lance,int,id >= 34816,True,0.66,20,1364800 +Lance,vec,id >= 34816,True,0.66,20,208442176 +Lance,img,id >= 34816,True,0.66,27,2769605496 +Lance,int,id >= 36864,True,0.64,20,1348416 +Lance,vec,id >= 36864,True,0.64,20,202150720 +Lance,img,id >= 36864,True,0.64,27,2685703032 +Lance,int,id >= 38912,True,0.62,20,1332032 +Lance,vec,id >= 38912,True,0.62,20,195859264 +Lance,img,id >= 38912,True,0.62,27,2601800568 +Lance,int,id >= 40960,True,0.6,19,1315648 +Lance,vec,id >= 40960,True,0.6,19,189567808 +Lance,img,id >= 40960,True,0.6,25,2517898096 +Lance,int,id >= 43008,True,0.58,19,1299264 +Lance,vec,id >= 43008,True,0.58,19,183276352 +Lance,img,id >= 43008,True,0.58,25,2433995632 +Lance,int,id >= 45056,True,0.56,19,1282880 +Lance,vec,id >= 45056,True,0.56,19,176984896 +Lance,img,id >= 45056,True,0.56,25,2350093168 +Lance,int,id >= 47104,True,0.54,19,1266496 +Lance,vec,id >= 47104,True,0.54,19,170693440 +Lance,img,id >= 47104,True,0.54,25,2266190704 +Lance,int,id >= 49152,True,0.52,19,1250112 +Lance,vec,id >= 49152,True,0.52,19,164401984 +Lance,img,id >= 49152,True,0.52,25,2182288240 +Lance,int,id >= 51200,True,0.5,18,1233728 +Lance,vec,id >= 51200,True,0.5,18,158110528 +Lance,img,id >= 51200,True,0.5,23,2098385768 +Lance,int,id >= 53248,True,0.48,18,1217344 +Lance,vec,id >= 53248,True,0.48,18,151819072 +Lance,img,id >= 53248,True,0.48,23,2014483304 +Lance,int,id >= 55296,True,0.46,18,1200960 +Lance,vec,id >= 55296,True,0.46,18,145527616 +Lance,img,id >= 55296,True,0.46,23,1930580840 +Lance,int,id >= 57344,True,0.44,18,1184576 +Lance,vec,id >= 57344,True,0.44,18,139236160 +Lance,img,id >= 57344,True,0.44,23,1846678376 +Lance,int,id >= 59392,True,0.42,18,1168192 +Lance,vec,id >= 59392,True,0.42,18,132944704 +Lance,img,id >= 59392,True,0.42,23,1762775912 +Lance,int,id >= 61440,True,0.4,17,1151808 +Lance,vec,id >= 61440,True,0.4,17,126653248 +Lance,img,id >= 61440,True,0.4,21,1678873440 +Lance,int,id >= 63488,True,0.38,17,1135424 +Lance,vec,id >= 63488,True,0.38,17,120361792 +Lance,img,id >= 63488,True,0.38,21,1594970976 +Lance,int,id >= 65536,True,0.36,17,1119040 +Lance,vec,id >= 65536,True,0.36,17,114070336 +Lance,img,id >= 65536,True,0.36,21,1511068512 +Lance,int,id >= 67584,True,0.34,17,1102656 +Lance,vec,id >= 67584,True,0.34,17,107778880 +Lance,img,id >= 67584,True,0.34,21,1427166048 +Lance,int,id >= 69632,True,0.32,17,1086272 +Lance,vec,id >= 69632,True,0.32,17,101487424 +Lance,img,id >= 69632,True,0.32,21,1343263584 +Lance,int,id >= 71680,True,0.3,16,1069888 +Lance,vec,id >= 71680,True,0.3,16,95195968 +Lance,img,id >= 71680,True,0.3,19,1259361112 +Lance,int,id >= 73728,True,0.28,16,1053504 +Lance,vec,id >= 73728,True,0.28,16,88904512 +Lance,img,id >= 73728,True,0.28,19,1175458648 +Lance,int,id >= 75776,True,0.26,16,1037120 +Lance,vec,id >= 75776,True,0.26,16,82613056 +Lance,img,id >= 75776,True,0.26,19,1091556184 +Lance,int,id >= 77824,True,0.24,16,1020736 +Lance,vec,id >= 77824,True,0.24,16,76321600 +Lance,img,id >= 77824,True,0.24,19,1007653720 +Lance,int,id >= 79872,True,0.22,16,1004352 +Lance,vec,id >= 79872,True,0.22,16,70030144 +Lance,img,id >= 79872,True,0.22,19,923751256 +Lance,int,id >= 81920,True,0.2,15,987968 +Lance,vec,id >= 81920,True,0.2,15,63738688 +Lance,img,id >= 81920,True,0.2,17,839848784 +Lance,int,id >= 83968,True,0.18,15,971584 +Lance,vec,id >= 83968,True,0.18,15,57447232 +Lance,img,id >= 83968,True,0.18,17,755946320 +Lance,int,id >= 86016,True,0.16,15,955200 +Lance,vec,id >= 86016,True,0.16,15,51155776 +Lance,img,id >= 86016,True,0.16,17,672043856 +Lance,int,id >= 88064,True,0.14,15,938816 +Lance,vec,id >= 88064,True,0.14,15,44864320 +Lance,img,id >= 88064,True,0.14,17,588141392 +Lance,int,id >= 90112,True,0.12,15,922432 +Lance,vec,id >= 90112,True,0.12,15,38572864 +Lance,img,id >= 90112,True,0.12,17,504238928 +Lance,int,id >= 92160,True,0.1,14,906048 +Lance,vec,id >= 92160,True,0.1,14,32281408 +Lance,img,id >= 92160,True,0.1,15,420336456 +Lance,int,id >= 94208,True,0.08,14,889664 +Lance,vec,id >= 94208,True,0.08,14,25989952 +Lance,img,id >= 94208,True,0.08,15,336433992 +Lance,int,id >= 96256,True,0.06,14,873280 +Lance,vec,id >= 96256,True,0.06,14,19698496 +Lance,img,id >= 96256,True,0.06,15,252531528 +Lance,int,id >= 98304,True,0.04,14,856896 +Lance,vec,id >= 98304,True,0.04,14,13407040 +Lance,img,id >= 98304,True,0.04,15,168629064 +Lance,int,id >= 100352,True,0.02,14,840512 +Lance,vec,id >= 100352,True,0.02,14,7115584 +Lance,img,id >= 100352,True,0.02,15,84726600 +DataFusion,int,id >= 0,True,1.0,44,1252459 +DataFusion,img,id >= 0,True,1.0,44,4195556602 +DataFusion,int,id >= 2048,True,0.98,44,1252459 +DataFusion,img,id >= 2048,True,0.98,44,4195556602 +DataFusion,int,id >= 4096,True,0.96,44,1252459 +DataFusion,img,id >= 4096,True,0.96,44,4195556602 +DataFusion,int,id >= 6144,True,0.94,44,1252459 +DataFusion,img,id >= 6144,True,0.94,44,4195556602 +DataFusion,int,id >= 8192,True,0.92,44,1252459 +DataFusion,img,id >= 8192,True,0.92,44,4195556602 +DataFusion,int,id >= 10240,True,0.9,43,1190211 +DataFusion,img,id >= 10240,True,0.9,43,3776064336 +DataFusion,int,id >= 12288,True,0.88,43,1190211 +DataFusion,img,id >= 12288,True,0.88,43,3776064336 +DataFusion,int,id >= 14336,True,0.86,43,1190211 +DataFusion,img,id >= 14336,True,0.86,43,3776064336 +DataFusion,int,id >= 16384,True,0.84,43,1190211 +DataFusion,img,id >= 16384,True,0.84,43,3776064336 +DataFusion,int,id >= 18432,True,0.82,43,1190211 +DataFusion,img,id >= 18432,True,0.82,43,3776064336 +DataFusion,int,id >= 20480,True,0.8,42,1127918 +DataFusion,img,id >= 20480,True,0.8,42,3356572070 +DataFusion,int,id >= 22528,True,0.78,42,1127918 +DataFusion,img,id >= 22528,True,0.78,42,3356572070 +DataFusion,int,id >= 24576,True,0.76,42,1127918 +DataFusion,img,id >= 24576,True,0.76,42,3356572070 +DataFusion,int,id >= 26624,True,0.74,42,1127918 +DataFusion,img,id >= 26624,True,0.74,42,3356572070 +DataFusion,int,id >= 28672,True,0.72,42,1127918 +DataFusion,img,id >= 28672,True,0.72,42,3356572070 +DataFusion,int,id >= 30720,True,0.7,41,1065734 +DataFusion,img,id >= 30720,True,0.7,41,2937079804 +DataFusion,int,id >= 32768,True,0.68,40,1053631 +DataFusion,img,id >= 32768,True,0.68,40,2853180306 +DataFusion,int,id >= 34816,True,0.66,40,1053631 +DataFusion,img,id >= 34816,True,0.66,40,2853180306 +DataFusion,int,id >= 36864,True,0.64,40,1053631 +DataFusion,img,id >= 36864,True,0.64,40,2853180306 +DataFusion,int,id >= 38912,True,0.62,40,1053631 +DataFusion,img,id >= 38912,True,0.62,40,2853180306 +DataFusion,int,id >= 40960,True,0.6,40,1053631 +DataFusion,img,id >= 40960,True,0.6,40,2853180306 +DataFusion,int,id >= 43008,True,0.58,39,991491 +DataFusion,img,id >= 43008,True,0.58,39,2433688038 +DataFusion,int,id >= 45056,True,0.56,39,991491 +DataFusion,img,id >= 45056,True,0.56,39,2433688038 +DataFusion,int,id >= 47104,True,0.54,39,991491 +DataFusion,img,id >= 47104,True,0.54,39,2433688038 +DataFusion,int,id >= 49152,True,0.52,39,991491 +DataFusion,img,id >= 49152,True,0.52,39,2433688038 +DataFusion,int,id >= 51200,True,0.5,39,991491 +DataFusion,img,id >= 51200,True,0.5,39,2433688038 +DataFusion,int,id >= 53248,True,0.48,38,933984 +DataFusion,img,id >= 53248,True,0.48,38,2047787791 +DataFusion,int,id >= 55296,True,0.46,38,933984 +DataFusion,img,id >= 55296,True,0.46,38,2047787791 +DataFusion,int,id >= 57344,True,0.44,38,933984 +DataFusion,img,id >= 57344,True,0.44,38,2047787791 +DataFusion,int,id >= 59392,True,0.42,38,933984 +DataFusion,img,id >= 59392,True,0.42,38,2047787791 +DataFusion,int,id >= 61440,True,0.4,38,933984 +DataFusion,img,id >= 61440,True,0.4,38,2047787791 +DataFusion,int,id >= 63488,True,0.38,37,871905 +DataFusion,img,id >= 63488,True,0.38,37,1628295525 +DataFusion,int,id >= 65536,True,0.36,37,871905 +DataFusion,img,id >= 65536,True,0.36,37,1628295525 +DataFusion,int,id >= 67584,True,0.34,37,871905 +DataFusion,img,id >= 67584,True,0.34,37,1628295525 +DataFusion,int,id >= 69632,True,0.32,37,871905 +DataFusion,img,id >= 69632,True,0.32,37,1628295525 +DataFusion,int,id >= 71680,True,0.3,37,871905 +DataFusion,img,id >= 71680,True,0.3,37,1628295525 +DataFusion,int,id >= 73728,True,0.28,36,809729 +DataFusion,img,id >= 73728,True,0.28,36,1208803259 +DataFusion,int,id >= 75776,True,0.26,36,809729 +DataFusion,img,id >= 75776,True,0.26,36,1208803259 +DataFusion,int,id >= 77824,True,0.24,36,809729 +DataFusion,img,id >= 77824,True,0.24,36,1208803259 +DataFusion,int,id >= 79872,True,0.22,36,809729 +DataFusion,img,id >= 79872,True,0.22,36,1208803259 +DataFusion,int,id >= 81920,True,0.2,36,809729 +DataFusion,img,id >= 81920,True,0.2,36,1208803259 +DataFusion,int,id >= 83968,True,0.18,35,747639 +DataFusion,img,id >= 83968,True,0.18,35,789310993 +DataFusion,int,id >= 86016,True,0.16,34,735499 +DataFusion,img,id >= 86016,True,0.16,34,705411495 +DataFusion,int,id >= 88064,True,0.14,34,735499 +DataFusion,img,id >= 88064,True,0.14,34,705411495 +DataFusion,int,id >= 90112,True,0.12,34,735499 +DataFusion,img,id >= 90112,True,0.12,34,705411495 +DataFusion,int,id >= 92160,True,0.1,34,735499 +DataFusion,img,id >= 92160,True,0.1,34,705411495 +DataFusion,int,id >= 94208,True,0.08,34,735499 +DataFusion,img,id >= 94208,True,0.08,34,705411495 +DataFusion,int,id >= 96256,True,0.06,33,673205 +DataFusion,img,id >= 96256,True,0.06,33,285919229 +DataFusion,int,id >= 98304,True,0.04,33,673205 +DataFusion,img,id >= 98304,True,0.04,33,285919229 +DataFusion,int,id >= 100352,True,0.02,33,673205 +DataFusion,img,id >= 100352,True,0.02,33,285919229 +Lance,int,id >= 0,False,1.0,23,1643328 +Lance,vec,id >= 0,False,1.0,23,315396928 +Lance,img,id >= 0,False,1.0,33,4195947408 +Lance,int,id >= 2048,False,0.98,23,1643328 +Lance,vec,id >= 2048,False,0.98,23,315396928 +Lance,img,id >= 2048,False,0.98,33,4195947408 +Lance,int,id >= 4096,False,0.96,23,1643328 +Lance,vec,id >= 4096,False,0.96,23,315396928 +Lance,img,id >= 4096,False,0.96,33,4195947408 +Lance,int,id >= 6144,False,0.94,23,1643328 +Lance,vec,id >= 6144,False,0.94,23,315396928 +Lance,img,id >= 6144,False,0.94,33,4195947408 +Lance,int,id >= 8192,False,0.92,23,1643328 +Lance,vec,id >= 8192,False,0.92,23,315396928 +Lance,img,id >= 8192,False,0.92,33,4195947408 +Lance,int,id >= 10240,False,0.9,23,1643328 +Lance,vec,id >= 10240,False,0.9,23,315396928 +Lance,img,id >= 10240,False,0.9,33,4195947408 +Lance,int,id >= 12288,False,0.88,23,1643328 +Lance,vec,id >= 12288,False,0.88,23,315396928 +Lance,img,id >= 12288,False,0.88,33,4195947408 +Lance,int,id >= 14336,False,0.86,23,1643328 +Lance,vec,id >= 14336,False,0.86,23,315396928 +Lance,img,id >= 14336,False,0.86,33,4195947408 +Lance,int,id >= 16384,False,0.84,23,1643328 +Lance,vec,id >= 16384,False,0.84,23,315396928 +Lance,img,id >= 16384,False,0.84,33,4195947408 +Lance,int,id >= 18432,False,0.82,23,1643328 +Lance,vec,id >= 18432,False,0.82,23,315396928 +Lance,img,id >= 18432,False,0.82,33,4195947408 +Lance,int,id >= 20480,False,0.8,23,1643328 +Lance,vec,id >= 20480,False,0.8,23,315396928 +Lance,img,id >= 20480,False,0.8,33,4195947408 +Lance,int,id >= 22528,False,0.78,23,1643328 +Lance,vec,id >= 22528,False,0.78,23,315396928 +Lance,img,id >= 22528,False,0.78,33,4195947408 +Lance,int,id >= 24576,False,0.76,23,1643328 +Lance,vec,id >= 24576,False,0.76,23,315396928 +Lance,img,id >= 24576,False,0.76,33,4195947408 +Lance,int,id >= 26624,False,0.74,23,1643328 +Lance,vec,id >= 26624,False,0.74,23,315396928 +Lance,img,id >= 26624,False,0.74,33,4195947408 +Lance,int,id >= 28672,False,0.72,23,1643328 +Lance,vec,id >= 28672,False,0.72,23,315396928 +Lance,img,id >= 28672,False,0.72,33,4195947408 +Lance,int,id >= 30720,False,0.7,23,1643328 +Lance,vec,id >= 30720,False,0.7,23,315396928 +Lance,img,id >= 30720,False,0.7,33,4195947408 +Lance,int,id >= 32768,False,0.68,23,1643328 +Lance,vec,id >= 32768,False,0.68,23,315396928 +Lance,img,id >= 32768,False,0.68,33,4195947408 +Lance,int,id >= 34816,False,0.66,23,1643328 +Lance,vec,id >= 34816,False,0.66,23,315396928 +Lance,img,id >= 34816,False,0.66,33,4195947408 +Lance,int,id >= 36864,False,0.64,23,1643328 +Lance,vec,id >= 36864,False,0.64,23,315396928 +Lance,img,id >= 36864,False,0.64,33,4195947408 +Lance,int,id >= 38912,False,0.62,23,1643328 +Lance,vec,id >= 38912,False,0.62,23,315396928 +Lance,img,id >= 38912,False,0.62,33,4195947408 +Lance,int,id >= 40960,False,0.6,23,1643328 +Lance,vec,id >= 40960,False,0.6,23,315396928 +Lance,img,id >= 40960,False,0.6,33,4195947408 +Lance,int,id >= 43008,False,0.58,23,1643328 +Lance,vec,id >= 43008,False,0.58,23,315396928 +Lance,img,id >= 43008,False,0.58,33,4195947408 +Lance,int,id >= 45056,False,0.56,23,1643328 +Lance,vec,id >= 45056,False,0.56,23,315396928 +Lance,img,id >= 45056,False,0.56,33,4195947408 +Lance,int,id >= 47104,False,0.54,23,1643328 +Lance,vec,id >= 47104,False,0.54,23,315396928 +Lance,img,id >= 47104,False,0.54,33,4195947408 +Lance,int,id >= 49152,False,0.52,23,1643328 +Lance,vec,id >= 49152,False,0.52,23,315396928 +Lance,img,id >= 49152,False,0.52,33,4195947408 +Lance,int,id >= 51200,False,0.5,23,1643328 +Lance,vec,id >= 51200,False,0.5,23,315396928 +Lance,img,id >= 51200,False,0.5,33,4195947408 +Lance,int,id >= 53248,False,0.48,23,1643328 +Lance,vec,id >= 53248,False,0.48,23,315396928 +Lance,img,id >= 53248,False,0.48,33,4195947408 +Lance,int,id >= 55296,False,0.46,23,1643328 +Lance,vec,id >= 55296,False,0.46,23,315396928 +Lance,img,id >= 55296,False,0.46,33,4195947408 +Lance,int,id >= 57344,False,0.44,23,1643328 +Lance,vec,id >= 57344,False,0.44,23,315396928 +Lance,img,id >= 57344,False,0.44,33,4195947408 +Lance,int,id >= 59392,False,0.42,23,1643328 +Lance,vec,id >= 59392,False,0.42,23,315396928 +Lance,img,id >= 59392,False,0.42,33,4195947408 +Lance,int,id >= 61440,False,0.4,23,1643328 +Lance,vec,id >= 61440,False,0.4,23,315396928 +Lance,img,id >= 61440,False,0.4,33,4195947408 +Lance,int,id >= 63488,False,0.38,23,1643328 +Lance,vec,id >= 63488,False,0.38,23,315396928 +Lance,img,id >= 63488,False,0.38,33,4195947408 +Lance,int,id >= 65536,False,0.36,23,1643328 +Lance,vec,id >= 65536,False,0.36,23,315396928 +Lance,img,id >= 65536,False,0.36,33,4195947408 +Lance,int,id >= 67584,False,0.34,23,1643328 +Lance,vec,id >= 67584,False,0.34,23,315396928 +Lance,img,id >= 67584,False,0.34,33,4195947408 +Lance,int,id >= 69632,False,0.32,23,1643328 +Lance,vec,id >= 69632,False,0.32,23,315396928 +Lance,img,id >= 69632,False,0.32,33,4195947408 +Lance,int,id >= 71680,False,0.3,23,1643328 +Lance,vec,id >= 71680,False,0.3,23,315396928 +Lance,img,id >= 71680,False,0.3,33,4195947408 +Lance,int,id >= 73728,False,0.28,23,1643328 +Lance,vec,id >= 73728,False,0.28,23,315396928 +Lance,img,id >= 73728,False,0.28,33,4195947408 +Lance,int,id >= 75776,False,0.26,23,1643328 +Lance,vec,id >= 75776,False,0.26,23,315396928 +Lance,img,id >= 75776,False,0.26,33,4195947408 +Lance,int,id >= 77824,False,0.24,23,1643328 +Lance,vec,id >= 77824,False,0.24,23,315396928 +Lance,img,id >= 77824,False,0.24,33,4195947408 +Lance,int,id >= 79872,False,0.22,23,1643328 +Lance,vec,id >= 79872,False,0.22,23,315396928 +Lance,img,id >= 79872,False,0.22,33,4195947408 +Lance,int,id >= 81920,False,0.2,23,1643328 +Lance,vec,id >= 81920,False,0.2,23,315396928 +Lance,img,id >= 81920,False,0.2,33,4195947408 +Lance,int,id >= 83968,False,0.18,23,1643328 +Lance,vec,id >= 83968,False,0.18,23,315396928 +Lance,img,id >= 83968,False,0.18,33,4195947408 +Lance,int,id >= 86016,False,0.16,23,1643328 +Lance,vec,id >= 86016,False,0.16,23,315396928 +Lance,img,id >= 86016,False,0.16,33,4195947408 +Lance,int,id >= 88064,False,0.14,23,1643328 +Lance,vec,id >= 88064,False,0.14,23,315396928 +Lance,img,id >= 88064,False,0.14,33,4195947408 +Lance,int,id >= 90112,False,0.12,23,1643328 +Lance,vec,id >= 90112,False,0.12,23,315396928 +Lance,img,id >= 90112,False,0.12,33,4195947408 +Lance,int,id >= 92160,False,0.1,23,1643328 +Lance,vec,id >= 92160,False,0.1,23,315396928 +Lance,img,id >= 92160,False,0.1,33,4195947408 +Lance,int,id >= 94208,False,0.08,23,1643328 +Lance,vec,id >= 94208,False,0.08,23,315396928 +Lance,img,id >= 94208,False,0.08,33,4195947408 +Lance,int,id >= 96256,False,0.06,23,1643328 +Lance,vec,id >= 96256,False,0.06,23,315396928 +Lance,img,id >= 96256,False,0.06,33,4195947408 +Lance,int,id >= 98304,False,0.04,23,1643328 +Lance,vec,id >= 98304,False,0.04,23,315396928 +Lance,img,id >= 98304,False,0.04,33,4195947408 +Lance,int,id >= 100352,False,0.02,23,1643328 +Lance,vec,id >= 100352,False,0.02,23,315396928 +Lance,img,id >= 100352,False,0.02,33,4195947408 +PyArrow,int,(id >= 0),False,1.0,15,1406232 +PyArrow,vec,(id >= 0),False,1.0,27,322882603 +PyArrow,img,(id >= 0),False,1.0,27,4195709630 +PyArrow,int,(id >= 2048),False,0.98,15,1406232 +PyArrow,vec,(id >= 2048),False,0.98,27,322882603 +PyArrow,img,(id >= 2048),False,0.98,27,4195709630 +PyArrow,int,(id >= 4096),False,0.96,15,1406232 +PyArrow,vec,(id >= 4096),False,0.96,27,322882603 +PyArrow,img,(id >= 4096),False,0.96,27,4195709630 +PyArrow,int,(id >= 6144),False,0.94,15,1406232 +PyArrow,vec,(id >= 6144),False,0.94,27,322882603 +PyArrow,img,(id >= 6144),False,0.94,27,4195709630 +PyArrow,int,(id >= 8192),False,0.92,15,1406232 +PyArrow,vec,(id >= 8192),False,0.92,27,322882603 +PyArrow,img,(id >= 8192),False,0.92,27,4195709630 +PyArrow,int,(id >= 10240),False,0.9,15,1406232 +PyArrow,vec,(id >= 10240),False,0.9,27,322882603 +PyArrow,img,(id >= 10240),False,0.9,27,4195709630 +PyArrow,int,(id >= 12288),False,0.88,15,1406232 +PyArrow,vec,(id >= 12288),False,0.88,27,322882603 +PyArrow,img,(id >= 12288),False,0.88,27,4195709630 +PyArrow,int,(id >= 14336),False,0.86,15,1406232 +PyArrow,vec,(id >= 14336),False,0.86,27,322882603 +PyArrow,img,(id >= 14336),False,0.86,27,4195709630 +PyArrow,int,(id >= 16384),False,0.84,15,1406232 +PyArrow,vec,(id >= 16384),False,0.84,27,322882603 +PyArrow,img,(id >= 16384),False,0.84,27,4195709630 +PyArrow,int,(id >= 18432),False,0.82,15,1406232 +PyArrow,vec,(id >= 18432),False,0.82,27,322882603 +PyArrow,img,(id >= 18432),False,0.82,27,4195709630 +PyArrow,int,(id >= 20480),False,0.8,15,1406232 +PyArrow,vec,(id >= 20480),False,0.8,27,322882603 +PyArrow,img,(id >= 20480),False,0.8,27,4195709630 +PyArrow,int,(id >= 22528),False,0.78,15,1406232 +PyArrow,vec,(id >= 22528),False,0.78,27,322882603 +PyArrow,img,(id >= 22528),False,0.78,27,4195709630 +PyArrow,int,(id >= 24576),False,0.76,15,1406232 +PyArrow,vec,(id >= 24576),False,0.76,27,322882603 +PyArrow,img,(id >= 24576),False,0.76,27,4195709630 +PyArrow,int,(id >= 26624),False,0.74,15,1406232 +PyArrow,vec,(id >= 26624),False,0.74,27,322882603 +PyArrow,img,(id >= 26624),False,0.74,27,4195709630 +PyArrow,int,(id >= 28672),False,0.72,15,1406232 +PyArrow,vec,(id >= 28672),False,0.72,27,322882603 +PyArrow,img,(id >= 28672),False,0.72,27,4195709630 +PyArrow,int,(id >= 30720),False,0.7,15,1406232 +PyArrow,vec,(id >= 30720),False,0.7,27,322882603 +PyArrow,img,(id >= 30720),False,0.7,27,4195709630 +PyArrow,int,(id >= 32768),False,0.68,15,1406232 +PyArrow,vec,(id >= 32768),False,0.68,27,322882603 +PyArrow,img,(id >= 32768),False,0.68,27,4195709630 +PyArrow,int,(id >= 34816),False,0.66,15,1406232 +PyArrow,vec,(id >= 34816),False,0.66,27,322882603 +PyArrow,img,(id >= 34816),False,0.66,27,4195709630 +PyArrow,int,(id >= 36864),False,0.64,15,1406232 +PyArrow,vec,(id >= 36864),False,0.64,27,322882603 +PyArrow,img,(id >= 36864),False,0.64,27,4195709630 +PyArrow,int,(id >= 38912),False,0.62,15,1406232 +PyArrow,vec,(id >= 38912),False,0.62,27,322882603 +PyArrow,img,(id >= 38912),False,0.62,27,4195709630 +PyArrow,int,(id >= 40960),False,0.6,15,1406232 +PyArrow,vec,(id >= 40960),False,0.6,27,322882603 +PyArrow,img,(id >= 40960),False,0.6,27,4195709630 +PyArrow,int,(id >= 43008),False,0.58,15,1406232 +PyArrow,vec,(id >= 43008),False,0.58,27,322882603 +PyArrow,img,(id >= 43008),False,0.58,27,4195709630 +PyArrow,int,(id >= 45056),False,0.56,15,1406232 +PyArrow,vec,(id >= 45056),False,0.56,27,322882603 +PyArrow,img,(id >= 45056),False,0.56,27,4195709630 +PyArrow,int,(id >= 47104),False,0.54,15,1406232 +PyArrow,vec,(id >= 47104),False,0.54,27,322882603 +PyArrow,img,(id >= 47104),False,0.54,27,4195709630 +PyArrow,int,(id >= 49152),False,0.52,15,1406232 +PyArrow,vec,(id >= 49152),False,0.52,27,322882603 +PyArrow,img,(id >= 49152),False,0.52,27,4195709630 +PyArrow,int,(id >= 51200),False,0.5,15,1406232 +PyArrow,vec,(id >= 51200),False,0.5,27,322882603 +PyArrow,img,(id >= 51200),False,0.5,27,4195709630 +PyArrow,int,(id >= 53248),False,0.48,15,1406232 +PyArrow,vec,(id >= 53248),False,0.48,27,322882603 +PyArrow,img,(id >= 53248),False,0.48,27,4195709630 +PyArrow,int,(id >= 55296),False,0.46,15,1406232 +PyArrow,vec,(id >= 55296),False,0.46,27,322882603 +PyArrow,img,(id >= 55296),False,0.46,27,4195709630 +PyArrow,int,(id >= 57344),False,0.44,15,1406232 +PyArrow,vec,(id >= 57344),False,0.44,27,322882603 +PyArrow,img,(id >= 57344),False,0.44,27,4195709630 +PyArrow,int,(id >= 59392),False,0.42,15,1406232 +PyArrow,vec,(id >= 59392),False,0.42,27,322882603 +PyArrow,img,(id >= 59392),False,0.42,27,4195709630 +PyArrow,int,(id >= 61440),False,0.4,15,1406232 +PyArrow,vec,(id >= 61440),False,0.4,27,322882603 +PyArrow,img,(id >= 61440),False,0.4,27,4195709630 +PyArrow,int,(id >= 63488),False,0.38,15,1406232 +PyArrow,vec,(id >= 63488),False,0.38,27,322882603 +PyArrow,img,(id >= 63488),False,0.38,27,4195709630 +PyArrow,int,(id >= 65536),False,0.36,15,1406232 +PyArrow,vec,(id >= 65536),False,0.36,27,322882603 +PyArrow,img,(id >= 65536),False,0.36,27,4195709630 +PyArrow,int,(id >= 67584),False,0.34,15,1406232 +PyArrow,vec,(id >= 67584),False,0.34,27,322882603 +PyArrow,img,(id >= 67584),False,0.34,27,4195709630 +PyArrow,int,(id >= 69632),False,0.32,15,1406232 +PyArrow,vec,(id >= 69632),False,0.32,27,322882603 +PyArrow,img,(id >= 69632),False,0.32,27,4195709630 +PyArrow,int,(id >= 71680),False,0.3,15,1406232 +PyArrow,vec,(id >= 71680),False,0.3,27,322882603 +PyArrow,img,(id >= 71680),False,0.3,27,4195709630 +PyArrow,int,(id >= 73728),False,0.28,15,1406232 +PyArrow,vec,(id >= 73728),False,0.28,27,322882603 +PyArrow,img,(id >= 73728),False,0.28,27,4195709630 +PyArrow,int,(id >= 75776),False,0.26,15,1406232 +PyArrow,vec,(id >= 75776),False,0.26,27,322882603 +PyArrow,img,(id >= 75776),False,0.26,27,4195709630 +PyArrow,int,(id >= 77824),False,0.24,15,1406232 +PyArrow,vec,(id >= 77824),False,0.24,27,322882603 +PyArrow,img,(id >= 77824),False,0.24,27,4195709630 +PyArrow,int,(id >= 79872),False,0.22,15,1406232 +PyArrow,vec,(id >= 79872),False,0.22,27,322882603 +PyArrow,img,(id >= 79872),False,0.22,27,4195709630 +PyArrow,int,(id >= 81920),False,0.2,15,1406232 +PyArrow,vec,(id >= 81920),False,0.2,27,322882603 +PyArrow,img,(id >= 81920),False,0.2,27,4195709630 +PyArrow,int,(id >= 83968),False,0.18,15,1406232 +PyArrow,vec,(id >= 83968),False,0.18,27,322882603 +PyArrow,img,(id >= 83968),False,0.18,27,4195709630 +PyArrow,int,(id >= 86016),False,0.16,15,1406232 +PyArrow,vec,(id >= 86016),False,0.16,27,322882603 +PyArrow,img,(id >= 86016),False,0.16,27,4195709630 +PyArrow,int,(id >= 88064),False,0.14,15,1406232 +PyArrow,vec,(id >= 88064),False,0.14,27,322882603 +PyArrow,img,(id >= 88064),False,0.14,27,4195709630 +PyArrow,int,(id >= 90112),False,0.12,15,1406232 +PyArrow,vec,(id >= 90112),False,0.12,27,322882603 +PyArrow,img,(id >= 90112),False,0.12,27,4195709630 +PyArrow,int,(id >= 92160),False,0.1,15,1406232 +PyArrow,vec,(id >= 92160),False,0.1,27,322882603 +PyArrow,img,(id >= 92160),False,0.1,27,4195709630 +PyArrow,int,(id >= 94208),False,0.08,15,1406232 +PyArrow,vec,(id >= 94208),False,0.08,27,322882603 +PyArrow,img,(id >= 94208),False,0.08,27,4195709630 +PyArrow,int,(id >= 96256),False,0.06,15,1406232 +PyArrow,vec,(id >= 96256),False,0.06,27,322882603 +PyArrow,img,(id >= 96256),False,0.06,27,4195709630 +PyArrow,int,(id >= 98304),False,0.04,15,1406232 +PyArrow,vec,(id >= 98304),False,0.04,27,322882603 +PyArrow,img,(id >= 98304),False,0.04,27,4195709630 +PyArrow,int,(id >= 100352),False,0.02,15,1406232 +PyArrow,vec,(id >= 100352),False,0.02,27,322882603 +PyArrow,img,(id >= 100352),False,0.02,27,4195709630 +DataFusion,int,id >= 0,False,1.0,44,1252459 +DataFusion,img,id >= 0,False,1.0,44,4195556602 +DataFusion,int,id >= 2048,False,0.98,44,1252459 +DataFusion,img,id >= 2048,False,0.98,44,4195556602 +DataFusion,int,id >= 4096,False,0.96,44,1252459 +DataFusion,img,id >= 4096,False,0.96,44,4195556602 +DataFusion,int,id >= 6144,False,0.94,44,1252459 +DataFusion,img,id >= 6144,False,0.94,44,4195556602 +DataFusion,int,id >= 8192,False,0.92,44,1252459 +DataFusion,img,id >= 8192,False,0.92,44,4195556602 +DataFusion,int,id >= 10240,False,0.9,44,1252459 +DataFusion,img,id >= 10240,False,0.9,44,4195556602 +DataFusion,int,id >= 12288,False,0.88,44,1252459 +DataFusion,img,id >= 12288,False,0.88,44,4195556602 +DataFusion,int,id >= 14336,False,0.86,44,1252459 +DataFusion,img,id >= 14336,False,0.86,44,4195556602 +DataFusion,int,id >= 16384,False,0.84,44,1252459 +DataFusion,img,id >= 16384,False,0.84,44,4195556602 +DataFusion,int,id >= 18432,False,0.82,44,1252459 +DataFusion,img,id >= 18432,False,0.82,44,4195556602 +DataFusion,int,id >= 20480,False,0.8,44,1252459 +DataFusion,img,id >= 20480,False,0.8,44,4195556602 +DataFusion,int,id >= 22528,False,0.78,44,1252459 +DataFusion,img,id >= 22528,False,0.78,44,4195556602 +DataFusion,int,id >= 24576,False,0.76,44,1252459 +DataFusion,img,id >= 24576,False,0.76,44,4195556602 +DataFusion,int,id >= 26624,False,0.74,44,1252459 +DataFusion,img,id >= 26624,False,0.74,44,4195556602 +DataFusion,int,id >= 28672,False,0.72,44,1252459 +DataFusion,img,id >= 28672,False,0.72,44,4195556602 +DataFusion,int,id >= 30720,False,0.7,44,1252459 +DataFusion,img,id >= 30720,False,0.7,44,4195556602 +DataFusion,int,id >= 32768,False,0.68,44,1252459 +DataFusion,img,id >= 32768,False,0.68,44,4195556602 +DataFusion,int,id >= 34816,False,0.66,44,1252459 +DataFusion,img,id >= 34816,False,0.66,44,4195556602 +DataFusion,int,id >= 36864,False,0.64,44,1252459 +DataFusion,img,id >= 36864,False,0.64,44,4195556602 +DataFusion,int,id >= 38912,False,0.62,44,1252459 +DataFusion,img,id >= 38912,False,0.62,44,4195556602 +DataFusion,int,id >= 40960,False,0.6,44,1252459 +DataFusion,img,id >= 40960,False,0.6,44,4195556602 +DataFusion,int,id >= 43008,False,0.58,44,1252459 +DataFusion,img,id >= 43008,False,0.58,44,4195556602 +DataFusion,int,id >= 45056,False,0.56,44,1252459 +DataFusion,img,id >= 45056,False,0.56,44,4195556602 +DataFusion,int,id >= 47104,False,0.54,44,1252459 +DataFusion,img,id >= 47104,False,0.54,44,4195556602 +DataFusion,int,id >= 49152,False,0.52,44,1252459 +DataFusion,img,id >= 49152,False,0.52,44,4195556602 +DataFusion,int,id >= 51200,False,0.5,44,1252459 +DataFusion,img,id >= 51200,False,0.5,44,4195556602 +DataFusion,int,id >= 53248,False,0.48,44,1252459 +DataFusion,img,id >= 53248,False,0.48,44,4195556602 +DataFusion,int,id >= 55296,False,0.46,44,1252459 +DataFusion,img,id >= 55296,False,0.46,44,4195556602 +DataFusion,int,id >= 57344,False,0.44,44,1252459 +DataFusion,img,id >= 57344,False,0.44,44,4195556602 +DataFusion,int,id >= 59392,False,0.42,44,1252459 +DataFusion,img,id >= 59392,False,0.42,44,4195556602 +DataFusion,int,id >= 61440,False,0.4,44,1252459 +DataFusion,img,id >= 61440,False,0.4,44,4195556602 +DataFusion,int,id >= 63488,False,0.38,44,1252459 +DataFusion,img,id >= 63488,False,0.38,44,4195556602 +DataFusion,int,id >= 65536,False,0.36,44,1252459 +DataFusion,img,id >= 65536,False,0.36,44,4195556602 +DataFusion,int,id >= 67584,False,0.34,44,1252459 +DataFusion,img,id >= 67584,False,0.34,44,4195556602 +DataFusion,int,id >= 69632,False,0.32,44,1252459 +DataFusion,img,id >= 69632,False,0.32,44,4195556602 +DataFusion,int,id >= 71680,False,0.3,44,1252459 +DataFusion,img,id >= 71680,False,0.3,44,4195556602 +DataFusion,int,id >= 73728,False,0.28,44,1252459 +DataFusion,img,id >= 73728,False,0.28,44,4195556602 +DataFusion,int,id >= 75776,False,0.26,44,1252459 +DataFusion,img,id >= 75776,False,0.26,44,4195556602 +DataFusion,int,id >= 77824,False,0.24,44,1252459 +DataFusion,img,id >= 77824,False,0.24,44,4195556602 +DataFusion,int,id >= 79872,False,0.22,44,1252459 +DataFusion,img,id >= 79872,False,0.22,44,4195556602 +DataFusion,int,id >= 81920,False,0.2,44,1252459 +DataFusion,img,id >= 81920,False,0.2,44,4195556602 +DataFusion,int,id >= 83968,False,0.18,44,1252459 +DataFusion,img,id >= 83968,False,0.18,44,4195556602 +DataFusion,int,id >= 86016,False,0.16,44,1252459 +DataFusion,img,id >= 86016,False,0.16,44,4195556602 +DataFusion,int,id >= 88064,False,0.14,44,1252459 +DataFusion,img,id >= 88064,False,0.14,44,4195556602 +DataFusion,int,id >= 90112,False,0.12,44,1252459 +DataFusion,img,id >= 90112,False,0.12,44,4195556602 +DataFusion,int,id >= 92160,False,0.1,44,1252459 +DataFusion,img,id >= 92160,False,0.1,44,4195556602 +DataFusion,int,id >= 94208,False,0.08,44,1252459 +DataFusion,img,id >= 94208,False,0.08,44,4195556602 +DataFusion,int,id >= 96256,False,0.06,44,1252459 +DataFusion,img,id >= 96256,False,0.06,44,4195556602 +DataFusion,int,id >= 98304,False,0.04,44,1252459 +DataFusion,img,id >= 98304,False,0.04,44,4195556602 +DataFusion,int,id >= 100352,False,0.02,44,1252459 +DataFusion,img,id >= 100352,False,0.02,44,4195556602 diff --git a/introductory/experiments/late_materialization/metered_fs.py b/introductory/experiments/late_materialization/metered_fs.py new file mode 100644 index 0000000..dd2a788 --- /dev/null +++ b/introductory/experiments/late_materialization/metered_fs.py @@ -0,0 +1,80 @@ +import io + +import pyarrow as pa +import pyarrow.fs as pa_fs + + +class MeteredFSHandler(pa_fs.FileSystemHandler): + """A FileSystemHandler that counts the number of IOs and bytes read.""" + def __init__(self, fs): + self.fs = fs + self.num_ios = 0 + self.total_bytes = 0 + + def open_input_stream(self, path): + f = self.fs.open_input_stream(path) + return pa.PythonFile(MeteredInputFile(self, f), mode="rb") + + def open_input_file(self, path): + f = self.fs.open_input_file(path) + return pa.PythonFile(MeteredInputFile(self, f), mode="rb") + + def copy_file(self, src, dest): + return self.fs.copy_file(src, dest) + + def move_file(self, src, dest): + return self.fs.move_file(src, dest) + + def move(self, src, dest): + return self.fs.move(src, dest) + + def delete_file(self, path): + return self.fs.delete_file(path) + + def create_dir(self, path, recursive): + return self.fs.create_dir(path, recursive) + + def delete_dir(self, path): + return self.fs.delete_dir(path) + + def delete_dir_contents(self, path): + return self.fs.delete_dir_contents(path) + + def delete_root_dir_contents(self): + return self.fs.delete_root_dir_contents() + + def get_file_info(self, path, **kwargs): + return self.fs.get_file_info(path, **kwargs) + + def get_file_info_selector(self, selector, **kwargs): + return self.fs.get_file_info_selector(selector, **kwargs) + + def get_type_name(self): + return self.fs.get_type_name() + + def normalize_path(self, path): + return self.fs.normalize_path(path) + + def open_append_stream(self, path): + return self.fs.open_append_stream(path) + + def open_output_stream(self, path): + return self.fs.open_output_stream(path) + + +class MeteredInputFile: + def __init__(self, fs, f): + self.fs = fs + self.f = f + + def read_buffer(self, n=-1): + res = self.f.read_buffer(n) + self.fs.num_ios += 1 + self.fs.total_bytes += len(res) + return res + + # Delegate all other methods to the underlying filesystem + def __getattr__(self, attr): + # For debugging, can see which methods are being called + # print(attr) + return getattr(self.f, attr) diff --git a/introductory/experiments/late_materialization/postprocess.py b/introductory/experiments/late_materialization/postprocess.py new file mode 100644 index 0000000..3d57785 --- /dev/null +++ b/introductory/experiments/late_materialization/postprocess.py @@ -0,0 +1,74 @@ +"""Convert benchmark data output by pytest-benchmark into a CSV file.""" +import json +import csv +import argparse +import os +import typing + +import lance + + +def get_row_count(): + """Get number of rows in dataset. This should be the same across them all.""" + return lance.dataset("data/lance").count_rows() + + +class TimeResult(typing.NamedTuple): + """Row format of the runtime results CSV file""" + + library: str + columns: str + predicate: str + late_materialization: bool + selectivity: float + runtime: float + + +def iter_runtime_benches(path): + with open(path) as f: + results = json.load(f) + for benchmark in results["benchmarks"]: + if benchmark["name"].startswith("test_runtime"): + yield benchmark + + +def iter_runtime_bench_data(path): + total_rows = get_row_count() + + for benchmark in iter_runtime_benches(path): + # Example: "test_runtime[True-lance-10000-id]" -> "lance-1K" + bench_parameters = benchmark["name"].split("[")[1][:-1] + late_materialization, library, min_value, project = bench_parameters.split("-") + late_materialization = late_materialization == "True" + min_value = int(min_value) + selectivity = (total_rows - min_value) / total_rows + + yield TimeResult( + library=library, + columns=project, + predicate=f"id >= {min_value}", + late_materialization=late_materialization, + selectivity=selectivity, + runtime=benchmark["stats"]["min"], + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--data", type=str) + args = parser.parse_args() + + if args.data is None: + # Get the latest benchmark data, if not specified + benches_directory = os.path.join(".benchmarks", os.listdir(".benchmarks")[0]) + args.data = os.path.join( + benches_directory, sorted(os.listdir(benches_directory))[-1] + ) + print("Using latest benchmark data: {}".format(args.data)) + + with open("runtime_results.csv", "w") as f: + writer = csv.writer(f) + writer.writerow(TimeResult._fields) + + for benchmark in iter_runtime_bench_data(args.data): + writer.writerow(benchmark) diff --git a/introductory/experiments/late_materialization/requirements.txt b/introductory/experiments/late_materialization/requirements.txt new file mode 100644 index 0000000..1a10ecd --- /dev/null +++ b/introductory/experiments/late_materialization/requirements.txt @@ -0,0 +1,7 @@ +# TODO: fix version of pylance +fsspec==2023.12.2 +maturin==0.14.17 +pyarrow==14.0.1 +pylance +pytest==7.4.3 +pytest-benchmark==4.0.0 \ No newline at end of file diff --git a/introductory/experiments/late_materialization/runtime_results.csv b/introductory/experiments/late_materialization/runtime_results.csv new file mode 100644 index 0000000..168f8fa --- /dev/null +++ b/introductory/experiments/late_materialization/runtime_results.csv @@ -0,0 +1,66 @@ +library,columns,predicate,late_materialization,selectivity,runtime +Lance,int,id >= 10240,True,0.9,0.00040191703010350466 +Lance,vec,id >= 10240,True,0.9,0.03369570802897215 +Lance,img,id >= 10240,True,0.9,2.209484374965541 +Lance,int,id >= 25600,True,0.75,0.00038204097654670477 +Lance,vec,id >= 25600,True,0.75,0.028473292011767626 +Lance,img,id >= 25600,True,0.75,0.7128469579620287 +Lance,int,id >= 51200,True,0.5,0.000303708016872406 +Lance,vec,id >= 51200,True,0.5,0.019318207981996238 +Lance,img,id >= 51200,True,0.5,0.35833695891778916 +Lance,int,id >= 76800,True,0.25,0.0002703750506043434 +Lance,vec,id >= 76800,True,0.25,0.010238458053208888 +Lance,img,id >= 76800,True,0.25,0.17970950005110353 +Lance,int,id >= 92160,True,0.1,0.00021337508223950863 +Lance,vec,id >= 92160,True,0.1,0.005455250036902726 +Lance,img,id >= 92160,True,0.1,0.09942162490915507 +DataFusion,int,id >= 10240,True,0.9,0.014016999979503453 +DataFusion,img,id >= 10240,True,0.9,5.431098834029399 +DataFusion,int,id >= 25600,True,0.75,0.013177874963730574 +DataFusion,img,id >= 25600,True,0.75,4.556813124916516 +DataFusion,int,id >= 51200,True,0.5,0.012065374990925193 +DataFusion,img,id >= 51200,True,0.5,3.272184625035152 +DataFusion,int,id >= 76800,True,0.25,0.011561291990801692 +DataFusion,img,id >= 76800,True,0.25,3.0411849999800324 +DataFusion,int,id >= 92160,True,0.1,0.010487042018212378 +DataFusion,img,id >= 92160,True,0.1,2.1525846250588074 +Lance,int,id >= 10240,False,0.9,0.0002829169388860464 +Lance,vec,id >= 10240,False,0.9,0.03062666696496308 +Lance,img,id >= 10240,False,0.9,2.1366003330331296 +Lance,int,id >= 25600,False,0.75,0.0002649170346558094 +Lance,vec,id >= 25600,False,0.75,0.029531584004871547 +Lance,img,id >= 25600,False,0.75,1.9548250419320539 +Lance,int,id >= 51200,False,0.5,0.00029333296697586775 +Lance,vec,id >= 51200,False,0.5,0.02944324992131442 +Lance,img,id >= 51200,False,0.5,1.7181180840125307 +Lance,int,id >= 76800,False,0.25,0.00026287499349564314 +Lance,vec,id >= 76800,False,0.25,0.029723874991759658 +Lance,img,id >= 76800,False,0.25,1.3059692919487134 +Lance,int,id >= 92160,False,0.1,0.00023033295292407274 +Lance,vec,id >= 92160,False,0.1,0.028916749986819923 +Lance,img,id >= 92160,False,0.1,1.425571333966218 +PyArrow,int,id >= 10240,False,0.9,0.0007146249990910292 +PyArrow,vec,id >= 10240,False,0.9,0.16967920796014369 +PyArrow,img,id >= 10240,False,0.9,5.599715124932118 +PyArrow,int,id >= 25600,False,0.75,0.0007245830493047833 +PyArrow,vec,id >= 25600,False,0.75,0.1609455409925431 +PyArrow,img,id >= 25600,False,0.75,4.311369417002425 +PyArrow,int,id >= 51200,False,0.5,0.0007243750151246786 +PyArrow,vec,id >= 51200,False,0.5,0.14830416697077453 +PyArrow,img,id >= 51200,False,0.5,4.588611916988157 +PyArrow,int,id >= 76800,False,0.25,0.0006674999604001641 +PyArrow,vec,id >= 76800,False,0.25,0.1381762500386685 +PyArrow,img,id >= 76800,False,0.25,4.5173566250596195 +PyArrow,int,id >= 92160,False,0.1,0.0007242090068757534 +PyArrow,vec,id >= 92160,False,0.1,0.15397108299657702 +PyArrow,img,id >= 92160,False,0.1,4.775401583989151 +DataFusion,int,id >= 10240,False,0.9,0.011413750005885959 +DataFusion,img,id >= 10240,False,0.9,5.682922625099309 +DataFusion,int,id >= 25600,False,0.75,0.01098295790143311 +DataFusion,img,id >= 25600,False,0.75,5.934684582985938 +DataFusion,int,id >= 51200,False,0.5,0.011468165903352201 +DataFusion,img,id >= 51200,False,0.5,6.377666833926924 +DataFusion,int,id >= 76800,False,0.25,0.010876082931645215 +DataFusion,img,id >= 76800,False,0.25,5.774112624931149 +DataFusion,int,id >= 92160,False,0.1,0.010726541979238391 +DataFusion,img,id >= 92160,False,0.1,5.944229874992743 diff --git a/introductory/experiments/late_materialization/src/lib.rs b/introductory/experiments/late_materialization/src/lib.rs new file mode 100644 index 0000000..e7f993a --- /dev/null +++ b/introductory/experiments/late_materialization/src/lib.rs @@ -0,0 +1,117 @@ +use std::sync::Arc; + +use datafusion::error::Result as DFResult; +use datafusion::execution::object_store::{DefaultObjectStoreRegistry, ObjectStoreRegistry}; +use datafusion::execution::runtime_env::RuntimeEnv; +use datafusion::prelude::*; +use futures::stream::StreamExt; +use pyo3::prelude::*; +use url::Url; + +use crate::metered_store::ReadMetrics; + +mod metered_store; + +lazy_static::lazy_static! { + /// The async runtime. This will default to a multi-threaded runtime. + static ref RT: tokio::runtime::Runtime = tokio::runtime::Runtime::new().unwrap(); + + static ref METRICS: Arc = Arc::new(ReadMetrics::default()); + + /// The DataFusion session context. + static ref CTX: SessionContext = { + let os_registry = DefaultObjectStoreRegistry::new(); + let default_store = os_registry.get_store(&Url::parse("file:///").unwrap()).unwrap(); + let store = metered_store::MeteredObjectStore::new(default_store, METRICS.clone()); + os_registry.register_store(&Url::parse("metered://").unwrap(), Arc::new(store)); + + let runtime_env = RuntimeEnv { + object_store_registry: Arc::new(os_registry), + ..Default::default() + }; + + // Enable late materialization (it is disabled by default) + let config = SessionConfig::default().set_bool("datafusion.execution.parquet.pushdown_filters", true); + + SessionContext::new_with_config_rt(config, Arc::new(runtime_env)) + }; +} + +#[derive(FromPyObject, Default)] +pub struct ScanConfig { + #[pyo3(item)] + pub late_materialization: bool, + #[pyo3(item)] + pub measure_io: bool, + #[pyo3(item)] + pub explain: bool, +} +// TODO: it might be an unfair comparison to make the `read_parquet` call here. +// We should try to put the initialization in another call. +#[pyfunction] +#[pyo3(signature = (path, columns, min_value, **config))] +fn scan_datafusion( + path: String, + columns: Vec, + min_value: u64, + config: Option, +) -> PyResult<(usize, Option<(usize, usize)>)> { + let config = config.unwrap_or_default(); + + let path = if config.measure_io { + // Convert path to absolute path + let path = std::fs::canonicalize(path).unwrap(); + let path = path.to_str().unwrap(); + let path = Url::from_file_path(path).unwrap(); + let path = path.to_string(); + // Note: the trailing slash is important to get DataFusion to search the + // directory for parquet files, rather than treat the path as a single file. + format!("metered{}/", &path[4..]) + } else { + path + }; + + let res: DFResult<(usize, Option<(usize, usize)>)> = RT.block_on(async move { + let read_options = ParquetReadOptions { + parquet_pruning: Some(config.late_materialization), + ..Default::default() + }; + + let df = CTX.read_parquet(path, read_options).await?; + + let df = df.filter(col("id").gt_eq(lit(min_value)))?; + + let columns = columns.iter().map(col).collect::>(); + let df = df.select(columns)?; + + if config.explain { + println!("{:?}", df.clone().create_physical_plan().await?); + println!("{:?}", df.clone().explain(false, false)?.collect().await); + } + + let mut row_count = 0; + let mut stream = df.execute_stream().await?; + while let Some(batch) = stream.next().await { + row_count += batch?.num_rows(); + } + + if config.measure_io { + let (io_count, io_bytes) = METRICS.metrics(); + METRICS.reset(); + Ok((row_count, Some((io_count, io_bytes)))) + } else { + Ok((row_count, None)) + } + }); + + res.map_err(|e| PyErr::new::(format!("{:?}", e))) +} + +/// A Python module implemented in Rust. The name of this function must match +/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to +/// import the module. +#[pymodule] +fn late_materialization(_py: Python<'_>, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(scan_datafusion, m)?)?; + Ok(()) +} diff --git a/introductory/experiments/late_materialization/src/metered_store.rs b/introductory/experiments/late_materialization/src/metered_store.rs new file mode 100644 index 0000000..6729a54 --- /dev/null +++ b/introductory/experiments/late_materialization/src/metered_store.rs @@ -0,0 +1,135 @@ +use std::sync::{ + atomic::{AtomicUsize, Ordering}, + Arc, +}; + +use bytes::Bytes; +use futures::stream::BoxStream; +use object_store::Result as OSResult; +use object_store::{ + path::Path, GetOptions, GetResult, ListResult, MultipartId, ObjectMeta, ObjectStore, + PutOptions, PutResult, +}; +use tokio::io::AsyncWrite; + +#[derive(Debug, Default)] +pub struct ReadMetrics { + read_count: AtomicUsize, + read_bytes: AtomicUsize, +} + +impl ReadMetrics { + fn record_read(&self, bytes: usize) { + self.read_count.fetch_add(1, Ordering::Relaxed); + self.read_bytes.fetch_add(bytes, Ordering::Relaxed); + } + + pub fn metrics(&self) -> (usize, usize) { + ( + self.read_count.load(Ordering::Relaxed), + self.read_bytes.load(Ordering::Relaxed), + ) + } + + pub fn reset(&self) { + self.read_count.store(0, Ordering::Relaxed); + self.read_bytes.store(0, Ordering::Relaxed); + } +} + +#[derive(Debug)] +pub struct MeteredObjectStore { + metrics: Arc, + inner: Arc, +} + +impl MeteredObjectStore { + pub fn new(inner: Arc, metrics: Arc) -> Self { + Self { metrics, inner } + } +} + +impl std::fmt::Display for MeteredObjectStore { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.inner) + } +} + +#[async_trait::async_trait] +impl ObjectStore for MeteredObjectStore { + async fn get_opts(&self, location: &Path, options: GetOptions) -> OSResult { + let res = self.inner.get_opts(location, options).await?; + self.metrics.record_read(res.meta.size); + Ok(res) + } + + async fn get_range(&self, location: &Path, range: std::ops::Range) -> OSResult { + let bytes = self.inner.get_range(location, range).await?; + self.metrics.record_read(bytes.len()); + Ok(bytes) + } + + async fn get_ranges( + &self, + location: &Path, + ranges: &[std::ops::Range], + ) -> OSResult> { + let bytes = self.inner.get_ranges(location, ranges).await?; + for b in &bytes { + self.metrics.record_read(b.len()); + } + Ok(bytes) + } + + async fn put(&self, location: &Path, bytes: Bytes) -> OSResult { + self.inner.put(location, bytes).await + } + + async fn put_opts( + &self, + location: &Path, + bytes: Bytes, + options: PutOptions, + ) -> OSResult { + self.inner.put_opts(location, bytes, options).await + } + + async fn put_multipart( + &self, + location: &Path, + ) -> OSResult<(MultipartId, Box)> { + self.inner.put_multipart(location).await + } + + async fn abort_multipart(&self, location: &Path, multipart_id: &MultipartId) -> OSResult<()> { + self.inner.abort_multipart(location, multipart_id).await + } + + async fn head(&self, location: &Path) -> OSResult { + self.inner.head(location).await + } + + async fn delete(&self, location: &Path) -> OSResult<()> { + self.inner.delete(location).await + } + + fn list(&self, prefix: Option<&Path>) -> BoxStream<'_, OSResult> { + self.inner.list(prefix) + } + + async fn list_with_delimiter(&self, prefix: Option<&Path>) -> OSResult { + self.inner.list_with_delimiter(prefix).await + } + + async fn copy(&self, from: &Path, to: &Path) -> OSResult<()> { + self.inner.copy(from, to).await + } + + async fn rename(&self, from: &Path, to: &Path) -> OSResult<()> { + self.inner.rename(from, to).await + } + + async fn copy_if_not_exists(&self, from: &Path, to: &Path) -> OSResult<()> { + self.inner.copy_if_not_exists(from, to).await + } +} diff --git a/introductory/experiments/overall_scans/.gitignore b/introductory/experiments/overall_scans/.gitignore index 5c0985f..05af73d 100644 --- a/introductory/experiments/overall_scans/.gitignore +++ b/introductory/experiments/overall_scans/.gitignore @@ -1,2 +1,4 @@ data -*.pyc \ No newline at end of file +*.pyc +venv +.benchmarks \ No newline at end of file diff --git a/introductory/paper/experiments.qmd b/introductory/paper/experiments.qmd index 29440b1..b448076 100644 --- a/introductory/paper/experiments.qmd +++ b/introductory/paper/experiments.qmd @@ -6,6 +6,9 @@ Based on experiments in [@zeng2023empirical]. ## Scanning +Scanning data is used in both OLAP queries and Data Loader workloads. As a comparison we'll be using Parquet, a common OLAP format, and TFRecord, a common data Loader format. + +There are two major optimizations in Lance aimed at vector and unstructured data. The first is late materialization in queries with filters, which can save IO calls. The second are strong alignment of the on-disk format with the in-memory format. ### Overall scan performance @@ -21,6 +24,8 @@ library(ggplot2) library(tidyr) library(dplyr) +ggplot2::theme_set(ggplot2::theme_minimal()) + # data <- read.csv("introductory/experiments/overall_scans/results.csv") data <- read.csv("../experiments/overall_scans/results.csv") @@ -66,3 +71,148 @@ ggplot(data, aes(x = format, y = scan_time)) + y="Runtime (s)", ) ``` + + +### Late materialization + +Late materialization can reduce IO costs by deferring the decision whether to load certain cells depending on the result of a filter. This is especially important when the projected columns are large, since the potential IO cost savings are substantial. + +Late materialization is an engine optimization, and can be applied to any columnar format. However, the performance benefit of this optimization depends on the page structure of the format. If pages are large and cannot be sliced, then late materialization only will be beneficial to the extent that whole pages can be skipped. Put another way, any IO savings brought by late materialization can be outweighted by the read amplification from the serialization format. In Lance, vector and binary columns are laid out in a flat layout, which can be sliced at the cell-level. Therefore, Lance can read these large columns with zero read amplication, if we choose to. In practice, there is often a minimum IO size used that means that some small amount of read amplification is actually beneficial to reduce the total number of IO calls. + +To demonstrate the performance benefit, we measured the performance of early versus late materialization strategies in Lance and Parquet. We compare against both PyArrow and DataFusion's Parquet scanners. PyArrow is commonly used in Parquet benchmarks in the literature but, unlike DataFusion, lacks a late materialization implementation. Therefore, we only provide results for late materialization in DataFusion. DataFusion currently does not support scanning vector columns (`FixedSizeList` in Arrow parlance), so it's results for vector embeddings are omitted as well. + +As a test dataset, we used a sythetic dataset with three columns of varing cell sizes: + +* `id`, an incrementing int64 column (8 bytes wide) +* `int`, a random int64 column (8 bytes wide) +* `vec`, a 768-dimensional 32-bit vector (~3KB wide) +* `img`, a random 40KB binary blob (40KB wide) + +Vector embeddings (which `vec` represents) and compressed images (which `img` represents) are examples where compression doesn't help much but the cost of read amplification is high. There might be other types of columns, such as text documents, where there is a large size and cross-cell compression might be useful. But within-cell compression is likely to be a good tradeoff when considered with the requirement of low read-amplication. + +The tables are written with 102,400 rows, with ten row groups of 10,240 rows. To isolate the effect of late materialization separate from statistics-based pruning, we write Parquet files without statistics. In Lance, we can disable the use of statistics when performing the scan. + +```{r} +library(ggplot2) +library(dplyr) +library(gt) +library(tidyr) + +# data_dir <- "introductory/experiments/late_materialization/" +data_dir <- "../experiments/late_materialization/" + +runtime_results <- read.csv(paste0(data_dir, "runtime_results.csv")) +runtime_results$materialization_type <- as.factor(ifelse(runtime_results$late_materialization, "Late", "Early")) +runtime_results$columns <- factor(factor(runtime_results$columns), levels=c("int", "vec", "img")) + +io_results <- read.csv(paste0(data_dir, "io_results.csv")) +io_results$materialization_type <- as.factor(ifelse(io_results$late_materialization, "Late", "Early")) +io_results$columns <- factor(factor(io_results$columns), levels=c("int", "vec", "img")) +``` + +In @fig-late-mat-runtime, we compare the time to scan the dataset with filters of varying selectivity for Parquet and Lance. For Lance, we show early and late materialization to show the impact of late materialization. + +At this row group size, Lance performance the scan faster than the two Parquet implementations, even for the small `int` column. In the early materialization case, Lance is reading roughly the same or more bytes from disk as the Parquet scans, as shown in @fig-late-mat-total-bytes. Despite this, Lance is able to read the image column with 4.5x less latency than Parquet (@fig-late-mat-table). One possible explanation for this difference is Lance's encodings require less decoding than Parquet to read into Arrow format. In fact, beyond some concatenation of buffers, Lance requires no transformation of binary column. + +In cases where the projection contains a large column and is relatively selection, Lance is even faster. For the `img` column with 12.1% selectivity, Lance scanned 21 times faster than DataFusion. A significant portion of this difference comes from the amount of data read from disk: during the scan, Lance reads 70% less data than DataFusion does. This difference is enabled by Lance's ability to pushdown slicing at the IO level, reading only the relevant parts of the pages from disk. Meanwhile DataFusion can only slice Parquet at the row group boundaries. (It's possible a future implementation of Parquet could slice at the page boundaries.) This pattern is clearly shown in @fig-late-mat-total-bytes, where the bytes read from disk by Lance smoothly scales with the number of rows selected by the filter, while DataFusion's jump each time a row group boundary is crossed. + +Because the filter being used selects a contiguous range of rows, DataFusion will only ever have one partial read of a group, limiting the read amplification to no more than 400MB in the case of the `img` column. However, if the filter results is more broken up, the read amplification will be stronger. + +```{r} +#| label: fig-late-mat-runtime +#| fig-cap: Scan time for early versus late materialization. +#| fig-height: 3.5 +#| fig-width: 5 +ggplot(runtime_results, aes(color=library, linetype=materialization_type, x=selectivity, y=runtime)) + + geom_line() + + facet_wrap(~ columns, scales = "free_y", labeller = "label_both") + + scale_x_continuous(label=scales::label_percent()) + + theme(legend.position = "bottom") + + labs( + linetype="Materialization type", + x="Filter selectivity", + y="Runtime (s)", + color="Format", + ) +``` + + + +```{r} +#| label: fig-late-mat-table +#| fig-cap: Read performance for `img` column. +chosen_selectivity <- sort(unique(runtime_results$selectivity))[1] + +low_selectivity_results <- runtime_results |> + filter(selectivity == chosen_selectivity & columns == "img") |> + left_join(io_results, by=c("library", "columns", "materialization_type", "selectivity")) |> + select(library, materialization_type, runtime, total_bytes) + +lance_baseline <- low_selectivity_results |> filter(library == "Lance") |> + rename(runtime_lance = runtime, total_bytes_lance = total_bytes) |> + select(materialization_type, runtime_lance, total_bytes_lance) + +low_selectivity_results |> + left_join(lance_baseline, by=c("materialization_type")) |> + mutate( + runtime_ratio = runtime / runtime_lance, + total_bytes_ratio = total_bytes / total_bytes_lance, + total_bytes = total_bytes / 1024 / 1024, + materialization_type = paste(materialization_type, "Materialization") + ) |> + select(materialization_type, library, runtime, runtime_ratio, total_bytes, total_bytes_ratio) |> + group_by(materialization_type) |> + gt(rowname_col="library") |> + fmt_number(runtime) |> + fmt_number(columns = c("runtime_ratio", "total_bytes_ratio"), decimals=1, pattern="{x}x") |> + fmt_number(total_bytes, decimals=0) |> + cols_label( + ends_with("ratio") ~ "", + runtime = "Latency (s)", + total_bytes = "Bytes Read (MB)", + ) #|> + # tab_header( + # title = sprintf("Read performance for img column at %.01f%% selectivity", chosen_selectivity * 100) + # ) +``` + + +In @fig-late-mat-total-bytes, we compare the total bytes read during the scans performed above. Both DataFusion and Lance are able to skip a substantial amount of IO, but Lance is able to smoothly scale while DataFusion jumps up each time it reaches a new row group boundary. + + + +```{r} +#| label: fig-late-mat-total-bytes +#| fig-cap: Bytes read for early versus late materialization. +#| fig-height: 3.5 +#| fig-width: 5 +ggplot(io_results, aes(color=library, linetype=materialization_type, x=selectivity, y=total_bytes)) + + geom_line() + + facet_wrap(~ columns, scales = "free_y", labeller = "label_both") + + scale_y_continuous(label=scales::label_bytes(unit="MB")) + + scale_x_continuous(label=scales::label_percent()) + + theme(legend.position = "bottom") + + labs( + linetype="Materialization type", + x="Filter selectivity", + y="Total bytes read", + color="Library", + ) +``` + + +In @fig-late-mat-num-ios, we compare the total number of IO calls performed during the scans performed above. + +```{r} +#| label: fig-late-mat-num-ios +#| fig-cap: The total number of IO calls made while scanning a single column with filter. +#| fig-height: 4 +ggplot(io_results, aes(color=library, linetype=materialization_type, x=selectivity, y=num_ios)) + + geom_line() + + facet_wrap(~ columns, scales = "free_y") + + scale_x_continuous(label=scales::label_percent()) + + theme(legend.position = "bottom") +``` + + +This reduction of IO is important for three factors: (1) query latency, (2) cost, and (3) scalability. The reduction in query latency is shown directly in @fig-late-mat-runtime. The reduction in IOs has direct cost benefits in object storage systems like S3 that charge per request. A 40% reduction in requests translates directly into reduction in costs. Finally, reduced IO calls also means the throughput limits of the storage system aren't hit as soon, improving the ability to scale the system.