This is an automated email from the ASF dual-hosted git repository.
guanmingchiu pushed a commit to branch dev-qdp
in repository https://gitbox.apache.org/repos/asf/mahout.git
The following commit(s) were added to refs/heads/dev-qdp by this push:
new 64637fa99 [QDP] Integrate Apache Arrow and Parquet for data processing
(#680)
64637fa99 is described below
commit 64637fa9977b50e326ec951f967744330e7f269c
Author: Guan-Ming (Wesley) Chiu <[email protected]>
AuthorDate: Fri Dec 5 23:14:54 2025 +0800
[QDP] Integrate Apache Arrow and Parquet for data processing (#680)
* Integrate Apache Arrow & Parquet for data processing
* Optimize Arrow Float64Array handling in io
* Add chunked Arrow Float64Array support
* Refactor encoding to support chunked Arrow Float64Array input
* Refactor I/O and encoding documentation to remove zero-copy
---
qdp/Cargo.lock | 1164 ++++++++++++++++++++++++++++++++-
qdp/Cargo.toml | 4 +
qdp/qdp-core/Cargo.toml | 2 +
qdp/qdp-core/src/error.rs | 3 +
qdp/qdp-core/src/gpu/encodings/mod.rs | 15 +
qdp/qdp-core/src/io.rs | 272 ++++++++
qdp/qdp-core/src/lib.rs | 48 ++
qdp/qdp-core/tests/parquet_io.rs | 163 +++++
8 files changed, 1640 insertions(+), 31 deletions(-)
diff --git a/qdp/Cargo.lock b/qdp/Cargo.lock
index 5d8cedd4c..c3694bab9 100644
--- a/qdp/Cargo.lock
+++ b/qdp/Cargo.lock
@@ -2,12 +2,333 @@
# It is not intended for manual editing.
version = 4
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "ahash"
+version = "0.8.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
+dependencies = [
+ "cfg-if",
+ "const-random",
+ "getrandom 0.3.4",
+ "once_cell",
+ "version_check",
+ "zerocopy",
+]
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "alloc-no-stdlib"
+version = "2.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3"
+
+[[package]]
+name = "alloc-stdlib"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece"
+dependencies = [
+ "alloc-no-stdlib",
+]
+
+[[package]]
+name = "android_system_properties"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "arrow"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b5ec52ba94edeed950e4a41f75d35376df196e8cb04437f7280a5aa49f20f796"
+dependencies = [
+ "arrow-arith",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-csv",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-json",
+ "arrow-ord",
+ "arrow-row",
+ "arrow-schema",
+ "arrow-select",
+ "arrow-string",
+]
+
+[[package]]
+name = "arrow-arith"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fc766fdacaf804cb10c7c70580254fcdb5d55cdfda2bc57b02baf5223a3af9e"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "num",
+]
+
+[[package]]
+name = "arrow-array"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a12fcdb3f1d03f69d3ec26ac67645a8fe3f878d77b5ebb0b15d64a116c212985"
+dependencies = [
+ "ahash",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "hashbrown 0.15.5",
+ "num",
+]
+
+[[package]]
+name = "arrow-buffer"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "263f4801ff1839ef53ebd06f99a56cecd1dbaf314ec893d93168e2e860e0291c"
+dependencies = [
+ "bytes",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-cast"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ede6175fbc039dfc946a61c1b6d42fd682fcecf5ab5d148fbe7667705798cac9"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "atoi",
+ "base64",
+ "chrono",
+ "half",
+ "lexical-core",
+ "num",
+ "ryu",
+]
+
+[[package]]
+name = "arrow-csv"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1644877d8bc9a0ef022d9153dc29375c2bda244c39aec05a91d0e87ccf77995f"
+dependencies = [
+ "arrow-array",
+ "arrow-cast",
+ "arrow-schema",
+ "chrono",
+ "csv",
+ "csv-core",
+ "lazy_static",
+ "regex",
+]
+
+[[package]]
+name = "arrow-data"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61cfdd7d99b4ff618f167e548b2411e5dd2c98c0ddebedd7df433d34c20a4429"
+dependencies = [
+ "arrow-buffer",
+ "arrow-schema",
+ "half",
+ "num",
+]
+
+[[package]]
+name = "arrow-ipc"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62ff528658b521e33905334723b795ee56b393dbe9cf76c8b1f64b648c65a60c"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "flatbuffers",
+]
+
+[[package]]
+name = "arrow-json"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ee5b4ca98a7fb2efb9ab3309a5d1c88b5116997ff93f3147efdc1062a6158e9"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-schema",
+ "chrono",
+ "half",
+ "indexmap",
+ "lexical-core",
+ "memchr",
+ "num",
+ "serde",
+ "serde_json",
+ "simdutf8",
+]
+
+[[package]]
+name = "arrow-ord"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0a3334a743bd2a1479dbc635540617a3923b4b2f6870f37357339e6b5363c21"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+]
+
+[[package]]
+name = "arrow-row"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8d1d7a7291d2c5107e92140f75257a99343956871f3d3ab33a7b41532f79cb68"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "half",
+]
+
+[[package]]
+name = "arrow-schema"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cfaf5e440be44db5413b75b72c2a87c1f8f0627117d110264048f2969b99e9"
+
+[[package]]
+name = "arrow-select"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69efcd706420e52cd44f5c4358d279801993846d1c2a8e52111853d61d55a619"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "arrow-string"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a21546b337ab304a32cfc0770f671db7411787586b45b78b4593ae78e64e2b03"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "arrow-select",
+ "memchr",
+ "num",
+ "regex",
+ "regex-syntax",
+]
+
+[[package]]
+name = "atoi"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528"
+dependencies = [
+ "num-traits",
+]
+
[[package]]
name = "autocfg"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "brotli"
+version = "7.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc97b8f16f944bba54f0433f07e30be199b6dc2bd25937444bbad560bcea29bd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+ "brotli-decompressor",
+]
+
+[[package]]
+name = "brotli-decompressor"
+version = "4.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a334ef7c9e23abf0ce748e8cd309037da93e606ad52eb372e4ce327a0dcfbdfd"
+dependencies = [
+ "alloc-no-stdlib",
+ "alloc-stdlib",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
+
+[[package]]
+name = "byteorder"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
+
+[[package]]
+name = "bytes"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b35204fbdc0b3f4446b89fc1ac2cf84a8a68971995d0bf2e925ec7cd960f9cb3"
+
[[package]]
name = "cc"
version = "1.2.48"
@@ -15,6 +336,8 @@ source =
"registry+https://github.com/rust-lang/crates.io-index"
checksum = "c481bdbf0ed3b892f6f806287d72acd515b352a4ec27a208489b8c1bc839633a"
dependencies = [
"find-msvc-tools",
+ "jobserver",
+ "libc",
"shlex",
]
@@ -24,6 +347,52 @@ version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+[[package]]
+name = "chrono"
+version = "0.4.42"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "145052bdd345b87320e369255277e3fb5152762ad123a901ef5c262dd38fe8d2"
+dependencies = [
+ "iana-time-zone",
+ "num-traits",
+ "windows-link",
+]
+
+[[package]]
+name = "const-random"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359"
+dependencies = [
+ "const-random-macro",
+]
+
+[[package]]
+name = "const-random-macro"
+version = "0.1.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e"
+dependencies = [
+ "getrandom 0.2.16",
+ "once_cell",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "core-foundation-sys"
+version = "0.8.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b"
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
@@ -44,70 +413,410 @@ dependencies = [
]
[[package]]
-name = "crossbeam-utils"
-version = "0.8.21"
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "csv"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52cd9d68cf7efc6ddfaaee42e7288d3a99d613d4b50f76ce9827ae0c6e14f938"
+dependencies = [
+ "csv-core",
+ "itoa",
+ "ryu",
+ "serde_core",
+]
+
+[[package]]
+name = "csv-core"
+version = "0.1.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "704a3c26996a80471189265814dbc2c257598b96b8a7feae2d31ace646bb9782"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "cudarc"
+version = "0.13.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e"
+dependencies = [
+ "libloading",
+]
+
+[[package]]
+name = "either"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
+
+[[package]]
+name = "flatbuffers"
+version = "24.12.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4f1baf0dbf96932ec9a3038d57900329c015b0bfb7b63d904f3bc27e2b02a096"
+dependencies = [
+ "bitflags",
+ "rustc_version",
+]
+
+[[package]]
+name = "flate2"
+version = "1.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.2.16"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "wasi",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi",
+ "wasip2",
+]
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "num-traits",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+
+[[package]]
+name = "hashbrown"
+version = "0.16.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "iana-time-zone"
+version = "0.1.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33e57f83510bb73707521ebaffa789ec8caf86f9657cad665b092b581d40e9fb"
+dependencies = [
+ "android_system_properties",
+ "core-foundation-sys",
+ "iana-time-zone-haiku",
+ "js-sys",
+ "log",
+ "wasm-bindgen",
+ "windows-core",
+]
+
+[[package]]
+name = "iana-time-zone-haiku"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "indexmap"
+version = "2.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.16.1",
+]
+
+[[package]]
+name = "indoc"
+version = "2.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "integer-encoding"
+version = "3.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02"
+
+[[package]]
+name = "itoa"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "js-sys"
+version = "0.3.83"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "464a3709c7f55f1f721e5389aa6ea4e3bc6aba669353300af094b29ffbdde1d8"
+dependencies = [
+ "once_cell",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "lexical-core"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d8d125a277f807e55a77304455eb7b1cb52f2b18c143b60e766c120bd64a594"
+dependencies = [
+ "lexical-parse-float",
+ "lexical-parse-integer",
+ "lexical-util",
+ "lexical-write-float",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-parse-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52a9f232fbd6f550bc0137dcb5f99ab674071ac2d690ac69704593cb4abbea56"
+dependencies = [
+ "lexical-parse-integer",
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-parse-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a7a039f8fb9c19c996cd7b2fcce303c1b2874fe1aca544edc85c4a5f8489b34"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "lexical-util"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2604dd126bb14f13fb5d1bd6a66155079cb9fa655b37f875b3a742c705dbed17"
+
+[[package]]
+name = "lexical-write-float"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50c438c87c013188d415fbabbb1dceb44249ab81664efbd31b14ae55dabb6361"
+dependencies = [
+ "lexical-util",
+ "lexical-write-integer",
+]
+
+[[package]]
+name = "lexical-write-integer"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "409851a618475d2d5796377cad353802345cba92c867d9fbcde9cf4eac4e14df"
+dependencies = [
+ "lexical-util",
+]
+
+[[package]]
+name = "libc"
+version = "0.2.177"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
+[[package]]
+name = "libm"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
+
+[[package]]
+name = "log"
+version = "0.4.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
+
+[[package]]
+name = "lz4_flex"
+version = "0.11.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
+dependencies = [
+ "twox-hash 2.1.2",
+]
+
+[[package]]
+name = "memchr"
+version = "2.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
+
+[[package]]
+name = "memoffset"
+version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
[[package]]
-name = "cudarc"
-version = "0.13.9"
+name = "miniz_oxide"
+version = "0.8.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "486c221362668c63a1636cfa51463b09574433b39029326cff40864b3ba12b6e"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
dependencies = [
- "libloading",
+ "adler2",
+ "simd-adler32",
]
[[package]]
-name = "either"
-version = "1.15.0"
+name = "num"
+version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
+checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23"
+dependencies = [
+ "num-bigint",
+ "num-complex",
+ "num-integer",
+ "num-iter",
+ "num-rational",
+ "num-traits",
+]
[[package]]
-name = "find-msvc-tools"
-version = "0.1.5"
+name = "num-bigint"
+version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
[[package]]
-name = "heck"
-version = "0.5.0"
+name = "num-complex"
+version = "0.4.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495"
+dependencies = [
+ "num-traits",
+]
[[package]]
-name = "indoc"
-version = "2.0.7"
+name = "num-integer"
+version = "0.1.46"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
dependencies = [
- "rustversion",
+ "num-traits",
]
[[package]]
-name = "libc"
-version = "0.2.177"
+name = "num-iter"
+version = "0.1.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
+checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf"
+dependencies = [
+ "autocfg",
+ "num-integer",
+ "num-traits",
+]
[[package]]
-name = "libloading"
-version = "0.8.9"
+name = "num-rational"
+version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
dependencies = [
- "cfg-if",
- "windows-link",
+ "num-bigint",
+ "num-integer",
+ "num-traits",
]
[[package]]
-name = "memoffset"
-version = "0.9.1"
+name = "num-traits"
+version = "0.2.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
dependencies = [
"autocfg",
+ "libm",
]
[[package]]
@@ -125,6 +834,60 @@ version = "1.21.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
+[[package]]
+name = "ordered-float"
+version = "2.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68f19d67e5a2795c94e73e0bb1cc1a7edeb2e28efd39e2e1c9b7a40c1108b11c"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "parquet"
+version = "54.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfb15796ac6f56b429fd99e33ba133783ad75b27c36b4b5ce06f1f82cc97754e"
+dependencies = [
+ "ahash",
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-cast",
+ "arrow-data",
+ "arrow-ipc",
+ "arrow-schema",
+ "arrow-select",
+ "base64",
+ "brotli",
+ "bytes",
+ "chrono",
+ "flate2",
+ "half",
+ "hashbrown 0.15.5",
+ "lz4_flex",
+ "num",
+ "num-bigint",
+ "paste",
+ "seq-macro",
+ "simdutf8",
+ "snap",
+ "thrift",
+ "twox-hash 1.6.3",
+ "zstd",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
+
[[package]]
name = "portable-atomic"
version = "1.11.1"
@@ -205,8 +968,10 @@ dependencies = [
name = "qdp-core"
version = "0.1.0"
dependencies = [
+ "arrow",
"cudarc",
"nvtx",
+ "parquet",
"qdp-kernels",
"rayon",
"thiserror",
@@ -237,6 +1002,12 @@ dependencies = [
"proc-macro2",
]
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
[[package]]
name = "rayon"
version = "1.11.0"
@@ -257,18 +1028,140 @@ dependencies = [
"crossbeam-utils",
]
+[[package]]
+name = "regex"
+version = "1.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+
+[[package]]
+name = "rustc_version"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
+dependencies = [
+ "semver",
+]
+
[[package]]
name = "rustversion"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+[[package]]
+name = "ryu"
+version = "1.0.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
+
+[[package]]
+name = "semver"
+version = "1.0.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d767eb0aabc880b29956c35734170f26ed551a859dbd361d140cdbeca61ab1e2"
+
+[[package]]
+name = "seq-macro"
+version = "0.3.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.145"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c"
+dependencies = [
+ "itoa",
+ "memchr",
+ "ryu",
+ "serde",
+ "serde_core",
+]
+
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+[[package]]
+name = "simd-adler32"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+
+[[package]]
+name = "simdutf8"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e"
+
+[[package]]
+name = "snap"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
+
+[[package]]
+name = "static_assertions"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
+
[[package]]
name = "syn"
version = "2.0.111"
@@ -306,6 +1199,42 @@ dependencies = [
"syn",
]
+[[package]]
+name = "thrift"
+version = "0.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
+dependencies = [
+ "byteorder",
+ "integer-encoding",
+ "ordered-float",
+]
+
+[[package]]
+name = "tiny-keccak"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
+dependencies = [
+ "crunchy",
+]
+
+[[package]]
+name = "twox-hash"
+version = "1.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97fee6b57c6a41524a810daee9286c02d7752c4253064d0b05472833a438f675"
+dependencies = [
+ "cfg-if",
+ "static_assertions",
+]
+
+[[package]]
+name = "twox-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c"
+
[[package]]
name = "unicode-ident"
version = "1.0.22"
@@ -318,8 +1247,181 @@ version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
+[[package]]
+name = "wasip2"
+version = "1.0.1+wasi-0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7"
+dependencies = [
+ "wit-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d759f433fa64a2d763d1340820e46e111a7a5ab75f993d1852d70b03dbb80fd"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48cb0d2638f8baedbc542ed444afc0644a29166f1595371af4fecf8ce1e7eeb3"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cefb59d5cd5f92d9dcf80e4683949f15ca4b511f4ac0a6e14d4e1ac60c6ecd40"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cbc538057e648b67f72a982e708d485b2efa771e1ac05fec311f9f63e5800db4"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "windows-core"
+version = "0.62.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb"
+dependencies = [
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-result"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.46.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59"
+
+[[package]]
+name = "zerocopy"
+version = "0.8.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd74ec98b9250adb3ca554bdde269adf631549f51d8a8f8f0a10b50f1cb298c3"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8a8d209fdf45cf5138cbb5a506f6b52522a25afccc534d1475dad8e31105c6a"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zstd"
+version = "0.13.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
+dependencies = [
+ "zstd-safe",
+]
+
+[[package]]
+name = "zstd-safe"
+version = "7.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f49c4d5f0abb602a93fb8736af2a4f4dd9512e36f7f570d66e65ff867ed3b9d"
+dependencies = [
+ "zstd-sys",
+]
+
+[[package]]
+name = "zstd-sys"
+version = "2.0.16+zstd.1.5.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91e19ebc2adc8f83e43039e79776e3fda8ca919132d68a1fed6a5faca2683748"
+dependencies = [
+ "cc",
+ "pkg-config",
+]
diff --git a/qdp/Cargo.toml b/qdp/Cargo.toml
index 0411c8eb3..b934a5559 100644
--- a/qdp/Cargo.toml
+++ b/qdp/Cargo.toml
@@ -24,6 +24,10 @@ cc = "1.2"
thiserror = "2.0"
# Parallel computing (for CPU preprocessing)
rayon = "1.10"
+# Apache Arrow for columnar data format support
+arrow = "54"
+# Parquet support for Arrow
+parquet = "54"
# Release profile optimizations
[profile.release]
diff --git a/qdp/qdp-core/Cargo.toml b/qdp/qdp-core/Cargo.toml
index 721ed5e54..729873b2c 100644
--- a/qdp/qdp-core/Cargo.toml
+++ b/qdp/qdp-core/Cargo.toml
@@ -9,6 +9,8 @@ qdp-kernels = { path = "../qdp-kernels" }
thiserror = { workspace = true }
rayon = { workspace = true }
nvtx = { version = "1.3", optional = true }
+arrow = { workspace = true }
+parquet = { workspace = true }
[lib]
name = "qdp_core"
diff --git a/qdp/qdp-core/src/error.rs b/qdp/qdp-core/src/error.rs
index af2161bab..5cfaabedf 100644
--- a/qdp/qdp-core/src/error.rs
+++ b/qdp/qdp-core/src/error.rs
@@ -33,6 +33,9 @@ pub enum MahoutError {
#[error("DLPack operation failed: {0}")]
DLPack(String),
+
+ #[error("I/O error: {0}")]
+ Io(String),
}
/// Result type alias for Mahout operations
diff --git a/qdp/qdp-core/src/gpu/encodings/mod.rs
b/qdp/qdp-core/src/gpu/encodings/mod.rs
index 75cf57549..539355c2f 100644
--- a/qdp/qdp-core/src/gpu/encodings/mod.rs
+++ b/qdp/qdp-core/src/gpu/encodings/mod.rs
@@ -17,6 +17,7 @@
// Quantum encoding strategies (Strategy Pattern)
use std::sync::Arc;
+use arrow::array::Float64Array;
use cudarc::driver::CudaDevice;
use crate::error::Result;
use crate::gpu::memory::GpuStateVector;
@@ -33,6 +34,20 @@ pub trait QuantumEncoder: Send + Sync {
num_qubits: usize,
) -> Result<GpuStateVector>;
+ /// Encode from chunked Arrow arrays
+ ///
+ /// Default implementation flattens chunks. (TODO: Encoders can override
for true zero-copy.)
+ fn encode_chunked(
+ &self,
+ device: &Arc<CudaDevice>,
+ chunks: &[Float64Array],
+ num_qubits: usize,
+ ) -> Result<GpuStateVector> {
+ // Default: flatten and use regular encode
+ let data = crate::io::arrow_to_vec_chunked(chunks);
+ self.encode(device, &data, num_qubits)
+ }
+
/// Validate input data before encoding
fn validate_input(&self, data: &[f64], num_qubits: usize) -> Result<()> {
Preprocessor::validate_input(data, num_qubits)
diff --git a/qdp/qdp-core/src/io.rs b/qdp/qdp-core/src/io.rs
new file mode 100644
index 000000000..fc9f09cd4
--- /dev/null
+++ b/qdp/qdp-core/src/io.rs
@@ -0,0 +1,272 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//! I/O module for reading and writing quantum data
+//!
+//! This module provides efficient columnar data exchange with the data
science ecosystem,
+
+use std::fs::File;
+use std::path::Path;
+use std::sync::Arc;
+
+use arrow::array::{Array, ArrayRef, Float64Array, RecordBatch};
+use arrow::datatypes::{DataType, Field, Schema};
+use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use parquet::arrow::ArrowWriter;
+use parquet::file::properties::WriterProperties;
+
+use crate::error::{MahoutError, Result};
+
+/// Convert Arrow Float64Array to Vec<f64>
+///
+/// Uses Arrow's internal buffer directly if no nulls, otherwise copies
+pub fn arrow_to_vec(array: &Float64Array) -> Vec<f64> {
+ if array.null_count() == 0 {
+ array.values().to_vec()
+ } else {
+ array.iter().map(|opt| opt.unwrap_or(0.0)).collect()
+ }
+}
+
+/// Convert chunked Arrow Float64Array to Vec<f64>
+///
+/// Efficiently flattens multiple Arrow arrays into a single Vec
+pub fn arrow_to_vec_chunked(arrays: &[Float64Array]) -> Vec<f64> {
+ let total_len: usize = arrays.iter().map(|a| a.len()).sum();
+ let mut result = Vec::with_capacity(total_len);
+
+ for array in arrays {
+ if array.null_count() == 0 {
+ result.extend_from_slice(array.values());
+ } else {
+ result.extend(array.iter().map(|opt| opt.unwrap_or(0.0)));
+ }
+ }
+
+ result
+}
+
+/// Reads quantum data from a Parquet file.
+///
+/// Expects a single column named "data" containing Float64 values.
+/// This function performs one copy from Arrow to Vec.
+/// use `read_parquet_to_arrow` instead.
+///
+/// # Arguments
+/// * `path` - Path to the Parquet file
+///
+/// # Returns
+/// Vector of f64 values from the first column
+///
+/// # Example
+/// ```no_run
+/// use qdp_core::io::read_parquet;
+///
+/// let data = read_parquet("quantum_data.parquet").unwrap();
+/// ```
+pub fn read_parquet<P: AsRef<Path>>(path: P) -> Result<Vec<f64>> {
+ let chunks = read_parquet_to_arrow(path)?;
+ Ok(arrow_to_vec_chunked(&chunks))
+}
+
+/// Writes quantum data to a Parquet file.
+///
+/// Creates a single column named "data" containing Float64 values.
+///
+/// # Arguments
+/// * `path` - Path to write the Parquet file
+/// * `data` - Vector of f64 values to write
+/// * `column_name` - Optional column name (defaults to "data")
+///
+/// # Example
+/// ```no_run
+/// use qdp_core::io::write_parquet;
+///
+/// let data = vec![0.5, 0.5, 0.5, 0.5];
+/// write_parquet("quantum_data.parquet", &data, None).unwrap();
+/// ```
+pub fn write_parquet<P: AsRef<Path>>(
+ path: P,
+ data: &[f64],
+ column_name: Option<&str>,
+) -> Result<()> {
+ if data.is_empty() {
+ return Err(MahoutError::InvalidInput(
+ "Cannot write empty data to Parquet".to_string(),
+ ));
+ }
+
+ let col_name = column_name.unwrap_or("data");
+
+ // Create Arrow schema
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ col_name,
+ DataType::Float64,
+ false,
+ )]));
+
+ // Create Float64Array from slice
+ let array = Float64Array::from_iter_values(data.iter().copied());
+ let array_ref: ArrayRef = Arc::new(array);
+
+ // Create RecordBatch
+ let batch = RecordBatch::try_new(schema.clone(),
vec![array_ref]).map_err(|e| {
+ MahoutError::Io(format!("Failed to create RecordBatch: {}", e))
+ })?;
+
+ // Write to Parquet file
+ let file = File::create(path.as_ref()).map_err(|e| {
+ MahoutError::Io(format!("Failed to create Parquet file: {}", e))
+ })?;
+
+ let props = WriterProperties::builder().build();
+ let mut writer = ArrowWriter::try_new(file, schema,
Some(props)).map_err(|e| {
+ MahoutError::Io(format!("Failed to create Parquet writer: {}", e))
+ })?;
+
+ writer.write(&batch).map_err(|e| {
+ MahoutError::Io(format!("Failed to write Parquet batch: {}", e))
+ })?;
+
+ writer.close().map_err(|e| {
+ MahoutError::Io(format!("Failed to close Parquet writer: {}", e))
+ })?;
+
+ Ok(())
+}
+
+/// Reads quantum data from a Parquet file as Arrow arrays.
+///
+/// Returns Arrow arrays directly from Parquet batches.
+/// Each element in the returned Vec corresponds to one Parquet batch.
+///
+/// Directly constructs the Arrow array from Parquet batches
+///
+/// # Arguments
+/// * `path` - Path to the Parquet file
+///
+/// # Returns
+/// Vector of Float64Arrays, one per Parquet batch
+pub fn read_parquet_to_arrow<P: AsRef<Path>>(path: P) ->
Result<Vec<Float64Array>> {
+ let file = File::open(path.as_ref()).map_err(|e| {
+ MahoutError::Io(format!("Failed to open Parquet file: {}", e))
+ })?;
+
+ let builder = ParquetRecordBatchReaderBuilder::try_new(file).map_err(|e| {
+ MahoutError::Io(format!("Failed to create Parquet reader: {}", e))
+ })?;
+
+ let mut reader = builder.build().map_err(|e| {
+ MahoutError::Io(format!("Failed to build Parquet reader: {}", e))
+ })?;
+
+ let mut arrays = Vec::new();
+
+ while let Some(batch_result) = reader.next() {
+ let batch = batch_result.map_err(|e| {
+ MahoutError::Io(format!("Failed to read Parquet batch: {}", e))
+ })?;
+
+ if batch.num_columns() == 0 {
+ return Err(MahoutError::Io(
+ "Parquet file has no columns".to_string(),
+ ));
+ }
+
+ let column = batch.column(0);
+ if !matches!(column.data_type(), DataType::Float64) {
+ return Err(MahoutError::Io(format!(
+ "Expected Float64 column, got {:?}",
+ column.data_type()
+ )));
+ }
+
+ // Clone the Float64Array (reference-counted, no data copy)
+ let float_array = column
+ .as_any()
+ .downcast_ref::<Float64Array>()
+ .ok_or_else(|| {
+ MahoutError::Io("Failed to downcast to
Float64Array".to_string())
+ })?
+ .clone();
+
+ arrays.push(float_array);
+ }
+
+ if arrays.is_empty() {
+ return Err(MahoutError::Io(
+ "Parquet file contains no data".to_string(),
+ ));
+ }
+
+ Ok(arrays)
+}
+
+/// Writes an Arrow Float64Array to a Parquet file.
+///
+/// Writes from Arrow format to Parquet.
+///
+/// # Arguments
+/// * `path` - Path to write the Parquet file
+/// * `array` - Float64Array to write
+/// * `column_name` - Optional column name (defaults to "data")
+pub fn write_arrow_to_parquet<P: AsRef<Path>>(
+ path: P,
+ array: &Float64Array,
+ column_name: Option<&str>,
+) -> Result<()> {
+ if array.is_empty() {
+ return Err(MahoutError::InvalidInput(
+ "Cannot write empty array to Parquet".to_string(),
+ ));
+ }
+
+ let col_name = column_name.unwrap_or("data");
+
+ // Create Arrow schema
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ col_name,
+ DataType::Float64,
+ false,
+ )]));
+
+ let array_ref: ArrayRef = Arc::new(array.clone());
+
+ // Create RecordBatch
+ let batch = RecordBatch::try_new(schema.clone(),
vec![array_ref]).map_err(|e| {
+ MahoutError::Io(format!("Failed to create RecordBatch: {}", e))
+ })?;
+
+ // Write to Parquet file
+ let file = File::create(path.as_ref()).map_err(|e| {
+ MahoutError::Io(format!("Failed to create Parquet file: {}", e))
+ })?;
+
+ let props = WriterProperties::builder().build();
+ let mut writer = ArrowWriter::try_new(file, schema,
Some(props)).map_err(|e| {
+ MahoutError::Io(format!("Failed to create Parquet writer: {}", e))
+ })?;
+
+ writer.write(&batch).map_err(|e| {
+ MahoutError::Io(format!("Failed to write Parquet batch: {}", e))
+ })?;
+
+ writer.close().map_err(|e| {
+ MahoutError::Io(format!("Failed to close Parquet writer: {}", e))
+ })?;
+
+ Ok(())
+}
diff --git a/qdp/qdp-core/src/lib.rs b/qdp/qdp-core/src/lib.rs
index 6fa43af7e..406715edd 100644
--- a/qdp/qdp-core/src/lib.rs
+++ b/qdp/qdp-core/src/lib.rs
@@ -18,6 +18,7 @@ pub mod dlpack;
pub mod gpu;
pub mod error;
pub mod preprocessing;
+pub mod io;
#[macro_use]
mod profiling;
@@ -25,6 +26,7 @@ mod profiling;
pub use error::{MahoutError, Result};
use std::sync::Arc;
+use arrow::array::Float64Array;
use cudarc::driver::CudaDevice;
use crate::dlpack::DLManagedTensor;
use crate::gpu::get_encoder;
@@ -85,6 +87,52 @@ impl QdpEngine {
pub fn device(&self) -> &CudaDevice {
&self.device
}
+
+ /// Encode from chunked Arrow arrays (zero-copy from Parquet)
+ ///
+ /// # Arguments
+ /// * `chunks` - Chunked Arrow Float64Arrays (from read_parquet_to_arrow)
+ /// * `num_qubits` - Number of qubits
+ /// * `encoding_method` - Strategy: "amplitude", "angle", or "basis"
+ ///
+ /// # Returns
+ /// DLPack pointer for zero-copy PyTorch integration
+ pub fn encode_chunked(
+ &self,
+ chunks: &[Float64Array],
+ num_qubits: usize,
+ encoding_method: &str,
+ ) -> Result<*mut DLManagedTensor> {
+ crate::profile_scope!("Mahout::EncodeChunked");
+
+ let encoder = get_encoder(encoding_method)?;
+ let state_vector = encoder.encode_chunked(&self.device, chunks,
num_qubits)?;
+ let dlpack_ptr = {
+ crate::profile_scope!("Mahout::CreateDLPack");
+ state_vector.to_dlpack()
+ };
+ Ok(dlpack_ptr)
+ }
+
+ /// Load data from Parquet file and encode into quantum state
+ ///
+ /// **ZERO-COPY**: Reads Parquet chunks directly without intermediate Vec
allocation.
+ ///
+ /// # Arguments
+ /// * `path` - Path to Parquet file
+ /// * `num_qubits` - Number of qubits
+ /// * `encoding_method` - Strategy: "amplitude", "angle", or "basis"
+ pub fn encode_from_parquet(
+ &self,
+ path: &str,
+ num_qubits: usize,
+ encoding_method: &str,
+ ) -> Result<*mut DLManagedTensor> {
+ crate::profile_scope!("Mahout::EncodeFromParquet");
+
+ let chunks = crate::io::read_parquet_to_arrow(path)?;
+ self.encode_chunked(&chunks, num_qubits, encoding_method)
+ }
}
// Re-export key types for convenience
diff --git a/qdp/qdp-core/tests/parquet_io.rs b/qdp/qdp-core/tests/parquet_io.rs
new file mode 100644
index 000000000..7b45573ba
--- /dev/null
+++ b/qdp/qdp-core/tests/parquet_io.rs
@@ -0,0 +1,163 @@
+//
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+use qdp_core::io::{
+ read_parquet, read_parquet_to_arrow, write_arrow_to_parquet, write_parquet,
+};
+use arrow::array::Float64Array;
+use std::fs;
+
+mod common;
+
+#[test]
+fn test_write_and_read_parquet() {
+ let temp_path = "/tmp/test_quantum_data.parquet";
+ let data = common::create_test_data(4);
+
+ // Write data
+ write_parquet(temp_path, &data, None).unwrap();
+
+ // Read it back
+ let read_data = read_parquet(temp_path).unwrap();
+
+ // Verify
+ assert_eq!(data.len(), read_data.len());
+ for (original, read) in data.iter().zip(read_data.iter()) {
+ assert!((original - read).abs() < 1e-10);
+ }
+
+ // Cleanup
+ fs::remove_file(temp_path).unwrap();
+}
+
+#[test]
+fn test_write_with_custom_column_name() {
+ let temp_path = "/tmp/test_custom_column.parquet";
+ let data = vec![1.0, 2.0, 3.0, 4.0];
+
+ // Write with custom column name
+ write_parquet(temp_path, &data, Some("quantum_state")).unwrap();
+
+ // Read it back (column name doesn't matter for reading)
+ let read_data = read_parquet(temp_path).unwrap();
+
+ assert_eq!(data, read_data);
+
+ // Cleanup
+ fs::remove_file(temp_path).unwrap();
+}
+
+#[test]
+fn test_write_empty_data_fails() {
+ let temp_path = "/tmp/test_empty.parquet";
+ let data: Vec<f64> = vec![];
+
+ let result = write_parquet(temp_path, &data, None);
+ assert!(result.is_err());
+}
+
+#[test]
+fn test_read_nonexistent_file_fails() {
+ let result = read_parquet("/tmp/nonexistent_file_12345.parquet");
+ assert!(result.is_err());
+}
+
+#[test]
+fn test_arrow_roundtrip() {
+ let temp_path = "/tmp/test_arrow_roundtrip.parquet";
+ let data = common::create_test_data(8);
+ let array = Float64Array::from(data.clone());
+
+ // Write Arrow array
+ write_arrow_to_parquet(temp_path, &array, None).unwrap();
+
+ // Read back as Arrow arrays (chunked)
+ let read_chunks = read_parquet_to_arrow(temp_path).unwrap();
+
+ // Verify total length
+ let total_len: usize = read_chunks.iter().map(|c| c.len()).sum();
+ assert_eq!(array.len(), total_len);
+
+ // Verify data integrity
+ let mut offset = 0;
+ for chunk in &read_chunks {
+ for i in 0..chunk.len() {
+ assert!((array.value(offset + i) - chunk.value(i)).abs() < 1e-10);
+ }
+ offset += chunk.len();
+ }
+
+ // Cleanup
+ fs::remove_file(temp_path).unwrap();
+}
+
+#[test]
+fn test_write_empty_arrow_fails() {
+ let temp_path = "/tmp/test_empty_arrow.parquet";
+ let array = Float64Array::from(Vec::<f64>::new());
+
+ let result = write_arrow_to_parquet(temp_path, &array, None);
+ assert!(result.is_err());
+}
+
+#[test]
+fn test_large_dataset() {
+ let temp_path = "/tmp/test_large_dataset.parquet";
+ let size = 1024;
+ let data: Vec<f64> = (0..size).map(|i| i as f64 / size as f64).collect();
+
+ // Write
+ write_parquet(temp_path, &data, None).unwrap();
+
+ // Read
+ let read_data = read_parquet(temp_path).unwrap();
+
+ // Verify size and sample values
+ assert_eq!(data.len(), read_data.len());
+ assert!((data[0] - read_data[0]).abs() < 1e-10);
+ assert!((data[size - 1] - read_data[size - 1]).abs() < 1e-10);
+
+ // Cleanup
+ fs::remove_file(temp_path).unwrap();
+}
+
+#[test]
+fn test_chunked_read_api() {
+ let temp_path = "/tmp/test_chunked_api.parquet";
+ let data = common::create_test_data(16);
+
+ // Write test data
+ write_parquet(temp_path, &data, None).unwrap();
+ let chunks = read_parquet_to_arrow(temp_path).unwrap();
+ assert!(!chunks.is_empty());
+ let total_len: usize = chunks.iter().map(|c| c.len()).sum();
+ assert_eq!(total_len, data.len());
+ for chunk in &chunks {
+ let buffer_ptr = chunk.values().as_ptr();
+ assert!(!buffer_ptr.is_null());
+ assert_eq!(buffer_ptr as usize % std::mem::align_of::<f64>(), 0);
+
+ unsafe {
+ let slice = std::slice::from_raw_parts(buffer_ptr, chunk.len());
+ for (i, &value) in slice.iter().enumerate() {
+ assert_eq!(value, chunk.value(i));
+ }
+ }
+ }
+
+ // Cleanup
+ fs::remove_file(temp_path).unwrap();
+}