This is an automated email from the ASF dual-hosted git repository.
mbutrovich pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 97767c2c7 deps: upgrade DataFusion to 51, Arrow to 57, Iceberg to
latest, MSRV to 1.88 (#2729)
97767c2c7 is described below
commit 97767c2c7f0595948f460e9789494291490efe13
Author: Matt Butrovich <[email protected]>
AuthorDate: Thu Dec 11 13:49:40 2025 -0500
deps: upgrade DataFusion to 51, Arrow to 57, Iceberg to latest, MSRV to
1.88 (#2729)
---
native/Cargo.lock | 569 ++++++++++-----------
native/Cargo.toml | 15 +-
native/core/Cargo.toml | 4 +-
native/core/src/execution/planner.rs | 4 +-
native/core/src/parquet/encryption_support.rs | 4 +-
native/core/src/parquet/parquet_exec.rs | 2 +-
native/core/src/parquet/read/column.rs | 16 +-
native/core/src/parquet/util/jni.rs | 7 +-
native/spark-expr/Cargo.toml | 1 +
native/spark-expr/src/agg_funcs/avg.rs | 10 +-
native/spark-expr/src/agg_funcs/avg_decimal.rs | 19 +-
.../src/math_funcs/internal/checkoverflow.rs | 2 +-
.../src/math_funcs/internal/make_decimal.rs | 10 +-
.../apache/comet/iceberg/IcebergReflection.scala | 3 +
.../serde/operator/CometIcebergNativeScan.scala | 184 +++++--
.../org/apache/comet/CometIcebergNativeSuite.scala | 141 +++++
16 files changed, 618 insertions(+), 373 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index f17b71845..acdd27976 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -110,9 +110,9 @@ checksum =
"a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61"
[[package]]
name = "apache-avro"
-version = "0.20.0"
+version = "0.21.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a033b4ced7c585199fb78ef50fca7fe2f444369ec48080c5fd072efa1a03cc7"
+checksum = "36fa98bc79671c7981272d91a8753a928ff6a1cd8e4f20a44c45bd5d313840bf"
dependencies = [
"bigdecimal",
"bon",
@@ -126,8 +126,8 @@ dependencies = [
"serde",
"serde_bytes",
"serde_json",
- "strum 0.27.2",
- "strum_macros 0.27.2",
+ "strum",
+ "strum_macros",
"thiserror 2.0.17",
"uuid",
"zstd",
@@ -159,9 +159,9 @@ checksum =
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc"
+checksum = "cb372a7cbcac02a35d3fb7b3fc1f969ec078e871f9bb899bf00a2e1809bec8a3"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -180,23 +180,23 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8"
+checksum = "0f377dcd19e440174596d83deb49cd724886d91060c07fec4f67014ef9d54049"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
"chrono",
- "num",
+ "num-traits",
]
[[package]]
name = "arrow-array"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d"
+checksum = "a23eaff85a44e9fa914660fb0d0bb00b79c4a3d888b5334adb3ea4330c84f002"
dependencies = [
"ahash 0.8.12",
"arrow-buffer",
@@ -206,29 +206,33 @@ dependencies = [
"chrono-tz",
"half",
"hashbrown 0.16.1",
- "num",
+ "num-complex",
+ "num-integer",
+ "num-traits",
]
[[package]]
name = "arrow-buffer"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc"
+checksum = "a2819d893750cb3380ab31ebdc8c68874dd4429f90fd09180f3c93538bd21626"
dependencies = [
"bytes",
"half",
- "num",
+ "num-bigint",
+ "num-traits",
]
[[package]]
name = "arrow-cast"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023"
+checksum = "e3d131abb183f80c450d4591dc784f8d7750c50c6e2bc3fcaad148afc8361271"
dependencies = [
"arrow-array",
"arrow-buffer",
"arrow-data",
+ "arrow-ord",
"arrow-schema",
"arrow-select",
"atoi",
@@ -237,15 +241,15 @@ dependencies = [
"comfy-table",
"half",
"lexical-core",
- "num",
+ "num-traits",
"ryu",
]
[[package]]
name = "arrow-csv"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb"
+checksum = "2275877a0e5e7e7c76954669366c2aa1a829e340ab1f612e647507860906fb6b"
dependencies = [
"arrow-array",
"arrow-cast",
@@ -258,21 +262,22 @@ dependencies = [
[[package]]
name = "arrow-data"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0"
+checksum = "05738f3d42cb922b9096f7786f606fcb8669260c2640df8490533bb2fa38c9d3"
dependencies = [
"arrow-buffer",
"arrow-schema",
"half",
- "num",
+ "num-integer",
+ "num-traits",
]
[[package]]
name = "arrow-ipc"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5"
+checksum = "3d09446e8076c4b3f235603d9ea7c5494e73d441b01cd61fb33d7254c11964b3"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -280,14 +285,14 @@ dependencies = [
"arrow-schema",
"arrow-select",
"flatbuffers",
- "lz4_flex 0.11.5",
+ "lz4_flex",
]
[[package]]
name = "arrow-json"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b"
+checksum = "371ffd66fa77f71d7628c63f209c9ca5341081051aa32f9c8020feb0def787c0"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -297,19 +302,21 @@ dependencies = [
"chrono",
"half",
"indexmap 2.12.1",
+ "itoa",
"lexical-core",
"memchr",
- "num",
- "serde",
+ "num-traits",
+ "ryu",
+ "serde_core",
"serde_json",
"simdutf8",
]
[[package]]
name = "arrow-ord"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f"
+checksum = "cbc94fc7adec5d1ba9e8cd1b1e8d6f72423b33fe978bf1f46d970fafab787521"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -320,9 +327,9 @@ dependencies = [
[[package]]
name = "arrow-row"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753"
+checksum = "169676f317157dc079cc5def6354d16db63d8861d61046d2f3883268ced6f99f"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -333,34 +340,34 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe"
+checksum = "d27609cd7dd45f006abae27995c2729ef6f4b9361cde1ddd019dc31a5aa017e0"
dependencies = [
"bitflags 2.10.0",
- "serde",
+ "serde_core",
"serde_json",
]
[[package]]
name = "arrow-select"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a"
+checksum = "ae980d021879ea119dd6e2a13912d81e64abed372d53163e804dfe84639d8010"
dependencies = [
"ahash 0.8.12",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
- "num",
+ "num-traits",
]
[[package]]
name = "arrow-string"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d"
+checksum = "cf35e8ef49dcf0c5f6d175edee6b8af7b45611805333129c541a8b89a0fc0534"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -368,7 +375,7 @@ dependencies = [
"arrow-schema",
"arrow-select",
"memchr",
- "num",
+ "num-traits",
"regex",
"regex-syntax",
]
@@ -549,9 +556,9 @@ checksum =
"c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
[[package]]
name = "aws-config"
-version = "1.8.11"
+version = "1.8.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0149602eeaf915158e14029ba0c78dedb8c08d554b024d54c8f239aab46511d"
+checksum = "96571e6996817bf3d58f6b569e4b9fd2e9d2fcf9f7424eed07b2ce9bb87535e5"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -579,9 +586,9 @@ dependencies = [
[[package]]
name = "aws-credential-types"
-version = "1.2.10"
+version = "1.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b01c9521fa01558f750d183c8c68c81b0155b9d193a4ba7f84c36bd1b6d04a06"
+checksum = "3cd362783681b15d136480ad555a099e82ecd8e2d10a841e14dfd0078d67fee3"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -613,9 +620,9 @@ dependencies = [
[[package]]
name = "aws-runtime"
-version = "1.5.16"
+version = "1.5.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7ce527fb7e53ba9626fc47824f25e256250556c40d8f81d27dd92aa38239d632"
+checksum = "d81b5b2898f6798ad58f484856768bca817e3cd9de0974c24ae0f1113fe88f1b"
dependencies = [
"aws-credential-types",
"aws-sigv4",
@@ -637,9 +644,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sso"
-version = "1.90.0"
+version = "1.91.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4f18e53542c522459e757f81e274783a78f8c81acdfc8d1522ee8a18b5fb1c66"
+checksum = "8ee6402a36f27b52fe67661c6732d684b2635152b676aa2babbfb5204f99115d"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -659,9 +666,9 @@ dependencies = [
[[package]]
name = "aws-sdk-ssooidc"
-version = "1.92.0"
+version = "1.93.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "532f4d866012ffa724a4385c82e8dd0e59f0ca0e600f3f22d4c03b6824b34e4a"
+checksum = "a45a7f750bbd170ee3677671ad782d90b894548f4e4ae168302c57ec9de5cb3e"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -681,9 +688,9 @@ dependencies = [
[[package]]
name = "aws-sdk-sts"
-version = "1.94.0"
+version = "1.95.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1be6fbbfa1a57724788853a623378223fe828fc4c09b146c992f0c95b6256174"
+checksum = "55542378e419558e6b1f398ca70adb0b2088077e79ad9f14eb09441f2f7b2164"
dependencies = [
"aws-credential-types",
"aws-runtime",
@@ -704,9 +711,9 @@ dependencies = [
[[package]]
name = "aws-sigv4"
-version = "1.3.6"
+version = "1.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c35452ec3f001e1f2f6db107b6373f1f48f05ec63ba2c5c9fa91f07dad32af11"
+checksum = "69e523e1c4e8e7e8ff219d732988e22bfeae8a1cafdbe6d9eca1546fa080be7c"
dependencies = [
"aws-credential-types",
"aws-smithy-http",
@@ -726,9 +733,9 @@ dependencies = [
[[package]]
name = "aws-smithy-async"
-version = "1.2.6"
+version = "1.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "127fcfad33b7dfc531141fda7e1c402ac65f88aca5511a4d31e2e3d2cd01ce9c"
+checksum = "9ee19095c7c4dda59f1697d028ce704c24b2d33c6718790c7f1d5a3015b4107c"
dependencies = [
"futures-util",
"pin-project-lite",
@@ -737,9 +744,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http"
-version = "0.62.5"
+version = "0.62.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "445d5d720c99eed0b4aa674ed00d835d9b1427dd73e04adaf2f94c6b2d6f9fca"
+checksum = "826141069295752372f8203c17f28e30c464d22899a43a0c9fd9c458d469c88b"
dependencies = [
"aws-smithy-runtime-api",
"aws-smithy-types",
@@ -758,9 +765,9 @@ dependencies = [
[[package]]
name = "aws-smithy-http-client"
-version = "1.1.4"
+version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "623254723e8dfd535f566ee7b2381645f8981da086b5c4aa26c0c41582bb1d2c"
+checksum = "59e62db736db19c488966c8d787f52e6270be565727236fd5579eaa301e7bc4a"
dependencies = [
"aws-smithy-async",
"aws-smithy-runtime-api",
@@ -782,27 +789,27 @@ dependencies = [
[[package]]
name = "aws-smithy-json"
-version = "0.61.7"
+version = "0.61.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2db31f727935fc63c6eeae8b37b438847639ec330a9161ece694efba257e0c54"
+checksum = "a6864c190cbb8e30cf4b77b2c8f3b6dfffa697a09b7218d2f7cd3d4c4065a9f7"
dependencies = [
"aws-smithy-types",
]
[[package]]
name = "aws-smithy-observability"
-version = "0.1.4"
+version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2d1881b1ea6d313f9890710d65c158bdab6fb08c91ea825f74c1c8c357baf4cc"
+checksum = "17f616c3f2260612fe44cede278bafa18e73e6479c4e393e2c4518cf2a9a228a"
dependencies = [
"aws-smithy-runtime-api",
]
[[package]]
name = "aws-smithy-query"
-version = "0.60.8"
+version = "0.60.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d28a63441360c477465f80c7abac3b9c4d075ca638f982e605b7dc2a2c7156c9"
+checksum = "ae5d689cf437eae90460e944a58b5668530d433b4ff85789e69d2f2a556e057d"
dependencies = [
"aws-smithy-types",
"urlencoding",
@@ -810,9 +817,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime"
-version = "1.9.4"
+version = "1.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0bbe9d018d646b96c7be063dd07987849862b0e6d07c778aad7d93d1be6c1ef0"
+checksum = "a392db6c583ea4a912538afb86b7be7c5d8887d91604f50eb55c262ee1b4a5f5"
dependencies = [
"aws-smithy-async",
"aws-smithy-http",
@@ -834,9 +841,9 @@ dependencies = [
[[package]]
name = "aws-smithy-runtime-api"
-version = "1.9.2"
+version = "1.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec7204f9fd94749a7c53b26da1b961b4ac36bf070ef1e0b94bb09f79d4f6c193"
+checksum = "ab0d43d899f9e508300e587bf582ba54c27a452dd0a9ea294690669138ae14a2"
dependencies = [
"aws-smithy-async",
"aws-smithy-types",
@@ -851,9 +858,9 @@ dependencies = [
[[package]]
name = "aws-smithy-types"
-version = "1.3.4"
+version = "1.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "25f535879a207fce0db74b679cfc3e91a3159c8144d717d55f5832aea9eef46e"
+checksum = "905cb13a9895626d49cf2ced759b062d913834c7482c38e49557eac4e6193f01"
dependencies = [
"base64-simd",
"bytes",
@@ -874,18 +881,18 @@ dependencies = [
[[package]]
name = "aws-smithy-xml"
-version = "0.60.12"
+version = "0.60.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eab77cdd036b11056d2a30a7af7b775789fb024bf216acc13884c6c97752ae56"
+checksum = "11b2f670422ff42bf7065031e72b45bc52a3508bd089f743ea90731ca2b6ea57"
dependencies = [
"xmlparser",
]
[[package]]
name = "aws-types"
-version = "1.3.10"
+version = "1.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d79fb68e3d7fe5d4833ea34dc87d2e97d26d3086cb3da660bb6b1f76d98680b6"
+checksum = "1d980627d2dd7bfc32a3c025685a033eeab8d365cc840c631ef59d1b8f428164"
dependencies = [
"aws-credential-types",
"aws-smithy-async",
@@ -1373,12 +1380,11 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "7.1.2"
+version = "7.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56"
+checksum = "b03b7db8e0b4b2fdad6c551e634134e99ec000e5c8c3b6856c65e8bbaded7a3b"
dependencies = [
- "strum 0.26.3",
- "strum_macros 0.26.4",
+ "unicode-segmentation",
"unicode-width",
]
@@ -1667,12 +1673,11 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a"
+checksum = "8ba7cb113e9c0bedf9e9765926031e132fa05a1b09ba6e93a6d1a4d7044457b8"
dependencies = [
"arrow",
- "arrow-ipc",
"arrow-schema",
"async-trait",
"bytes",
@@ -1682,6 +1687,7 @@ dependencies = [
"datafusion-common",
"datafusion-common-runtime",
"datafusion-datasource",
+ "datafusion-datasource-arrow",
"datafusion-datasource-csv",
"datafusion-datasource-json",
"datafusion-datasource-parquet",
@@ -1702,7 +1708,6 @@ dependencies = [
"datafusion-session",
"datafusion-sql",
"futures",
- "hex",
"itertools 0.14.0",
"log",
"object_store",
@@ -1710,6 +1715,7 @@ dependencies = [
"parquet",
"rand 0.9.2",
"regex",
+ "rstest",
"sqlparser",
"tempfile",
"tokio",
@@ -1719,9 +1725,9 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621"
+checksum = "66a3a799f914a59b1ea343906a0486f17061f39509af74e874a866428951130d"
dependencies = [
"arrow",
"async-trait",
@@ -1734,7 +1740,6 @@ dependencies = [
"datafusion-physical-expr",
"datafusion-physical-plan",
"datafusion-session",
- "datafusion-sql",
"futures",
"itertools 0.14.0",
"log",
@@ -1745,9 +1750,9 @@ dependencies = [
[[package]]
name = "datafusion-catalog-listing"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83"
+checksum = "6db1b113c80d7a0febcd901476a57aef378e717c54517a163ed51417d87621b0"
dependencies = [
"arrow",
"async-trait",
@@ -1757,10 +1762,11 @@ dependencies = [
"datafusion-execution",
"datafusion-expr",
"datafusion-physical-expr",
+ "datafusion-physical-expr-adapter",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
- "datafusion-session",
"futures",
+ "itertools 0.14.0",
"log",
"object_store",
"tokio",
@@ -1795,13 +1801,13 @@ dependencies = [
"lazy_static",
"log",
"log4rs",
- "lz4_flex 0.12.0",
+ "lz4_flex",
"mimalloc",
"num",
"object_store",
"object_store_opendal",
"once_cell",
- "opendal 0.55.0",
+ "opendal",
"parking_lot",
"parquet",
"paste",
@@ -1883,14 +1889,13 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1"
+checksum = "7c10f7659e96127d25e8366be7c8be4109595d6a2c3eac70421f380a7006a1b0"
dependencies = [
"ahash 0.8.12",
"arrow",
"arrow-ipc",
- "base64",
"chrono",
"half",
"hashbrown 0.14.5",
@@ -1908,9 +1913,9 @@ dependencies = [
[[package]]
name = "datafusion-common-runtime"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e"
+checksum = "b92065bbc6532c6651e2f7dd30b55cba0c7a14f860c7e1d15f165c41a1868d95"
dependencies = [
"futures",
"log",
@@ -1919,9 +1924,9 @@ dependencies = [
[[package]]
name = "datafusion-datasource"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86"
+checksum = "fde13794244bc7581cd82f6fff217068ed79cdc344cafe4ab2c3a1c3510b38d6"
dependencies = [
"arrow",
"async-compression",
@@ -1944,9 +1949,7 @@ dependencies = [
"itertools 0.14.0",
"log",
"object_store",
- "parquet",
"rand 0.9.2",
- "tempfile",
"tokio",
"tokio-util",
"url",
@@ -1954,22 +1957,44 @@ dependencies = [
"zstd",
]
+[[package]]
+name = "datafusion-datasource-arrow"
+version = "51.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "804fa9b4ecf3157982021770617200ef7c1b2979d57bec9044748314775a9aea"
+dependencies = [
+ "arrow",
+ "arrow-ipc",
+ "async-trait",
+ "bytes",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "datafusion-session",
+ "futures",
+ "itertools 0.14.0",
+ "object_store",
+ "tokio",
+]
+
[[package]]
name = "datafusion-datasource-csv"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825"
+checksum = "61a1641a40b259bab38131c5e6f48fac0717bedb7dc93690e604142a849e0568"
dependencies = [
"arrow",
"async-trait",
"bytes",
- "datafusion-catalog",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
- "datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"datafusion-session",
@@ -1981,74 +2006,67 @@ dependencies = [
[[package]]
name = "datafusion-datasource-json"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6"
+checksum = "adeacdb00c1d37271176f8fb6a1d8ce096baba16ea7a4b2671840c5c9c64fe85"
dependencies = [
"arrow",
"async-trait",
"bytes",
- "datafusion-catalog",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
- "datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
"datafusion-session",
"futures",
"object_store",
- "serde_json",
"tokio",
]
[[package]]
name = "datafusion-datasource-parquet"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133"
+checksum = "43d0b60ffd66f28bfb026565d62b0a6cbc416da09814766a3797bba7d85a3cd9"
dependencies = [
"arrow",
"async-trait",
"bytes",
- "datafusion-catalog",
"datafusion-common",
"datafusion-common-runtime",
"datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
- "datafusion-functions-aggregate",
+ "datafusion-functions-aggregate-common",
"datafusion-physical-expr",
"datafusion-physical-expr-adapter",
"datafusion-physical-expr-common",
- "datafusion-physical-optimizer",
"datafusion-physical-plan",
"datafusion-pruning",
"datafusion-session",
"futures",
- "hex",
"itertools 0.14.0",
"log",
"object_store",
"parking_lot",
"parquet",
- "rand 0.9.2",
"tokio",
]
[[package]]
name = "datafusion-doc"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429"
+checksum = "2b99e13947667b36ad713549237362afb054b2d8f8cc447751e23ec61202db07"
[[package]]
name = "datafusion-execution"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531"
+checksum = "63695643190679037bc946ad46a263b62016931547bf119859c511f7ff2f5178"
dependencies = [
"arrow",
"async-trait",
@@ -2067,9 +2085,9 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe"
+checksum = "f9a4787cbf5feb1ab351f789063398f67654a6df75c4d37d7f637dc96f951a91"
dependencies = [
"arrow",
"async-trait",
@@ -2081,6 +2099,7 @@ dependencies = [
"datafusion-functions-window-common",
"datafusion-physical-expr-common",
"indexmap 2.12.1",
+ "itertools 0.14.0",
"paste",
"serde_json",
"sqlparser",
@@ -2088,9 +2107,9 @@ dependencies = [
[[package]]
name = "datafusion-expr-common"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6"
+checksum = "5ce2fb1b8c15c9ac45b0863c30b268c69dc9ee7a1ee13ecf5d067738338173dc"
dependencies = [
"arrow",
"datafusion-common",
@@ -2101,9 +2120,9 @@ dependencies = [
[[package]]
name = "datafusion-functions"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977"
+checksum = "794a9db7f7b96b3346fc007ff25e994f09b8f0511b4cf7dff651fadfe3ebb28f"
dependencies = [
"arrow",
"arrow-buffer",
@@ -2121,6 +2140,7 @@ dependencies = [
"itertools 0.14.0",
"log",
"md-5",
+ "num-traits",
"rand 0.9.2",
"regex",
"sha2",
@@ -2130,9 +2150,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf"
+checksum = "1c25210520a9dcf9c2b2cbbce31ebd4131ef5af7fc60ee92b266dc7d159cb305"
dependencies = [
"ahash 0.8.12",
"arrow",
@@ -2151,9 +2171,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17"
+checksum = "62f4a66f3b87300bb70f4124b55434d2ae3fe80455f3574701d0348da040b55d"
dependencies = [
"ahash 0.8.12",
"arrow",
@@ -2164,9 +2184,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3"
+checksum = "ae5c06eed03918dc7fe7a9f082a284050f0e9ecf95d72f57712d1496da03b8c4"
dependencies = [
"arrow",
"arrow-ord",
@@ -2174,6 +2194,7 @@ dependencies = [
"datafusion-doc",
"datafusion-execution",
"datafusion-expr",
+ "datafusion-expr-common",
"datafusion-functions",
"datafusion-functions-aggregate",
"datafusion-functions-aggregate-common",
@@ -2186,9 +2207,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981"
+checksum = "db4fed1d71738fbe22e2712d71396db04c25de4111f1ec252b8f4c6d3b25d7f5"
dependencies = [
"arrow",
"async-trait",
@@ -2202,9 +2223,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b"
+checksum = "1d92206aa5ae21892f1552b4d61758a862a70956e6fd7a95cb85db1de74bc6d1"
dependencies = [
"arrow",
"datafusion-common",
@@ -2220,9 +2241,9 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f"
+checksum = "53ae9bcc39800820d53a22d758b3b8726ff84a5a3e24cecef04ef4e5fdf1c7cc"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -2230,20 +2251,20 @@ dependencies = [
[[package]]
name = "datafusion-macros"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730"
+checksum = "1063ad4c9e094b3f798acee16d9a47bd7372d9699be2de21b05c3bd3f34ab848"
dependencies = [
- "datafusion-expr",
+ "datafusion-doc",
"quote",
"syn 2.0.111",
]
[[package]]
name = "datafusion-optimizer"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a"
+checksum = "9f35f9ec5d08b87fd1893a30c2929f2559c2f9806ca072d8fefca5009dc0f06a"
dependencies = [
"arrow",
"chrono",
@@ -2260,9 +2281,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6"
+checksum = "c30cc8012e9eedcb48bbe112c6eff4ae5ed19cf3003cb0f505662e88b7014c5d"
dependencies = [
"ahash 0.8.12",
"arrow",
@@ -2275,7 +2296,6 @@ dependencies = [
"hashbrown 0.14.5",
"indexmap 2.12.1",
"itertools 0.14.0",
- "log",
"parking_lot",
"paste",
"petgraph 0.8.3",
@@ -2283,9 +2303,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-adapter"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0"
+checksum = "7f9ff2dbd476221b1f67337699eff432781c4e6e1713d2aefdaa517dfbf79768"
dependencies = [
"arrow",
"datafusion-common",
@@ -2298,9 +2318,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-common"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3"
+checksum = "90da43e1ec550b172f34c87ec68161986ced70fd05c8d2a2add66eef9c276f03"
dependencies = [
"ahash 0.8.12",
"arrow",
@@ -2312,9 +2332,9 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c"
+checksum = "ce9804f799acd7daef3be7aaffe77c0033768ed8fdbf5fb82fc4c5f2e6bc14e6"
dependencies = [
"arrow",
"datafusion-common",
@@ -2326,14 +2346,13 @@ dependencies = [
"datafusion-physical-plan",
"datafusion-pruning",
"itertools 0.14.0",
- "log",
]
[[package]]
name = "datafusion-physical-plan"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c"
+checksum = "0acf0ad6b6924c6b1aa7d213b181e012e2d3ec0a64ff5b10ee6282ab0f8532ac"
dependencies = [
"ahash 0.8.12",
"arrow",
@@ -2362,12 +2381,11 @@ dependencies = [
[[package]]
name = "datafusion-pruning"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65"
+checksum = "ac2c2498a1f134a9e11a9f5ed202a2a7d7e9774bd9249295593053ea3be999db"
dependencies = [
"arrow",
- "arrow-schema",
"datafusion-common",
"datafusion-datasource",
"datafusion-expr-common",
@@ -2380,35 +2398,26 @@ dependencies = [
[[package]]
name = "datafusion-session"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d"
+checksum = "8f96eebd17555386f459037c65ab73aae8df09f464524c709d6a3134ad4f4776"
dependencies = [
- "arrow",
"async-trait",
- "dashmap",
"datafusion-common",
- "datafusion-common-runtime",
"datafusion-execution",
"datafusion-expr",
- "datafusion-physical-expr",
"datafusion-physical-plan",
- "datafusion-sql",
- "futures",
- "itertools 0.14.0",
- "log",
- "object_store",
"parking_lot",
- "tokio",
]
[[package]]
name = "datafusion-spark"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "613efb6666a7d42fcb922b90cd0daa2b25ea486d141350e5d3e86e46df28309a"
+checksum = "97a8d6fed24c80dd403dcc6afec33766a599d1b72575f222237f01429b2e58ba"
dependencies = [
"arrow",
+ "bigdecimal",
"chrono",
"crc32fast",
"datafusion-catalog",
@@ -2416,21 +2425,21 @@ dependencies = [
"datafusion-execution",
"datafusion-expr",
"datafusion-functions",
- "datafusion-macros",
"log",
+ "rand 0.9.2",
"sha1",
"url",
- "xxhash-rust",
]
[[package]]
name = "datafusion-sql"
-version = "50.3.0"
+version = "51.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b"
+checksum = "3fc195fe60634b2c6ccfd131b487de46dc30eccae8a3c35a13f136e7f440414f"
dependencies = [
"arrow",
"bigdecimal",
+ "chrono",
"datafusion-common",
"datafusion-expr",
"indexmap 2.12.1",
@@ -2842,6 +2851,12 @@ version = "0.3.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988"
+[[package]]
+name = "futures-timer"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24"
+
[[package]]
name = "futures-util"
version = "0.3.31"
@@ -3204,7 +3219,7 @@ dependencies = [
[[package]]
name = "iceberg"
version = "0.7.0"
-source =
"git+https://github.com/apache/iceberg-rust?rev=a667539#a6675394c0b15f90e70b907b0742d5be6ac39a3e"
+source =
"git+https://github.com/apache/iceberg-rust?rev=16906c1#16906c127d521395a789a9019350e467cc34d063"
dependencies = [
"anyhow",
"apache-avro",
@@ -3234,7 +3249,7 @@ dependencies = [
"murmur3",
"num-bigint",
"once_cell",
- "opendal 0.54.1",
+ "opendal",
"ordered-float 4.6.0",
"parquet",
"rand 0.8.5",
@@ -3248,8 +3263,7 @@ dependencies = [
"serde_json",
"serde_repr",
"serde_with",
- "strum 0.27.2",
- "thrift",
+ "strum",
"tokio",
"typed-builder",
"url",
@@ -3305,9 +3319,9 @@ checksum =
"7aedcccd01fc5fe81e6b489c15b247b8b0690feb23304303a9e560f37efc560a"
[[package]]
name = "icu_properties"
-version = "2.1.1"
+version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e93fcd3157766c0c8da2f8cff6ce651a31f0810eaa1c51ec363ef790bbb5fb99"
+checksum = "020bfc02fe870ec3a66d93e677ccca0562506e5872c650f893269e08615d74ec"
dependencies = [
"icu_collections",
"icu_locale_core",
@@ -3319,9 +3333,9 @@ dependencies = [
[[package]]
name = "icu_properties_data"
-version = "2.1.1"
+version = "2.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02845b3647bb045f1100ecd6480ff52f34c35f82d9880e029d329c21d1054899"
+checksum = "616c294cf8d725c6afcd8f55abc17c56464ef6211f9ed59cccffe534129c77af"
[[package]]
name = "icu_provider"
@@ -3500,9 +3514,9 @@ dependencies = [
[[package]]
name = "jiff-tzdb"
-version = "0.1.4"
+version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524"
+checksum = "68971ebff725b9e2ca27a601c5eb38a4c5d64422c4cbab0c535f248087eda5c2"
[[package]]
name = "jiff-tzdb-platform"
@@ -3685,9 +3699,9 @@ dependencies = [
[[package]]
name = "libz-rs-sys"
-version = "0.5.3"
+version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b484ba8d4f775eeca644c452a56650e544bf7e617f1d170fe7298122ead5222"
+checksum = "15413ef615ad868d4d65dce091cb233b229419c7c0c4bcaa746c0901c49ff39c"
dependencies = [
"zlib-rs",
]
@@ -3770,15 +3784,6 @@ version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
-[[package]]
-name = "lz4_flex"
-version = "0.11.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a"
-dependencies = [
- "twox-hash",
-]
-
[[package]]
name = "lz4_flex"
version = "0.12.0"
@@ -4011,16 +4016,6 @@ dependencies = [
"libm",
]
-[[package]]
-name = "num_cpus"
-version = "1.17.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
-dependencies = [
- "hermit-abi",
- "libc",
-]
-
[[package]]
name = "object"
version = "0.37.3"
@@ -4079,7 +4074,7 @@ dependencies = [
"chrono",
"futures",
"object_store",
- "opendal 0.55.0",
+ "opendal",
"pin-project",
"tokio",
]
@@ -4096,34 +4091,6 @@ version = "11.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
-[[package]]
-name = "opendal"
-version = "0.54.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42afda58fa2cf50914402d132cc1caacff116a85d10c72ab2082bb7c50021754"
-dependencies = [
- "anyhow",
- "backon",
- "base64",
- "bytes",
- "chrono",
- "crc32c",
- "futures",
- "getrandom 0.2.16",
- "http 1.4.0",
- "http-body 1.0.1",
- "log",
- "md-5",
- "percent-encoding",
- "quick-xml 0.38.4",
- "reqsign",
- "reqwest",
- "serde",
- "serde_json",
- "tokio",
- "uuid",
-]
-
[[package]]
name = "opendal"
version = "0.55.0"
@@ -4134,6 +4101,7 @@ dependencies = [
"backon",
"base64",
"bytes",
+ "crc32c",
"futures",
"getrandom 0.2.16",
"hdrs",
@@ -4144,6 +4112,7 @@ dependencies = [
"md-5",
"percent-encoding",
"quick-xml 0.38.4",
+ "reqsign",
"reqwest",
"serde",
"serde_json",
@@ -4223,9 +4192,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "56.2.0"
+version = "57.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27"
+checksum = "be3e4f6d320dd92bfa7d612e265d7d08bba0a240bab86af3425e1d255a511d89"
dependencies = [
"ahash 0.8.12",
"arrow-array",
@@ -4243,9 +4212,10 @@ dependencies = [
"futures",
"half",
"hashbrown 0.16.1",
- "lz4_flex 0.11.5",
- "num",
+ "lz4_flex",
"num-bigint",
+ "num-integer",
+ "num-traits",
"object_store",
"parquet-variant",
"parquet-variant-compute",
@@ -4263,9 +4233,9 @@ dependencies = [
[[package]]
name = "parquet-variant"
-version = "0.1.0"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a56bf96fdaf5f9392b447cf1e60bfe4b72149ab3309aa3855c02813cc89ad93f"
+checksum = "066f3e371a47d5b0bfd9491992f2400e4f155511d0c9bcfaca04bac7f1e33600"
dependencies = [
"arrow-schema",
"chrono",
@@ -4277,23 +4247,25 @@ dependencies = [
[[package]]
name = "parquet-variant-compute"
-version = "0.1.0"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bc7ce683368c9f2672379c12e35b5cd664e81964e234d891b4073a8355015ce7"
+checksum = "497db08281ed670f7e5119116b961282d50645a28b2afb97686f18e2e4034414"
dependencies = [
"arrow",
"arrow-schema",
"chrono",
"half",
+ "indexmap 2.12.1",
"parquet-variant",
"parquet-variant-json",
+ "uuid",
]
[[package]]
name = "parquet-variant-json"
-version = "0.1.0"
+version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af8bde078e883e197efe49d315bfa0ecf8f68879d32a2abf34eb09f40ea88f21"
+checksum = "9958f58a045ec273727651a3d07e32b382a7732f7cae86d26d80e9ec0acf2e3b"
dependencies = [
"arrow-schema",
"base64",
@@ -4891,6 +4863,12 @@ version = "0.8.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58"
+[[package]]
+name = "relative-path"
+version = "1.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba39f3699c378cd8970968dcbff9c43159ea4cfbd88d43c00b22f2ef10a435d2"
+
[[package]]
name = "rend"
version = "0.4.2"
@@ -4931,9 +4909,9 @@ dependencies = [
[[package]]
name = "reqwest"
-version = "0.12.24"
+version = "0.12.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f"
+checksum = "b6eff9328d40131d43bd911d42d79eb6a47312002a4daefc9e37f17e74a7701a"
dependencies = [
"base64",
"bytes",
@@ -5034,6 +5012,35 @@ dependencies = [
"byteorder",
]
+[[package]]
+name = "rstest"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f5a3193c063baaa2a95a33f03035c8a72b83d97a54916055ba22d35ed3839d49"
+dependencies = [
+ "futures-timer",
+ "futures-util",
+ "rstest_macros",
+]
+
+[[package]]
+name = "rstest_macros"
+version = "0.26.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c845311f0ff7951c5506121a9ad75aec44d083c31583b2ea5a30bcb0b0abba0"
+dependencies = [
+ "cfg-if",
+ "glob",
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "relative-path",
+ "rustc_version",
+ "syn 2.0.111",
+ "unicode-ident",
+]
+
[[package]]
name = "rust-ini"
version = "0.21.3"
@@ -5441,9 +5448,9 @@ dependencies = [
[[package]]
name = "simd-adler32"
-version = "0.3.7"
+version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe"
+checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2"
[[package]]
name = "simdutf8"
@@ -5496,9 +5503,9 @@ dependencies = [
[[package]]
name = "sqlparser"
-version = "0.58.0"
+version = "0.59.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c"
+checksum = "4591acadbcf52f0af60eafbb2c003232b2b4cd8de5f0e9437cb8b1b59046cc0f"
dependencies = [
"log",
"sqlparser_derive",
@@ -5533,32 +5540,13 @@ version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
-[[package]]
-name = "strum"
-version = "0.26.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
-
[[package]]
name = "strum"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf"
dependencies = [
- "strum_macros 0.27.2",
-]
-
-[[package]]
-name = "strum_macros"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
-dependencies = [
- "heck",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.111",
+ "strum_macros",
]
[[package]]
@@ -5719,15 +5707,6 @@ dependencies = [
"windows-sys 0.59.0",
]
-[[package]]
-name = "threadpool"
-version = "1.8.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d050e60b33d41c19108b32cea32164033a9013fe3b46cbd4457559bfbf77afaa"
-dependencies = [
- "num_cpus",
-]
-
[[package]]
name = "thrift"
version = "0.17.0"
@@ -5736,9 +5715,7 @@ checksum =
"7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09"
dependencies = [
"byteorder",
"integer-encoding",
- "log",
"ordered-float 2.10.1",
- "threadpool",
]
[[package]]
@@ -5909,9 +5886,9 @@ dependencies = [
[[package]]
name = "toml_edit"
-version = "0.23.7"
+version = "0.23.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6485ef6d0d9b5d0ec17244ff7eb05310113c3f316f2d14200d4de56b3cb98f8d"
+checksum = "5d7cbc3b4b49633d57a0509303158ca50de80ae32c265093b24c414705807832"
dependencies = [
"indexmap 2.12.1",
"toml_datetime",
@@ -5945,9 +5922,9 @@ dependencies = [
[[package]]
name = "tower-http"
-version = "0.6.7"
+version = "0.6.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9cf146f99d442e8e68e585f5d798ccd3cad9a7835b917e09728880a862706456"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
dependencies = [
"bitflags 2.10.0",
"bytes",
@@ -6656,12 +6633,6 @@ version = "0.13.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "66fee0b777b0f5ac1c69bb06d361268faafa61cd4682ae064a171c16c433e9e4"
-[[package]]
-name = "xxhash-rust"
-version = "0.8.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3"
-
[[package]]
name = "xz2"
version = "0.1.7"
@@ -6776,9 +6747,9 @@ dependencies = [
[[package]]
name = "zlib-rs"
-version = "0.5.3"
+version = "0.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36134c44663532e6519d7a6dfdbbe06f6f8192bde8ae9ed076e9b213f0e31df7"
+checksum = "51f936044d677be1a1168fae1d03b583a285a5dd9d8cbf7b24c23aa1fc775235"
[[package]]
name = "zstd"
diff --git a/native/Cargo.toml b/native/Cargo.toml
index e07616b92..554534cc2 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -31,16 +31,16 @@ license = "Apache-2.0"
edition = "2021"
# Comet uses the same minimum Rust version as DataFusion
-rust-version = "1.86"
+rust-version = "1.88"
[workspace.dependencies]
-arrow = { version = "56.2.0", features = ["prettyprint", "ffi", "chrono-tz"] }
+arrow = { version = "57.0.0", features = ["prettyprint", "ffi", "chrono-tz"] }
async-trait = { version = "0.1" }
bytes = { version = "1.10.0" }
-parquet = { version = "56.2.0", default-features = false, features =
["experimental"] }
-datafusion = { version = "50.3.0", default-features = false, features =
["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
-datafusion-datasource = { version = "50.3.0" }
-datafusion-spark = { version = "50.3.0" }
+parquet = { version = "57.0.0", default-features = false, features =
["experimental"] }
+datafusion = { version = "51.0.0", default-features = false, features =
["unicode_expressions", "crypto_expressions", "nested_expressions", "parquet"] }
+datafusion-datasource = { version = "51.0.0" }
+datafusion-spark = { version = "51.0.0" }
datafusion-comet-spark-expr = { path = "spark-expr" }
datafusion-comet-proto = { path = "proto" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
@@ -54,8 +54,7 @@ object_store = { version = "0.12.3", features = ["gcp",
"azure", "aws", "http"]
url = "2.2"
aws-config = "1.8.10"
aws-credential-types = "1.2.9"
-iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "a667539" }
-iceberg-datafusion = { git = "https://github.com/apache/iceberg-rust", rev =
"a667539" }
+iceberg = { git = "https://github.com/apache/iceberg-rust", rev = "16906c1" }
[profile.release]
debug = true
diff --git a/native/core/Cargo.toml b/native/core/Cargo.toml
index 39b706b1a..c79d60309 100644
--- a/native/core/Cargo.toml
+++ b/native/core/Cargo.toml
@@ -59,7 +59,7 @@ bytes = { workspace = true }
tempfile = "3.8.0"
itertools = "0.14.0"
paste = "1.0.14"
-datafusion = { workspace = true, features = ["parquet_encryption"] }
+datafusion = { workspace = true, features = ["parquet_encryption", "sql"] }
datafusion-datasource = { workspace = true }
datafusion-spark = { workspace = true }
once_cell = "1.18.0"
@@ -95,7 +95,7 @@ jni = { version = "0.21", features = ["invocation"] }
lazy_static = "1.4"
assertables = "9"
hex = "0.4.3"
-datafusion-functions-nested = { version = "50.3.0" }
+datafusion-functions-nested = { version = "51.0.0" }
[features]
backtrace = ["datafusion/backtrace"]
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index cc9231047..570cb6c47 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -1442,7 +1442,7 @@ impl PhysicalPlanner {
vec![], // No struct columns to unnest
output_schema,
unnest_options,
- ));
+ )?);
Ok((
scans,
@@ -2230,7 +2230,7 @@ impl PhysicalPlanner {
partition_by,
sort_phy_exprs,
window_frame.into(),
- input_schema.as_ref(),
+ input_schema,
false, // TODO: Ignore nulls
false, // TODO: Spark does not support DISTINCT ... OVER
None,
diff --git a/native/core/src/parquet/encryption_support.rs
b/native/core/src/parquet/encryption_support.rs
index ff67a3fcb..4540c217d 100644
--- a/native/core/src/parquet/encryption_support.rs
+++ b/native/core/src/parquet/encryption_support.rs
@@ -54,7 +54,7 @@ impl EncryptionFactory for CometEncryptionFactory {
_options: &EncryptionFactoryOptions,
_schema: &SchemaRef,
_file_path: &Path,
- ) -> Result<Option<FileEncryptionProperties>, DataFusionError> {
+ ) -> Result<Option<Arc<FileEncryptionProperties>>, DataFusionError> {
Err(DataFusionError::NotImplemented(
"Comet does not support Parquet encryption yet."
.parse()
@@ -69,7 +69,7 @@ impl EncryptionFactory for CometEncryptionFactory {
&self,
options: &EncryptionFactoryOptions,
file_path: &Path,
- ) -> Result<Option<FileDecryptionProperties>, DataFusionError> {
+ ) -> Result<Option<Arc<FileDecryptionProperties>>, DataFusionError> {
let config: CometEncryptionConfig = options.to_extension_options()?;
let full_path: String = config.uri_base + file_path.as_ref();
diff --git a/native/core/src/parquet/parquet_exec.rs
b/native/core/src/parquet/parquet_exec.rs
index 0a95ec999..ec18d227f 100644
--- a/native/core/src/parquet/parquet_exec.rs
+++ b/native/core/src/parquet/parquet_exec.rs
@@ -122,7 +122,7 @@ pub(crate) fn init_datasource_exec(
object_store_url,
file_source,
)
- .with_projection(Some(projection_vector))
+ .with_projection_indices(Some(projection_vector))
.with_table_partition_cols(partition_fields)
.build()
}
diff --git a/native/core/src/parquet/read/column.rs
b/native/core/src/parquet/read/column.rs
index 9988a89f1..c2b64bba5 100644
--- a/native/core/src/parquet/read/column.rs
+++ b/native/core/src/parquet/read/column.rs
@@ -124,7 +124,7 @@ impl ColumnReader {
match desc.physical_type() {
PhysicalType::BOOLEAN => typed_reader!(BoolColumnReader, Boolean),
PhysicalType::INT32 => {
- if let Some(ref logical_type) = desc.logical_type() {
+ if let Some(ref logical_type) = desc.logical_type_ref() {
match logical_type {
lt @ LogicalType::Integer {
bit_width,
@@ -282,7 +282,7 @@ impl ColumnReader {
}
}
PhysicalType::INT64 => {
- if let Some(ref logical_type) = desc.logical_type() {
+ if let Some(ref logical_type) = desc.logical_type_ref() {
match logical_type {
lt @ LogicalType::Integer {
bit_width,
@@ -331,19 +331,19 @@ impl ColumnReader {
None
};
match unit {
- ParquetTimeUnit::MILLIS(_) => {
+ ParquetTimeUnit::MILLIS => {
typed_reader!(
Int64TimestampMillisColumnReader,
ArrowDataType::Timestamp(time_unit,
time_zone)
)
}
- ParquetTimeUnit::MICROS(_) => {
+ ParquetTimeUnit::MICROS => {
typed_reader!(
Int64TimestampMicrosColumnReader,
ArrowDataType::Timestamp(time_unit,
time_zone)
)
}
- ParquetTimeUnit::NANOS(_) => {
+ ParquetTimeUnit::NANOS => {
typed_reader!(
Int64TimestampNanosColumnReader,
ArrowDataType::Int64
@@ -390,7 +390,7 @@ impl ColumnReader {
PhysicalType::DOUBLE => typed_reader!(DoubleColumnReader, Float64),
PhysicalType::BYTE_ARRAY => {
- if let Some(logical_type) = desc.logical_type() {
+ if let Some(logical_type) = desc.logical_type_ref() {
match logical_type {
LogicalType::String =>
typed_reader!(StringColumnReader, Utf8),
//
https://github.com/apache/parquet-format/blob/master/LogicalTypes.md
@@ -403,13 +403,13 @@ impl ColumnReader {
}
}
PhysicalType::FIXED_LEN_BYTE_ARRAY => {
- if let Some(logical_type) = desc.logical_type() {
+ if let Some(logical_type) = desc.logical_type_ref() {
match logical_type {
LogicalType::Decimal {
precision,
scale: _,
} => {
- if !use_decimal_128 && precision <=
DECIMAL_MAX_INT_DIGITS {
+ if !use_decimal_128 && precision <=
&DECIMAL_MAX_INT_DIGITS {
typed_reader!(FLBADecimal32ColumnReader, Int32)
} else if !use_decimal_128
&& promotion_info.precision <=
DECIMAL_MAX_LONG_DIGITS
diff --git a/native/core/src/parquet/util/jni.rs
b/native/core/src/parquet/util/jni.rs
index 966351e71..2223f508f 100644
--- a/native/core/src/parquet/util/jni.rs
+++ b/native/core/src/parquet/util/jni.rs
@@ -30,7 +30,6 @@ use datafusion::execution::object_store::ObjectStoreUrl;
use object_store::path::Path;
use parquet::{
basic::{Encoding, LogicalType, TimeUnit, Type as PhysicalType},
- format::{MicroSeconds, MilliSeconds, NanoSeconds},
schema::types::{ColumnDescriptor, ColumnPath, PrimitiveTypeBuilder},
};
use url::{ParseError, Url};
@@ -185,9 +184,9 @@ fn convert_logical_type(
fn convert_time_unit(time_unit: jint) -> TimeUnit {
match time_unit {
- 0 => TimeUnit::MILLIS(MilliSeconds::new()),
- 1 => TimeUnit::MICROS(MicroSeconds::new()),
- 2 => TimeUnit::NANOS(NanoSeconds::new()),
+ 0 => TimeUnit::MILLIS,
+ 1 => TimeUnit::MICROS,
+ 2 => TimeUnit::NANOS,
_ => panic!("Invalid time unit id for Parquet: {time_unit}"),
}
}
diff --git a/native/spark-expr/Cargo.toml b/native/spark-expr/Cargo.toml
index 961c7bec0..b3a46fd91 100644
--- a/native/spark-expr/Cargo.toml
+++ b/native/spark-expr/Cargo.toml
@@ -45,6 +45,7 @@ arrow = {workspace = true}
criterion = { version = "0.7", features = ["async", "async_tokio",
"async_std"] }
rand = { workspace = true}
tokio = { version = "1", features = ["rt-multi-thread"] }
+datafusion = { workspace = true, features = ["sql"] }
[lib]
name = "datafusion_comet_spark_expr"
diff --git a/native/spark-expr/src/agg_funcs/avg.rs
b/native/spark-expr/src/agg_funcs/avg.rs
index e8b90b4f4..e746aaf6e 100644
--- a/native/spark-expr/src/agg_funcs/avg.rs
+++ b/native/spark-expr/src/agg_funcs/avg.rs
@@ -25,8 +25,7 @@ use arrow::compute::sum;
use arrow::datatypes::{DataType, Field, FieldRef};
use datafusion::common::{not_impl_err, Result, ScalarValue};
use datafusion::logical_expr::{
- type_coercion::aggregates::avg_return_type, Accumulator, AggregateUDFImpl,
EmitTo,
- GroupsAccumulator, ReversedUDAF, Signature,
+ Accumulator, AggregateUDFImpl, EmitTo, GroupsAccumulator, ReversedUDAF,
Signature,
};
use datafusion::physical_expr::expressions::format_state_name;
use std::{any::Any, sync::Arc};
@@ -36,6 +35,13 @@ use datafusion::logical_expr::function::{AccumulatorArgs,
StateFieldsArgs};
use datafusion::logical_expr::Volatility::Immutable;
use DataType::*;
+fn avg_return_type(_name: &str, data_type: &DataType) -> Result<DataType> {
+ match data_type {
+ Float64 => Ok(Float64),
+ _ => not_impl_err!("Avg return type for {data_type}"),
+ }
+}
+
/// AVG aggregate expression
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Avg {
diff --git a/native/spark-expr/src/agg_funcs/avg_decimal.rs
b/native/spark-expr/src/agg_funcs/avg_decimal.rs
index 9f7c82641..b3b2731d9 100644
--- a/native/spark-expr/src/agg_funcs/avg_decimal.rs
+++ b/native/spark-expr/src/agg_funcs/avg_decimal.rs
@@ -32,13 +32,28 @@ use std::{any::Any, sync::Arc};
use crate::utils::{build_bool_state, is_valid_decimal_precision, unlikely};
use arrow::array::ArrowNativeTypeOp;
-use arrow::datatypes::{MAX_DECIMAL128_FOR_EACH_PRECISION,
MIN_DECIMAL128_FOR_EACH_PRECISION};
+use arrow::datatypes::{
+ DECIMAL128_MAX_PRECISION, DECIMAL128_MAX_SCALE,
MAX_DECIMAL128_FOR_EACH_PRECISION,
+ MIN_DECIMAL128_FOR_EACH_PRECISION,
+};
use datafusion::logical_expr::function::{AccumulatorArgs, StateFieldsArgs};
-use datafusion::logical_expr::type_coercion::aggregates::avg_return_type;
use datafusion::logical_expr::Volatility::Immutable;
use num::{integer::div_ceil, Integer};
use DataType::*;
+fn avg_return_type(_name: &str, data_type: &DataType) -> Result<DataType> {
+ match data_type {
+ Decimal128(precision, scale) => {
+ // In the spark, the result type is DECIMAL(min(38,precision+4),
min(38,scale+4)).
+ // Ref:
https://github.com/apache/spark/blob/fcf636d9eb8d645c24be3db2d599aba2d7e2955a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/Average.scala#L66
+ let new_precision = DECIMAL128_MAX_PRECISION.min(*precision + 4);
+ let new_scale = DECIMAL128_MAX_SCALE.min(*scale + 4);
+ Ok(Decimal128(new_precision, new_scale))
+ }
+ _ => not_impl_err!("Avg return type for {data_type}"),
+ }
+}
+
/// AVG aggregate expression
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct AvgDecimal {
diff --git a/native/spark-expr/src/math_funcs/internal/checkoverflow.rs
b/native/spark-expr/src/math_funcs/internal/checkoverflow.rs
index 2276dba3d..9773a107a 100644
--- a/native/spark-expr/src/math_funcs/internal/checkoverflow.rs
+++ b/native/spark-expr/src/math_funcs/internal/checkoverflow.rs
@@ -136,7 +136,7 @@ impl PhysicalExpr for CheckOverflow {
);
let new_v: Option<i128> = v.and_then(|v| {
- Decimal128Type::validate_decimal_precision(v, precision)
+ Decimal128Type::validate_decimal_precision(v, precision,
scale)
.map(|_| v)
.ok()
});
diff --git a/native/spark-expr/src/math_funcs/internal/make_decimal.rs
b/native/spark-expr/src/math_funcs/internal/make_decimal.rs
index 338317520..f8ed31621 100644
--- a/native/spark-expr/src/math_funcs/internal/make_decimal.rs
+++ b/native/spark-expr/src/math_funcs/internal/make_decimal.rs
@@ -34,7 +34,7 @@ pub fn spark_make_decimal(
match &args[0] {
ColumnarValue::Scalar(v) => match v {
ScalarValue::Int64(n) =>
Ok(ColumnarValue::Scalar(ScalarValue::Decimal128(
- long_to_decimal(n, precision),
+ long_to_decimal(n, precision, scale),
precision,
scale,
))),
@@ -45,7 +45,7 @@ pub fn spark_make_decimal(
let arr = a.as_primitive::<Int64Type>();
let mut result = Decimal128Builder::new();
for v in arr.into_iter() {
- result.append_option(long_to_decimal(&v, precision))
+ result.append_option(long_to_decimal(&v, precision, scale))
}
let result_type = DataType::Decimal128(precision, scale);
@@ -61,9 +61,11 @@ pub fn spark_make_decimal(
/// Convert the input long to decimal with the given maximum precision. If
overflows, returns null
/// instead.
#[inline]
-fn long_to_decimal(v: &Option<i64>, precision: u8) -> Option<i128> {
+fn long_to_decimal(v: &Option<i64>, precision: u8, scale: i8) -> Option<i128> {
match v {
- Some(v) if validate_decimal_precision(*v as i128, precision).is_ok()
=> Some(*v as i128),
+ Some(v) if validate_decimal_precision(*v as i128, precision,
scale).is_ok() => {
+ Some(*v as i128)
+ }
_ => None,
}
}
diff --git
a/spark/src/main/scala/org/apache/comet/iceberg/IcebergReflection.scala
b/spark/src/main/scala/org/apache/comet/iceberg/IcebergReflection.scala
index e545f8e39..fc9fd8e6e 100644
--- a/spark/src/main/scala/org/apache/comet/iceberg/IcebergReflection.scala
+++ b/spark/src/main/scala/org/apache/comet/iceberg/IcebergReflection.scala
@@ -732,6 +732,7 @@ case class CometIcebergNativeScanMetadata(
nameMapping: Option[String],
tasks: java.util.List[_],
scanSchema: Any,
+ tableSchema: Any,
globalFieldIdMapping: Map[String, Int],
catalogProperties: Map[String, String],
fileFormat: String)
@@ -763,6 +764,7 @@ object CometIcebergNativeScanMetadata extends Logging {
table <- getTable(scan)
tasks <- getTasks(scan)
scanSchema <- getExpectedSchema(scan)
+ tableSchema <- getSchema(table)
} yield {
// nameMapping is optional - if it fails we just use None
val nameMapping = getTableProperties(table).flatMap { properties =>
@@ -787,6 +789,7 @@ object CometIcebergNativeScanMetadata extends Logging {
nameMapping = nameMapping,
tasks = tasks,
scanSchema = scanSchema,
+ tableSchema = tableSchema,
globalFieldIdMapping = globalFieldIdMapping,
catalogProperties = catalogProperties,
fileFormat = fileFormat)
diff --git
a/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala
b/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala
index 24676bfe3..61729e92a 100644
---
a/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala
+++
b/spark/src/main/scala/org/apache/comet/serde/operator/CometIcebergNativeScan.scala
@@ -21,6 +21,10 @@ package org.apache.comet.serde.operator
import scala.jdk.CollectionConverters._
+import org.json4s._
+import org.json4s.JsonDSL._
+import org.json4s.jackson.JsonMethods._
+
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.expressions._
import org.apache.spark.sql.comet.{CometBatchScanExec, CometNativeExec}
@@ -64,6 +68,84 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
}
}
+ /**
+ * Converts an Iceberg partition value to JSON format expected by
iceberg-rust.
+ *
+ * iceberg-rust's Literal::try_from_json() expects specific formats for
certain types:
+ * - Timestamps: ISO string format "yyyy-MM-dd'T'HH:mm:ss.SSSSSS"
+ * - Dates: ISO string format "YYYY-MM-DD"
+ * - Decimals: String representation
+ *
+ * See: iceberg-rust/crates/iceberg/src/spec/values/literal.rs
+ */
+ private def partitionValueToJson(fieldTypeStr: String, value: Any): JValue =
{
+ fieldTypeStr match {
+ case t if t.startsWith("timestamp") =>
+ val micros = value match {
+ case l: java.lang.Long => l.longValue()
+ case i: java.lang.Integer => i.longValue()
+ case _ => value.toString.toLong
+ }
+ val instant = java.time.Instant.ofEpochSecond(micros / 1000000,
(micros % 1000000) * 1000)
+ val formatted = java.time.format.DateTimeFormatter
+ .ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSSSS")
+ .withZone(java.time.ZoneOffset.UTC)
+ .format(instant)
+ JString(formatted)
+
+ case "date" =>
+ val days = value.asInstanceOf[java.lang.Integer].intValue()
+ val localDate = java.time.LocalDate.ofEpochDay(days.toLong)
+ JString(localDate.toString)
+
+ case d if d.startsWith("decimal(") =>
+ JString(value.toString)
+
+ case "string" =>
+ JString(value.toString)
+
+ case "int" | "long" =>
+ value match {
+ case i: java.lang.Integer => JInt(BigInt(i.intValue()))
+ case l: java.lang.Long => JInt(BigInt(l.longValue()))
+ case _ => JDecimal(BigDecimal(value.toString))
+ }
+
+ case "float" | "double" =>
+ value match {
+ // NaN/Infinity are not valid JSON numbers - serialize as strings
+ case f: java.lang.Float if f.isNaN || f.isInfinite =>
+ JString(f.toString)
+ case d: java.lang.Double if d.isNaN || d.isInfinite =>
+ JString(d.toString)
+ case f: java.lang.Float => JDouble(f.doubleValue())
+ case d: java.lang.Double => JDouble(d.doubleValue())
+ case _ => JDecimal(BigDecimal(value.toString))
+ }
+
+ case "boolean" =>
+ value match {
+ case b: java.lang.Boolean => JBool(b.booleanValue())
+ case _ => JBool(value.toString.toBoolean)
+ }
+
+ case "uuid" =>
+ JString(value.toString)
+
+ // Fallback: infer JSON type from Java type
+ case _ =>
+ value match {
+ case s: String => JString(s)
+ case i: java.lang.Integer => JInt(BigInt(i.intValue()))
+ case l: java.lang.Long => JInt(BigInt(l.longValue()))
+ case d: java.lang.Double => JDouble(d.doubleValue())
+ case f: java.lang.Float => JDouble(f.doubleValue())
+ case b: java.lang.Boolean => JBool(b.booleanValue())
+ case other => JString(other.toString)
+ }
+ }
+ }
+
/**
* Helper to extract a literal from an Iceberg expression and build a binary
predicate.
*/
@@ -230,8 +312,6 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
// metadata (e.g., "schema-id") that iceberg-rust's StructType
// deserializer rejects. We need pure StructType format:
// {"type":"struct","fields":[...]}
- import org.json4s.JsonDSL._
- import org.json4s.jackson.JsonMethods._
// Filter out fields with unknown types (dropped partition
fields).
// Unknown type fields represent partition columns that have
been dropped
@@ -286,8 +366,9 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
// mechanism for providing partition values to identity-transformed
// partition columns. Non-identity transforms (bucket, truncate,
days,
// etc.) read values from data files.
- import org.json4s._
- import org.json4s.jackson.JsonMethods._
+ //
+ // IMPORTANT: Use the same field IDs as partition_type_json
(partition field IDs,
+ // not source field IDs) so that JSON deserialization matches
correctly.
// Filter out fields with unknown type (same as partition type
filtering)
val partitionDataMap: Map[String, JValue] =
@@ -298,6 +379,7 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
if (fieldTypeStr == IcebergReflection.TypeNames.UNKNOWN) {
None
} else {
+ // Use the partition type's field ID (same as in
partition_type_json)
val fieldIdMethod = field.getClass.getMethod("fieldId")
val fieldId = fieldIdMethod.invoke(field).asInstanceOf[Int]
@@ -305,26 +387,11 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
partitionData.getClass.getMethod("get", classOf[Int],
classOf[Class[_]])
val value = getMethod.invoke(partitionData,
Integer.valueOf(idx), classOf[Object])
- val jsonValue: JValue = if (value == null) {
+ val jsonValue = if (value == null) {
JNull
} else {
- value match {
- case s: String => JString(s)
- // NaN/Infinity are not valid JSON number literals per the
- // JSON spec. Serialize as strings (e.g., "NaN",
"Infinity")
- // which are valid JSON and can be parsed by Rust's
- // f32/f64::from_str().
- case f: java.lang.Float if f.isNaN || f.isInfinite =>
- JString(f.toString)
- case d: java.lang.Double if d.isNaN || d.isInfinite =>
- JString(d.toString)
- case n: Number => JDecimal(BigDecimal(n.toString))
- case b: java.lang.Boolean =>
- JBool(b.booleanValue())
- case other => JString(other.toString)
- }
+ partitionValueToJson(fieldTypeStr, value)
}
-
Some(fieldId.toString -> jsonValue)
}
}.toMap
@@ -649,8 +716,6 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
val partitionSpec = specMethod.invoke(task)
// Build JSON representation of partition values
using json4s
- import org.json4s._
- import org.json4s.jackson.JsonMethods._
val partitionMap =
scala.collection.mutable.Map[String, JValue]()
@@ -682,22 +747,30 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
val sourceFieldId =
sourceIdMethod.invoke(partitionField).asInstanceOf[Int]
- // Convert value to appropriate JValue type
- val jsonValue: JValue = if (value == null) {
+ val jsonValue = if (value == null) {
JNull
} else {
- value match {
- case s: String => JString(s)
- case i: java.lang.Integer =>
JInt(BigInt(i.intValue()))
- case l: java.lang.Long =>
JInt(BigInt(l.longValue()))
- case d: java.lang.Double =>
JDouble(d.doubleValue())
- case f: java.lang.Float =>
JDouble(f.doubleValue())
- case b: java.lang.Boolean =>
JBool(b.booleanValue())
- case n: Number =>
JDecimal(BigDecimal(n.toString))
- case other => JString(other.toString)
- }
+ // Get field type from schema to serialize
correctly
+ val fieldTypeStr =
+ try {
+ val findFieldMethod =
+ metadata.tableSchema.getClass
+ .getMethod("findField", classOf[Int])
+ val field = findFieldMethod.invoke(
+ metadata.tableSchema,
+ sourceFieldId.asInstanceOf[Object])
+ if (field != null) {
+ val typeMethod =
field.getClass.getMethod("type")
+ typeMethod.invoke(field).toString
+ } else {
+ "unknown"
+ }
+ } catch {
+ case _: Exception => "unknown"
+ }
+
+ partitionValueToJson(fieldTypeStr, value)
}
-
partitionMap(sourceFieldId.toString) =
jsonValue
}
}
@@ -734,12 +807,47 @@ object CometIcebergNativeScan extends
CometOperatorSerde[CometBatchScanExec] wit
IcebergReflection.getDeleteFilesFromTask(task,
fileScanTaskClass)
val hasDeletes = !deletes.isEmpty
- // Use pre-extracted scanSchema for schema evolution
support
+ // Schema to pass to iceberg-rust's FileScanTask.
+ // This is used by RecordBatchTransformer for field type
lookups (e.g., in
+ // constants_map) and default value generation. The
actual projection is
+ // controlled by project_field_ids.
+ //
+ // Schema selection logic:
+ // 1. If hasDeletes=true: Use taskSchema (file-specific
schema) because
+ // delete files reference specific schema versions and
we need exact schema
+ // matching for MOR.
+ // 2. Else if scanSchema contains columns not in
tableSchema: Use scanSchema
+ // because this is a VERSION AS OF query reading a
historical snapshot with
+ // different schema (e.g., after column drop, scanSchema
has old columns
+ // that tableSchema doesn't)
+ // 3. Else: Use tableSchema because scanSchema is the
query OUTPUT schema
+ // (e.g., for aggregates like "SELECT count(*)",
scanSchema only has
+ // aggregate fields and doesn't contain partition
columns needed by
+ // constants_map)
val schema: AnyRef =
if (hasDeletes) {
taskSchema
} else {
- metadata.scanSchema.asInstanceOf[AnyRef]
+ // Check if scanSchema has columns that tableSchema
doesn't have
+ // (VERSION AS OF case)
+ val scanSchemaFieldIds = IcebergReflection
+ .buildFieldIdMapping(metadata.scanSchema)
+ .values
+ .toSet
+ val tableSchemaFieldIds = IcebergReflection
+ .buildFieldIdMapping(metadata.tableSchema)
+ .values
+ .toSet
+ val hasHistoricalColumns =
+ scanSchemaFieldIds.exists(id =>
!tableSchemaFieldIds.contains(id))
+
+ if (hasHistoricalColumns) {
+ // VERSION AS OF: scanSchema has columns that
current table doesn't have
+ metadata.scanSchema.asInstanceOf[AnyRef]
+ } else {
+ // Regular query: use tableSchema for partition
field lookups
+ metadata.tableSchema.asInstanceOf[AnyRef]
+ }
}
// scalastyle:off classforname
diff --git
a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
index d1b9197ff..8a666dc76 100644
--- a/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometIcebergNativeSuite.scala
@@ -2101,6 +2101,147 @@ class CometIcebergNativeSuite extends CometTestBase {
}
}
+ // Test to reproduce "Field X not found in schema" errors
+ // Mimics TestAggregatePushDown.testNaN() where aggregate output schema
differs from table schema
+ test("partitioned table with aggregates - reproduces Field not found error")
{
+ assume(icebergAvailable, "Iceberg not available in classpath")
+
+ withTempIcebergDir { warehouseDir =>
+ withSQLConf(
+ "spark.sql.catalog.test_cat" ->
"org.apache.iceberg.spark.SparkCatalog",
+ "spark.sql.catalog.test_cat.type" -> "hadoop",
+ "spark.sql.catalog.test_cat.warehouse" -> warehouseDir.getAbsolutePath,
+ CometConf.COMET_ENABLED.key -> "true",
+ CometConf.COMET_EXEC_ENABLED.key -> "true",
+ CometConf.COMET_ICEBERG_NATIVE_ENABLED.key -> "true") {
+
+ // Create table partitioned by id, like TestAggregatePushDown.testNaN
+ spark.sql("""
+ CREATE TABLE test_cat.db.agg_test (
+ id INT,
+ data FLOAT
+ ) USING iceberg
+ PARTITIONED BY (id)
+ """)
+
+ spark.sql("""
+ INSERT INTO test_cat.db.agg_test VALUES
+ (1, CAST('NaN' AS FLOAT)),
+ (1, CAST('NaN' AS FLOAT)),
+ (2, 2.0),
+ (2, CAST('NaN' AS FLOAT)),
+ (3, CAST('NaN' AS FLOAT)),
+ (3, 1.0)
+ """)
+
+ // This aggregate query's output schema is completely different from
table schema
+ // When iceberg-rust tries to look up partition field 'id' (field 1 in
table schema),
+ // it needs to find it in the full table schema, not the aggregate
output schema
+ checkIcebergNativeScan(
+ "SELECT count(*), max(data), min(data), count(data) FROM
test_cat.db.agg_test")
+
+ spark.sql("DROP TABLE test_cat.db.agg_test")
+ }
+ }
+ }
+
+ test("MOR partitioned table with timestamp_ntz - reproduces NULL partition
issue") {
+ assume(icebergAvailable, "Iceberg not available in classpath")
+
+ withTempIcebergDir { warehouseDir =>
+ withSQLConf(
+ "spark.sql.catalog.test_cat" ->
"org.apache.iceberg.spark.SparkCatalog",
+ "spark.sql.catalog.test_cat.type" -> "hadoop",
+ "spark.sql.catalog.test_cat.warehouse" -> warehouseDir.getAbsolutePath,
+ CometConf.COMET_ENABLED.key -> "true",
+ CometConf.COMET_EXEC_ENABLED.key -> "true",
+ CometConf.COMET_ICEBERG_NATIVE_ENABLED.key -> "true") {
+
+ // Create partitioned table like
TestRewritePositionDeleteFiles.testTimestampNtz
+ spark.sql("""
+ CREATE TABLE test_cat.db.timestamp_ntz_partition_test (
+ id LONG,
+ ts TIMESTAMP_NTZ,
+ c1 STRING,
+ c2 STRING
+ ) USING iceberg
+ PARTITIONED BY (ts)
+ TBLPROPERTIES (
+ 'format-version' = '2',
+ 'write.delete.mode' = 'merge-on-read',
+ 'write.merge.mode' = 'merge-on-read'
+ )
+ """)
+
+ // Insert data into multiple partitions
+ spark.sql("""
+ INSERT INTO test_cat.db.timestamp_ntz_partition_test
+ VALUES
+ (1, TIMESTAMP_NTZ '2023-01-01 15:30:00', 'a', 'b'),
+ (2, TIMESTAMP_NTZ '2023-01-02 15:30:00', 'c', 'd'),
+ (3, TIMESTAMP_NTZ '2023-01-03 15:30:00', 'e', 'f')
+ """)
+
+ // Delete some rows to create position delete files
+ spark.sql("DELETE FROM test_cat.db.timestamp_ntz_partition_test WHERE
id = 2")
+
+ // Query should work with NULL partition handling
+ checkIcebergNativeScan(
+ "SELECT * FROM test_cat.db.timestamp_ntz_partition_test ORDER BY id")
+
+ spark.sql("DROP TABLE test_cat.db.timestamp_ntz_partition_test")
+ }
+ }
+ }
+
+ test("MOR partitioned table with decimal - reproduces NULL partition issue")
{
+ assume(icebergAvailable, "Iceberg not available in classpath")
+
+ withTempIcebergDir { warehouseDir =>
+ withSQLConf(
+ "spark.sql.catalog.test_cat" ->
"org.apache.iceberg.spark.SparkCatalog",
+ "spark.sql.catalog.test_cat.type" -> "hadoop",
+ "spark.sql.catalog.test_cat.warehouse" -> warehouseDir.getAbsolutePath,
+ CometConf.COMET_ENABLED.key -> "true",
+ CometConf.COMET_EXEC_ENABLED.key -> "true",
+ CometConf.COMET_ICEBERG_NATIVE_ENABLED.key -> "true") {
+
+ // Create partitioned table like
TestRewritePositionDeleteFiles.testDecimalPartition
+ spark.sql("""
+ CREATE TABLE test_cat.db.decimal_partition_test (
+ id LONG,
+ dec DECIMAL(18, 10),
+ c1 STRING,
+ c2 STRING
+ ) USING iceberg
+ PARTITIONED BY (dec)
+ TBLPROPERTIES (
+ 'format-version' = '2',
+ 'write.delete.mode' = 'merge-on-read',
+ 'write.merge.mode' = 'merge-on-read'
+ )
+ """)
+
+ // Insert data into multiple partitions
+ spark.sql("""
+ INSERT INTO test_cat.db.decimal_partition_test
+ VALUES
+ (1, 1.0, 'a', 'b'),
+ (2, 2.0, 'c', 'd'),
+ (3, 3.0, 'e', 'f')
+ """)
+
+ // Delete some rows to create position delete files
+ spark.sql("DELETE FROM test_cat.db.decimal_partition_test WHERE id =
2")
+
+ // Query should work with NULL partition handling
+ checkIcebergNativeScan("SELECT * FROM
test_cat.db.decimal_partition_test ORDER BY id")
+
+ spark.sql("DROP TABLE test_cat.db.decimal_partition_test")
+ }
+ }
+ }
+
// Helper to create temp directory
def withTempIcebergDir(f: File => Unit): Unit = {
val dir = Files.createTempDirectory("comet-iceberg-test").toFile
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]