This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 23dccce40 feat: Upgrade to DataFusion 46.0.0-rc2 (#1423)
23dccce40 is described below
commit 23dccce40712306cfcfeb4e4b142dc64acf846d6
Author: Andy Grove <[email protected]>
AuthorDate: Wed Mar 5 11:43:54 2025 -0700
feat: Upgrade to DataFusion 46.0.0-rc2 (#1423)
---
native/Cargo.lock | 435 +++++++++++----------
native/Cargo.toml | 32 +-
native/core/benches/bloom_filter_agg.rs | 8 +-
native/core/benches/shuffle_writer.rs | 8 +-
.../src/execution/expressions/bloom_filter_agg.rs | 2 +-
native/core/src/execution/planner.rs | 38 +-
.../core/src/execution/shuffle/shuffle_writer.rs | 7 +-
native/core/src/parquet/mod.rs | 41 +-
native/core/src/parquet/schema_adapter.rs | 26 +-
native/spark-expr/benches/aggregate.rs | 8 +-
native/spark-expr/src/agg_funcs/stddev.rs | 5 +-
native/spark-expr/src/agg_funcs/sum_decimal.rs | 8 +-
native/spark-expr/src/agg_funcs/variance.rs | 2 +-
native/spark-expr/src/hash_funcs/sha2.rs | 9 +-
14 files changed, 346 insertions(+), 283 deletions(-)
diff --git a/native/Cargo.lock b/native/Cargo.lock
index ff339bf7a..3e5b2ea78 100644
--- a/native/Cargo.lock
+++ b/native/Cargo.lock
@@ -99,9 +99,9 @@ checksum =
"55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
[[package]]
name = "anyhow"
-version = "1.0.95"
+version = "1.0.96"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34ac096ce696dc2fcabef30516bb13c0a68a11d30131d3df6f04711467681b04"
+checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4"
[[package]]
name = "arc-swap"
@@ -123,9 +123,9 @@ checksum =
"7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
[[package]]
name = "arrow"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6422e12ac345a0678d7a17e316238e3a40547ae7f92052b77bd86d5e0239f3fc"
+checksum = "dc208515aa0151028e464cc94a692156e945ce5126abd3537bb7fd6ba2143ed1"
dependencies = [
"arrow-arith",
"arrow-array",
@@ -144,9 +144,9 @@ dependencies = [
[[package]]
name = "arrow-arith"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23cf34bb1f48c41d3475927bcc7be498665b8e80b379b88f62a840337f8b8248"
+checksum = "e07e726e2b3f7816a85c6a45b6ec118eeeabf0b2a8c208122ad949437181f49a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -158,9 +158,9 @@ dependencies = [
[[package]]
name = "arrow-array"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fb4a06d507f54b70a277be22a127c8ffe0cec6cd98c0ad8a48e77779bbda8223"
+checksum = "a2262eba4f16c78496adfd559a29fe4b24df6088efc9985a873d58e92be022d5"
dependencies = [
"ahash",
"arrow-buffer",
@@ -175,9 +175,9 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d69d326d5ad1cb82dcefa9ede3fee8fdca98f9982756b16f9cb142f4aa6edc89"
+checksum = "4e899dade2c3b7f5642eb8366cfd898958bcca099cde6dfea543c7e8d3ad88d4"
dependencies = [
"bytes",
"half",
@@ -186,9 +186,9 @@ dependencies = [
[[package]]
name = "arrow-cast"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "626e65bd42636a84a238bed49d09c8777e3d825bf81f5087a70111c2831d9870"
+checksum = "4103d88c5b441525ed4ac23153be7458494c2b0c9a11115848fdb9b81f6f886a"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -207,9 +207,9 @@ dependencies = [
[[package]]
name = "arrow-csv"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "71c8f959f7a1389b1dbd883cdcd37c3ed12475329c111912f7f69dad8195d8c6"
+checksum = "43d3cb0914486a3cae19a5cad2598e44e225d53157926d0ada03c20521191a65"
dependencies = [
"arrow-array",
"arrow-cast",
@@ -223,9 +223,9 @@ dependencies = [
[[package]]
name = "arrow-data"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1858e7c7d01c44cf71c21a85534fd1a54501e8d60d1195d0d6fbcc00f4b10754"
+checksum = "0a329fb064477c9ec5f0870d2f5130966f91055c7c5bce2b3a084f116bc28c3b"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -235,9 +235,9 @@ dependencies = [
[[package]]
name = "arrow-ipc"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a6bb3f727f049884c7603f0364bc9315363f356b59e9f605ea76541847e06a1e"
+checksum = "ddecdeab02491b1ce88885986e25002a3da34dd349f682c7cfe67bab7cc17b86"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -249,9 +249,9 @@ dependencies = [
[[package]]
name = "arrow-json"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35de94f165ed8830aede72c35f238763794f0d49c69d30c44d49c9834267ff8c"
+checksum = "d03b9340013413eb84868682ace00a1098c81a5ebc96d279f7ebf9a4cac3c0fd"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -269,9 +269,9 @@ dependencies = [
[[package]]
name = "arrow-ord"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8aa06e5f267dc53efbacb933485c79b6fc1685d3ffbe870a16ce4e696fb429da"
+checksum = "f841bfcc1997ef6ac48ee0305c4dfceb1f7c786fe31e67c1186edf775e1f1160"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -282,9 +282,9 @@ dependencies = [
[[package]]
name = "arrow-row"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "66f1144bb456a2f9d82677bd3abcea019217e572fc8f07de5a7bac4b2c56eb2c"
+checksum = "1eeb55b0a0a83851aa01f2ca5ee5648f607e8506ba6802577afdda9d75cdedcd"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -295,18 +295,18 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "105f01ec0090259e9a33a9263ec18ff223ab91a0ea9fbc18042f7e38005142f6"
+checksum = "85934a9d0261e0fa5d4e2a5295107d743b543a6e0484a835d4b8db2da15306f9"
dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.0",
]
[[package]]
name = "arrow-select"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f690752fdbd2dee278b5f1636fefad8f2f7134c85e20fd59c4199e15a39a6807"
+checksum = "7e2932aece2d0c869dd2125feb9bd1709ef5c445daa3838ac4112dcfa0fda52c"
dependencies = [
"ahash",
"arrow-array",
@@ -318,9 +318,9 @@ dependencies = [
[[package]]
name = "arrow-string"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d0fff9cd745a7039b66c47ecaf5954460f9fa12eed628f65170117ea93e64ee0"
+checksum = "912e38bd6a7a7714c1d9b61df80315685553b7455e8a6045c27531d8ecd5b458"
dependencies = [
"arrow-array",
"arrow-buffer",
@@ -429,9 +429,9 @@ checksum =
"bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "bitflags"
-version = "2.8.0"
+version = "2.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
+checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd"
[[package]]
name = "blake2"
@@ -444,9 +444,9 @@ dependencies = [
[[package]]
name = "blake3"
-version = "1.5.5"
+version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8ee0c1824c4dea5b5f81736aff91bae041d2c07ee1192bec91054e10e3e601e"
+checksum = "675f87afced0413c9bb02843499dbbd3882a237645883f71a2b59644a6d2f753"
dependencies = [
"arrayref",
"arrayvec",
@@ -517,9 +517,9 @@ checksum =
"37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
[[package]]
name = "cc"
-version = "1.2.12"
+version = "1.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755717a7de9ec452bf7f3f1a3099085deabd7f2962b861dae91ecd7a365903d2"
+checksum = "be714c154be609ec7f5dad223a33bf1482fff90472de28f7362806e6d4832b8c"
dependencies = [
"jobserver",
"libc",
@@ -642,18 +642,18 @@ dependencies = [
[[package]]
name = "clap"
-version = "4.5.28"
+version = "4.5.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e77c3243bd94243c03672cb5154667347c457ca271254724f9f393aee1c05ff"
+checksum = "027bb0d98429ae334a8698531da7077bdf906419543a35a55c2cb1b66437d767"
dependencies = [
"clap_builder",
]
[[package]]
name = "clap_builder"
-version = "4.5.27"
+version = "4.5.31"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b26884eb4b57140e4d2d93652abfa49498b938b3c9179f9fc487b0acc3edad7"
+checksum = "5589e0cba072e0f3d23791efac0fd8627b49c829c196a492e88168e6a669d863"
dependencies = [
"anstyle",
"clap_lex",
@@ -677,12 +677,11 @@ dependencies = [
[[package]]
name = "comfy-table"
-version = "7.1.3"
+version = "7.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24f165e7b643266ea80cb858aed492ad9280e3e05ce24d4a99d7d7b889b6a4d9"
+checksum = "4a65ebfec4fb190b6f90e944a817d60499ee0744e582530e2c9900a22e591d9a"
dependencies = [
- "strum",
- "strum_macros",
+ "unicode-segmentation",
"unicode-width",
]
@@ -838,9 +837,9 @@ dependencies = [
[[package]]
name = "csv-core"
-version = "0.1.11"
+version = "0.1.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
+checksum = "7d02f3b0da4c6504f86e9cd789d8dbafab48c2321be74e9987593de5a894d93d"
dependencies = [
"memchr",
]
@@ -861,26 +860,28 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eae420e7a5b0b7f1c39364cc76cbcd0f5fdc416b2514ae3847c2676bbd60702a"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
- "arrow-array",
"arrow-ipc",
"arrow-schema",
"async-trait",
"bytes",
"chrono",
"datafusion-catalog",
+ "datafusion-catalog-listing",
"datafusion-common",
"datafusion-common-runtime",
+ "datafusion-datasource",
"datafusion-execution",
"datafusion-expr",
+ "datafusion-expr-common",
"datafusion-functions",
"datafusion-functions-aggregate",
"datafusion-functions-table",
"datafusion-functions-window",
+ "datafusion-macros",
"datafusion-optimizer",
"datafusion-physical-expr",
"datafusion-physical-expr-common",
@@ -888,7 +889,6 @@ dependencies = [
"datafusion-physical-plan",
"datafusion-sql",
"futures",
- "glob",
"itertools 0.14.0",
"log",
"object_store",
@@ -905,9 +905,8 @@ dependencies = [
[[package]]
name = "datafusion-catalog"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f27987bc22b810939e8dfecc55571e9d50355d6ea8ec1c47af8383a76a6d0e1"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"async-trait",
@@ -921,7 +920,27 @@ dependencies = [
"itertools 0.14.0",
"log",
"parking_lot",
- "sqlparser",
+]
+
+[[package]]
+name = "datafusion-catalog-listing"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
+dependencies = [
+ "arrow",
+ "async-trait",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-datasource",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "futures",
+ "log",
+ "object_store",
+ "tokio",
]
[[package]]
@@ -1027,16 +1046,12 @@ dependencies = [
[[package]]
name = "datafusion-common"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e3f6d5b8c9408cc692f7c194b8aa0c0f9b253e065a8d960ad9cdc2a13e697602"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
- "arrow-array",
- "arrow-buffer",
"arrow-ipc",
- "arrow-schema",
"base64",
"half",
"hashbrown 0.14.5",
@@ -1053,25 +1068,49 @@ dependencies = [
[[package]]
name = "datafusion-common-runtime"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d4603c8e8a4baf77660ab7074cc66fc15cc8a18f2ce9dfadb755fc6ee294e48"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
+dependencies = [
+ "log",
+ "tokio",
+]
+
+[[package]]
+name = "datafusion-datasource"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
+ "arrow",
+ "async-trait",
+ "bytes",
+ "chrono",
+ "datafusion-catalog",
+ "datafusion-common",
+ "datafusion-common-runtime",
+ "datafusion-execution",
+ "datafusion-expr",
+ "datafusion-physical-expr",
+ "datafusion-physical-expr-common",
+ "datafusion-physical-plan",
+ "futures",
+ "glob",
+ "itertools 0.14.0",
"log",
+ "object_store",
+ "rand",
"tokio",
+ "url",
]
[[package]]
name = "datafusion-doc"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5bf4bc68623a5cf231eed601ed6eb41f46a37c4d15d11a0bff24cbc8396cd66"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
[[package]]
name = "datafusion-execution"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "88b491c012cdf8e051053426013429a76f74ee3c2db68496c79c323ca1084d27"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"dashmap",
@@ -1088,9 +1127,8 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e5a181408d4fc5dc22f9252781a8f39f2d0e5d1b33ec9bde242844980a2689c1"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"chrono",
@@ -1108,21 +1146,20 @@ dependencies = [
[[package]]
name = "datafusion-expr-common"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1129b48e8534d8c03c6543bcdccef0b55c8ac0c1272a15a56c67068b6eb1885"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"datafusion-common",
+ "indexmap",
"itertools 0.14.0",
"paste",
]
[[package]]
name = "datafusion-functions"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6125874e4856dfb09b59886784fcb74cde5cfc5930b3a80a1a728ef7a010df6b"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"arrow-buffer",
@@ -1136,7 +1173,6 @@ dependencies = [
"datafusion-expr",
"datafusion-expr-common",
"datafusion-macros",
- "hashbrown 0.14.5",
"hex",
"itertools 0.14.0",
"log",
@@ -1150,14 +1186,11 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f3add7b1d3888e05e7c95f2b281af900ca69ebdcb21069ba679b33bde8b3b9d6"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
- "arrow-buffer",
- "arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -1173,9 +1206,8 @@ dependencies = [
[[package]]
name = "datafusion-functions-aggregate-common"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6e18baa4cfc3d2f144f74148ed68a1f92337f5072b6dde204a0dbbdf3324989c"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
@@ -1186,15 +1218,11 @@ dependencies = [
[[package]]
name = "datafusion-functions-nested"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ec5ee8cecb0dc370291279673097ddabec03a011f73f30d7f1096457127e03e"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
- "arrow-array",
- "arrow-buffer",
"arrow-ord",
- "arrow-schema",
"datafusion-common",
"datafusion-doc",
"datafusion-execution",
@@ -1210,9 +1238,8 @@ dependencies = [
[[package]]
name = "datafusion-functions-table"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c403ddd473bbb0952ba880008428b3c7febf0ed3ce1eec35a205db20efb2a36"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"async-trait",
@@ -1226,9 +1253,8 @@ dependencies = [
[[package]]
name = "datafusion-functions-window"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ab18c2fb835614d06a75f24a9e09136d3a8c12a92d97c95a6af316a1787a9c5"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"datafusion-common",
"datafusion-doc",
@@ -1243,9 +1269,8 @@ dependencies = [
[[package]]
name = "datafusion-functions-window-common"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a77b73bc15e7d1967121fdc7a55d819bfb9d6c03766a6c322247dce9094a53a4"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"datafusion-common",
"datafusion-physical-expr-common",
@@ -1253,9 +1278,8 @@ dependencies = [
[[package]]
name = "datafusion-macros"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09369b8d962291e808977cf94d495fd8b5b38647232d7ef562c27ac0f495b0af"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"datafusion-expr",
"quote",
@@ -1264,9 +1288,8 @@ dependencies = [
[[package]]
name = "datafusion-optimizer"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2403a7e4a84637f3de7d8d4d7a9ccc0cc4be92d89b0161ba3ee5be82f0531c54"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
"chrono",
@@ -1282,15 +1305,11 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86ff72ac702b62dbf2650c4e1d715ebd3e4aab14e3885e72e8549e250307347c"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
- "arrow-array",
- "arrow-buffer",
- "arrow-schema",
"datafusion-common",
"datafusion-expr",
"datafusion-expr-common",
@@ -1307,13 +1326,11 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr-common"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60982b7d684e25579ee29754b4333057ed62e2cc925383c5f0bd8cab7962f435"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
- "arrow-buffer",
"datafusion-common",
"datafusion-expr-common",
"hashbrown 0.14.5",
@@ -1322,12 +1339,10 @@ dependencies = [
[[package]]
name = "datafusion-physical-optimizer"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ac5e85c189d5238a5cf181a624e450c4cd4c66ac77ca551d6f3ff9080bac90bb"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
- "arrow-schema",
"datafusion-common",
"datafusion-execution",
"datafusion-expr",
@@ -1335,22 +1350,17 @@ dependencies = [
"datafusion-physical-expr",
"datafusion-physical-expr-common",
"datafusion-physical-plan",
- "futures",
"itertools 0.14.0",
"log",
- "url",
]
[[package]]
name = "datafusion-physical-plan"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c36bf163956d7e2542657c78b3383fdc78f791317ef358a359feffcdb968106f"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"ahash",
"arrow",
- "arrow-array",
- "arrow-buffer",
"arrow-ord",
"arrow-schema",
"async-trait",
@@ -1375,13 +1385,10 @@ dependencies = [
[[package]]
name = "datafusion-sql"
-version = "45.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e13caa4daede211ecec53c78b13c503b592794d125f9a3cc3afe992edf9e7f43"
+version = "46.0.0"
+source =
"git+https://github.com/apache/datafusion?rev=46.0.0-rc1#ec4862fa2d870fcab973fd1589ef99b6bf8d560f"
dependencies = [
"arrow",
- "arrow-array",
- "arrow-schema",
"bigdecimal",
"datafusion-common",
"datafusion-expr",
@@ -1441,9 +1448,9 @@ dependencies = [
[[package]]
name = "either"
-version = "1.13.0"
+version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+checksum = "b7914353092ddf589ad78f25c5c1c21b7f80b0ff8621e7c814c3485b5306da9d"
[[package]]
name = "equator"
@@ -1467,9 +1474,9 @@ dependencies = [
[[package]]
name = "equivalent"
-version = "1.0.1"
+version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "errno"
@@ -1523,9 +1530,9 @@ dependencies = [
[[package]]
name = "flate2"
-version = "1.0.35"
+version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c936bfdafb507ebbf50b8074c54fa31c5be9a1e7e5f467dd659697041407d07c"
+checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
dependencies = [
"crc32fast",
"miniz_oxide",
@@ -2156,9 +2163,9 @@ dependencies = [
[[package]]
name = "libc"
-version = "0.2.169"
+version = "0.2.170"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+checksum = "875b3680cb2f8f71bdcf9a30f38d48282f5d3c95cbf9b3fa57269bb5d5c06828"
[[package]]
name = "libloading"
@@ -2204,9 +2211,9 @@ checksum =
"d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
[[package]]
name = "litemap"
-version = "0.7.4"
+version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+checksum = "23fb14cb19457329c82206317a5663005a4d404783dc74f4252769b0d5f42856"
[[package]]
name = "lock_api"
@@ -2220,9 +2227,9 @@ dependencies = [
[[package]]
name = "log"
-version = "0.4.25"
+version = "0.4.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
+checksum = "30bde2b3dc3671ae49d8e2e9f044c7c005836e7a023ee57cffa25ab82764bb9e"
dependencies = [
"serde",
]
@@ -2312,9 +2319,9 @@ checksum =
"68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "miniz_oxide"
-version = "0.8.3"
+version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b8402cab7aefae129c6977bb0ff1b8fd9a04eb5b51efc50a70bea51cda0c7924"
+checksum = "8e3e04debbb59698c15bacbb6d93584a8c0ca9cc3213cb423d31f760d8843ce5"
dependencies = [
"adler2",
]
@@ -2506,9 +2513,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "54.1.0"
+version = "54.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8a01a0efa30bbd601ae85b375c728efdb211ade54390281628a7b16708beb235"
+checksum = "f88838dca3b84d41444a0341b19f347e8098a3898b0f21536654b8b799e11abd"
dependencies = [
"ahash",
"arrow-array",
@@ -2537,7 +2544,7 @@ dependencies = [
"thrift",
"tokio",
"twox-hash 1.6.3",
- "zstd 0.13.2",
+ "zstd 0.13.3",
"zstd-sys",
]
@@ -2788,6 +2795,15 @@ dependencies = [
"prost 0.9.0",
]
+[[package]]
+name = "psm"
+version = "0.1.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f58e5423e24c18cc840e1c98370b3993c6649cd1678b4d24318bcf0a083cbe88"
+dependencies = [
+ "cc",
+]
+
[[package]]
name = "quick-xml"
version = "0.26.0"
@@ -2856,13 +2872,33 @@ dependencies = [
"crossbeam-utils",
]
+[[package]]
+name = "recursive"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e"
+dependencies = [
+ "recursive-proc-macro-impl",
+ "stacker",
+]
+
+[[package]]
+name = "recursive-proc-macro-impl"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b"
+dependencies = [
+ "quote",
+ "syn 2.0.98",
+]
+
[[package]]
name = "redox_syscall"
-version = "0.5.8"
+version = "0.5.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
+checksum = "82b568323e98e49e2a0899dcee453dd679fae22d69adf9b11dd508d1549b7e2f"
dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.0",
]
[[package]]
@@ -2930,7 +2966,7 @@ version = "0.38.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.0",
"errno",
"libc",
"linux-raw-sys",
@@ -2978,9 +3014,9 @@ checksum =
"a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
[[package]]
name = "serde"
-version = "1.0.217"
+version = "1.0.218"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70"
+checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
dependencies = [
"serde_derive",
]
@@ -2997,9 +3033,9 @@ dependencies = [
[[package]]
name = "serde_derive"
-version = "1.0.217"
+version = "1.0.218"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0"
+checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
dependencies = [
"proc-macro2",
"quote",
@@ -3008,9 +3044,9 @@ dependencies = [
[[package]]
name = "serde_json"
-version = "1.0.138"
+version = "1.0.139"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d434192e7da787e94a6ea7e9670b26a036d0ca41e0b7efb2676dd32bae872949"
+checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
dependencies = [
"itoa",
"memchr",
@@ -3077,9 +3113,9 @@ dependencies = [
[[package]]
name = "smallvec"
-version = "1.13.2"
+version = "1.14.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
+checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
[[package]]
name = "snafu"
@@ -3110,11 +3146,12 @@ checksum =
"1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b"
[[package]]
name = "sqlparser"
-version = "0.53.0"
+version = "0.54.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "05a528114c392209b3264855ad491fcce534b94a38771b0a0b97a79379275ce8"
+checksum = "c66e3b7374ad4a6af849b08b3e7a6eda0edbd82f0fd59b57e22671bf16979899"
dependencies = [
"log",
+ "recursive",
"sqlparser_derive",
]
@@ -3135,6 +3172,19 @@ version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3"
+[[package]]
+name = "stacker"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9156ebd5870ef293bfb43f91c7a74528d363ec0d424afe24160ed5a4343d08a"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "libc",
+ "psm",
+ "windows-sys 0.59.0",
+]
+
[[package]]
name = "static_assertions"
version = "1.1.0"
@@ -3147,25 +3197,6 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb"
-[[package]]
-name = "strum"
-version = "0.26.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06"
-
-[[package]]
-name = "strum_macros"
-version = "0.26.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be"
-dependencies = [
- "heck 0.5.0",
- "proc-macro2",
- "quote",
- "rustversion",
- "syn 2.0.98",
-]
-
[[package]]
name = "subtle"
version = "2.6.1"
@@ -3230,9 +3261,9 @@ dependencies = [
[[package]]
name = "tempfile"
-version = "3.16.0"
+version = "3.17.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "38c246215d7d24f48ae091a2902398798e05d978b24315d6efbc00ede9a8bb91"
+checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
dependencies = [
"cfg-if",
"fastrand",
@@ -3397,15 +3428,15 @@ dependencies = [
[[package]]
name = "typenum"
-version = "1.17.0"
+version = "1.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825"
+checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f"
[[package]]
name = "unicode-ident"
-version = "1.0.16"
+version = "1.0.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a210d160f08b701c8721ba1c726c11662f877ea6b7094007e1ca9a1041945034"
+checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
[[package]]
name = "unicode-segmentation"
@@ -3459,11 +3490,13 @@ checksum =
"b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
[[package]]
name = "uuid"
-version = "1.13.1"
+version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ced87ca4be083373936a67f8de945faa23b6b42384bd5b64434850802c6dccd0"
+checksum = "e0f540e3240398cce6128b64ba83fdbdd86129c16a3aa1a3a252efd66eb3d587"
dependencies = [
"getrandom 0.3.1",
+ "js-sys",
+ "wasm-bindgen",
]
[[package]]
@@ -3772,7 +3805,7 @@ version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.0",
]
[[package]]
@@ -3834,18 +3867,18 @@ dependencies = [
[[package]]
name = "zerofrom"
-version = "0.1.5"
+version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
dependencies = [
"zerofrom-derive",
]
[[package]]
name = "zerofrom-derive"
-version = "0.1.5"
+version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
dependencies = [
"proc-macro2",
"quote",
@@ -3886,9 +3919,9 @@ dependencies = [
[[package]]
name = "zstd"
-version = "0.13.2"
+version = "0.13.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fcf2b778a664581e31e389454a7072dab1647606d44f7feea22cd5abb9c9f3f9"
+checksum = "e91ee311a569c327171651566e07972200e76fcfe2242a4fa446149a3881c08a"
dependencies = [
"zstd-safe 7.2.1",
]
diff --git a/native/Cargo.toml b/native/Cargo.toml
index c233d4041..94115809d 100644
--- a/native/Cargo.toml
+++ b/native/Cargo.toml
@@ -31,26 +31,26 @@ license = "Apache-2.0"
edition = "2021"
# Comet uses the same minimum Rust version as DataFusion
-rust-version = "1.81"
+rust-version = "1.85"
[workspace.dependencies]
-arrow = { version = "54.1.0", features = ["prettyprint", "ffi", "chrono-tz"] }
-arrow-array = { version = "54.1.0" }
-arrow-buffer = { version = "54.1.0" }
-arrow-data = { version = "54.1.0" }
-arrow-schema = { version = "54.1.0" }
+arrow = { version = "54.2.0", features = ["prettyprint", "ffi", "chrono-tz"] }
+arrow-array = { version = "54.2.0" }
+arrow-buffer = { version = "54.2.0" }
+arrow-data = { version = "54.2.0" }
+arrow-schema = { version = "54.2.0" }
async-trait = { version = "0.1" }
bytes = { version = "1.10.0" }
-parquet = { version = "54.1.0", default-features = false, features =
["experimental"] }
-datafusion = { version = "45.0.0", default-features = false, features =
["unicode_expressions", "crypto_expressions"] }
-datafusion-common = { version = "45.0.0", default-features = false }
-datafusion-functions = { version = "45.0.0", default-features = false,
features = ["crypto_expressions"] }
-datafusion-functions-nested = { version = "45.0.0", default-features = false }
-datafusion-expr = { version = "45.0.0", default-features = false }
-datafusion-expr-common = { version = "45.0.0", default-features = false }
-datafusion-execution = { version = "45.0.0", default-features = false }
-datafusion-physical-plan = { version = "45.0.0", default-features = false }
-datafusion-physical-expr = { version = "45.0.0", default-features = false }
+parquet = { version = "54.2.0", default-features = false, features =
["experimental"] }
+datafusion = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false, features = ["unicode_expressions",
"crypto_expressions"] }
+datafusion-common = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false }
+datafusion-datasource = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false }
+datafusion-functions = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false, features = ["crypto_expressions"] }
+datafusion-functions-nested = { git = "https://github.com/apache/datafusion",
rev = "46.0.0-rc2", default-features = false }
+datafusion-expr = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false }
+datafusion-expr-common = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false }
+datafusion-execution = { git = "https://github.com/apache/datafusion", rev =
"46.0.0-rc2", default-features = false }
+datafusion-physical-expr = { git = "https://github.com/apache/datafusion", rev
= "46.0.0-rc2", default-features = false }
datafusion-comet-spark-expr = { path = "spark-expr", version = "0.7.0" }
datafusion-comet-proto = { path = "proto", version = "0.7.0" }
chrono = { version = "0.4", default-features = false, features = ["clock"] }
diff --git a/native/core/benches/bloom_filter_agg.rs
b/native/core/benches/bloom_filter_agg.rs
index b83ff3fad..8f479a618 100644
--- a/native/core/benches/bloom_filter_agg.rs
+++ b/native/core/benches/bloom_filter_agg.rs
@@ -21,9 +21,10 @@ use arrow_array::{ArrayRef, RecordBatch};
use arrow_schema::SchemaRef;
use comet::execution::expressions::bloom_filter_agg::BloomFilterAgg;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion::datasource::memory::MemorySourceConfig;
+use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_expr::PhysicalExpr;
use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode,
PhysicalGroupBy};
-use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_common::ScalarValue;
use datafusion_execution::TaskContext;
@@ -88,8 +89,9 @@ async fn agg_test(
mode: AggregateMode,
) {
let schema = &partitions[0][0].schema();
- let scan: Arc<dyn ExecutionPlan> =
- Arc::new(MemoryExec::try_new(partitions, Arc::clone(schema),
None).unwrap());
+ let scan: Arc<dyn ExecutionPlan> = Arc::new(DataSourceExec::new(Arc::new(
+ MemorySourceConfig::try_new(partitions, Arc::clone(schema),
None).unwrap(),
+ )));
let aggregate = create_aggregate(scan, c0.clone(), schema, aggregate_udf,
alias, mode);
let mut stream = aggregate
.execute(0, Arc::new(TaskContext::default()))
diff --git a/native/core/benches/shuffle_writer.rs
b/native/core/benches/shuffle_writer.rs
index 9e47949c9..944120684 100644
--- a/native/core/benches/shuffle_writer.rs
+++ b/native/core/benches/shuffle_writer.rs
@@ -20,9 +20,11 @@ use arrow_array::{builder::StringBuilder, RecordBatch};
use arrow_schema::{DataType, Field, Schema};
use comet::execution::shuffle::{CompressionCodec, ShuffleBlockWriter,
ShuffleWriterExec};
use criterion::{criterion_group, criterion_main, Criterion};
+use datafusion::datasource::memory::MemorySourceConfig;
+use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_plan::metrics::Time;
use datafusion::{
- physical_plan::{common::collect, memory::MemoryExec, ExecutionPlan},
+ physical_plan::{common::collect, ExecutionPlan},
prelude::SessionContext,
};
use datafusion_physical_expr::{expressions::Column, Partitioning};
@@ -88,7 +90,9 @@ fn create_shuffle_writer_exec(compression_codec:
CompressionCodec) -> ShuffleWri
let schema = batches[0].schema();
let partitions = &[batches];
ShuffleWriterExec::try_new(
- Arc::new(MemoryExec::try_new(partitions, schema, None).unwrap()),
+ Arc::new(DataSourceExec::new(Arc::new(
+ MemorySourceConfig::try_new(partitions, Arc::clone(&schema),
None).unwrap(),
+ ))),
Partitioning::Hash(vec![Arc::new(Column::new("a", 0))], 16),
compression_codec,
"/tmp/data.out".to_string(),
diff --git a/native/core/src/execution/expressions/bloom_filter_agg.rs
b/native/core/src/execution/expressions/bloom_filter_agg.rs
index ea8bb3647..66dbac58e 100644
--- a/native/core/src/execution/expressions/bloom_filter_agg.rs
+++ b/native/core/src/execution/expressions/bloom_filter_agg.rs
@@ -25,7 +25,7 @@ use arrow::array::ArrayRef;
use arrow_array::BinaryArray;
use datafusion::error::Result;
use datafusion::physical_expr::PhysicalExpr;
-use datafusion_common::{downcast_value, DataFusionError, ScalarValue};
+use datafusion_common::{downcast_value, ScalarValue};
use datafusion_expr::{
function::{AccumulatorArgs, StateFieldsArgs},
Accumulator, AggregateUDFImpl, Signature,
diff --git a/native/core/src/execution/planner.rs
b/native/core/src/execution/planner.rs
index 34074f2ca..bb1687607 100644
--- a/native/core/src/execution/planner.rs
+++ b/native/core/src/execution/planner.rs
@@ -77,8 +77,8 @@ use crate::execution::spark_plan::SparkPlan;
use crate::parquet::parquet_support::{register_object_store,
SparkParquetOptions};
use crate::parquet::schema_adapter::SparkSchemaAdapterFactory;
use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder;
-use datafusion::datasource::physical_plan::FileScanConfig;
+use datafusion::datasource::physical_plan::{FileScanConfig, ParquetSource};
+use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_plan::coalesce_batches::CoalesceBatchesExec;
use datafusion::physical_plan::filter::FilterExec as DataFusionFilterExec;
use datafusion_comet_proto::{
@@ -1274,16 +1274,6 @@ impl PhysicalPlanner {
Field::new(field.name(), field.data_type().clone(),
field.is_nullable())
})
.collect_vec();
- let mut file_scan_config =
- FileScanConfig::new(object_store_url,
Arc::clone(&data_schema))
- .with_file_groups(file_groups)
- .with_table_partition_cols(partition_fields);
-
- assert_eq!(
- projection_vector.len(),
- required_schema.fields.len() +
partition_schema.fields.len()
- );
- file_scan_config =
file_scan_config.with_projection(Some(projection_vector));
let mut table_parquet_options = TableParquetOptions::new();
// TODO: Maybe these are configs?
@@ -1297,17 +1287,31 @@ impl PhysicalPlanner {
);
spark_parquet_options.allow_cast_unsigned_ints = true;
- let mut builder = ParquetExecBuilder::new(file_scan_config)
- .with_table_parquet_options(table_parquet_options)
+ let mut parquet_source =
ParquetSource::new(table_parquet_options)
.with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new(
spark_parquet_options,
)));
if let Some(filter) = cnf_data_filters {
- builder = builder.with_predicate(filter);
+ parquet_source =
+
parquet_source.with_predicate(Arc::clone(&data_schema), filter);
}
- let scan = builder.build();
+ let mut file_scan_config = FileScanConfig::new(
+ object_store_url,
+ Arc::clone(&data_schema),
+ Arc::new(parquet_source),
+ )
+ .with_file_groups(file_groups)
+ .with_table_partition_cols(partition_fields);
+
+ assert_eq!(
+ projection_vector.len(),
+ required_schema.fields.len() +
partition_schema.fields.len()
+ );
+ file_scan_config =
file_scan_config.with_projection(Some(projection_vector));
+
+ let scan = DataSourceExec::new(Arc::new(file_scan_config));
Ok((
vec![],
Arc::new(SparkPlan::new(spark_plan.plan_id,
Arc::new(scan), vec![])),
@@ -1604,8 +1608,8 @@ impl PhysicalPlanner {
let window_agg = Arc::new(BoundedWindowAggExec::try_new(
window_expr?,
Arc::clone(&child.native_plan),
- partition_exprs.to_vec(),
InputOrderMode::Sorted,
+ !partition_exprs.is_empty(),
)?);
Ok((
scans,
diff --git a/native/core/src/execution/shuffle/shuffle_writer.rs
b/native/core/src/execution/shuffle/shuffle_writer.rs
index 25c836a6a..8b34cd478 100644
--- a/native/core/src/execution/shuffle/shuffle_writer.rs
+++ b/native/core/src/execution/shuffle/shuffle_writer.rs
@@ -909,8 +909,9 @@ mod test {
use super::*;
use crate::execution::shuffle::read_ipc_compressed;
use arrow_schema::{DataType, Field, Schema};
+ use datafusion::datasource::memory::MemorySourceConfig;
+ use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_plan::common::collect;
- use datafusion::physical_plan::memory::MemoryExec;
use datafusion::prelude::SessionContext;
use datafusion_execution::config::SessionConfig;
use datafusion_execution::runtime_env::RuntimeEnvBuilder;
@@ -1152,7 +1153,9 @@ mod test {
let partitions = &[batches];
let exec = ShuffleWriterExec::try_new(
- Arc::new(MemoryExec::try_new(partitions, batch.schema(),
None).unwrap()),
+ Arc::new(DataSourceExec::new(Arc::new(
+ MemorySourceConfig::try_new(partitions, batch.schema(),
None).unwrap(),
+ ))),
Partitioning::Hash(vec![Arc::new(Column::new("a", 0))],
num_partitions),
CompressionCodec::Zstd(1),
"/tmp/data.out".to_string(),
diff --git a/native/core/src/parquet/mod.rs b/native/core/src/parquet/mod.rs
index b0a74864c..5f51d6273 100644
--- a/native/core/src/parquet/mod.rs
+++ b/native/core/src/parquet/mod.rs
@@ -51,8 +51,8 @@ use crate::parquet::schema_adapter::SparkSchemaAdapterFactory;
use arrow::buffer::{Buffer, MutableBuffer};
use arrow_array::{Array, RecordBatch};
use datafusion::datasource::listing::PartitionedFile;
-use datafusion::datasource::physical_plan::parquet::ParquetExecBuilder;
-use datafusion::datasource::physical_plan::FileScanConfig;
+use datafusion::datasource::physical_plan::{FileScanConfig, ParquetSource};
+use datafusion::datasource::source::DataSourceExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_comet_spark_expr::EvalMode;
use datafusion_common::config::TableParquetOptions;
@@ -666,18 +666,6 @@ pub unsafe extern "system" fn
Java_org_apache_comet_parquet_Native_initRecordBat
start + length,
);
partitioned_file.object_meta.location = object_store_path;
- // We build the file scan config with the *required* schema so that
the reader knows
- // the output schema we want
- let file_scan_config = FileScanConfig::new(object_store_url,
Arc::new(required_schema_arrow))
- .with_file(partitioned_file)
- // TODO: (ARROW NATIVE) - do partition columns in native
- // - will need partition schema and partition values to do so
- // .with_table_partition_cols(partition_fields)
- ;
- let mut table_parquet_options = TableParquetOptions::new();
- // TODO: Maybe these are configs?
- table_parquet_options.global.pushdown_filters = true;
- table_parquet_options.global.reorder_filters = true;
let session_timezone: String = env
.get_string(&JString::from_raw(session_timezone))
.unwrap()
@@ -687,16 +675,29 @@ pub unsafe extern "system" fn
Java_org_apache_comet_parquet_Native_initRecordBat
SparkParquetOptions::new(EvalMode::Legacy,
session_timezone.as_str(), false);
spark_parquet_options.allow_cast_unsigned_ints = true;
- let builder2 = ParquetExecBuilder::new(file_scan_config)
- .with_table_parquet_options(table_parquet_options)
-
.with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new(
- spark_parquet_options,
- )));
+ let mut table_parquet_options = TableParquetOptions::new();
+ // TODO: Maybe these are configs?
+ table_parquet_options.global.pushdown_filters = true;
+ table_parquet_options.global.reorder_filters = true;
+
+ let parquet_source =
ParquetSource::new(table_parquet_options).with_schema_adapter_factory(
+ Arc::new(SparkSchemaAdapterFactory::new(spark_parquet_options)),
+ );
+
+ // We build the file scan config with the *required* schema so that
the reader knows
+ // the output schema we want
+ let file_scan_config = FileScanConfig::new(object_store_url,
Arc::new(required_schema_arrow), Arc::new(parquet_source))
+ .with_file(partitioned_file)
+ // TODO: (ARROW NATIVE) - do partition columns in native
+ // - will need partition schema and partition values to do so
+ // .with_table_partition_cols(partition_fields)
+ ;
//TODO: (ARROW NATIVE) - predicate pushdown??
// builder = builder.with_predicate(filter);
- let scan = builder2.build();
+ let scan = Arc::new(DataSourceExec::new(Arc::new(file_scan_config)));
+
let ctx = TaskContext::default();
let partition_index: usize = 0;
batch_stream = Some(scan.execute(partition_index, Arc::new(ctx))?);
diff --git a/native/core/src/parquet/schema_adapter.rs
b/native/core/src/parquet/schema_adapter.rs
index b3150f282..879ef8670 100644
--- a/native/core/src/parquet/schema_adapter.rs
+++ b/native/core/src/parquet/schema_adapter.rs
@@ -279,12 +279,14 @@ mod test {
use arrow_array::UInt32Array;
use arrow_schema::SchemaRef;
use datafusion::datasource::listing::PartitionedFile;
- use datafusion::datasource::physical_plan::{FileScanConfig, ParquetExec};
+ use datafusion::datasource::physical_plan::{FileScanConfig, ParquetSource};
+ use datafusion::datasource::source::DataSourceExec;
use datafusion::execution::object_store::ObjectStoreUrl;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_comet_spark_expr::test_common::file_util::get_temp_filename;
use datafusion_comet_spark_expr::EvalMode;
+ use datafusion_common::config::TableParquetOptions;
use datafusion_common::DataFusionError;
use futures::StreamExt;
use parquet::arrow::ArrowWriter;
@@ -341,19 +343,23 @@ mod test {
writer.close()?;
let object_store_url = ObjectStoreUrl::local_filesystem();
- let file_scan_config = FileScanConfig::new(object_store_url,
required_schema)
- .with_file_groups(vec![vec![PartitionedFile::from_path(
- filename.to_string(),
- )?]]);
let mut spark_parquet_options =
SparkParquetOptions::new(EvalMode::Legacy, "UTC", false);
spark_parquet_options.allow_cast_unsigned_ints = true;
- let parquet_exec = ParquetExec::builder(file_scan_config)
-
.with_schema_adapter_factory(Arc::new(SparkSchemaAdapterFactory::new(
- spark_parquet_options,
- )))
- .build();
+ let parquet_source = Arc::new(
+
ParquetSource::new(TableParquetOptions::new()).with_schema_adapter_factory(Arc::new(
+ SparkSchemaAdapterFactory::new(spark_parquet_options),
+ )),
+ );
+
+ let file_scan_config =
+ FileScanConfig::new(object_store_url, required_schema,
parquet_source)
+ .with_file_groups(vec![vec![PartitionedFile::from_path(
+ filename.to_string(),
+ )?]]);
+
+ let parquet_exec = DataSourceExec::new(Arc::new(file_scan_config));
let mut stream = parquet_exec
.execute(0, Arc::new(TaskContext::default()))
diff --git a/native/spark-expr/benches/aggregate.rs
b/native/spark-expr/benches/aggregate.rs
index 051ac5eb6..e285295ed 100644
--- a/native/spark-expr/benches/aggregate.rs
+++ b/native/spark-expr/benches/aggregate.rs
@@ -20,12 +20,13 @@ use arrow_array::builder::{Decimal128Builder,
StringBuilder};
use arrow_array::{ArrayRef, RecordBatch};
use arrow_schema::SchemaRef;
use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use datafusion::datasource::memory::MemorySourceConfig;
+use datafusion::datasource::source::DataSourceExec;
use datafusion::execution::TaskContext;
use datafusion::functions_aggregate::average::avg_udaf;
use datafusion::functions_aggregate::sum::sum_udaf;
use datafusion::physical_expr::PhysicalExpr;
use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode,
PhysicalGroupBy};
-use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_comet_spark_expr::AvgDecimal;
use datafusion_comet_spark_expr::SumDecimal;
@@ -119,8 +120,9 @@ async fn agg_test(
alias: &str,
) {
let schema = &partitions[0][0].schema();
- let scan: Arc<dyn ExecutionPlan> =
- Arc::new(MemoryExec::try_new(partitions, Arc::clone(schema),
None).unwrap());
+ let scan: Arc<dyn ExecutionPlan> = Arc::new(DataSourceExec::new(Arc::new(
+ MemorySourceConfig::try_new(partitions, Arc::clone(schema),
None).unwrap(),
+ )));
let aggregate = create_aggregate(scan, c0.clone(), c1.clone(), schema,
aggregate_udf, alias);
let mut stream = aggregate
.execute(0, Arc::new(TaskContext::default()))
diff --git a/native/spark-expr/src/agg_funcs/stddev.rs
b/native/spark-expr/src/agg_funcs/stddev.rs
index 39dffa1c8..d6cea40a5 100644
--- a/native/spark-expr/src/agg_funcs/stddev.rs
+++ b/native/spark-expr/src/agg_funcs/stddev.rs
@@ -27,6 +27,7 @@ use datafusion_common::types::NativeType;
use datafusion_common::{internal_err, Result, ScalarValue};
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::{AggregateUDFImpl, Signature, Volatility};
+use datafusion_expr_common::signature::Coercion;
use datafusion_physical_expr::expressions::format_state_name;
use datafusion_physical_expr::expressions::StatsType;
@@ -56,11 +57,11 @@ impl Stddev {
Self {
name: name.into(),
signature: Signature::coercible(
- vec![
+ vec![Coercion::new_exact(
datafusion_expr_common::signature::TypeSignatureClass::Native(Arc::new(
NativeType::Float64,
)),
- ],
+ )],
Volatility::Immutable,
),
stats_type,
diff --git a/native/spark-expr/src/agg_funcs/sum_decimal.rs
b/native/spark-expr/src/agg_funcs/sum_decimal.rs
index f3f34d9bf..bba04dce2 100644
--- a/native/spark-expr/src/agg_funcs/sum_decimal.rs
+++ b/native/spark-expr/src/agg_funcs/sum_decimal.rs
@@ -465,9 +465,10 @@ mod tests {
use arrow::datatypes::*;
use arrow_array::builder::{Decimal128Builder, StringBuilder};
use arrow_array::RecordBatch;
+ use datafusion::datasource::memory::MemorySourceConfig;
+ use datafusion::datasource::source::DataSourceExec;
use datafusion::execution::TaskContext;
use datafusion::physical_plan::aggregates::{AggregateExec, AggregateMode,
PhysicalGroupBy};
- use datafusion::physical_plan::memory::MemoryExec;
use datafusion::physical_plan::ExecutionPlan;
use datafusion_common::Result;
use datafusion_expr::AggregateUDF;
@@ -495,8 +496,9 @@ mod tests {
let data_type = DataType::Decimal128(8, 2);
let schema = Arc::clone(&partitions[0][0].schema());
- let scan: Arc<dyn ExecutionPlan> =
- Arc::new(MemoryExec::try_new(partitions, Arc::clone(&schema),
None).unwrap());
+ let scan: Arc<dyn ExecutionPlan> =
Arc::new(DataSourceExec::new(Arc::new(
+ MemorySourceConfig::try_new(partitions, Arc::clone(&schema),
None).unwrap(),
+ )));
let aggregate_udf =
Arc::new(AggregateUDF::new_from_impl(SumDecimal::try_new(
data_type.clone(),
diff --git a/native/spark-expr/src/agg_funcs/variance.rs
b/native/spark-expr/src/agg_funcs/variance.rs
index 5520941b2..e897347be 100644
--- a/native/spark-expr/src/agg_funcs/variance.rs
+++ b/native/spark-expr/src/agg_funcs/variance.rs
@@ -22,7 +22,7 @@ use arrow::{
datatypes::{DataType, Field},
};
use datafusion::logical_expr::Accumulator;
-use datafusion_common::{downcast_value, DataFusionError, Result, ScalarValue};
+use datafusion_common::{downcast_value, Result, ScalarValue};
use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs};
use datafusion_expr::Volatility::Immutable;
use datafusion_expr::{AggregateUDFImpl, Signature};
diff --git a/native/spark-expr/src/hash_funcs/sha2.rs
b/native/spark-expr/src/hash_funcs/sha2.rs
index 40d8def3a..8b33dd37d 100644
--- a/native/spark-expr/src/hash_funcs/sha2.rs
+++ b/native/spark-expr/src/hash_funcs/sha2.rs
@@ -17,10 +17,11 @@
use crate::math_funcs::hex::hex_strings;
use arrow_array::{Array, StringArray};
+use arrow_schema::DataType;
use datafusion::functions::crypto::{sha224, sha256, sha384, sha512};
use datafusion_common::cast::as_binary_array;
use datafusion_common::{exec_err, DataFusionError, ScalarValue};
-use datafusion_expr::{ColumnarValue, ScalarUDF};
+use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
use std::sync::Arc;
/// `sha224` function that simulates Spark's `sha2` expression with bit width
224
@@ -53,7 +54,11 @@ fn wrap_digest_result_as_hex_string(
ColumnarValue::Array(array) => array.len(),
ColumnarValue::Scalar(_) => 1,
};
- let value = digest.invoke_batch(args, row_count)?;
+ let value = digest.invoke_with_args(ScalarFunctionArgs {
+ args: args.into(),
+ number_rows: row_count,
+ return_type: &DataType::Utf8,
+ })?;
match value {
ColumnarValue::Array(array) => {
let binary_array = as_binary_array(&array)?;
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]