This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git
The following commit(s) were added to refs/heads/master by this push:
new 72f0600 Upgrade to DataFusion 14.0.0 (#67)
72f0600 is described below
commit 72f06006ca78ab4bcfcfc10ea3d3f2c2e9e515e9
Author: Andy Grove <[email protected]>
AuthorDate: Tue Nov 8 11:44:17 2022 -0700
Upgrade to DataFusion 14.0.0 (#67)
* upgrade to DataFusion 14.0.0
* fmt
---
Cargo.lock | 133 +++++++++++++++++++++++++++++----------
Cargo.toml | 7 ++-
datafusion/tests/test_catalog.py | 6 +-
src/dataframe.rs | 4 +-
src/dataset_exec.rs | 5 +-
src/expression.rs | 15 ++---
src/functions.rs | 10 +--
src/pyarrow_filter_expression.rs | 8 +--
8 files changed, 128 insertions(+), 60 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index fbce888..90bbf7c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -112,15 +112,16 @@ checksum =
"8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
[[package]]
name = "arrow"
-version = "24.0.0"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d68391300d5237f6725f0f869ae7cb65d45fcf8a6d18f6ceecd328fb803bef93"
+checksum = "e24e2bcd431a4aa0ff003fdd2dc21c78cfb42f31459c89d2312c2746fe17a5ac"
dependencies = [
"ahash 0.8.0",
"arrow-array",
"arrow-buffer",
"arrow-data",
"arrow-schema",
+ "arrow-select",
"bitflags",
"chrono",
"comfy-table",
@@ -141,9 +142,9 @@ dependencies = [
[[package]]
name = "arrow-array"
-version = "24.0.0"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0bb00c5862b5eea683812083c495bef01a9a5149da46ad2f4c0e4aa8800f64d"
+checksum = "c9044300874385f19e77cbf90911e239bd23630d8f23bb0f948f9067998a13b7"
dependencies = [
"ahash 0.8.0",
"arrow-buffer",
@@ -157,18 +158,19 @@ dependencies = [
[[package]]
name = "arrow-buffer"
-version = "24.0.0"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3e594d0fe0026a8bc2459bdc5ac9623e5fb666724a715e0acbc96ba30c5d4cc7"
+checksum = "78476cbe9e3f808dcecab86afe42d573863c63e149c62e6e379ed2522743e626"
dependencies = [
"half",
+ "num",
]
[[package]]
name = "arrow-data"
-version = "24.0.0"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8500df05060d86fdc53e9b5cb32e51bfeaacc040fdeced3eb99ac0d59200ff45"
+checksum = "4d916feee158c485dad4f701cba31bc9a90a8db87d9df8e2aa8adc0c20a2bbb9"
dependencies = [
"arrow-buffer",
"arrow-schema",
@@ -178,9 +180,37 @@ dependencies = [
[[package]]
name = "arrow-schema"
-version = "24.0.0"
+version = "26.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f9406eb7834ca6bd8350d1baa515d18b9fcec487eddacfb62f5e19511f7bd37"
+
+[[package]]
+name = "arrow-select"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86d1fef01f25e1452c86fa6887f078de8e0aaeeb828370feab205944cfc30e27"
+checksum = "6593a01586751c74498495d2f5a01fcd438102b52965c11dd98abf4ebcacef37"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
+dependencies = [
+ "bzip2",
+ "flate2",
+ "futures-core",
+ "futures-io",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+]
[[package]]
name = "async-trait"
@@ -294,6 +324,27 @@ version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
+[[package]]
+name = "bzip2"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
+dependencies = [
+ "bzip2-sys",
+ "libc",
+]
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.11+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
[[package]]
name = "cc"
version = "1.0.73"
@@ -479,15 +530,17 @@ dependencies = [
[[package]]
name = "datafusion"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a2bdec06a3db088da76fc28cb0877b8b5438ca6b6025e04d975bace0fd85df19"
+checksum = "e7a8411475928479fe57af18698626f0a44f3c29153e051dce45f7455c08a6d5"
dependencies = [
"ahash 0.8.0",
"apache-avro",
"arrow",
+ "async-compression",
"async-trait",
"bytes",
+ "bzip2",
"chrono",
"datafusion-common",
"datafusion-expr",
@@ -495,6 +548,7 @@ dependencies = [
"datafusion-physical-expr",
"datafusion-row",
"datafusion-sql",
+ "flate2",
"futures",
"glob",
"hashbrown",
@@ -508,6 +562,7 @@ dependencies = [
"parking_lot",
"parquet",
"paste",
+ "percent-encoding",
"pin-project-lite",
"pyo3",
"rand 0.8.5",
@@ -516,18 +571,20 @@ dependencies = [
"tempfile",
"tokio",
"tokio-stream",
+ "tokio-util",
"url",
"uuid 1.2.1",
]
[[package]]
name = "datafusion-common"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "506eab038bf2d39ac02c22be30b019873ca01f887148b939d309a0e9523f4515"
+checksum = "15f1ffcbc1f040c9ab99f41db1c743d95aff267bb2e7286aaa010738b7402251"
dependencies = [
"apache-avro",
"arrow",
+ "chrono",
"object_store",
"ordered-float 3.2.0",
"parquet",
@@ -537,21 +594,22 @@ dependencies = [
[[package]]
name = "datafusion-expr"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3d2810e369c735d69479e27fe8410e97a76ed07484aa9b3ad7c039efa504257"
+checksum = "1883d9590d303ef38fa295567e7fdb9f8f5f511fcc167412d232844678cd295c"
dependencies = [
"ahash 0.8.0",
"arrow",
"datafusion-common",
+ "log",
"sqlparser",
]
[[package]]
name = "datafusion-optimizer"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60f3b80326243629d02e33f37e955a7114781c6c44caf9d8b254618157de7143"
+checksum = "2127d46d566ab3463d70da9675fc07b9d634be8d17e80d0e1ce79600709fe651"
dependencies = [
"arrow",
"async-trait",
@@ -565,27 +623,33 @@ dependencies = [
[[package]]
name = "datafusion-physical-expr"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e9bf3b7ae861d351a85174fd4fddb29d249950b2f23671318971960452b4b9ab"
+checksum = "0d108b6fe8eeb317ecad1d74619e8758de49cccc8c771b56c97962fd52eaae23"
dependencies = [
"ahash 0.8.0",
"arrow",
+ "arrow-buffer",
+ "arrow-schema",
"blake2",
"blake3",
"chrono",
"datafusion-common",
"datafusion-expr",
"datafusion-row",
+ "half",
"hashbrown",
+ "itertools",
"lazy_static",
"md-5",
+ "num-traits",
"ordered-float 3.2.0",
"paste",
"rand 0.8.5",
"regex",
"sha2",
"unicode-segmentation",
+ "uuid 1.2.1",
]
[[package]]
@@ -596,6 +660,7 @@ dependencies = [
"datafusion",
"datafusion-common",
"datafusion-expr",
+ "datafusion-optimizer",
"futures",
"mimalloc",
"object_store",
@@ -607,9 +672,9 @@ dependencies = [
[[package]]
name = "datafusion-row"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3f44a2a722719c569b437b3aa2108a99dc911369e8d86c44e6293225c3387147"
+checksum = "43537b6377d506e4788bf21e9ed943340e076b48ca4d077e6ea4405ca5e54a1c"
dependencies = [
"arrow",
"datafusion-common",
@@ -619,9 +684,9 @@ dependencies = [
[[package]]
name = "datafusion-sql"
-version = "13.0.0"
+version = "14.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e98493e04385c924d1d3d7ab8739c41f1ebf676a46863181103a0fb9c7168fa9"
+checksum = "244d08d4710e1088d9c0949c9b5b8d68d9cf2cde7203134a4cc389e870fe2354"
dependencies = [
"arrow",
"datafusion-common",
@@ -672,12 +737,11 @@ dependencies = [
[[package]]
name = "flatbuffers"
-version = "2.1.2"
+version = "22.9.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a"
+checksum = "8ce016b9901aef3579617931fbb2df8fc9a9f7cb95a16eb8acc8148209bb9e70"
dependencies = [
"bitflags",
- "smallvec",
"thiserror",
]
@@ -1470,9 +1534,9 @@ dependencies = [
[[package]]
name = "parquet"
-version = "24.0.0"
+version = "26.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74fd590f0672998df84503d1bcbebc69732583d03cc3495c7dd8d3e5a1d8437f"
+checksum = "3bf8fa7ab6572791325a8595f55dc532dde88b996ae10a5ca8a2db746784ecc4"
dependencies = [
"ahash 0.8.0",
"arrow",
@@ -1486,7 +1550,6 @@ dependencies = [
"lz4",
"num",
"num-bigint",
- "rand 0.8.5",
"seq-macro",
"snap",
"thrift",
@@ -1518,6 +1581,12 @@ version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
+[[package]]
+name = "pkg-config"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+
[[package]]
name = "ppv-lite86"
version = "0.2.16"
@@ -1982,9 +2051,9 @@ checksum =
"6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
[[package]]
name = "sqlparser"
-version = "0.25.0"
+version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0781f2b6bd03e5adf065c8e772b49eaea9f640d06a1b9130330fe8bd2563f4fd"
+checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
dependencies = [
"log",
]
diff --git a/Cargo.toml b/Cargo.toml
index afdb9a9..b67cbc5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,9 +34,10 @@ default = ["mimalloc"]
tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread",
"sync"] }
rand = "0.7"
pyo3 = { version = "~0.17.1", features = ["extension-module", "abi3",
"abi3-py37"] }
-datafusion = { version = "^13.0.0", features = ["pyarrow", "avro"] }
-datafusion-expr = { version = "^13.0.0" }
-datafusion-common = { version = "^13.0.0", features = ["pyarrow"] }
+datafusion = { version = "^14.0.0", features = ["pyarrow", "avro"] }
+datafusion-expr = { version = "^14.0.0" }
+datafusion-optimizer = { version = "^14.0.0" }
+datafusion-common = { version = "^14.0.0", features = ["pyarrow"] }
uuid = { version = "0.8", features = ["v4"] }
mimalloc = { version = "*", optional = true, default-features = false }
async-trait = "0.1"
diff --git a/datafusion/tests/test_catalog.py b/datafusion/tests/test_catalog.py
index a9bdf72..214f6b1 100644
--- a/datafusion/tests/test_catalog.py
+++ b/datafusion/tests/test_catalog.py
@@ -33,8 +33,8 @@ def test_basic(ctx, database):
assert table.kind == "physical"
assert table.schema == pa.schema(
[
- pa.field("int", pa.int64(), nullable=False),
- pa.field("str", pa.string(), nullable=False),
- pa.field("float", pa.float64(), nullable=False),
+ pa.field("int", pa.int64(), nullable=True),
+ pa.field("str", pa.string(), nullable=True),
+ pa.field("float", pa.float64(), nullable=True),
]
)
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 997ba98..ab8f2c4 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -170,8 +170,8 @@ impl PyDataFrame {
"left" => JoinType::Left,
"right" => JoinType::Right,
"full" => JoinType::Full,
- "semi" => JoinType::Semi,
- "anti" => JoinType::Anti,
+ "semi" => JoinType::LeftSemi,
+ "anti" => JoinType::LeftAnti,
how => {
return Err(DataFusionError::Common(format!(
"The join type {} does not exist or is not implemented",
diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs
index 91b9942..9c41218 100644
--- a/src/dataset_exec.rs
+++ b/src/dataset_exec.rs
@@ -37,7 +37,8 @@ use
datafusion::physical_plan::stream::RecordBatchStreamAdapter;
use datafusion::physical_plan::{
DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream,
Statistics,
};
-use datafusion_expr::{combine_filters, Expr};
+use datafusion_expr::Expr;
+use datafusion_optimizer::utils::conjunction;
use crate::errors::DataFusionError;
use crate::pyarrow_filter_expression::PyArrowFilterExpression;
@@ -93,7 +94,7 @@ impl DatasetExec {
.collect()
});
let columns: Option<Vec<String>> = columns.transpose()?;
- let filter_expr: Option<PyObject> = combine_filters(filters)
+ let filter_expr: Option<PyObject> = conjunction(filters.to_owned())
.map(|filters| {
PyArrowFilterExpression::try_from(&filters)
.map(|filter_expr| filter_expr.inner().clone_ref(py))
diff --git a/src/expression.rs b/src/expression.rs
index 4e9eb50..2e8fb80 100644
--- a/src/expression.rs
+++ b/src/expression.rs
@@ -20,7 +20,7 @@ use std::convert::{From, Into};
use datafusion::arrow::datatypes::DataType;
use datafusion::arrow::pyarrow::PyArrowType;
-use datafusion_expr::{col, lit, Expr};
+use datafusion_expr::{col, lit, Cast, Expr, GetIndexedField};
use datafusion::scalar::ScalarValue;
@@ -94,10 +94,10 @@ impl PyExpr {
}
fn __getitem__(&self, key: &str) -> PyResult<PyExpr> {
- Ok(Expr::GetIndexedField {
- expr: Box::new(self.expr.clone()),
- key: ScalarValue::Utf8(Some(key.to_string())),
- }
+ Ok(Expr::GetIndexedField(GetIndexedField::new(
+ Box::new(self.expr.clone()),
+ ScalarValue::Utf8(Some(key.to_string())),
+ ))
.into())
}
@@ -129,10 +129,7 @@ impl PyExpr {
pub fn cast(&self, to: PyArrowType<DataType>) -> PyExpr {
// self.expr.cast_to() requires DFSchema to validate that the cast
// is supported, omit that for now
- let expr = Expr::Cast {
- expr: Box::new(self.expr.clone()),
- data_type: to.0,
- };
+ let expr = Expr::Cast(Cast::new(Box::new(self.expr.clone()), to.0));
expr.into()
}
}
diff --git a/src/functions.rs b/src/functions.rs
index ca2aca1..d7b1b9e 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -18,7 +18,7 @@
use pyo3::{prelude::*, wrap_pyfunction};
use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion_expr::BuiltinScalarFunction;
+use datafusion_expr::{lit, BuiltinScalarFunction};
use crate::errors;
use crate::expression::PyExpr;
@@ -79,14 +79,14 @@ fn concat(args: Vec<PyExpr>) -> PyResult<PyExpr> {
#[pyfunction(sep, args = "*")]
fn concat_ws(sep: String, args: Vec<PyExpr>) -> PyResult<PyExpr> {
let args = args.into_iter().map(|e| e.expr).collect::<Vec<_>>();
- Ok(datafusion_expr::concat_ws(sep, &args).into())
+ Ok(datafusion_expr::concat_ws(lit(sep), args).into())
}
/// Creates a new Sort expression
#[pyfunction]
fn order_by(expr: PyExpr, asc: Option<bool>, nulls_first: Option<bool>) ->
PyResult<PyExpr> {
Ok(PyExpr {
- expr: datafusion::logical_plan::Expr::Sort {
+ expr: datafusion_expr::Expr::Sort {
expr: Box::new(expr.expr),
asc: asc.unwrap_or(true),
nulls_first: nulls_first.unwrap_or(true),
@@ -98,7 +98,7 @@ fn order_by(expr: PyExpr, asc: Option<bool>, nulls_first:
Option<bool>) -> PyRes
#[pyfunction]
fn alias(expr: PyExpr, name: &str) -> PyResult<PyExpr> {
Ok(PyExpr {
- expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr),
String::from(name)),
+ expr: datafusion_expr::Expr::Alias(Box::new(expr.expr),
String::from(name)),
})
}
@@ -114,7 +114,7 @@ fn window(
let fun = datafusion_expr::window_function::WindowFunction::from_str(name)
.map_err(|e| -> errors::DataFusionError { e.into() })?;
Ok(PyExpr {
- expr: datafusion::logical_plan::Expr::WindowFunction {
+ expr: datafusion_expr::Expr::WindowFunction {
fun,
args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
partition_by: partition_by
diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs
index fc0f9b8..4ac0079 100644
--- a/src/pyarrow_filter_expression.rs
+++ b/src/pyarrow_filter_expression.rs
@@ -22,7 +22,7 @@ use std::convert::TryFrom;
use std::result::Result;
use datafusion_common::{Column, ScalarValue};
-use datafusion_expr::{Expr, Operator};
+use datafusion_expr::{Between, BinaryExpr, Expr, Operator};
use crate::errors::DataFusionError;
@@ -121,7 +121,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
v
))),
},
- Expr::BinaryExpr { left, op, right } => {
+ Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
let operator = operator_to_py(op, op_module)?;
let left =
PyArrowFilterExpression::try_from(left.as_ref())?.0;
let right =
PyArrowFilterExpression::try_from(right.as_ref())?.0;
@@ -144,12 +144,12 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
.into_ref(py);
Ok(expr.call_method1("is_null", (expr,))?)
}
- Expr::Between {
+ Expr::Between(Between {
expr,
negated,
low,
high,
- } => {
+ }) => {
let expr =
PyArrowFilterExpression::try_from(expr.as_ref())?.0;
let low =
PyArrowFilterExpression::try_from(low.as_ref())?.0;
let high =
PyArrowFilterExpression::try_from(high.as_ref())?.0;