This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion-python.git


The following commit(s) were added to refs/heads/master by this push:
     new 72f0600  Upgrade to DataFusion 14.0.0 (#67)
72f0600 is described below

commit 72f06006ca78ab4bcfcfc10ea3d3f2c2e9e515e9
Author: Andy Grove <[email protected]>
AuthorDate: Tue Nov 8 11:44:17 2022 -0700

    Upgrade to DataFusion 14.0.0 (#67)
    
    * upgrade to DataFusion 14.0.0
    
    * fmt
---
 Cargo.lock                       | 133 +++++++++++++++++++++++++++++----------
 Cargo.toml                       |   7 ++-
 datafusion/tests/test_catalog.py |   6 +-
 src/dataframe.rs                 |   4 +-
 src/dataset_exec.rs              |   5 +-
 src/expression.rs                |  15 ++---
 src/functions.rs                 |  10 +--
 src/pyarrow_filter_expression.rs |   8 +--
 8 files changed, 128 insertions(+), 60 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index fbce888..90bbf7c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -112,15 +112,16 @@ checksum = 
"8da52d66c7071e2e3fa2a1e5c6d088fec47b593032b254f5e980de8ea54454d6"
 
 [[package]]
 name = "arrow"
-version = "24.0.0"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "d68391300d5237f6725f0f869ae7cb65d45fcf8a6d18f6ceecd328fb803bef93"
+checksum = "e24e2bcd431a4aa0ff003fdd2dc21c78cfb42f31459c89d2312c2746fe17a5ac"
 dependencies = [
  "ahash 0.8.0",
  "arrow-array",
  "arrow-buffer",
  "arrow-data",
  "arrow-schema",
+ "arrow-select",
  "bitflags",
  "chrono",
  "comfy-table",
@@ -141,9 +142,9 @@ dependencies = [
 
 [[package]]
 name = "arrow-array"
-version = "24.0.0"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "f0bb00c5862b5eea683812083c495bef01a9a5149da46ad2f4c0e4aa8800f64d"
+checksum = "c9044300874385f19e77cbf90911e239bd23630d8f23bb0f948f9067998a13b7"
 dependencies = [
  "ahash 0.8.0",
  "arrow-buffer",
@@ -157,18 +158,19 @@ dependencies = [
 
 [[package]]
 name = "arrow-buffer"
-version = "24.0.0"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "3e594d0fe0026a8bc2459bdc5ac9623e5fb666724a715e0acbc96ba30c5d4cc7"
+checksum = "78476cbe9e3f808dcecab86afe42d573863c63e149c62e6e379ed2522743e626"
 dependencies = [
  "half",
+ "num",
 ]
 
 [[package]]
 name = "arrow-data"
-version = "24.0.0"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "8500df05060d86fdc53e9b5cb32e51bfeaacc040fdeced3eb99ac0d59200ff45"
+checksum = "4d916feee158c485dad4f701cba31bc9a90a8db87d9df8e2aa8adc0c20a2bbb9"
 dependencies = [
  "arrow-buffer",
  "arrow-schema",
@@ -178,9 +180,37 @@ dependencies = [
 
 [[package]]
 name = "arrow-schema"
-version = "24.0.0"
+version = "26.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "0f9406eb7834ca6bd8350d1baa515d18b9fcec487eddacfb62f5e19511f7bd37"
+
+[[package]]
+name = "arrow-select"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "86d1fef01f25e1452c86fa6887f078de8e0aaeeb828370feab205944cfc30e27"
+checksum = "6593a01586751c74498495d2f5a01fcd438102b52965c11dd98abf4ebcacef37"
+dependencies = [
+ "arrow-array",
+ "arrow-buffer",
+ "arrow-data",
+ "arrow-schema",
+ "num",
+]
+
+[[package]]
+name = "async-compression"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "942c7cd7ae39e91bde4820d74132e9862e62c2f386c3aa90ccf55949f5bad63a"
+dependencies = [
+ "bzip2",
+ "flate2",
+ "futures-core",
+ "futures-io",
+ "memchr",
+ "pin-project-lite",
+ "tokio",
+]
 
 [[package]]
 name = "async-trait"
@@ -294,6 +324,27 @@ version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "ec8a7b6a70fde80372154c65702f00a0f56f3e1c36abbc6c440484be248856db"
 
+[[package]]
+name = "bzip2"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "6afcd980b5f3a45017c57e57a2fcccbb351cc43a356ce117ef760ef8052b89b0"
+dependencies = [
+ "bzip2-sys",
+ "libc",
+]
+
+[[package]]
+name = "bzip2-sys"
+version = "0.1.11+1.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
 [[package]]
 name = "cc"
 version = "1.0.73"
@@ -479,15 +530,17 @@ dependencies = [
 
 [[package]]
 name = "datafusion"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "a2bdec06a3db088da76fc28cb0877b8b5438ca6b6025e04d975bace0fd85df19"
+checksum = "e7a8411475928479fe57af18698626f0a44f3c29153e051dce45f7455c08a6d5"
 dependencies = [
  "ahash 0.8.0",
  "apache-avro",
  "arrow",
+ "async-compression",
  "async-trait",
  "bytes",
+ "bzip2",
  "chrono",
  "datafusion-common",
  "datafusion-expr",
@@ -495,6 +548,7 @@ dependencies = [
  "datafusion-physical-expr",
  "datafusion-row",
  "datafusion-sql",
+ "flate2",
  "futures",
  "glob",
  "hashbrown",
@@ -508,6 +562,7 @@ dependencies = [
  "parking_lot",
  "parquet",
  "paste",
+ "percent-encoding",
  "pin-project-lite",
  "pyo3",
  "rand 0.8.5",
@@ -516,18 +571,20 @@ dependencies = [
  "tempfile",
  "tokio",
  "tokio-stream",
+ "tokio-util",
  "url",
  "uuid 1.2.1",
 ]
 
 [[package]]
 name = "datafusion-common"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "506eab038bf2d39ac02c22be30b019873ca01f887148b939d309a0e9523f4515"
+checksum = "15f1ffcbc1f040c9ab99f41db1c743d95aff267bb2e7286aaa010738b7402251"
 dependencies = [
  "apache-avro",
  "arrow",
+ "chrono",
  "object_store",
  "ordered-float 3.2.0",
  "parquet",
@@ -537,21 +594,22 @@ dependencies = [
 
 [[package]]
 name = "datafusion-expr"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "b3d2810e369c735d69479e27fe8410e97a76ed07484aa9b3ad7c039efa504257"
+checksum = "1883d9590d303ef38fa295567e7fdb9f8f5f511fcc167412d232844678cd295c"
 dependencies = [
  "ahash 0.8.0",
  "arrow",
  "datafusion-common",
+ "log",
  "sqlparser",
 ]
 
 [[package]]
 name = "datafusion-optimizer"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "60f3b80326243629d02e33f37e955a7114781c6c44caf9d8b254618157de7143"
+checksum = "2127d46d566ab3463d70da9675fc07b9d634be8d17e80d0e1ce79600709fe651"
 dependencies = [
  "arrow",
  "async-trait",
@@ -565,27 +623,33 @@ dependencies = [
 
 [[package]]
 name = "datafusion-physical-expr"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e9bf3b7ae861d351a85174fd4fddb29d249950b2f23671318971960452b4b9ab"
+checksum = "0d108b6fe8eeb317ecad1d74619e8758de49cccc8c771b56c97962fd52eaae23"
 dependencies = [
  "ahash 0.8.0",
  "arrow",
+ "arrow-buffer",
+ "arrow-schema",
  "blake2",
  "blake3",
  "chrono",
  "datafusion-common",
  "datafusion-expr",
  "datafusion-row",
+ "half",
  "hashbrown",
+ "itertools",
  "lazy_static",
  "md-5",
+ "num-traits",
  "ordered-float 3.2.0",
  "paste",
  "rand 0.8.5",
  "regex",
  "sha2",
  "unicode-segmentation",
+ "uuid 1.2.1",
 ]
 
 [[package]]
@@ -596,6 +660,7 @@ dependencies = [
  "datafusion",
  "datafusion-common",
  "datafusion-expr",
+ "datafusion-optimizer",
  "futures",
  "mimalloc",
  "object_store",
@@ -607,9 +672,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-row"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "3f44a2a722719c569b437b3aa2108a99dc911369e8d86c44e6293225c3387147"
+checksum = "43537b6377d506e4788bf21e9ed943340e076b48ca4d077e6ea4405ca5e54a1c"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -619,9 +684,9 @@ dependencies = [
 
 [[package]]
 name = "datafusion-sql"
-version = "13.0.0"
+version = "14.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "e98493e04385c924d1d3d7ab8739c41f1ebf676a46863181103a0fb9c7168fa9"
+checksum = "244d08d4710e1088d9c0949c9b5b8d68d9cf2cde7203134a4cc389e870fe2354"
 dependencies = [
  "arrow",
  "datafusion-common",
@@ -672,12 +737,11 @@ dependencies = [
 
 [[package]]
 name = "flatbuffers"
-version = "2.1.2"
+version = "22.9.29"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "86b428b715fdbdd1c364b84573b5fdc0f84f8e423661b9f398735278bc7f2b6a"
+checksum = "8ce016b9901aef3579617931fbb2df8fc9a9f7cb95a16eb8acc8148209bb9e70"
 dependencies = [
  "bitflags",
- "smallvec",
  "thiserror",
 ]
 
@@ -1470,9 +1534,9 @@ dependencies = [
 
 [[package]]
 name = "parquet"
-version = "24.0.0"
+version = "26.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "74fd590f0672998df84503d1bcbebc69732583d03cc3495c7dd8d3e5a1d8437f"
+checksum = "3bf8fa7ab6572791325a8595f55dc532dde88b996ae10a5ca8a2db746784ecc4"
 dependencies = [
  "ahash 0.8.0",
  "arrow",
@@ -1486,7 +1550,6 @@ dependencies = [
  "lz4",
  "num",
  "num-bigint",
- "rand 0.8.5",
  "seq-macro",
  "snap",
  "thrift",
@@ -1518,6 +1581,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
+[[package]]
+name = "pkg-config"
+version = "0.3.26"
+source = "registry+https://github.com/rust-lang/crates.io-index";
+checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.16"
@@ -1982,9 +2051,9 @@ checksum = 
"6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d"
 
 [[package]]
 name = "sqlparser"
-version = "0.25.0"
+version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index";
-checksum = "0781f2b6bd03e5adf065c8e772b49eaea9f640d06a1b9130330fe8bd2563f4fd"
+checksum = "86be66ea0b2b22749cfa157d16e2e84bf793e626a3375f4d378dc289fa03affb"
 dependencies = [
  "log",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index afdb9a9..b67cbc5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -34,9 +34,10 @@ default = ["mimalloc"]
 tokio = { version = "1.0", features = ["macros", "rt", "rt-multi-thread", 
"sync"] }
 rand = "0.7"
 pyo3 = { version = "~0.17.1", features = ["extension-module", "abi3", 
"abi3-py37"] }
-datafusion = { version = "^13.0.0", features = ["pyarrow", "avro"] }
-datafusion-expr = { version = "^13.0.0" }
-datafusion-common = { version = "^13.0.0", features = ["pyarrow"] }
+datafusion = { version = "^14.0.0", features = ["pyarrow", "avro"] }
+datafusion-expr = { version = "^14.0.0" }
+datafusion-optimizer = { version = "^14.0.0" }
+datafusion-common = { version = "^14.0.0", features = ["pyarrow"] }
 uuid = { version = "0.8", features = ["v4"] }
 mimalloc = { version = "*", optional = true, default-features = false }
 async-trait = "0.1"
diff --git a/datafusion/tests/test_catalog.py b/datafusion/tests/test_catalog.py
index a9bdf72..214f6b1 100644
--- a/datafusion/tests/test_catalog.py
+++ b/datafusion/tests/test_catalog.py
@@ -33,8 +33,8 @@ def test_basic(ctx, database):
     assert table.kind == "physical"
     assert table.schema == pa.schema(
         [
-            pa.field("int", pa.int64(), nullable=False),
-            pa.field("str", pa.string(), nullable=False),
-            pa.field("float", pa.float64(), nullable=False),
+            pa.field("int", pa.int64(), nullable=True),
+            pa.field("str", pa.string(), nullable=True),
+            pa.field("float", pa.float64(), nullable=True),
         ]
     )
diff --git a/src/dataframe.rs b/src/dataframe.rs
index 997ba98..ab8f2c4 100644
--- a/src/dataframe.rs
+++ b/src/dataframe.rs
@@ -170,8 +170,8 @@ impl PyDataFrame {
             "left" => JoinType::Left,
             "right" => JoinType::Right,
             "full" => JoinType::Full,
-            "semi" => JoinType::Semi,
-            "anti" => JoinType::Anti,
+            "semi" => JoinType::LeftSemi,
+            "anti" => JoinType::LeftAnti,
             how => {
                 return Err(DataFusionError::Common(format!(
                     "The join type {} does not exist or is not implemented",
diff --git a/src/dataset_exec.rs b/src/dataset_exec.rs
index 91b9942..9c41218 100644
--- a/src/dataset_exec.rs
+++ b/src/dataset_exec.rs
@@ -37,7 +37,8 @@ use 
datafusion::physical_plan::stream::RecordBatchStreamAdapter;
 use datafusion::physical_plan::{
     DisplayFormatType, ExecutionPlan, Partitioning, SendableRecordBatchStream, 
Statistics,
 };
-use datafusion_expr::{combine_filters, Expr};
+use datafusion_expr::Expr;
+use datafusion_optimizer::utils::conjunction;
 
 use crate::errors::DataFusionError;
 use crate::pyarrow_filter_expression::PyArrowFilterExpression;
@@ -93,7 +94,7 @@ impl DatasetExec {
                 .collect()
         });
         let columns: Option<Vec<String>> = columns.transpose()?;
-        let filter_expr: Option<PyObject> = combine_filters(filters)
+        let filter_expr: Option<PyObject> = conjunction(filters.to_owned())
             .map(|filters| {
                 PyArrowFilterExpression::try_from(&filters)
                     .map(|filter_expr| filter_expr.inner().clone_ref(py))
diff --git a/src/expression.rs b/src/expression.rs
index 4e9eb50..2e8fb80 100644
--- a/src/expression.rs
+++ b/src/expression.rs
@@ -20,7 +20,7 @@ use std::convert::{From, Into};
 
 use datafusion::arrow::datatypes::DataType;
 use datafusion::arrow::pyarrow::PyArrowType;
-use datafusion_expr::{col, lit, Expr};
+use datafusion_expr::{col, lit, Cast, Expr, GetIndexedField};
 
 use datafusion::scalar::ScalarValue;
 
@@ -94,10 +94,10 @@ impl PyExpr {
     }
 
     fn __getitem__(&self, key: &str) -> PyResult<PyExpr> {
-        Ok(Expr::GetIndexedField {
-            expr: Box::new(self.expr.clone()),
-            key: ScalarValue::Utf8(Some(key.to_string())),
-        }
+        Ok(Expr::GetIndexedField(GetIndexedField::new(
+            Box::new(self.expr.clone()),
+            ScalarValue::Utf8(Some(key.to_string())),
+        ))
         .into())
     }
 
@@ -129,10 +129,7 @@ impl PyExpr {
     pub fn cast(&self, to: PyArrowType<DataType>) -> PyExpr {
         // self.expr.cast_to() requires DFSchema to validate that the cast
         // is supported, omit that for now
-        let expr = Expr::Cast {
-            expr: Box::new(self.expr.clone()),
-            data_type: to.0,
-        };
+        let expr = Expr::Cast(Cast::new(Box::new(self.expr.clone()), to.0));
         expr.into()
     }
 }
diff --git a/src/functions.rs b/src/functions.rs
index ca2aca1..d7b1b9e 100644
--- a/src/functions.rs
+++ b/src/functions.rs
@@ -18,7 +18,7 @@
 use pyo3::{prelude::*, wrap_pyfunction};
 
 use datafusion::physical_plan::aggregates::AggregateFunction;
-use datafusion_expr::BuiltinScalarFunction;
+use datafusion_expr::{lit, BuiltinScalarFunction};
 
 use crate::errors;
 use crate::expression::PyExpr;
@@ -79,14 +79,14 @@ fn concat(args: Vec<PyExpr>) -> PyResult<PyExpr> {
 #[pyfunction(sep, args = "*")]
 fn concat_ws(sep: String, args: Vec<PyExpr>) -> PyResult<PyExpr> {
     let args = args.into_iter().map(|e| e.expr).collect::<Vec<_>>();
-    Ok(datafusion_expr::concat_ws(sep, &args).into())
+    Ok(datafusion_expr::concat_ws(lit(sep), args).into())
 }
 
 /// Creates a new Sort expression
 #[pyfunction]
 fn order_by(expr: PyExpr, asc: Option<bool>, nulls_first: Option<bool>) -> 
PyResult<PyExpr> {
     Ok(PyExpr {
-        expr: datafusion::logical_plan::Expr::Sort {
+        expr: datafusion_expr::Expr::Sort {
             expr: Box::new(expr.expr),
             asc: asc.unwrap_or(true),
             nulls_first: nulls_first.unwrap_or(true),
@@ -98,7 +98,7 @@ fn order_by(expr: PyExpr, asc: Option<bool>, nulls_first: 
Option<bool>) -> PyRes
 #[pyfunction]
 fn alias(expr: PyExpr, name: &str) -> PyResult<PyExpr> {
     Ok(PyExpr {
-        expr: datafusion::logical_plan::Expr::Alias(Box::new(expr.expr), 
String::from(name)),
+        expr: datafusion_expr::Expr::Alias(Box::new(expr.expr), 
String::from(name)),
     })
 }
 
@@ -114,7 +114,7 @@ fn window(
     let fun = datafusion_expr::window_function::WindowFunction::from_str(name)
         .map_err(|e| -> errors::DataFusionError { e.into() })?;
     Ok(PyExpr {
-        expr: datafusion::logical_plan::Expr::WindowFunction {
+        expr: datafusion_expr::Expr::WindowFunction {
             fun,
             args: args.into_iter().map(|x| x.expr).collect::<Vec<_>>(),
             partition_by: partition_by
diff --git a/src/pyarrow_filter_expression.rs b/src/pyarrow_filter_expression.rs
index fc0f9b8..4ac0079 100644
--- a/src/pyarrow_filter_expression.rs
+++ b/src/pyarrow_filter_expression.rs
@@ -22,7 +22,7 @@ use std::convert::TryFrom;
 use std::result::Result;
 
 use datafusion_common::{Column, ScalarValue};
-use datafusion_expr::{Expr, Operator};
+use datafusion_expr::{Between, BinaryExpr, Expr, Operator};
 
 use crate::errors::DataFusionError;
 
@@ -121,7 +121,7 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
                         v
                     ))),
                 },
-                Expr::BinaryExpr { left, op, right } => {
+                Expr::BinaryExpr(BinaryExpr { left, op, right }) => {
                     let operator = operator_to_py(op, op_module)?;
                     let left = 
PyArrowFilterExpression::try_from(left.as_ref())?.0;
                     let right = 
PyArrowFilterExpression::try_from(right.as_ref())?.0;
@@ -144,12 +144,12 @@ impl TryFrom<&Expr> for PyArrowFilterExpression {
                         .into_ref(py);
                     Ok(expr.call_method1("is_null", (expr,))?)
                 }
-                Expr::Between {
+                Expr::Between(Between {
                     expr,
                     negated,
                     low,
                     high,
-                } => {
+                }) => {
                     let expr = 
PyArrowFilterExpression::try_from(expr.as_ref())?.0;
                     let low = 
PyArrowFilterExpression::try_from(low.as_ref())?.0;
                     let high = 
PyArrowFilterExpression::try_from(high.as_ref())?.0;

Reply via email to