This is an automated email from the ASF dual-hosted git repository.
dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 4e1b6de554 fix partition-by panic (#12297)
4e1b6de554 is described below
commit 4e1b6de5549cc7ed399766dd22cafd450701db16
Author: iamthinh <[email protected]>
AuthorDate: Tue Sep 3 00:35:56 2024 -0700
fix partition-by panic (#12297)
---
datafusion/physical-plan/src/coalesce/mod.rs | 15 +++++++++++++--
datafusion/physical-plan/src/repartition/mod.rs | 12 +++++++++---
datafusion/sqllogictest/test_files/repartition.slt | 20 ++++++++++++++++++++
3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/datafusion/physical-plan/src/coalesce/mod.rs
b/datafusion/physical-plan/src/coalesce/mod.rs
index 5befa5ecda..ce5a1e53ab 100644
--- a/datafusion/physical-plan/src/coalesce/mod.rs
+++ b/datafusion/physical-plan/src/coalesce/mod.rs
@@ -18,7 +18,7 @@
use arrow::compute::concat_batches;
use arrow_array::builder::StringViewBuilder;
use arrow_array::cast::AsArray;
-use arrow_array::{Array, ArrayRef, RecordBatch};
+use arrow_array::{Array, ArrayRef, RecordBatch, RecordBatchOptions};
use arrow_schema::SchemaRef;
use std::sync::Arc;
@@ -265,7 +265,9 @@ fn gc_string_view_batch(batch: &RecordBatch) -> RecordBatch
{
}
})
.collect();
- RecordBatch::try_new(batch.schema(), new_columns)
+ let mut options = RecordBatchOptions::new();
+ options = options.with_row_count(Some(batch.num_rows()));
+ RecordBatch::try_new_with_options(batch.schema(), new_columns, &options)
.expect("Failed to re-create the gc'ed record batch")
}
@@ -501,6 +503,15 @@ mod tests {
assert_eq!(array.data_buffers().len(), gc_array.data_buffers().len());
// no compaction
}
+ #[test]
+ fn test_gc_string_view_test_batch_empty() {
+ let schema = Schema::empty();
+ let batch = RecordBatch::new_empty(schema.into());
+ let output_batch = gc_string_view_batch(&batch);
+ assert_eq!(batch.num_columns(), output_batch.num_columns());
+ assert_eq!(batch.num_rows(), output_batch.num_rows());
+ }
+
#[test]
fn test_gc_string_view_batch_large_no_compact() {
// view with large strings (has buffers) but full --> no need to
compact
diff --git a/datafusion/physical-plan/src/repartition/mod.rs
b/datafusion/physical-plan/src/repartition/mod.rs
index 47e5192c23..093803e3c8 100644
--- a/datafusion/physical-plan/src/repartition/mod.rs
+++ b/datafusion/physical-plan/src/repartition/mod.rs
@@ -41,7 +41,7 @@ use crate::{DisplayFormatType, ExecutionPlan, Partitioning,
PlanProperties, Stat
use arrow::array::ArrayRef;
use arrow::datatypes::{SchemaRef, UInt64Type};
use arrow::record_batch::RecordBatch;
-use arrow_array::PrimitiveArray;
+use arrow_array::{PrimitiveArray, RecordBatchOptions};
use datafusion_common::utils::transpose;
use datafusion_common::{arrow_datafusion_err, not_impl_err, DataFusionError,
Result};
use datafusion_common_runtime::SpawnedTask;
@@ -309,8 +309,14 @@ impl BatchPartitioner {
})
.collect::<Result<Vec<ArrayRef>>>()?;
- let batch =
- RecordBatch::try_new(batch.schema(),
columns).unwrap();
+ let mut options = RecordBatchOptions::new();
+ options =
options.with_row_count(Some(indices.len()));
+ let batch = RecordBatch::try_new_with_options(
+ batch.schema(),
+ columns,
+ &options,
+ )
+ .unwrap();
Ok((partition, batch))
});
diff --git a/datafusion/sqllogictest/test_files/repartition.slt
b/datafusion/sqllogictest/test_files/repartition.slt
index e3c204a4f4..2d59ad2b5b 100644
--- a/datafusion/sqllogictest/test_files/repartition.slt
+++ b/datafusion/sqllogictest/test_files/repartition.slt
@@ -127,3 +127,23 @@ physical_plan
04)------FilterExec: c3@2 > 0
05)--------RepartitionExec: partitioning=RoundRobinBatch(3), input_partitions=1
06)----------StreamingTableExec: partition_sizes=1, projection=[c1, c2, c3],
infinite_source=true
+
+# Start repratition on empty column test.
+# See https://github.com/apache/datafusion/issues/12057
+
+statement ok
+CREATE TABLE t1(v1 int);
+
+statement ok
+INSERT INTO t1 values(42);
+
+query I
+SELECT sum(1) OVER (PARTITION BY false=false)
+FROM t1 WHERE ((false > (v1 = v1)) IS DISTINCT FROM true);
+----
+1
+
+statement ok
+DROP TABLE t1;
+
+# End repartition on empty columns test
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]