alamb commented on code in PR #9388:
URL: https://github.com/apache/arrow-datafusion/pull/9388#discussion_r1506650248
##########
datafusion-examples/examples/expr_api.rs:
##########
@@ -113,10 +115,7 @@ fn evaluate_demo() -> Result<()> {
fn simplify_demo() -> Result<()> {
// For example, lets say you have has created an expression such
// ts = to_timestamp("2020-09-08T12:00:00+00:00")
- let expr = col("ts").eq(call_fn(
- "to_timestamp",
- vec![lit("2020-09-08T12:00:00+00:00")],
- )?);
+ let expr =
col("ts").eq(to_timestamp(vec![lit("2020-09-08T12:00:00+00:00")]));
Review Comment:
👍
##########
datafusion/functions/benches/to_timestamp.rs:
##########
@@ -0,0 +1,116 @@
+// Licensed to the Apache Software Foundation (ASF) under one
Review Comment:
❤️
##########
datafusion/core/tests/sql/explain_analyze.rs:
##########
@@ -730,7 +730,7 @@ async fn parquet_explain_analyze() {
let ctx = SessionContext::new();
register_alltypes_parquet(&ctx).await;
- let sql = "EXPLAIN ANALYZE select id, float_col, timestamp_col from
alltypes_plain where timestamp_col > to_timestamp('2009-02-01T00:00:00'); ";
+ let sql = "EXPLAIN ANALYZE select id, float_col, timestamp_col from
alltypes_plain where timestamp_col > arrow_cast('2009-02-01T00:00:00',
'Timestamp(Second, None)');";
Review Comment:
likewise here I think these can/should continue to work without any changes
as they are in the core crate (which already depends on datafusion-functions)
##########
datafusion/core/tests/simplification.rs:
##########
@@ -108,3 +152,73 @@ fn fold_and_simplify() {
let simplified = simplifier.simplify(expr).unwrap();
assert_eq!(simplified, lit(true))
}
+
+#[test]
+fn to_timestamp_expr_folded() -> Result<()> {
Review Comment:
```suggestion
/// Ensure that timestamp expressions are folded so they aren't invoked on
each row
fn to_timestamp_expr_folded() -> Result<()> {
```
##########
datafusion/core/tests/parquet/row_group_pruning.rs:
##########
@@ -112,7 +112,7 @@ impl RowGroupPruningTest {
async fn prune_timestamps_nanos() {
RowGroupPruningTest::new()
.with_scenario(Scenario::Timestamps)
- .with_query("SELECT * FROM t where nanos < to_timestamp('2020-01-02
01:01:11Z')")
+ .with_query("SELECT * FROM t where nanos <
arrow_cast('2020-01-02T01:01:11', 'Timestamp(Nanosecond, None)')")
Review Comment:
likewise here I think these tests could continue to use to_timestamp without
problem
##########
datafusion/core/tests/parquet/page_pruning.rs:
##########
@@ -267,7 +267,7 @@ async fn test_prune(
async fn prune_timestamps_nanos() {
test_prune(
Scenario::Timestamps,
- "SELECT * FROM t where nanos < to_timestamp('2020-01-02 01:01:11Z')",
+ "SELECT * FROM t where nanos < arrow_cast('2020-01-02T01:01:11',
'Timestamp(Nanosecond, None)')",
Review Comment:
Why does this need to be changed? Since these tests are in datafusion/core I
think they can continue to use to_timestamp without any problems
##########
datafusion/sqllogictest/test_files/functions.slt:
##########
@@ -483,7 +483,7 @@ statement error Did you mean 'arrow_typeof'?
SELECT arrowtypeof(v1) from test;
# Scalar function
-statement error Did you mean 'to_timestamp_seconds'?
Review Comment:
🤔 this looks like there may be special case handling for function hinting
that doesn't work for user defined functions.
I'll file a ticket to add that feature to user defined functions too
##########
datafusion/optimizer/src/simplify_expressions/simplify_exprs.rs:
##########
@@ -505,59 +488,6 @@ mod tests {
Ok(())
}
- #[test]
- fn now_less_than_timestamp() -> Result<()> {
Review Comment:
Moved to datafusion/core/tests/simplification.rs
##########
datafusion/functions/src/datetime/to_timestamp.rs:
##########
@@ -0,0 +1,1184 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use std::any::Any;
+use std::sync::Arc;
+
+use arrow::array::{
+ Array, ArrowPrimitiveType, GenericStringArray, OffsetSizeTrait,
PrimitiveArray,
+};
+use arrow::compute::kernels::cast_utils::string_to_timestamp_nanos;
+use arrow::datatypes::DataType::Timestamp;
+use arrow::datatypes::TimeUnit::{Microsecond, Millisecond, Nanosecond, Second};
+use arrow::datatypes::{
+ ArrowTimestampType, DataType, TimestampMicrosecondType,
TimestampMillisecondType,
+ TimestampNanosecondType, TimestampSecondType,
+};
+use chrono::prelude::*;
+use chrono::LocalResult::Single;
+use itertools::Either;
+
+use datafusion_common::cast::as_generic_string_array;
+use datafusion_common::{exec_err, DataFusionError, Result, ScalarType,
ScalarValue};
+use datafusion_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility};
+use datafusion_physical_expr::expressions::cast_column;
+
+/// Error message if nanosecond conversion request beyond supported interval
+const ERR_NANOSECONDS_NOT_SUPPORTED: &str = "The dates that can be represented
as nanoseconds have to be between 1677-09-21T00:12:44.0 and
2262-04-11T23:47:16.854775804";
+
+#[derive(Debug)]
+pub(super) struct ToTimestampFunc {
Review Comment:
These are all so similar, I wonder if we could make a common class to avoid
the duplication something like
```rust
struct ToTimestampImpl {
/// target datatype
data_type: DataType,
}
```
🤔
##########
datafusion/physical-expr/src/datetime_expressions.rs:
##########
@@ -42,418 +41,20 @@ use arrow_array::cast::AsArray;
use arrow_array::temporal_conversions::NANOSECONDS;
use arrow_array::timezone::Tz;
use arrow_array::types::{ArrowTimestampType, Date32Type, Int32Type};
-use arrow_array::{GenericStringArray, StringArray};
+use arrow_array::StringArray;
use chrono::prelude::*;
-use chrono::LocalResult::Single;
use chrono::{Duration, LocalResult, Months, NaiveDate};
-use itertools::Either;
use datafusion_common::cast::{
- as_date32_array, as_date64_array, as_generic_string_array,
as_primitive_array,
- as_timestamp_microsecond_array, as_timestamp_millisecond_array,
- as_timestamp_nanosecond_array, as_timestamp_second_array,
-};
-use datafusion_common::{
- exec_err, not_impl_err, DataFusionError, Result, ScalarType, ScalarValue,
+ as_date32_array, as_date64_array, as_primitive_array,
as_timestamp_microsecond_array,
+ as_timestamp_millisecond_array, as_timestamp_nanosecond_array,
+ as_timestamp_second_array,
};
+use datafusion_common::{exec_err, not_impl_err, DataFusionError, Result,
ScalarValue};
use datafusion_expr::ColumnarValue;
use crate::expressions::cast_column;
-/// Error message if nanosecond conversion request beyond supported interval
Review Comment:
this is great to extract all this code out of datafusion/physical-expr
##########
datafusion/optimizer/src/simplify_expressions/expr_simplifier.rs:
##########
@@ -1530,14 +1531,26 @@ mod tests {
// Check non string arguments
// to_timestamp("2020-09-08T12:00:00+00:00") -->
timestamp(1599566400i64)
- let expr =
- call_fn("to_timestamp",
vec![lit("2020-09-08T12:00:00+00:00")]).unwrap();
- test_evaluate(expr, lit_timestamp_nano(1599566400000000000i64));
+ //
+ // todo - determine how to migrate this
Review Comment:
I think the const evaluator is called via the simplfier
Thus I think we could move these tests to the
`datafusion/core/tests/simplification.rs` (and call the entire simplifier on
them, rather than just the constant propagator) as well as they are really
tests that ensure simplification works with these particular functions.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]