tustvold commented on code in PR #8377:
URL: https://github.com/apache/arrow-datafusion/pull/8377#discussion_r1421000040
##########
datafusion/physical-expr/src/analysis.rs:
##########
@@ -111,6 +115,22 @@ impl ExprBoundaries {
distinct_count: col_stats.distinct_count.clone(),
})
}
+
+ /// Create `ExprBoundaries` that represent no known bounds for all the
columns `schema`
+ pub fn try_new_unknown(schema: &Schema) -> Result<Vec<Self>> {
Review Comment:
Might unbounded be more obvious a name than unknown?
##########
datafusion-examples/examples/expr_api.rs:
##########
@@ -120,6 +171,64 @@ fn simplify_demo() -> Result<()> {
col("i").lt(lit(10))
);
+ // String --> Date simplification
+ // `cast('2020-09-01' as date)` --> 18500
+ assert_eq!(
+ simplifier.simplify(lit("2020-09-01").cast_to(&DataType::Date32,
&schema)?)?,
+ lit(ScalarValue::Date32(Some(18506)))
+ );
+
+ Ok(())
+}
+
+/// DataFusion also has APIs for analyzing predicates (boolean expressions) to
+/// determine any ranges restrictions on the inputs required for the predicate
+/// evaluate to true.
+fn range_analysis_demo() -> Result<()> {
+ // For example, let's say you are interested in finding data for all days
+ // in the month of September, 2020
+ let september_1 = ScalarValue::Date32(Some(18506)); // 2020-09-01
+ let october_1 = ScalarValue::Date32(Some(18536)); // 2020-10-01
+
+ // The predicate to find all such days could be
+ // `date > '2020-09-01' AND date < '2020-10-01'`
+ let expr = col("date")
+ .gt(lit(september_1.clone()))
+ .and(col("date").lt(lit(october_1.clone())));
+
+ // Using the analysis API, DataFusion can determine that the value of
`date`
+ // must be in the range `['2020-09-01', '2020-10-01']`. If your data is
+ // organized in files according to day, this information permits skipping
+ // entire files without reading them.
+ //
+ // While this simply example could be handled with a special case, the
Review Comment:
```suggestion
// While this simple example could be handled with a special case, the
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]