tustvold commented on code in PR #5408:
URL: https://github.com/apache/arrow-rs/pull/5408#discussion_r1495213832
##########
arrow-ord/src/cmp.rs:
##########
@@ -166,6 +168,122 @@ pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) ->
Result<BooleanArray, Ar
compare_op(Op::NotDistinct, lhs, rhs)
}
+fn process_nested(
+ l: &dyn Array,
+ r: &dyn Array,
+ op: Op,
+ l_t: &DataType,
+ r_t: &DataType,
+ len: usize,
+) -> Result<Option<BooleanArray>, ArrowError> {
+ use arrow_schema::DataType::*;
+ if let (List(_), List(_)) = (l_t, r_t) {
+ // Process nested data types
+ match op {
Review Comment:
I think it would be better to construct the DynComparator once for
`l.values()` and `r.values()` and then use the offsets to drive the comparison?
##########
arrow-ord/src/cmp.rs:
##########
@@ -166,6 +168,122 @@ pub fn not_distinct(lhs: &dyn Datum, rhs: &dyn Datum) ->
Result<BooleanArray, Ar
compare_op(Op::NotDistinct, lhs, rhs)
}
+fn process_nested(
+ l: &dyn Array,
+ r: &dyn Array,
+ op: Op,
+ l_t: &DataType,
+ r_t: &DataType,
+ len: usize,
+) -> Result<Option<BooleanArray>, ArrowError> {
+ use arrow_schema::DataType::*;
+ if let (List(_), List(_)) = (l_t, r_t) {
+ // Process nested data types
+ match op {
+ Op::Less => {
+ let l = l.as_list::<i32>();
+ let r = r.as_list::<i32>();
+ let mut values = BooleanArray::builder(len);
+ for i in 0..l.len() {
+ let l = l.value(i);
+ let r = r.value(i);
+ let l_t = l.data_type();
+ let r_t = r.data_type();
+ let l_len = l.len();
+ let r_len = r.len();
+ let min_len = std::cmp::min(l_len, r_len);
+
+ if !l_t.is_nested() && !r_t.is_nested() {
+ let cmp = build_compare(&l, &r)?;
+
+ fn post_process(len: usize, cmp: DynComparator,
r_is_longer: bool) -> bool {
+ for j in 0..len {
+ let ord = cmp(j, j);
+ if ord == std::cmp::Ordering::Less {
+ return true;
+ }
+ if ord == std::cmp::Ordering::Greater {
+ return false;
+ }
+ }
+ r_is_longer
+ }
+ values.append_value(post_process(min_len, cmp, r_len >
l_len));
+ } else {
+ // Since `compare_op` does not support inconsistent
lengths, we compare the
+ // prefix with `compare_op` only, and compare the left
if the prefix is equal
+ let l = l.slice(0, min_len);
+ let r = r.slice(0, min_len);
+
+ let lt_res = lt(&l, &r)?;
+ let eq_res = eq(&l, &r)?;
+
+ fn post_process(
+ lt: &BooleanArray,
+ eq: &BooleanArray,
+ r_is_longer: bool,
+ ) -> bool {
+ for j in 0..lt.len() {
+ if lt.value(j) {
+ return true;
+ }
+ if !eq.value(j) {
+ return false;
+ }
+ }
+ r_is_longer
+ }
+
+ values.append_value(post_process(<_res, &eq_res,
r_len > l_len));
+ }
+ }
+
+ let values = values.finish();
+ Ok(Some(values))
+ }
+ Op::Equal => {
Review Comment:
This duplication could be eliminated by extracting out a function that maps
the Ordering result of DynComparator to the boolean result
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]