Re: [PR] feat: support `MapArray` in lexsort [arrow-rs]

2025-07-14 Thread via GitHub


mbrobbel merged PR #7882:
URL: https://github.com/apache/arrow-rs/pull/7882


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



Re: [PR] feat: support `MapArray` in lexsort [arrow-rs]

2025-07-10 Thread via GitHub


alamb commented on code in PR #7882:
URL: https://github.com/apache/arrow-rs/pull/7882#discussion_r2198286609


##
arrow-ord/src/ord.rs:
##
@@ -915,4 +947,220 @@ mod tests {
 assert_eq!(cmp(2, 0), Ordering::Equal); // (None, None) cmp (None, 
None)
 assert_eq!(cmp(3, 0), Ordering::Greater); // None cmp (None, None)
 }
+
+#[test]
+fn test_map() {
+// Create first map array demonstrating key priority over values:
+// [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
+let string_builder = StringBuilder::new();
+let int_builder = Int32Builder::new();
+let mut map1_builder = MapBuilder::new(None, string_builder, 
int_builder);
+
+// {"a": 100, "b": 1} - high value for "a", low value for "b"
+map1_builder.keys().append_value("a");
+map1_builder.values().append_value(100);
+map1_builder.keys().append_value("b");
+map1_builder.values().append_value(1);
+map1_builder.append(true).unwrap();
+
+// {"b": 999, "c": 1} - very high value for "b", low value for "c"
+map1_builder.keys().append_value("b");
+map1_builder.values().append_value(999);
+map1_builder.keys().append_value("c");
+map1_builder.values().append_value(1);
+map1_builder.append(true).unwrap();
+
+// {}
+map1_builder.append(true).unwrap();
+
+// {"x": 1}
+map1_builder.keys().append_value("x");
+map1_builder.values().append_value(1);
+map1_builder.append(true).unwrap();
+
+let map1 = map1_builder.finish();
+
+// Create second map array:
+// [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
+let string_builder = StringBuilder::new();
+let int_builder = Int32Builder::new();
+let mut map2_builder = MapBuilder::new(None, string_builder, 
int_builder);
+
+// {"a": 1, "c": 999} - low value for "a", high value for "c"
+map2_builder.keys().append_value("a");
+map2_builder.values().append_value(1);
+map2_builder.keys().append_value("c");
+map2_builder.values().append_value(999);
+map2_builder.append(true).unwrap();
+
+// {"b": 1, "d": 999} - low value for "b", high value for "d"
+map2_builder.keys().append_value("b");
+map2_builder.values().append_value(1);
+map2_builder.keys().append_value("d");
+map2_builder.values().append_value(999);
+map2_builder.append(true).unwrap();
+
+// {"a": 1}
+map2_builder.keys().append_value("a");
+map2_builder.values().append_value(1);
+map2_builder.append(true).unwrap();
+
+// None
+map2_builder.append(false).unwrap();
+
+let map2 = map2_builder.finish();
+
+let opts = SortOptions {
+descending: false,
+nulls_first: true,
+};
+let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+// Test that keys have priority over values:
+// {"a": 100, "b": 1} vs {"a": 1, "c": 999}
+// First entries match (a:100 vs a:1), but 100 > 1, so Greater
+assert_eq!(cmp(0, 0), Ordering::Greater);
+
+// {"b": 999, "c": 1} vs {"b": 1, "d": 999}
+// First entries match (b:999 vs b:1), but 999 > 1, so Greater
+assert_eq!(cmp(1, 1), Ordering::Greater);
+
+// Key comparison: "a" < "b", so {"a": 100, "b": 1} < {"b": 999, "c": 
1}
+assert_eq!(cmp(0, 1), Ordering::Less);
+
+// Empty map vs non-empty
+assert_eq!(cmp(2, 2), Ordering::Less); // {} < {"a": 1}
+
+// Non-null vs null
+assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
+
+// Key priority test: "x" > "a", regardless of values
+assert_eq!(cmp(3, 0), Ordering::Greater); // {"x": 1} > {"a": 1, "c": 
999}
+
+// Empty vs non-empty
+assert_eq!(cmp(2, 0), Ordering::Less); // {} < {"a": 1, "c": 999}
+
+let opts = SortOptions {
+descending: true,
+nulls_first: true,
+};
+let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+// With descending=true, value comparison is reversed
+assert_eq!(cmp(0, 0), Ordering::Less); // {"a": 100, "b": 1} vs {"a": 
1, "c": 999} (reversed)
+assert_eq!(cmp(1, 1), Ordering::Less); // {"b": 999, "c": 1} vs {"b": 
1, "d": 999} (reversed)
+assert_eq!(cmp(0, 1), Ordering::Greater); // {"a": 100, "b": 1} vs 
{"b": 999, "c": 1} (key order reversed)
+assert_eq!(cmp(3, 3), Ordering::Greater); // {"x": 1} > None
+assert_eq!(cmp(2, 2), Ordering::Greater); // {} > {"a": 1} (reversed)
+
+let opts = SortOptions {
+descending: false,
+nulls_first: false,
+};
+let cmp = make_comparator(&map1, &map2, opts).unwrap();
+
+// Same key priority behavior with nulls_first=false
+assert_eq!(cmp(0, 0), Ordering::Greater)

[PR] feat: support `MapArray` in lexsort [arrow-rs]

2025-07-07 Thread via GitHub


rluvaton opened a new pull request, #7882:
URL: https://github.com/apache/arrow-rs/pull/7882

   # Which issue does this PR close?
   
   - Closes #7881
   
   # Rationale for this change
   
   to be able to sort MapArray
   
   # What changes are included in this PR?
   
   copy-paste the code from sorting lists but looking at entries instead of 
`values`
   
   # Are these changes tested?
   
   manually, but I need to add tests
   
   # Are there any user-facing changes?
   yes, Map is now supported


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]