albertlockett commented on code in PR #9373:
URL: https://github.com/apache/arrow-rs/pull/9373#discussion_r2787949501


##########
arrow-select/src/concat.rs:
##########
@@ -1900,4 +1900,90 @@ mod tests {
         assert_eq!(values.values(), &[10, 20, 30]);
         assert_eq!(&[2, 3, 5], run_ends);
     }
+
+    #[test]
+    fn test_concat_u8_dictionary_256_values() {
+        // Integration test: concat should work with exactly 256 unique values
+        let values = StringArray::from((0..256).map(|i| format!("v{}", 
i)).collect::<Vec<_>>());
+        let keys = UInt8Array::from((0..256).map(|i| i as 
u8).collect::<Vec<_>>());
+        let dict = DictionaryArray::<UInt8Type>::try_new(keys, 
Arc::new(values)).unwrap();
+
+        // Concatenate with itself - should succeed
+        let result = concat(&[&dict as &dyn Array, &dict as &dyn Array]);
+        assert!(
+            result.is_ok(),
+            "Concat should succeed with 256 unique values for u8"
+        );
+
+        let concatenated = result.unwrap();
+        assert_eq!(
+            concatenated.len(),
+            512,
+            "Should have 512 total elements (256 * 2)"
+        );
+    }

Review Comment:
   If you were to rewrite this test using the similar setup as the test below 
(`test_concat_u8_dictionary_257_values_fails`), it would actually pass without 
the fix introduced by this PR.
   
   ```rs
           let values = StringArray::from((0..128).map(|i| format!("v{}", 
i)).collect::<Vec<_>>());
           let keys = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
           let dict1 = DictionaryArray::<UInt8Type>::try_new(keys, 
Arc::new(values)).unwrap();
   
           let values = StringArray::from((128..256).map(|i| format!("v{}", 
i)).collect::<Vec<_>>());
           let keys = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
           let dict2 = DictionaryArray::<UInt8Type>::try_new(keys, 
Arc::new(values)).unwrap();
   
           // Concatenate with itself - should succeed
           let result = concat(&[&dict1 as &dyn Array, &dict2 as &dyn Array]); 
// <-- will succeed
   ```
   
   The original issue https://github.com/apache/arrow-rs/issues/9366 mentions 
that concatenating dicts with some types succeed when concatenating to exactly 
256 total values (in the case where the dict key type is u8).
   
   I wonder if we should actually use one of the types for which the bug occurs 
like `FixedSizeBinary`.
   
   ```suggestion
   
       #[test]
       fn test_concat_u8_dictionary_256_values() {
           let values = FixedSizeBinaryArray::try_from_iter((0..128).map(|i| 
vec![i as u8])).unwrap();
           let keys = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
           let dict1 = DictionaryArray::<UInt8Type>::try_new(keys, 
Arc::new(values)).unwrap();
   
           let values = FixedSizeBinaryArray::try_from_iter((128..256).map(|i| 
vec![i as u8])).unwrap();
           let keys = UInt8Array::from((0..128).map(|i| i as 
u8).collect::<Vec<_>>());
           let dict2 = DictionaryArray::<UInt8Type>::try_new(keys, 
Arc::new(values)).unwrap();
   
            let result = concat(&[&dict1 as &dyn Array, &dict2 as &dyn Array]);
            assert!(
                result.is_ok(),
                "Concat should succeed with 256 unique values for u8"
            );
   
            let concatenated = result.unwrap();
            assert_eq!(
                concatenated.len(),
                256,
                "Should have total elements 256"
            );
       }
   ```
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to