This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 3e02689e3 Add time dictionary coercions (#6208)
3e02689e3 is described below
commit 3e02689e3464bc8cf929a0d116888fb6f59999fa
Author: Adrian Garcia Badaracco <[email protected]>
AuthorDate: Thu Aug 8 14:59:48 2024 -0500
Add time dictionary coercions (#6208)
* Add time dictionary coercions
* format
* Pass through primitive values
---
arrow-cast/src/cast/dictionary.rs | 59 +++++++++++++++++++++++++++++++++++++++
arrow-cast/src/cast/mod.rs | 30 ++++++++++++++++++++
2 files changed, 89 insertions(+)
diff --git a/arrow-cast/src/cast/dictionary.rs
b/arrow-cast/src/cast/dictionary.rs
index ee2021d15..daaddc491 100644
--- a/arrow-cast/src/cast/dictionary.rs
+++ b/arrow-cast/src/cast/dictionary.rs
@@ -162,6 +162,26 @@ where
take(cast_dict_values.as_ref(), dict_array.keys(), None)
}
+/// Pack a data type into a dictionary array passing the values through a
primitive array
+pub(crate) fn pack_array_to_dictionary_via_primitive<K:
ArrowDictionaryKeyType>(
+ array: &dyn Array,
+ primitive_type: DataType,
+ dict_value_type: &DataType,
+ cast_options: &CastOptions,
+) -> Result<ArrayRef, ArrowError> {
+ let primitive = cast_with_options(array, &primitive_type, cast_options)?;
+ let dict = cast_with_options(
+ primitive.as_ref(),
+ &DataType::Dictionary(Box::new(K::DATA_TYPE),
Box::new(primitive_type)),
+ cast_options,
+ )?;
+ cast_with_options(
+ dict.as_ref(),
+ &DataType::Dictionary(Box::new(K::DATA_TYPE),
Box::new(dict_value_type.clone())),
+ cast_options,
+ )
+}
+
/// Attempts to encode an array into an `ArrayDictionary` with index
/// type K and value (dictionary) type value_type
///
@@ -188,6 +208,45 @@ pub(crate) fn cast_to_dictionary<K:
ArrowDictionaryKeyType>(
Decimal256(_, _) => {
pack_numeric_to_dictionary::<K, Decimal256Type>(array,
dict_value_type, cast_options)
}
+ Float16 => {
+ pack_numeric_to_dictionary::<K, Float16Type>(array,
dict_value_type, cast_options)
+ }
+ Float32 => {
+ pack_numeric_to_dictionary::<K, Float32Type>(array,
dict_value_type, cast_options)
+ }
+ Float64 => {
+ pack_numeric_to_dictionary::<K, Float64Type>(array,
dict_value_type, cast_options)
+ }
+ Date32 => pack_array_to_dictionary_via_primitive::<K>(
+ array,
+ DataType::Int32,
+ dict_value_type,
+ cast_options,
+ ),
+ Date64 => pack_array_to_dictionary_via_primitive::<K>(
+ array,
+ DataType::Int64,
+ dict_value_type,
+ cast_options,
+ ),
+ Time32(_) => pack_array_to_dictionary_via_primitive::<K>(
+ array,
+ DataType::Int32,
+ dict_value_type,
+ cast_options,
+ ),
+ Time64(_) => pack_array_to_dictionary_via_primitive::<K>(
+ array,
+ DataType::Int64,
+ dict_value_type,
+ cast_options,
+ ),
+ Timestamp(_, _) => pack_array_to_dictionary_via_primitive::<K>(
+ array,
+ DataType::Int64,
+ dict_value_type,
+ cast_options,
+ ),
Utf8 => {
// If the input is a view type, we can avoid casting (thus
copying) the data
if array.data_type() == &DataType::Utf8View {
diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 9f552ec72..93f8a06ea 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -6768,6 +6768,36 @@ mod tests {
assert_eq!(array_to_strings(&cast_array), expected);
}
+ #[test]
+ fn test_cast_time_array_to_dict() {
+ use DataType::*;
+
+ let array = Arc::new(Date32Array::from(vec![Some(1000), None,
Some(2000)])) as ArrayRef;
+
+ let expected = vec!["1972-09-27", "null", "1975-06-24"];
+
+ let cast_type = Dictionary(Box::new(UInt8), Box::new(Date32));
+ let cast_array = cast(&array, &cast_type).expect("cast failed");
+ assert_eq!(cast_array.data_type(), &cast_type);
+ assert_eq!(array_to_strings(&cast_array), expected);
+ }
+
+ #[test]
+ fn test_cast_timestamp_array_to_dict() {
+ use DataType::*;
+
+ let array = Arc::new(
+ TimestampSecondArray::from(vec![Some(1000), None,
Some(2000)]).with_timezone_utc(),
+ ) as ArrayRef;
+
+ let expected = vec!["1970-01-01T00:16:40", "null",
"1970-01-01T00:33:20"];
+
+ let cast_type = Dictionary(Box::new(UInt8),
Box::new(Timestamp(TimeUnit::Second, None)));
+ let cast_array = cast(&array, &cast_type).expect("cast failed");
+ assert_eq!(cast_array.data_type(), &cast_type);
+ assert_eq!(array_to_strings(&cast_array), expected);
+ }
+
#[test]
fn test_cast_string_array_to_dict() {
use DataType::*;