This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new cbee739dd Format Timestamps as RFC3339 (#2939)
cbee739dd is described below

commit cbee739ddaebc3596b91754fbab9e26904d9622c
Author: Wei-Ting Kuo <[email protected]>
AuthorDate: Sat Oct 29 02:41:57 2022 +0800

    Format Timestamps as RFC3339 (#2939)
    
    * standarize-tz-display
    
    * only test named timezone while chrono-tz enabled
    
    * fix docs
    
    * fix doc
---
 arrow-array/src/array/primitive_array.rs |  33 ++++----
 arrow/src/util/display.rs                |  80 ++++++++++++++++---
 arrow/src/util/pretty.rs                 | 129 ++++++++++++++++++++++++++++++-
 3 files changed, 212 insertions(+), 30 deletions(-)

diff --git a/arrow-array/src/array/primitive_array.rs 
b/arrow-array/src/array/primitive_array.rs
index 016e5306c..eb3618f7c 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -18,7 +18,10 @@
 use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
 use crate::iterator::PrimitiveIter;
 use crate::raw_pointer::RawPtrBox;
-use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time};
+use crate::temporal_conversions::{
+    as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time,
+};
+use crate::timezone::Tz;
 use crate::trusted_len::trusted_len_unzip;
 use crate::types::*;
 use crate::{print_long_array, Array, ArrayAccessor};
@@ -26,7 +29,7 @@ use arrow_buffer::{i256, ArrowNativeType, Buffer};
 use arrow_data::bit_iterator::try_for_each_valid_idx;
 use arrow_data::ArrayData;
 use arrow_schema::{ArrowError, DataType};
-use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
+use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime};
 use half::f16;
 use std::any::Any;
 
@@ -116,40 +119,40 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
 /// # Example: UTC timestamps post epoch
 /// ```
 /// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
 /// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00
 /// let arr = TimestampSecondArray::from(vec![11111111]);
 /// // OR
 /// let arr = TimestampSecondArray::from(vec![Some(11111111)]);
-/// let utc_offset = FixedOffset::east(0);
+/// let utc_tz: Tz = "+00:00".parse().unwrap();
 ///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| 
v.to_string()).unwrap(), "1970-05-09 14:25:11")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| 
v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00")
 /// ```
 ///
 /// # Example: UTC timestamps pre epoch
 /// ```
 /// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
 /// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00
 /// let arr = TimestampSecondArray::from(vec![-11111111]);
 /// // OR
 /// let arr = TimestampSecondArray::from(vec![Some(-11111111)]);
-/// let utc_offset = FixedOffset::east(0);
+/// let utc_tz: Tz = "+00:00".parse().unwrap();
 ///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v| 
v.to_string()).unwrap(), "1969-08-25 09:34:49")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v| 
v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00")
 /// ```
 ///
 /// # Example: With timezone specified
 /// ```
 /// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
 /// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00
 /// let arr = 
TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string());
 /// // OR
 /// let arr = 
TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string());
-/// let sydney_offset = FixedOffset::east(10 * 60 * 60);
+/// let sydney_tz: Tz = "+10:00".parse().unwrap();
 ///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v| 
v.to_string()).unwrap(), "1970-05-10 00:25:11")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v| 
v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00")
 /// ```
 ///
 pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
@@ -503,12 +506,8 @@ where
     ///
     /// functionally it is same as `value_as_datetime`, however it adds
     /// the passed tz to the to-be-returned NaiveDateTime
-    pub fn value_as_datetime_with_tz(
-        &self,
-        i: usize,
-        tz: FixedOffset,
-    ) -> Option<NaiveDateTime> {
-        as_datetime::<T>(i64::from(self.value(i))).map(|datetime| datetime + 
tz)
+    pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) -> 
Option<DateTime<Tz>> {
+        as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz)
     }
 
     /// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 7c0b5a28f..f5bef1605 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -33,6 +33,7 @@ use crate::{array, datatypes::IntervalUnit};
 use array::DictionaryArray;
 
 use crate::error::{ArrowError, Result};
+use arrow_array::timezone::Tz;
 
 macro_rules! make_string {
     ($array_type:ty, $column: ident, $row: ident) => {{
@@ -190,7 +191,7 @@ macro_rules! make_string_datetime {
         } else {
             array
                 .value_as_datetime($row)
-                .map(|d| d.to_string())
+                .map(|d| format!("{:?}", d))
                 .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
         };
 
@@ -198,6 +199,29 @@ macro_rules! make_string_datetime {
     }};
 }
 
+macro_rules! make_string_datetime_with_tz {
+    ($array_type:ty, $tz_string: ident, $column: ident, $row: ident) => {{
+        let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+
+        let s = if array.is_null($row) {
+            "".to_string()
+        } else {
+            match $tz_string.parse::<Tz>() {
+                Ok(tz) => array
+                    .value_as_datetime_with_tz($row, tz)
+                    .map(|d| format!("{}", d.to_rfc3339()))
+                    .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
+                Err(_) => array
+                    .value_as_datetime($row)
+                    .map(|d| format!("{:?} (Unknown Time Zone '{}')", d, 
$tz_string))
+                    .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
+            }
+        };
+
+        Ok(s)
+    }};
+}
+
 // It's not possible to do array.value($row).to_string() for &[u8], let's 
format it as hex
 macro_rules! make_string_hex {
     ($array_type:ty, $column: ident, $row: ident) => {{
@@ -334,17 +358,55 @@ pub fn array_value_to_string(column: &array::ArrayRef, 
row: usize) -> Result<Str
         DataType::Float32 => make_string!(array::Float32Array, column, row),
         DataType::Float64 => make_string!(array::Float64Array, column, row),
         DataType::Decimal128(..) => make_string_from_decimal(column, row),
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
-            make_string_datetime!(array::TimestampSecondArray, column, row)
+        DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Second 
=> {
+            match tz_string_opt {
+                Some(tz_string) => make_string_datetime_with_tz!(
+                    array::TimestampSecondArray,
+                    tz_string,
+                    column,
+                    row
+                ),
+                None => make_string_datetime!(array::TimestampSecondArray, 
column, row),
+            }
         }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
-            make_string_datetime!(array::TimestampMillisecondArray, column, 
row)
+        DataType::Timestamp(unit, tz_string_opt) if *unit == 
TimeUnit::Millisecond => {
+            match tz_string_opt {
+                Some(tz_string) => make_string_datetime_with_tz!(
+                    array::TimestampMillisecondArray,
+                    tz_string,
+                    column,
+                    row
+                ),
+                None => {
+                    make_string_datetime!(array::TimestampMillisecondArray, 
column, row)
+                }
+            }
         }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
-            make_string_datetime!(array::TimestampMicrosecondArray, column, 
row)
+        DataType::Timestamp(unit, tz_string_opt) if *unit == 
TimeUnit::Microsecond => {
+            match tz_string_opt {
+                Some(tz_string) => make_string_datetime_with_tz!(
+                    array::TimestampMicrosecondArray,
+                    tz_string,
+                    column,
+                    row
+                ),
+                None => {
+                    make_string_datetime!(array::TimestampMicrosecondArray, 
column, row)
+                }
+            }
         }
-        DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
-            make_string_datetime!(array::TimestampNanosecondArray, column, row)
+        DataType::Timestamp(unit, tz_string_opt) if *unit == 
TimeUnit::Nanosecond => {
+            match tz_string_opt {
+                Some(tz_string) => make_string_datetime_with_tz!(
+                    array::TimestampNanosecondArray,
+                    tz_string,
+                    column,
+                    row
+                ),
+                None => {
+                    make_string_datetime!(array::TimestampNanosecondArray, 
column, row)
+                }
+            }
         }
         DataType::Date32 => make_string_date!(array::Date32Array, column, row),
         DataType::Date64 => make_string_date!(array::Date64Array, column, row),
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index f819e389f..8d811223c 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -370,13 +370,134 @@ mod tests {
         };
     }
 
+    /// Generate an array with type $ARRAYTYPE with a numeric value of
+    /// $VALUE, and compare $EXPECTED_RESULT to the output of
+    /// formatting that array with `pretty_format_batches`
+    macro_rules! check_datetime_with_timezone {
+        ($ARRAYTYPE:ident, $VALUE:expr, $TZ_STRING:expr, 
$EXPECTED_RESULT:expr) => {
+            let mut builder = $ARRAYTYPE::builder(10);
+            builder.append_value($VALUE);
+            builder.append_null();
+            let array = builder.finish();
+            let array = array.with_timezone($TZ_STRING);
+
+            let schema = Arc::new(Schema::new(vec![Field::new(
+                "f",
+                array.data_type().clone(),
+                true,
+            )]));
+            let batch = RecordBatch::try_new(schema, 
vec![Arc::new(array)]).unwrap();
+
+            let table = pretty_format_batches(&[batch])
+                .expect("formatting batches")
+                .to_string();
+
+            let expected = $EXPECTED_RESULT;
+            let actual: Vec<&str> = table.lines().collect();
+
+            assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n", 
actual);
+        };
+    }
+
+    #[test]
+    #[cfg(features = "chrono-tz")]
+    fn test_pretty_format_timestamp_second_with_utc_timezone() {
+        let expected = vec![
+            "+---------------------------+",
+            "| f                         |",
+            "+---------------------------+",
+            "| 1970-05-09T14:25:11+00:00 |",
+            "|                           |",
+            "+---------------------------+",
+        ];
+        check_datetime_with_timezone!(
+            TimestampSecondArray,
+            11111111,
+            "UTC".to_string(),
+            expected
+        );
+    }
+
+    #[test]
+    #[cfg(features = "chrono-tz")]
+    fn test_pretty_format_timestamp_second_with_non_utc_timezone() {
+        let expected = vec![
+            "+---------------------------+",
+            "| f                         |",
+            "+---------------------------+",
+            "| 1970-05-09T22:25:11+08:00 |",
+            "|                           |",
+            "+---------------------------+",
+        ];
+        check_datetime_with_timezone!(
+            TimestampSecondArray,
+            11111111,
+            "Asia/Taipei".to_string(),
+            expected
+        );
+    }
+
+    #[test]
+    fn test_pretty_format_timestamp_second_with_fixed_offset_timezone() {
+        let expected = vec![
+            "+---------------------------+",
+            "| f                         |",
+            "+---------------------------+",
+            "| 1970-05-09T22:25:11+08:00 |",
+            "|                           |",
+            "+---------------------------+",
+        ];
+        check_datetime_with_timezone!(
+            TimestampSecondArray,
+            11111111,
+            "+08:00".to_string(),
+            expected
+        );
+    }
+
+    #[test]
+    fn 
test_pretty_format_timestamp_second_with_incorrect_fixed_offset_timezone() {
+        let expected = vec![
+            "+-------------------------------------------------+",
+            "| f                                               |",
+            "+-------------------------------------------------+",
+            "| 1970-05-09T14:25:11 (Unknown Time Zone '08:00') |",
+            "|                                                 |",
+            "+-------------------------------------------------+",
+        ];
+        check_datetime_with_timezone!(
+            TimestampSecondArray,
+            11111111,
+            "08:00".to_string(),
+            expected
+        );
+    }
+
+    #[test]
+    fn test_pretty_format_timestamp_second_with_unknown_timezone() {
+        let expected = vec![
+            "+---------------------------------------------------+",
+            "| f                                                 |",
+            "+---------------------------------------------------+",
+            "| 1970-05-09T14:25:11 (Unknown Time Zone 'Unknown') |",
+            "|                                                   |",
+            "+---------------------------------------------------+",
+        ];
+        check_datetime_with_timezone!(
+            TimestampSecondArray,
+            11111111,
+            "Unknown".to_string(),
+            expected
+        );
+    }
+
     #[test]
     fn test_pretty_format_timestamp_second() {
         let expected = vec![
             "+---------------------+",
             "| f                   |",
             "+---------------------+",
-            "| 1970-05-09 14:25:11 |",
+            "| 1970-05-09T14:25:11 |",
             "|                     |",
             "+---------------------+",
         ];
@@ -389,7 +510,7 @@ mod tests {
             "+-------------------------+",
             "| f                       |",
             "+-------------------------+",
-            "| 1970-01-01 03:05:11.111 |",
+            "| 1970-01-01T03:05:11.111 |",
             "|                         |",
             "+-------------------------+",
         ];
@@ -402,7 +523,7 @@ mod tests {
             "+----------------------------+",
             "| f                          |",
             "+----------------------------+",
-            "| 1970-01-01 00:00:11.111111 |",
+            "| 1970-01-01T00:00:11.111111 |",
             "|                            |",
             "+----------------------------+",
         ];
@@ -415,7 +536,7 @@ mod tests {
             "+-------------------------------+",
             "| f                             |",
             "+-------------------------------+",
-            "| 1970-01-01 00:00:00.011111111 |",
+            "| 1970-01-01T00:00:00.011111111 |",
             "|                               |",
             "+-------------------------------+",
         ];

Reply via email to