This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new cbee739dd Format Timestamps as RFC3339 (#2939)
cbee739dd is described below
commit cbee739ddaebc3596b91754fbab9e26904d9622c
Author: Wei-Ting Kuo <[email protected]>
AuthorDate: Sat Oct 29 02:41:57 2022 +0800
Format Timestamps as RFC3339 (#2939)
* standarize-tz-display
* only test named timezone while chrono-tz enabled
* fix docs
* fix doc
---
arrow-array/src/array/primitive_array.rs | 33 ++++----
arrow/src/util/display.rs | 80 ++++++++++++++++---
arrow/src/util/pretty.rs | 129 ++++++++++++++++++++++++++++++-
3 files changed, 212 insertions(+), 30 deletions(-)
diff --git a/arrow-array/src/array/primitive_array.rs
b/arrow-array/src/array/primitive_array.rs
index 016e5306c..eb3618f7c 100644
--- a/arrow-array/src/array/primitive_array.rs
+++ b/arrow-array/src/array/primitive_array.rs
@@ -18,7 +18,10 @@
use crate::builder::{BooleanBufferBuilder, BufferBuilder, PrimitiveBuilder};
use crate::iterator::PrimitiveIter;
use crate::raw_pointer::RawPtrBox;
-use crate::temporal_conversions::{as_date, as_datetime, as_duration, as_time};
+use crate::temporal_conversions::{
+ as_date, as_datetime, as_datetime_with_timezone, as_duration, as_time,
+};
+use crate::timezone::Tz;
use crate::trusted_len::trusted_len_unzip;
use crate::types::*;
use crate::{print_long_array, Array, ArrayAccessor};
@@ -26,7 +29,7 @@ use arrow_buffer::{i256, ArrowNativeType, Buffer};
use arrow_data::bit_iterator::try_for_each_valid_idx;
use arrow_data::ArrayData;
use arrow_schema::{ArrowError, DataType};
-use chrono::{Duration, FixedOffset, NaiveDate, NaiveDateTime, NaiveTime};
+use chrono::{DateTime, Duration, NaiveDate, NaiveDateTime, NaiveTime};
use half::f16;
use std::any::Any;
@@ -116,40 +119,40 @@ pub type Float64Array = PrimitiveArray<Float64Type>;
/// # Example: UTC timestamps post epoch
/// ```
/// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1970-05-09T14:25:11+0:00
/// let arr = TimestampSecondArray::from(vec![11111111]);
/// // OR
/// let arr = TimestampSecondArray::from(vec![Some(11111111)]);
-/// let utc_offset = FixedOffset::east(0);
+/// let utc_tz: Tz = "+00:00".parse().unwrap();
///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v|
v.to_string()).unwrap(), "1970-05-09 14:25:11")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v|
v.to_string()).unwrap(), "1970-05-09 14:25:11 +00:00")
/// ```
///
/// # Example: UTC timestamps pre epoch
/// ```
/// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1969-08-25T09:34:49+0:00
/// let arr = TimestampSecondArray::from(vec![-11111111]);
/// // OR
/// let arr = TimestampSecondArray::from(vec![Some(-11111111)]);
-/// let utc_offset = FixedOffset::east(0);
+/// let utc_tz: Tz = "+00:00".parse().unwrap();
///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_offset).map(|v|
v.to_string()).unwrap(), "1969-08-25 09:34:49")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, utc_tz).map(|v|
v.to_string()).unwrap(), "1969-08-25 09:34:49 +00:00")
/// ```
///
/// # Example: With timezone specified
/// ```
/// # use arrow_array::TimestampSecondArray;
-/// use chrono::FixedOffset;
+/// use arrow_array::timezone::Tz;
/// // Corresponds to single element array with entry 1970-05-10T00:25:11+10:00
/// let arr =
TimestampSecondArray::from(vec![11111111]).with_timezone("+10:00".to_string());
/// // OR
/// let arr =
TimestampSecondArray::from(vec![Some(11111111)]).with_timezone("+10:00".to_string());
-/// let sydney_offset = FixedOffset::east(10 * 60 * 60);
+/// let sydney_tz: Tz = "+10:00".parse().unwrap();
///
-/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_offset).map(|v|
v.to_string()).unwrap(), "1970-05-10 00:25:11")
+/// assert_eq!(arr.value_as_datetime_with_tz(0, sydney_tz).map(|v|
v.to_string()).unwrap(), "1970-05-10 00:25:11 +10:00")
/// ```
///
pub type TimestampSecondArray = PrimitiveArray<TimestampSecondType>;
@@ -503,12 +506,8 @@ where
///
/// functionally it is same as `value_as_datetime`, however it adds
/// the passed tz to the to-be-returned NaiveDateTime
- pub fn value_as_datetime_with_tz(
- &self,
- i: usize,
- tz: FixedOffset,
- ) -> Option<NaiveDateTime> {
- as_datetime::<T>(i64::from(self.value(i))).map(|datetime| datetime +
tz)
+ pub fn value_as_datetime_with_tz(&self, i: usize, tz: Tz) ->
Option<DateTime<Tz>> {
+ as_datetime_with_timezone::<T>(i64::from(self.value(i)), tz)
}
/// Returns value as a chrono `NaiveDate` by using `Self::datetime()`
diff --git a/arrow/src/util/display.rs b/arrow/src/util/display.rs
index 7c0b5a28f..f5bef1605 100644
--- a/arrow/src/util/display.rs
+++ b/arrow/src/util/display.rs
@@ -33,6 +33,7 @@ use crate::{array, datatypes::IntervalUnit};
use array::DictionaryArray;
use crate::error::{ArrowError, Result};
+use arrow_array::timezone::Tz;
macro_rules! make_string {
($array_type:ty, $column: ident, $row: ident) => {{
@@ -190,7 +191,7 @@ macro_rules! make_string_datetime {
} else {
array
.value_as_datetime($row)
- .map(|d| d.to_string())
+ .map(|d| format!("{:?}", d))
.unwrap_or_else(|| "ERROR CONVERTING DATE".to_string())
};
@@ -198,6 +199,29 @@ macro_rules! make_string_datetime {
}};
}
+macro_rules! make_string_datetime_with_tz {
+ ($array_type:ty, $tz_string: ident, $column: ident, $row: ident) => {{
+ let array = $column.as_any().downcast_ref::<$array_type>().unwrap();
+
+ let s = if array.is_null($row) {
+ "".to_string()
+ } else {
+ match $tz_string.parse::<Tz>() {
+ Ok(tz) => array
+ .value_as_datetime_with_tz($row, tz)
+ .map(|d| format!("{}", d.to_rfc3339()))
+ .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
+ Err(_) => array
+ .value_as_datetime($row)
+ .map(|d| format!("{:?} (Unknown Time Zone '{}')", d,
$tz_string))
+ .unwrap_or_else(|| "ERROR CONVERTING DATE".to_string()),
+ }
+ };
+
+ Ok(s)
+ }};
+}
+
// It's not possible to do array.value($row).to_string() for &[u8], let's
format it as hex
macro_rules! make_string_hex {
($array_type:ty, $column: ident, $row: ident) => {{
@@ -334,17 +358,55 @@ pub fn array_value_to_string(column: &array::ArrayRef,
row: usize) -> Result<Str
DataType::Float32 => make_string!(array::Float32Array, column, row),
DataType::Float64 => make_string!(array::Float64Array, column, row),
DataType::Decimal128(..) => make_string_from_decimal(column, row),
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Second => {
- make_string_datetime!(array::TimestampSecondArray, column, row)
+ DataType::Timestamp(unit, tz_string_opt) if *unit == TimeUnit::Second
=> {
+ match tz_string_opt {
+ Some(tz_string) => make_string_datetime_with_tz!(
+ array::TimestampSecondArray,
+ tz_string,
+ column,
+ row
+ ),
+ None => make_string_datetime!(array::TimestampSecondArray,
column, row),
+ }
}
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Millisecond => {
- make_string_datetime!(array::TimestampMillisecondArray, column,
row)
+ DataType::Timestamp(unit, tz_string_opt) if *unit ==
TimeUnit::Millisecond => {
+ match tz_string_opt {
+ Some(tz_string) => make_string_datetime_with_tz!(
+ array::TimestampMillisecondArray,
+ tz_string,
+ column,
+ row
+ ),
+ None => {
+ make_string_datetime!(array::TimestampMillisecondArray,
column, row)
+ }
+ }
}
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Microsecond => {
- make_string_datetime!(array::TimestampMicrosecondArray, column,
row)
+ DataType::Timestamp(unit, tz_string_opt) if *unit ==
TimeUnit::Microsecond => {
+ match tz_string_opt {
+ Some(tz_string) => make_string_datetime_with_tz!(
+ array::TimestampMicrosecondArray,
+ tz_string,
+ column,
+ row
+ ),
+ None => {
+ make_string_datetime!(array::TimestampMicrosecondArray,
column, row)
+ }
+ }
}
- DataType::Timestamp(unit, _) if *unit == TimeUnit::Nanosecond => {
- make_string_datetime!(array::TimestampNanosecondArray, column, row)
+ DataType::Timestamp(unit, tz_string_opt) if *unit ==
TimeUnit::Nanosecond => {
+ match tz_string_opt {
+ Some(tz_string) => make_string_datetime_with_tz!(
+ array::TimestampNanosecondArray,
+ tz_string,
+ column,
+ row
+ ),
+ None => {
+ make_string_datetime!(array::TimestampNanosecondArray,
column, row)
+ }
+ }
}
DataType::Date32 => make_string_date!(array::Date32Array, column, row),
DataType::Date64 => make_string_date!(array::Date64Array, column, row),
diff --git a/arrow/src/util/pretty.rs b/arrow/src/util/pretty.rs
index f819e389f..8d811223c 100644
--- a/arrow/src/util/pretty.rs
+++ b/arrow/src/util/pretty.rs
@@ -370,13 +370,134 @@ mod tests {
};
}
+ /// Generate an array with type $ARRAYTYPE with a numeric value of
+ /// $VALUE, and compare $EXPECTED_RESULT to the output of
+ /// formatting that array with `pretty_format_batches`
+ macro_rules! check_datetime_with_timezone {
+ ($ARRAYTYPE:ident, $VALUE:expr, $TZ_STRING:expr,
$EXPECTED_RESULT:expr) => {
+ let mut builder = $ARRAYTYPE::builder(10);
+ builder.append_value($VALUE);
+ builder.append_null();
+ let array = builder.finish();
+ let array = array.with_timezone($TZ_STRING);
+
+ let schema = Arc::new(Schema::new(vec![Field::new(
+ "f",
+ array.data_type().clone(),
+ true,
+ )]));
+ let batch = RecordBatch::try_new(schema,
vec![Arc::new(array)]).unwrap();
+
+ let table = pretty_format_batches(&[batch])
+ .expect("formatting batches")
+ .to_string();
+
+ let expected = $EXPECTED_RESULT;
+ let actual: Vec<&str> = table.lines().collect();
+
+ assert_eq!(expected, actual, "Actual result:\n\n{:#?}\n\n",
actual);
+ };
+ }
+
+ #[test]
+ #[cfg(features = "chrono-tz")]
+ fn test_pretty_format_timestamp_second_with_utc_timezone() {
+ let expected = vec![
+ "+---------------------------+",
+ "| f |",
+ "+---------------------------+",
+ "| 1970-05-09T14:25:11+00:00 |",
+ "| |",
+ "+---------------------------+",
+ ];
+ check_datetime_with_timezone!(
+ TimestampSecondArray,
+ 11111111,
+ "UTC".to_string(),
+ expected
+ );
+ }
+
+ #[test]
+ #[cfg(features = "chrono-tz")]
+ fn test_pretty_format_timestamp_second_with_non_utc_timezone() {
+ let expected = vec![
+ "+---------------------------+",
+ "| f |",
+ "+---------------------------+",
+ "| 1970-05-09T22:25:11+08:00 |",
+ "| |",
+ "+---------------------------+",
+ ];
+ check_datetime_with_timezone!(
+ TimestampSecondArray,
+ 11111111,
+ "Asia/Taipei".to_string(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_pretty_format_timestamp_second_with_fixed_offset_timezone() {
+ let expected = vec![
+ "+---------------------------+",
+ "| f |",
+ "+---------------------------+",
+ "| 1970-05-09T22:25:11+08:00 |",
+ "| |",
+ "+---------------------------+",
+ ];
+ check_datetime_with_timezone!(
+ TimestampSecondArray,
+ 11111111,
+ "+08:00".to_string(),
+ expected
+ );
+ }
+
+ #[test]
+ fn
test_pretty_format_timestamp_second_with_incorrect_fixed_offset_timezone() {
+ let expected = vec![
+ "+-------------------------------------------------+",
+ "| f |",
+ "+-------------------------------------------------+",
+ "| 1970-05-09T14:25:11 (Unknown Time Zone '08:00') |",
+ "| |",
+ "+-------------------------------------------------+",
+ ];
+ check_datetime_with_timezone!(
+ TimestampSecondArray,
+ 11111111,
+ "08:00".to_string(),
+ expected
+ );
+ }
+
+ #[test]
+ fn test_pretty_format_timestamp_second_with_unknown_timezone() {
+ let expected = vec![
+ "+---------------------------------------------------+",
+ "| f |",
+ "+---------------------------------------------------+",
+ "| 1970-05-09T14:25:11 (Unknown Time Zone 'Unknown') |",
+ "| |",
+ "+---------------------------------------------------+",
+ ];
+ check_datetime_with_timezone!(
+ TimestampSecondArray,
+ 11111111,
+ "Unknown".to_string(),
+ expected
+ );
+ }
+
#[test]
fn test_pretty_format_timestamp_second() {
let expected = vec![
"+---------------------+",
"| f |",
"+---------------------+",
- "| 1970-05-09 14:25:11 |",
+ "| 1970-05-09T14:25:11 |",
"| |",
"+---------------------+",
];
@@ -389,7 +510,7 @@ mod tests {
"+-------------------------+",
"| f |",
"+-------------------------+",
- "| 1970-01-01 03:05:11.111 |",
+ "| 1970-01-01T03:05:11.111 |",
"| |",
"+-------------------------+",
];
@@ -402,7 +523,7 @@ mod tests {
"+----------------------------+",
"| f |",
"+----------------------------+",
- "| 1970-01-01 00:00:11.111111 |",
+ "| 1970-01-01T00:00:11.111111 |",
"| |",
"+----------------------------+",
];
@@ -415,7 +536,7 @@ mod tests {
"+-------------------------------+",
"| f |",
"+-------------------------------+",
- "| 1970-01-01 00:00:00.011111111 |",
+ "| 1970-01-01T00:00:00.011111111 |",
"| |",
"+-------------------------------+",
];