alamb commented on a change in pull request #1112:
URL: https://github.com/apache/arrow-rs/pull/1112#discussion_r777108645
##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
DataType::Decimal(precision, scale) => {
build_decimal_array(line_number, rows, i, *precision,
*scale)
}
- DataType::Int8 =>
build_primitive_array::<Int8Type>(line_number, rows, i),
+ DataType::Int8 => {
+ build_primitive_array::<Int8Type>(line_number, rows, i,
None)
+ }
DataType::Int16 => {
- build_primitive_array::<Int16Type>(line_number, rows, i)
+ build_primitive_array::<Int16Type>(line_number, rows, i,
None)
}
DataType::Int32 => {
- build_primitive_array::<Int32Type>(line_number, rows, i)
+ build_primitive_array::<Int32Type>(line_number, rows, i,
None)
}
DataType::Int64 => {
- build_primitive_array::<Int64Type>(line_number, rows, i)
+ build_primitive_array::<Int64Type>(line_number, rows, i,
None)
}
DataType::UInt8 => {
- build_primitive_array::<UInt8Type>(line_number, rows, i)
+ build_primitive_array::<UInt8Type>(line_number, rows, i,
None)
}
DataType::UInt16 => {
- build_primitive_array::<UInt16Type>(line_number, rows, i)
+ build_primitive_array::<UInt16Type>(line_number, rows, i,
None)
}
DataType::UInt32 => {
- build_primitive_array::<UInt32Type>(line_number, rows, i)
+ build_primitive_array::<UInt32Type>(line_number, rows, i,
None)
}
DataType::UInt64 => {
- build_primitive_array::<UInt64Type>(line_number, rows, i)
+ build_primitive_array::<UInt64Type>(line_number, rows, i,
None)
}
DataType::Float32 => {
- build_primitive_array::<Float32Type>(line_number, rows, i)
+ build_primitive_array::<Float32Type>(line_number, rows, i,
None)
}
DataType::Float64 => {
- build_primitive_array::<Float64Type>(line_number, rows, i)
+ build_primitive_array::<Float64Type>(line_number, rows, i,
None)
}
DataType::Date32 => {
- build_primitive_array::<Date32Type>(line_number, rows, i)
- }
- DataType::Date64 => {
- build_primitive_array::<Date64Type>(line_number, rows, i)
+ build_primitive_array::<Date32Type>(line_number, rows, i,
None)
}
- DataType::Timestamp(TimeUnit::Microsecond, _) =>
build_primitive_array::<
- TimestampMicrosecondType,
- >(
- line_number, rows, i
+ DataType::Date64 => build_primitive_array::<Date64Type>(
+ line_number,
+ rows,
+ i,
+ datetime_format.clone(),
),
+ DataType::Timestamp(TimeUnit::Microsecond, _) => {
+ build_primitive_array::<TimestampMicrosecondType>(
+ line_number,
+ rows,
+ i,
+ None,
Review comment:
`datefime_format`?
##########
File path: arrow/src/csv/reader.rs
##########
@@ -316,6 +323,8 @@ pub struct Reader<R: Read> {
batch_size: usize,
/// Vector that can hold the `StringRecord`s of the batches
batch_records: Vec<StringRecord>,
+ /// datetime format used to parse datetime values, (format understood by
chrono)
Review comment:
Can you please provide the link to the appropriate chrono documentation?
##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
DataType::Decimal(precision, scale) => {
build_decimal_array(line_number, rows, i, *precision,
*scale)
}
- DataType::Int8 =>
build_primitive_array::<Int8Type>(line_number, rows, i),
+ DataType::Int8 => {
+ build_primitive_array::<Int8Type>(line_number, rows, i,
None)
+ }
DataType::Int16 => {
- build_primitive_array::<Int16Type>(line_number, rows, i)
+ build_primitive_array::<Int16Type>(line_number, rows, i,
None)
}
DataType::Int32 => {
- build_primitive_array::<Int32Type>(line_number, rows, i)
+ build_primitive_array::<Int32Type>(line_number, rows, i,
None)
}
DataType::Int64 => {
- build_primitive_array::<Int64Type>(line_number, rows, i)
+ build_primitive_array::<Int64Type>(line_number, rows, i,
None)
}
DataType::UInt8 => {
- build_primitive_array::<UInt8Type>(line_number, rows, i)
+ build_primitive_array::<UInt8Type>(line_number, rows, i,
None)
}
DataType::UInt16 => {
- build_primitive_array::<UInt16Type>(line_number, rows, i)
+ build_primitive_array::<UInt16Type>(line_number, rows, i,
None)
}
DataType::UInt32 => {
- build_primitive_array::<UInt32Type>(line_number, rows, i)
+ build_primitive_array::<UInt32Type>(line_number, rows, i,
None)
}
DataType::UInt64 => {
- build_primitive_array::<UInt64Type>(line_number, rows, i)
+ build_primitive_array::<UInt64Type>(line_number, rows, i,
None)
}
DataType::Float32 => {
- build_primitive_array::<Float32Type>(line_number, rows, i)
+ build_primitive_array::<Float32Type>(line_number, rows, i,
None)
}
DataType::Float64 => {
- build_primitive_array::<Float64Type>(line_number, rows, i)
+ build_primitive_array::<Float64Type>(line_number, rows, i,
None)
}
DataType::Date32 => {
- build_primitive_array::<Date32Type>(line_number, rows, i)
- }
- DataType::Date64 => {
- build_primitive_array::<Date64Type>(line_number, rows, i)
+ build_primitive_array::<Date32Type>(line_number, rows, i,
None)
Review comment:
should this also have `datetime_format`?
##########
File path: arrow/src/csv/reader.rs
##########
@@ -520,47 +554,60 @@ fn parse(
DataType::Decimal(precision, scale) => {
build_decimal_array(line_number, rows, i, *precision,
*scale)
}
- DataType::Int8 =>
build_primitive_array::<Int8Type>(line_number, rows, i),
+ DataType::Int8 => {
+ build_primitive_array::<Int8Type>(line_number, rows, i,
None)
+ }
DataType::Int16 => {
- build_primitive_array::<Int16Type>(line_number, rows, i)
+ build_primitive_array::<Int16Type>(line_number, rows, i,
None)
}
DataType::Int32 => {
- build_primitive_array::<Int32Type>(line_number, rows, i)
+ build_primitive_array::<Int32Type>(line_number, rows, i,
None)
}
DataType::Int64 => {
- build_primitive_array::<Int64Type>(line_number, rows, i)
+ build_primitive_array::<Int64Type>(line_number, rows, i,
None)
}
DataType::UInt8 => {
- build_primitive_array::<UInt8Type>(line_number, rows, i)
+ build_primitive_array::<UInt8Type>(line_number, rows, i,
None)
}
DataType::UInt16 => {
- build_primitive_array::<UInt16Type>(line_number, rows, i)
+ build_primitive_array::<UInt16Type>(line_number, rows, i,
None)
}
DataType::UInt32 => {
- build_primitive_array::<UInt32Type>(line_number, rows, i)
+ build_primitive_array::<UInt32Type>(line_number, rows, i,
None)
}
DataType::UInt64 => {
- build_primitive_array::<UInt64Type>(line_number, rows, i)
+ build_primitive_array::<UInt64Type>(line_number, rows, i,
None)
}
DataType::Float32 => {
- build_primitive_array::<Float32Type>(line_number, rows, i)
+ build_primitive_array::<Float32Type>(line_number, rows, i,
None)
}
DataType::Float64 => {
- build_primitive_array::<Float64Type>(line_number, rows, i)
+ build_primitive_array::<Float64Type>(line_number, rows, i,
None)
}
DataType::Date32 => {
- build_primitive_array::<Date32Type>(line_number, rows, i)
- }
- DataType::Date64 => {
- build_primitive_array::<Date64Type>(line_number, rows, i)
+ build_primitive_array::<Date32Type>(line_number, rows, i,
None)
}
- DataType::Timestamp(TimeUnit::Microsecond, _) =>
build_primitive_array::<
- TimestampMicrosecondType,
- >(
- line_number, rows, i
+ DataType::Date64 => build_primitive_array::<Date64Type>(
+ line_number,
+ rows,
+ i,
+ datetime_format.clone(),
),
+ DataType::Timestamp(TimeUnit::Microsecond, _) => {
+ build_primitive_array::<TimestampMicrosecondType>(
+ line_number,
+ rows,
+ i,
+ None,
+ )
+ }
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
-
build_primitive_array::<TimestampNanosecondType>(line_number, rows, i)
+ build_primitive_array::<TimestampNanosecondType>(
+ line_number,
+ rows,
+ i,
+ None,
Review comment:
also here?
##########
File path: arrow/src/csv/reader.rs
##########
@@ -1041,6 +1116,20 @@ impl ReaderBuilder {
self
}
+ /// Set the datetime regex used to parse the string to Date64Type
+ /// this regex is used while infering schema
+ pub fn with_datetime_re(mut self, datetime_re: Regex) -> Self {
+ self.datetime_re = Some(datetime_re);
+ self
+ }
+
+ /// Set the datetime regex used to parse the string to Date64Type
Review comment:
Can you also add a link here to the datetime format docs that are
understood
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]