This is an automated email from the ASF dual-hosted git repository.
uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 8d296cc ARROW-2554: [Python] fix timestamp unit detection from python
lists
8d296cc is described below
commit 8d296cce6b002f5b0a79fdc3240d5f4159d83306
Author: Marco Neumann <[email protected]>
AuthorDate: Thu Jun 14 14:21:10 2018 +0200
ARROW-2554: [Python] fix timestamp unit detection from python lists
Author: Marco Neumann <[email protected]>
Closes #2118 from crepererum/ARROW-2554 and squashes the following commits:
850765e2 <Marco Neumann> ARROW-2554: fix timestamp unit detection from
python lists
---
cpp/src/arrow/python/builtin_convert.cc | 33 ++++++++++++++++++++++++++++-----
python/pyarrow/tests/test_array.py | 11 +++++++++++
2 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/cpp/src/arrow/python/builtin_convert.cc
b/cpp/src/arrow/python/builtin_convert.cc
index dc0ae8c..49f2b31 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -62,7 +62,10 @@ class TypeInferrer {
bool_count_(0),
int_count_(0),
date_count_(0),
- timestamp_count_(0),
+ timestamp_second_count_(0),
+ timestamp_milli_count_(0),
+ timestamp_micro_count_(0),
+ timestamp_nano_count_(0),
float_count_(0),
binary_count_(0),
unicode_count_(0),
@@ -94,7 +97,7 @@ class TypeInferrer {
} else if (PyDate_CheckExact(obj)) {
++date_count_;
} else if (PyDateTime_CheckExact(obj)) {
- ++timestamp_count_;
+ ++timestamp_micro_count_;
} else if (internal::IsPyBinary(obj)) {
++binary_count_;
} else if (PyUnicode_Check(obj)) {
@@ -107,7 +110,18 @@ class TypeInferrer {
} else if (is_floating(type->id())) {
++float_count_;
} else if (type->id() == Type::TIMESTAMP) {
- ++timestamp_count_;
+ const auto& type2 = checked_cast<TimestampType&>(*type);
+ if (type2.unit() == TimeUnit::NANO) {
+ ++timestamp_nano_count_;
+ } else if (type2.unit() == TimeUnit::MICRO) {
+ ++timestamp_micro_count_;
+ } else if (type2.unit() == TimeUnit::MILLI) {
+ ++timestamp_milli_count_;
+ } else if (type2.unit() == TimeUnit::SECOND) {
+ ++timestamp_second_count_;
+ } else {
+ throw std::runtime_error("Unknown unit of TimestampType");
+ }
} else {
std::ostringstream ss;
ss << "Found a NumPy scalar with Arrow dtype that we cannot handle: ";
@@ -168,8 +182,14 @@ class TypeInferrer {
return int64();
} else if (date_count_) {
return date64();
- } else if (timestamp_count_) {
+ } else if (timestamp_nano_count_) {
+ return timestamp(TimeUnit::NANO);
+ } else if (timestamp_micro_count_) {
return timestamp(TimeUnit::MICRO);
+ } else if (timestamp_milli_count_) {
+ return timestamp(TimeUnit::MILLI);
+ } else if (timestamp_second_count_) {
+ return timestamp(TimeUnit::SECOND);
} else if (bool_count_) {
return boolean();
} else if (binary_count_) {
@@ -236,7 +256,10 @@ class TypeInferrer {
int64_t bool_count_;
int64_t int_count_;
int64_t date_count_;
- int64_t timestamp_count_;
+ int64_t timestamp_second_count_;
+ int64_t timestamp_milli_count_;
+ int64_t timestamp_micro_count_;
+ int64_t timestamp_nano_count_;
int64_t float_count_;
int64_t binary_count_;
int64_t unicode_count_;
diff --git a/python/pyarrow/tests/test_array.py
b/python/pyarrow/tests/test_array.py
index f30203c..a0b1a51 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -877,3 +877,14 @@ def test_nested_dictionary_array():
dict_arr = pa.DictionaryArray.from_arrays([0, 1, 0], ['a', 'b'])
dict_arr2 = pa.DictionaryArray.from_arrays([0, 1, 2, 1, 0], dict_arr)
assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
+
+
[email protected]('unit', ['ns', 'us', 'ms', 's'])
+def test_timestamp_units_from_list(unit):
+ x = np.datetime64('2017-01-01 01:01:01.111111111', unit)
+ a1 = pa.array([x])
+ a2 = pa.array([x], type=pa.timestamp(unit))
+
+ assert a1.type == a2.type
+ assert a1.type.unit == unit
+ assert a1[0] == a2[0]
--
To stop receiving notification emails like this one, please contact
[email protected].