This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 8d296cc  ARROW-2554: [Python] fix timestamp unit detection from python 
lists
8d296cc is described below

commit 8d296cce6b002f5b0a79fdc3240d5f4159d83306
Author: Marco Neumann <marco.neum...@blue-yonder.com>
AuthorDate: Thu Jun 14 14:21:10 2018 +0200

    ARROW-2554: [Python] fix timestamp unit detection from python lists
    
    Author: Marco Neumann <marco.neum...@blue-yonder.com>
    
    Closes #2118 from crepererum/ARROW-2554 and squashes the following commits:
    
    850765e2 <Marco Neumann> ARROW-2554: fix timestamp unit detection from 
python lists
---
 cpp/src/arrow/python/builtin_convert.cc | 33 ++++++++++++++++++++++++++++-----
 python/pyarrow/tests/test_array.py      | 11 +++++++++++
 2 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index dc0ae8c..49f2b31 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -62,7 +62,10 @@ class TypeInferrer {
         bool_count_(0),
         int_count_(0),
         date_count_(0),
-        timestamp_count_(0),
+        timestamp_second_count_(0),
+        timestamp_milli_count_(0),
+        timestamp_micro_count_(0),
+        timestamp_nano_count_(0),
         float_count_(0),
         binary_count_(0),
         unicode_count_(0),
@@ -94,7 +97,7 @@ class TypeInferrer {
     } else if (PyDate_CheckExact(obj)) {
       ++date_count_;
     } else if (PyDateTime_CheckExact(obj)) {
-      ++timestamp_count_;
+      ++timestamp_micro_count_;
     } else if (internal::IsPyBinary(obj)) {
       ++binary_count_;
     } else if (PyUnicode_Check(obj)) {
@@ -107,7 +110,18 @@ class TypeInferrer {
       } else if (is_floating(type->id())) {
         ++float_count_;
       } else if (type->id() == Type::TIMESTAMP) {
-        ++timestamp_count_;
+        const auto& type2 = checked_cast<TimestampType&>(*type);
+        if (type2.unit() == TimeUnit::NANO) {
+          ++timestamp_nano_count_;
+        } else if (type2.unit() == TimeUnit::MICRO) {
+          ++timestamp_micro_count_;
+        } else if (type2.unit() == TimeUnit::MILLI) {
+          ++timestamp_milli_count_;
+        } else if (type2.unit() == TimeUnit::SECOND) {
+          ++timestamp_second_count_;
+        } else {
+          throw std::runtime_error("Unknown unit of TimestampType");
+        }
       } else {
         std::ostringstream ss;
         ss << "Found a NumPy scalar with Arrow dtype that we cannot handle: ";
@@ -168,8 +182,14 @@ class TypeInferrer {
       return int64();
     } else if (date_count_) {
       return date64();
-    } else if (timestamp_count_) {
+    } else if (timestamp_nano_count_) {
+      return timestamp(TimeUnit::NANO);
+    } else if (timestamp_micro_count_) {
       return timestamp(TimeUnit::MICRO);
+    } else if (timestamp_milli_count_) {
+      return timestamp(TimeUnit::MILLI);
+    } else if (timestamp_second_count_) {
+      return timestamp(TimeUnit::SECOND);
     } else if (bool_count_) {
       return boolean();
     } else if (binary_count_) {
@@ -236,7 +256,10 @@ class TypeInferrer {
   int64_t bool_count_;
   int64_t int_count_;
   int64_t date_count_;
-  int64_t timestamp_count_;
+  int64_t timestamp_second_count_;
+  int64_t timestamp_milli_count_;
+  int64_t timestamp_micro_count_;
+  int64_t timestamp_nano_count_;
   int64_t float_count_;
   int64_t binary_count_;
   int64_t unicode_count_;
diff --git a/python/pyarrow/tests/test_array.py 
b/python/pyarrow/tests/test_array.py
index f30203c..a0b1a51 100644
--- a/python/pyarrow/tests/test_array.py
+++ b/python/pyarrow/tests/test_array.py
@@ -877,3 +877,14 @@ def test_nested_dictionary_array():
     dict_arr = pa.DictionaryArray.from_arrays([0, 1, 0], ['a', 'b'])
     dict_arr2 = pa.DictionaryArray.from_arrays([0, 1, 2, 1, 0], dict_arr)
     assert dict_arr2.to_pylist() == ['a', 'b', 'a', 'b', 'a']
+
+
+@pytest.mark.parametrize('unit', ['ns', 'us', 'ms', 's'])
+def test_timestamp_units_from_list(unit):
+    x = np.datetime64('2017-01-01 01:01:01.111111111', unit)
+    a1 = pa.array([x])
+    a2 = pa.array([x], type=pa.timestamp(unit))
+
+    assert a1.type == a2.type
+    assert a1.type.unit == unit
+    assert a1[0] == a2[0]

-- 
To stop receiving notification emails like this one, please contact
u...@apache.org.

Reply via email to