[ 
https://issues.apache.org/jira/browse/ARROW-1730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16221614#comment-16221614
 ] 

ASF GitHub Bot commented on ARROW-1730:
---------------------------------------

wesm closed pull request #1256: ARROW-1730, ARROW-1738: [Python] Fix wrong 
datetime conversion
URL: https://github.com/apache/arrow/pull/1256
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/cpp/src/arrow/python/builtin_convert.cc 
b/cpp/src/arrow/python/builtin_convert.cc
index f7862d151..d52627ebf 100644
--- a/cpp/src/arrow/python/builtin_convert.cc
+++ b/cpp/src/arrow/python/builtin_convert.cc
@@ -522,18 +522,51 @@ class UInt64Converter : public 
TypedConverterVisitor<UInt64Builder, UInt64Conver
 class DateConverter : public TypedConverterVisitor<Date64Builder, 
DateConverter> {
  public:
   inline Status AppendItem(const OwnedRef& item) {
-    auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
-    return typed_builder_->Append(PyDate_to_ms(pydate));
+    int64_t t;
+    if (PyDate_Check(item.obj())) {
+      auto pydate = reinterpret_cast<PyDateTime_Date*>(item.obj());
+      t = PyDate_to_ms(pydate);
+    } else {
+      t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+      RETURN_IF_PYERROR();
+    }
+    return typed_builder_->Append(t);
   }
 };
 
 class TimestampConverter
     : public TypedConverterVisitor<Date64Builder, TimestampConverter> {
  public:
+  explicit TimestampConverter(TimeUnit::type unit) : unit_(unit) {}
+
   inline Status AppendItem(const OwnedRef& item) {
-    auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
-    return typed_builder_->Append(PyDateTime_to_us(pydatetime));
+    int64_t t;
+    if (PyDateTime_Check(item.obj())) {
+      auto pydatetime = reinterpret_cast<PyDateTime_DateTime*>(item.obj());
+
+      switch (unit_) {
+        case TimeUnit::SECOND:
+          t = PyDateTime_to_s(pydatetime);
+          break;
+        case TimeUnit::MILLI:
+          t = PyDateTime_to_ms(pydatetime);
+          break;
+        case TimeUnit::MICRO:
+          t = PyDateTime_to_us(pydatetime);
+          break;
+        case TimeUnit::NANO:
+          t = PyDateTime_to_ns(pydatetime);
+          break;
+      }
+    } else {
+      t = static_cast<int64_t>(PyLong_AsLongLong(item.obj()));
+      RETURN_IF_PYERROR();
+    }
+    return typed_builder_->Append(t);
   }
+
+ private:
+  TimeUnit::type unit_;
 };
 
 class DoubleConverter : public TypedConverterVisitor<DoubleBuilder, 
DoubleConverter> {
@@ -687,7 +720,8 @@ std::shared_ptr<SeqConverter> GetConverter(const 
std::shared_ptr<DataType>& type
     case Type::DATE64:
       return std::make_shared<DateConverter>();
     case Type::TIMESTAMP:
-      return std::make_shared<TimestampConverter>();
+      return std::make_shared<TimestampConverter>(
+          static_cast<const TimestampType&>(*type).unit());
     case Type::DOUBLE:
       return std::make_shared<DoubleConverter>();
     case Type::BINARY:
diff --git a/cpp/src/arrow/python/util/datetime.h 
b/cpp/src/arrow/python/util/datetime.h
index 782960f62..c110bc64a 100644
--- a/cpp/src/arrow/python/util/datetime.h
+++ b/cpp/src/arrow/python/util/datetime.h
@@ -247,12 +247,26 @@ static inline int64_t PyDate_to_ms(PyDateTime_Date* 
pydate) {
   return total_seconds * 1000;
 }
 
+static inline int64_t PyDateTime_to_s(PyDateTime_DateTime* pydatetime) {
+  return PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime)) / 1000LL;
+}
+
+static inline int64_t PyDateTime_to_ms(PyDateTime_DateTime* pydatetime) {
+  int64_t date_ms = 
PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
+  int ms = PyDateTime_DATE_GET_MICROSECOND(pydatetime) / 1000;
+  return date_ms + ms;
+}
+
 static inline int64_t PyDateTime_to_us(PyDateTime_DateTime* pydatetime) {
   int64_t ms = PyDate_to_ms(reinterpret_cast<PyDateTime_Date*>(pydatetime));
   int us = PyDateTime_DATE_GET_MICROSECOND(pydatetime);
   return ms * 1000 + us;
 }
 
+static inline int64_t PyDateTime_to_ns(PyDateTime_DateTime* pydatetime) {
+  return PyDateTime_to_us(pydatetime) * 1000;
+}
+
 static inline int32_t PyDate_to_days(PyDateTime_Date* pydate) {
   return static_cast<int32_t>(PyDate_to_ms(pydate) / 86400000LL);
 }
diff --git a/python/pyarrow/tests/test_convert_builtin.py 
b/python/pyarrow/tests/test_convert_builtin.py
index d18ed9506..414266ddb 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -197,6 +197,75 @@ def test_timestamp(self):
         assert arr[3].as_py() == datetime.datetime(2010, 8, 13, 5,
                                                    46, 57, 437699)
 
+    def test_timestamp_with_unit(self):
+        data = [
+            datetime.datetime(2007, 7, 13, 1, 23, 34, 123456),
+        ]
+
+        s = pa.timestamp('s')
+        ms = pa.timestamp('ms')
+        us = pa.timestamp('us')
+        ns = pa.timestamp('ns')
+
+        arr_s = pa.array(data, type=s)
+        assert len(arr_s) == 1
+        assert arr_s.type == s
+        assert arr_s[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                     23, 34, 0)
+
+        arr_ms = pa.array(data, type=ms)
+        assert len(arr_ms) == 1
+        assert arr_ms.type == ms
+        assert arr_ms[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123000)
+
+        arr_us = pa.array(data, type=us)
+        assert len(arr_us) == 1
+        assert arr_us.type == us
+        assert arr_us[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123456)
+
+        arr_ns = pa.array(data, type=ns)
+        assert len(arr_ns) == 1
+        assert arr_ns.type == ns
+        assert arr_ns[0].as_py() == datetime.datetime(2007, 7, 13, 1,
+                                                      23, 34, 123456)
+
+    def test_timestamp_from_int_with_unit(self):
+        data = [1]
+
+        s = pa.timestamp('s')
+        ms = pa.timestamp('ms')
+        us = pa.timestamp('us')
+        ns = pa.timestamp('ns')
+
+        arr_s = pa.array(data, type=s)
+        assert len(arr_s) == 1
+        assert arr_s.type == s
+        assert str(arr_s[0]) == "Timestamp('1970-01-01 00:00:01')"
+
+        arr_ms = pa.array(data, type=ms)
+        assert len(arr_ms) == 1
+        assert arr_ms.type == ms
+        assert str(arr_ms[0]) == "Timestamp('1970-01-01 00:00:00.001000')"
+
+        arr_us = pa.array(data, type=us)
+        assert len(arr_us) == 1
+        assert arr_us.type == us
+        assert str(arr_us[0]) == "Timestamp('1970-01-01 00:00:00.000001')"
+
+        arr_ns = pa.array(data, type=ns)
+        assert len(arr_ns) == 1
+        assert arr_ns.type == ns
+        assert str(arr_ns[0]) == "Timestamp('1970-01-01 00:00:00.000000001')"
+
+        with pytest.raises(pa.ArrowException):
+            class CustomClass():
+                pass
+            pa.array([1, CustomClass()], type=ns)
+            pa.array([1, CustomClass()], type=pa.date32())
+            pa.array([1, CustomClass()], type=pa.date64())
+
     def test_mixed_nesting_levels(self):
         pa.array([1, 2, None])
         pa.array([[1], [2], None])


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


> [Python] Incorrect result from pyarrow.array when passing timestamp type
> ------------------------------------------------------------------------
>
>                 Key: ARROW-1730
>                 URL: https://issues.apache.org/jira/browse/ARROW-1730
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Python
>            Reporter: Wes McKinney
>            Assignee: Licht Takeuchi
>              Labels: pull-request-available
>             Fix For: 0.8.0
>
>
> Even with the ARROW-1484 patch, we have:
> {code: language=python}
> In [10]: pa.array([0], type=pa.timestamp('ns'))
> Out[10]: 
> <pyarrow.lib.TimestampArray object at 0x7f9145b27098>
> [
>   Timestamp('1968-01-12 11:18:14.409378304')
> ]
> In [11]: pa.array([0], type='int64').cast(pa.timestamp('ns'))
> Out[11]: 
> <pyarrow.lib.TimestampArray object at 0x7f9145b27d18>
> [
>   Timestamp('1970-01-01 00:00:00')
> ]
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to