This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git

commit 1ae9235458206fa6092d3dc5787b0d3ab97c05dd
Author: Vivekanand Vellanki <[email protected]>
AuthorDate: Mon Oct 1 17:34:56 2018 +0530

    [Gandiva] Added support for months_between
    
    Change-Id: I3b5553d490e95ec367cc2281c7b0abcf433c51fa
---
 cpp/src/gandiva/function_registry.cc     |  3 ++
 cpp/src/gandiva/precompiled/time.cc      | 56 ++++++++++++++++++++++++++++++
 cpp/src/gandiva/precompiled/time_test.cc | 30 ++++++++++++++++
 cpp/src/gandiva/precompiled/types.h      |  1 +
 cpp/src/gandiva/tests/date_time_test.cc  | 59 ++++++++++++++++++++++++++++++++
 5 files changed, 149 insertions(+)

diff --git a/cpp/src/gandiva/function_registry.cc 
b/cpp/src/gandiva/function_registry.cc
index 033d369..c0ead6e 100644
--- a/cpp/src/gandiva/function_registry.cc
+++ b/cpp/src/gandiva/function_registry.cc
@@ -218,6 +218,9 @@ NativeFunction FunctionRegistry::pc_registry_[] = {
     DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond),
     DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch),
 
+    BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64),
+    BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, 
float64),
+
     // date_trunc operations on date/timestamp
     DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Millennium),
     DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Century),
diff --git a/cpp/src/gandiva/precompiled/time.cc 
b/cpp/src/gandiva/precompiled/time.cc
index 1206e1f..3903da1 100644
--- a/cpp/src/gandiva/precompiled/time.cc
+++ b/cpp/src/gandiva/precompiled/time.cc
@@ -27,6 +27,9 @@ extern "C" {
 
 #define MINS_IN_HOUR 60
 #define SECONDS_IN_MINUTE 60
+#define SECONDS_IN_HOUR (SECONDS_IN_MINUTE) * (MINS_IN_HOUR)
+
+#define HOURS_IN_DAY 24
 
 // Expand inner macro for all date types.
 #define DATE_TYPES(INNER) \
@@ -447,4 +450,57 @@ DATE_TRUNC_FUNCTIONS(timestamp)
 FORCE_INLINE
 date64 castDATE_int64(int64 in) { return in; }
 
+static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+
+bool IsLastDayOfMonth(const EpochTimePoint &tp) {
+  if (tp.TmMon() != 1) {
+    // not February. Dont worry about leap year
+    return (tp.TmMday() == days_in_month[tp.TmMon()]);
+  }
+
+  // this is February, check if the day is 28 or 29
+  if (tp.TmMday() < 28) {
+    return false;
+  }
+
+  if (tp.TmMday() == 29) {
+    // Feb 29th
+    return true;
+  }
+
+  // check if year is non-leap year
+  return !IsLeapYear(tp.TmYear());
+}
+
+// MONTHS_BETWEEN returns number of months between dates date1 and date2.
+// If date1 is later than date2, then the result is positive.
+// If date1 is earlier than date2, then the result is negative.
+// If date1 and date2 are either the same days of the month or both last days 
of months,
+// then the result is always an integer. Otherwise Oracle Database calculates 
the
+// fractional portion of the result based on a 31-day month and considers the 
difference
+// in time components date1 and date2
+#define MONTHS_BETWEEN(TYPE)                                                   
     \
+  FORCE_INLINE                                                                 
     \
+  double months_between##_##TYPE##_##TYPE(uint64_t endEpoch, uint64_t 
startEpoch) { \
+    EpochTimePoint endTime(endEpoch);                                          
     \
+    EpochTimePoint startTime(startEpoch);                                      
     \
+    int endYear = endTime.TmYear();                                            
     \
+    int endMonth = endTime.TmMon();                                            
     \
+    int startYear = startTime.TmYear();                                        
     \
+    int startMonth = startTime.TmMon();                                        
     \
+    int monthsDiff = (endYear - startYear) * 12 + (endMonth - startMonth);     
     \
+    if ((endTime.TmMday() == startTime.TmMday()) ||                            
     \
+        (IsLastDayOfMonth(endTime) && IsLastDayOfMonth(startTime))) {          
     \
+      return (double)monthsDiff;                                               
     \
+    }                                                                          
     \
+    double diffDays = (double)(endTime.TmMday() - startTime.TmMday()) / 
(double)31; \
+    double diffHours =                                                         
     \
+        (double)(endTime.TmHour() - startTime.TmHour()) +                      
     \
+        (double)(endTime.TmMin() - startTime.TmMin()) / (double)MINS_IN_HOUR + 
     \
+        (double)(endTime.TmSec() - startTime.TmSec()) / 
(double)SECONDS_IN_HOUR;    \
+    return (double)monthsDiff + diffDays + diffHours / (double)(HOURS_IN_DAY * 
31); \
+  }
+
+DATE_TYPES(MONTHS_BETWEEN)
+
 }  // extern "C"
diff --git a/cpp/src/gandiva/precompiled/time_test.cc 
b/cpp/src/gandiva/precompiled/time_test.cc
index 99ee0a7..56317c2 100644
--- a/cpp/src/gandiva/precompiled/time_test.cc
+++ b/cpp/src/gandiva/precompiled/time_test.cc
@@ -497,4 +497,34 @@ TEST(TestTime, TestExtractWeek) {
   }
 }
 
+TEST(TestTime, TestMonthsBetween) {
+  std::vector<std::string> testStrings = {
+      "1995-03-02 00:00:00", "1995-02-02 00:00:00", "1.0",
+      "1995-02-02 00:00:00", "1995-03-02 00:00:00", "-1.0",
+      "1995-03-31 00:00:00", "1995-02-28 00:00:00", "1.0",
+      "1996-03-31 00:00:00", "1996-02-28 00:00:00", "1.09677418",
+      "1996-03-31 00:00:00", "1996-02-29 00:00:00", "1.0",
+      "1996-05-31 00:00:00", "1996-04-30 00:00:00", "1.0",
+      "1996-05-31 00:00:00", "1996-03-31 00:00:00", "2.0",
+      "1996-05-31 00:00:00", "1996-03-30 00:00:00", "2.03225806",
+      "1996-03-15 00:00:00", "1996-02-14 00:00:00", "1.03225806",
+      "1995-02-02 00:00:00", "1995-01-01 00:00:00", "1.03225806",
+      "1995-02-02 10:00:00", "1995-01-01 11:00:00", "1.03091397"};
+
+  for (uint32_t i = 0; i < testStrings.size();) {
+    timestamp endTs = StringToTimestamp(testStrings[i++].c_str());
+    timestamp startTs = StringToTimestamp(testStrings[i++].c_str());
+
+    double expectedResult = atof(testStrings[i++].c_str());
+    double actualResult = months_between_timestamp_timestamp(endTs, startTs);
+
+    double diff = actualResult - expectedResult;
+    if (diff < 0) {
+      diff = expectedResult - actualResult;
+    }
+
+    EXPECT_TRUE(diff < 0.001);
+  }
+}
+
 }  // namespace gandiva
diff --git a/cpp/src/gandiva/precompiled/types.h 
b/cpp/src/gandiva/precompiled/types.h
index 168f93a..7315214 100644
--- a/cpp/src/gandiva/precompiled/types.h
+++ b/cpp/src/gandiva/precompiled/types.h
@@ -117,6 +117,7 @@ int64 date_trunc_Century_date64(date64);
 int64 date_trunc_Millennium_date64(date64);
 
 int64 date_trunc_Week_timestamp(timestamp);
+double months_between_timestamp_timestamp(uint64, uint64);
 
 int32 mem_compare(const char* left, int32 left_len, const char* right, int32 
right_len);
 
diff --git a/cpp/src/gandiva/tests/date_time_test.cc 
b/cpp/src/gandiva/tests/date_time_test.cc
index 859b92c..edf2a1a 100644
--- a/cpp/src/gandiva/tests/date_time_test.cc
+++ b/cpp/src/gandiva/tests/date_time_test.cc
@@ -325,4 +325,63 @@ TEST_F(TestProjector, TestTimestampDiff) {
   }
 }
 
+TEST_F(TestProjector, TestMonthsBetween) {
+  auto f0 = field("f0", arrow::date64());
+  auto f1 = field("f1", arrow::date64());
+  auto schema = arrow::schema({f0, f1});
+
+  // output fields
+  auto output = field("out", arrow::float64());
+
+  auto months_between_expr =
+      TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output);
+
+  std::shared_ptr<Projector> projector;
+  Status status = Projector::Make(schema, {months_between_expr}, &projector);
+  std::cout << status.message();
+  ASSERT_TRUE(status.ok());
+
+  struct tm y1970 = {0};
+  y1970.tm_year = 70;
+  y1970.tm_mon = 0;
+  y1970.tm_mday = 1;
+  y1970.tm_hour = 0;
+  y1970.tm_min = 0;
+  y1970.tm_sec = 0;
+  time_t epoch = mktime(&y1970);
+
+  // Create a row-batch with some sample data
+  int num_records = 4;
+  auto validity = {true, true, true, true};
+  std::vector<int64_t> f0_data = {MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1995, 3, 31, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1996, 3, 31, 0, 0, 0, 0)};
+
+  auto array0 =
+      MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, 
validity);
+
+  std::vector<int64_t> f1_data = {MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1995, 2, 28, 0, 0, 0, 0),
+                                  MillisSince(epoch, 1996, 2, 29, 0, 0, 0, 0)};
+
+  auto array1 =
+      MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f1_data, 
validity);
+
+  // expected output
+  auto exp_output = MakeArrowArrayFloat64({1.0, -1.0, 1.0, 1.0}, validity);
+
+  // prepare input record batch
+  auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, 
array1});
+
+  // Evaluate expression
+  arrow::ArrayVector outputs;
+  status = projector->Evaluate(*in_batch, pool_, &outputs);
+  EXPECT_TRUE(status.ok());
+
+  // Validate results
+  EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0));
+}
+
 }  // namespace gandiva

Reply via email to