This is an automated email from the ASF dual-hosted git repository. wesm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow.git
commit 1ae9235458206fa6092d3dc5787b0d3ab97c05dd Author: Vivekanand Vellanki <[email protected]> AuthorDate: Mon Oct 1 17:34:56 2018 +0530 [Gandiva] Added support for months_between Change-Id: I3b5553d490e95ec367cc2281c7b0abcf433c51fa --- cpp/src/gandiva/function_registry.cc | 3 ++ cpp/src/gandiva/precompiled/time.cc | 56 ++++++++++++++++++++++++++++++ cpp/src/gandiva/precompiled/time_test.cc | 30 ++++++++++++++++ cpp/src/gandiva/precompiled/types.h | 1 + cpp/src/gandiva/tests/date_time_test.cc | 59 ++++++++++++++++++++++++++++++++ 5 files changed, 149 insertions(+) diff --git a/cpp/src/gandiva/function_registry.cc b/cpp/src/gandiva/function_registry.cc index 033d369..c0ead6e 100644 --- a/cpp/src/gandiva/function_registry.cc +++ b/cpp/src/gandiva/function_registry.cc @@ -218,6 +218,9 @@ NativeFunction FunctionRegistry::pc_registry_[] = { DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractSecond), DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, extractEpoch), + BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, date64, date64, float64), + BINARY_GENERIC_SAFE_NULL_IF_NULL(months_between, timestamp, timestamp, float64), + // date_trunc operations on date/timestamp DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Millennium), DATE_TYPES(EXTRACT_SAFE_NULL_IF_NULL, date_trunc_Century), diff --git a/cpp/src/gandiva/precompiled/time.cc b/cpp/src/gandiva/precompiled/time.cc index 1206e1f..3903da1 100644 --- a/cpp/src/gandiva/precompiled/time.cc +++ b/cpp/src/gandiva/precompiled/time.cc @@ -27,6 +27,9 @@ extern "C" { #define MINS_IN_HOUR 60 #define SECONDS_IN_MINUTE 60 +#define SECONDS_IN_HOUR (SECONDS_IN_MINUTE) * (MINS_IN_HOUR) + +#define HOURS_IN_DAY 24 // Expand inner macro for all date types. #define DATE_TYPES(INNER) \ @@ -447,4 +450,57 @@ DATE_TRUNC_FUNCTIONS(timestamp) FORCE_INLINE date64 castDATE_int64(int64 in) { return in; } +static int days_in_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31}; + +bool IsLastDayOfMonth(const EpochTimePoint &tp) { + if (tp.TmMon() != 1) { + // not February. Dont worry about leap year + return (tp.TmMday() == days_in_month[tp.TmMon()]); + } + + // this is February, check if the day is 28 or 29 + if (tp.TmMday() < 28) { + return false; + } + + if (tp.TmMday() == 29) { + // Feb 29th + return true; + } + + // check if year is non-leap year + return !IsLeapYear(tp.TmYear()); +} + +// MONTHS_BETWEEN returns number of months between dates date1 and date2. +// If date1 is later than date2, then the result is positive. +// If date1 is earlier than date2, then the result is negative. +// If date1 and date2 are either the same days of the month or both last days of months, +// then the result is always an integer. Otherwise Oracle Database calculates the +// fractional portion of the result based on a 31-day month and considers the difference +// in time components date1 and date2 +#define MONTHS_BETWEEN(TYPE) \ + FORCE_INLINE \ + double months_between##_##TYPE##_##TYPE(uint64_t endEpoch, uint64_t startEpoch) { \ + EpochTimePoint endTime(endEpoch); \ + EpochTimePoint startTime(startEpoch); \ + int endYear = endTime.TmYear(); \ + int endMonth = endTime.TmMon(); \ + int startYear = startTime.TmYear(); \ + int startMonth = startTime.TmMon(); \ + int monthsDiff = (endYear - startYear) * 12 + (endMonth - startMonth); \ + if ((endTime.TmMday() == startTime.TmMday()) || \ + (IsLastDayOfMonth(endTime) && IsLastDayOfMonth(startTime))) { \ + return (double)monthsDiff; \ + } \ + double diffDays = (double)(endTime.TmMday() - startTime.TmMday()) / (double)31; \ + double diffHours = \ + (double)(endTime.TmHour() - startTime.TmHour()) + \ + (double)(endTime.TmMin() - startTime.TmMin()) / (double)MINS_IN_HOUR + \ + (double)(endTime.TmSec() - startTime.TmSec()) / (double)SECONDS_IN_HOUR; \ + return (double)monthsDiff + diffDays + diffHours / (double)(HOURS_IN_DAY * 31); \ + } + +DATE_TYPES(MONTHS_BETWEEN) + } // extern "C" diff --git a/cpp/src/gandiva/precompiled/time_test.cc b/cpp/src/gandiva/precompiled/time_test.cc index 99ee0a7..56317c2 100644 --- a/cpp/src/gandiva/precompiled/time_test.cc +++ b/cpp/src/gandiva/precompiled/time_test.cc @@ -497,4 +497,34 @@ TEST(TestTime, TestExtractWeek) { } } +TEST(TestTime, TestMonthsBetween) { + std::vector<std::string> testStrings = { + "1995-03-02 00:00:00", "1995-02-02 00:00:00", "1.0", + "1995-02-02 00:00:00", "1995-03-02 00:00:00", "-1.0", + "1995-03-31 00:00:00", "1995-02-28 00:00:00", "1.0", + "1996-03-31 00:00:00", "1996-02-28 00:00:00", "1.09677418", + "1996-03-31 00:00:00", "1996-02-29 00:00:00", "1.0", + "1996-05-31 00:00:00", "1996-04-30 00:00:00", "1.0", + "1996-05-31 00:00:00", "1996-03-31 00:00:00", "2.0", + "1996-05-31 00:00:00", "1996-03-30 00:00:00", "2.03225806", + "1996-03-15 00:00:00", "1996-02-14 00:00:00", "1.03225806", + "1995-02-02 00:00:00", "1995-01-01 00:00:00", "1.03225806", + "1995-02-02 10:00:00", "1995-01-01 11:00:00", "1.03091397"}; + + for (uint32_t i = 0; i < testStrings.size();) { + timestamp endTs = StringToTimestamp(testStrings[i++].c_str()); + timestamp startTs = StringToTimestamp(testStrings[i++].c_str()); + + double expectedResult = atof(testStrings[i++].c_str()); + double actualResult = months_between_timestamp_timestamp(endTs, startTs); + + double diff = actualResult - expectedResult; + if (diff < 0) { + diff = expectedResult - actualResult; + } + + EXPECT_TRUE(diff < 0.001); + } +} + } // namespace gandiva diff --git a/cpp/src/gandiva/precompiled/types.h b/cpp/src/gandiva/precompiled/types.h index 168f93a..7315214 100644 --- a/cpp/src/gandiva/precompiled/types.h +++ b/cpp/src/gandiva/precompiled/types.h @@ -117,6 +117,7 @@ int64 date_trunc_Century_date64(date64); int64 date_trunc_Millennium_date64(date64); int64 date_trunc_Week_timestamp(timestamp); +double months_between_timestamp_timestamp(uint64, uint64); int32 mem_compare(const char* left, int32 left_len, const char* right, int32 right_len); diff --git a/cpp/src/gandiva/tests/date_time_test.cc b/cpp/src/gandiva/tests/date_time_test.cc index 859b92c..edf2a1a 100644 --- a/cpp/src/gandiva/tests/date_time_test.cc +++ b/cpp/src/gandiva/tests/date_time_test.cc @@ -325,4 +325,63 @@ TEST_F(TestProjector, TestTimestampDiff) { } } +TEST_F(TestProjector, TestMonthsBetween) { + auto f0 = field("f0", arrow::date64()); + auto f1 = field("f1", arrow::date64()); + auto schema = arrow::schema({f0, f1}); + + // output fields + auto output = field("out", arrow::float64()); + + auto months_between_expr = + TreeExprBuilder::MakeExpression("months_between", {f0, f1}, output); + + std::shared_ptr<Projector> projector; + Status status = Projector::Make(schema, {months_between_expr}, &projector); + std::cout << status.message(); + ASSERT_TRUE(status.ok()); + + struct tm y1970 = {0}; + y1970.tm_year = 70; + y1970.tm_mon = 0; + y1970.tm_mday = 1; + y1970.tm_hour = 0; + y1970.tm_min = 0; + y1970.tm_sec = 0; + time_t epoch = mktime(&y1970); + + // Create a row-batch with some sample data + int num_records = 4; + auto validity = {true, true, true, true}; + std::vector<int64_t> f0_data = {MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0), + MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0), + MillisSince(epoch, 1995, 3, 31, 0, 0, 0, 0), + MillisSince(epoch, 1996, 3, 31, 0, 0, 0, 0)}; + + auto array0 = + MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f0_data, validity); + + std::vector<int64_t> f1_data = {MillisSince(epoch, 1995, 2, 2, 0, 0, 0, 0), + MillisSince(epoch, 1995, 3, 2, 0, 0, 0, 0), + MillisSince(epoch, 1995, 2, 28, 0, 0, 0, 0), + MillisSince(epoch, 1996, 2, 29, 0, 0, 0, 0)}; + + auto array1 = + MakeArrowTypeArray<arrow::Date64Type, int64_t>(date64(), f1_data, validity); + + // expected output + auto exp_output = MakeArrowArrayFloat64({1.0, -1.0, 1.0, 1.0}, validity); + + // prepare input record batch + auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0, array1}); + + // Evaluate expression + arrow::ArrayVector outputs; + status = projector->Evaluate(*in_batch, pool_, &outputs); + EXPECT_TRUE(status.ok()); + + // Validate results + EXPECT_ARROW_ARRAY_EQUALS(exp_output, outputs.at(0)); +} + } // namespace gandiva
