jorisvandenbossche commented on a change in pull request #10598:
URL: https://github.com/apache/arrow/pull/10598#discussion_r662078092



##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -216,16 +216,17 @@ struct ARROW_EXPORT ProjectOptions : public 
FunctionOptions {
   std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
 };
 
-struct ARROW_EXPORT TemporalComponentExtractionOptions : public 
FunctionOptions {
-  explicit TemporalComponentExtractionOptions(int64_t week_start = 0)
-      : week_start(std::move(week_start)) {}
+struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
+  explicit DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
+      : one_based_numbering(one_based_numbering), week_start(week_start) {}
+  explicit DayOfWeekOptions() : one_based_numbering(false), week_start(1) {}
 
-  static TemporalComponentExtractionOptions Defaults() {
-    return TemporalComponentExtractionOptions{};
-  }
+  static DayOfWeekOptions Defaults() { return DayOfWeekOptions{}; }
 
-  /// Index of the first day of the week.
-  int64_t week_start;
+  /// Number days from 1 if true and form 0 if false

Review comment:
       ```suggestion
     /// Number days from 1 if true and from 0 if false
   ```

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -136,19 +136,25 @@ struct Day {
 
 // ----------------------------------------------------------------------
 // Extract day of week from timestamp
+//
+// By default week starts on Monday represented by 0 and ends on Sunday 
represented
+// by 6. Start day of the week (Monday=1, Sunday=7) and numbering start (0 or 
1) can be
+// set using DayOfWeekOptions
 
 template <typename Duration>
 struct DayOfWeek {
-  explicit DayOfWeek(TemporalComponentExtractionOptions options) : 
options(options) {}
+  explicit DayOfWeek(DayOfWeekOptions options) : options(options) {}
 
   template <typename T, typename Arg0>
   T Call(KernelContext*, Arg0 arg, Status*) const {
-    return static_cast<T>(
-        weekday(year_month_day(floor<days>(sys_time<Duration>(Duration{arg}))))
-            .iso_encoding() -
-        1 + options.week_start);
+    auto wd = arrow_vendored::date::year_month_weekday(
+                  floor<days>(sys_time<Duration>(Duration{arg})))
+                  .weekday()
+                  .iso_encoding();
+    return (wd + 7 - options.week_start) % 7 + options.one_based_numbering;

Review comment:
       Would it be worth to have a separate if branch for the case that 
`options.week_start` is 1 (the default), in which case this is this is much 
simpler (eg you don't need the `%`)? 
   (I don't know how expensive those calculations are compared to the actual 
`year_month_weekday(..).weekday()` call, though, so maybe it doesn't matter)

##########
File path: cpp/src/arrow/compute/api_scalar.h
##########
@@ -216,6 +216,19 @@ struct ARROW_EXPORT ProjectOptions : public 
FunctionOptions {
   std::vector<std::shared_ptr<const KeyValueMetadata>> field_metadata;
 };
 
+struct ARROW_EXPORT DayOfWeekOptions : public FunctionOptions {
+  explicit DayOfWeekOptions(bool one_based_numbering, uint32_t week_start)
+      : one_based_numbering(one_based_numbering), week_start(week_start) {}

Review comment:
       Can you validate the `week_start` here? (to check that it is in the 
correct range, and eg a user is not passing 0. Since the numbering here is 
different as the default output of DayOfWeek, that seems like an easy mistake)

##########
File path: docs/source/cpp/compute.rst
##########
@@ -954,44 +954,46 @@ Temporal component extraction
 These functions extract datetime components (year, month, day, etc) from 
timestamp type.
 Note: this is currently not supported for timestamps with timezone information.
 
-+--------------------+------------+-------------------+---------------+--------+
-| Function name      | Arity      | Input types       | Output type   | Notes  
|
-+====================+============+===================+===============+========+
-| year               | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| month              | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| day                | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| day_of_week        | Unary      | Temporal          | Int64         | \(1)   
|
-+--------------------+------------+-------------------+---------------+--------+
-| day_of_year        | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| iso_year           | Unary      | Temporal          | Int64         | \(2)   
|
-+--------------------+------------+-------------------+---------------+--------+
-| iso_week           | Unary      | Temporal          | Int64         | \(2)   
|
-+--------------------+------------+-------------------+---------------+--------+
-| iso_calendar       | Unary      | Temporal          | Struct        | \(3)   
|
-+--------------------+------------+-------------------+---------------+--------+
-| quarter            | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| hour               | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| minute             | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| second             | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| millisecond        | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| microsecond        | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| nanosecond         | Unary      | Temporal          | Int64         |        
|
-+--------------------+------------+-------------------+---------------+--------+
-| subsecond          | Unary      | Temporal          | Double        |        
|
-+--------------------+------------+-------------------+---------------+--------+
-
-* \(1) Outputs the number of the day of the week. Week begins on Monday and is 
denoted
-  by 0 and ends on Sunday denoted by 6.
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| Function name      | Arity      | Input types       | Output type   | Notes  
| Options class              |
++====================+============+===================+===============+========+============================+
+| year               | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| month              | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| day                | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| day_of_week        | Unary      | Temporal          | Int64         | \(1)   
| :struct:`DayOfWeekOptions` |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| day_of_year        | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| iso_year           | Unary      | Temporal          | Int64         | \(2)   
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| iso_week           | Unary      | Temporal          | Int64         | \(2)   
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| iso_calendar       | Unary      | Temporal          | Struct        | \(3)   
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| quarter            | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| hour               | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| minute             | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| second             | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| millisecond        | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| microsecond        | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| nanosecond         | Unary      | Temporal          | Int64         |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+| subsecond          | Unary      | Temporal          | Double        |        
|                            |
++--------------------+------------+-------------------+---------------+--------+----------------------------+
+
+* \(1) Outputs the number of the day of the week. By default week begins on 
Monday
+  represented by 0 and ends on Sunday represented by 6. 
:member:`DayOfWeekOptions::week_start` can be used to set
+  the starting day of the week using ISO convention (Monday=1, Sunday=7). Day 
numbering can start with 0 or 1
+  using :member:`DayOfWeekOptions::week_start` paramter.

Review comment:
       ```suggestion
     using :member:`DayOfWeekOptions::week_start` parameter.
   ```

##########
File path: python/pyarrow/_compute.pyx
##########
@@ -1043,6 +1043,23 @@ class StrptimeOptions(_StrptimeOptions):
         self._set_options(format, unit)
 
 
+cdef class _DayOfWeekOptions(FunctionOptions):
+    cdef:
+        unique_ptr[CDayOfWeekOptions] day_of_week_options
+
+    cdef const CFunctionOptions* get_options(self) except NULL:
+        return self.day_of_week_options.get()
+
+    def _set_options(self, one_based_numbering, week_start):
+        self.day_of_week_options.reset(
+            new CDayOfWeekOptions(one_based_numbering, week_start))
+
+
+class DayOfWeekOptions(_DayOfWeekOptions):
+    def __init__(self, one_based_numbering=True, week_start=1):

Review comment:
       ```suggestion
       def __init__(self, one_based_numbering=False, week_start=1):
   ```
   
   The default is False?

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -451,9 +517,14 @@ const FunctionDoc day_doc{
 
 const FunctionDoc day_of_week_doc{
     "Extract day of the week number",
-    ("Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.\n"
-     "Returns an error if timestamp has a defined timezone. Null values return 
null."),
-    {"values"}};
+    ("By default Week starts on Monday represented by 0 and ends on Sunday 
represented "
+     "by 6.\n"
+     "Returns an error if timestamp has a defined timezone. Null values return 
null.\n"
+     "DayOfWeekOptions.week_start can be used to set another starting day 
using ISO "

Review comment:
       Can you move this up to put it directly after the "By default, the week 
starts with ..", so before the "Returns an error if ...". Since this 
explanation of the options directly adds to the default monday-sunday as 0-6, 
putting the other sentence in between breaks a bit the logical flow I think

##########
File path: cpp/src/arrow/compute/kernels/scalar_temporal.cc
##########
@@ -512,12 +517,14 @@ const FunctionDoc day_doc{
 
 const FunctionDoc day_of_week_doc{
     "Extract day of the week number",
-    ("Week starts on Monday denoted by 0 and ends on Sunday denoted by 6.\n"
+    ("By default Week starts on Monday represented by 0 and ends on Sunday 
represented "

Review comment:
       ```suggestion
       ("By default, the week starts on Monday represented by 0 and ends on 
Sunday represented "
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to