rymurr commented on a change in pull request #2908:
URL: https://github.com/apache/iceberg/pull/2908#discussion_r689464459



##########
File path: python/iceberg/api/transforms/transform_util.py
##########
@@ -15,70 +15,70 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import base64
 from datetime import datetime, timedelta
 
 import pytz
 
 
-class TransformUtil(object):
-    EPOCH = datetime.utcfromtimestamp(0)
-    EPOCH_YEAR = datetime.utcfromtimestamp(0).year
-
-    @staticmethod
-    def human_year(year_ordinal):
-        return "{0:0=4d}".format(TransformUtil.EPOCH_YEAR + year_ordinal)
-
-    @staticmethod
-    def human_month(month_ordinal):
-        return "{0:0=4d}-{1:0=2d}".format(TransformUtil.EPOCH_YEAR + 
int(month_ordinal / 12), 1 + int(month_ordinal % 12))
-
-    @staticmethod
-    def human_day(day_ordinal):
-        day = TransformUtil.EPOCH + timedelta(days=day_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}".format(day.year, day.month, 
day.day)
-
-    @staticmethod
-    def human_time(micros_from_midnight):
-        day = TransformUtil.EPOCH + 
timedelta(microseconds=micros_from_midnight)
-        return "{}".format(day.time())
-
-    @staticmethod
-    def human_timestamp_with_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return 
pytz.timezone("UTC").localize(day).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-
-    @staticmethod
-    def human_timestamp_without_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return day.isoformat()
-
-    @staticmethod
-    def human_hour(hour_ordinal):
-        time = TransformUtil.EPOCH + timedelta(hours=hour_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}-{3:0=2d}".format(time.year, 
time.month, time.day, time.hour)
-
-    @staticmethod
-    def base_64_encode(buffer):
-        raise NotImplementedError()
-
-    @staticmethod
-    def diff_hour(date1, date2):
-        return int((date1 - date2).total_seconds() / 3600)
-
-    @staticmethod
-    def diff_day(date1, date2):
-        return (date1 - date2).days
-
-    @staticmethod
-    def diff_month(date1, date2):
-        return (date1.year - date2.year) * 12 + (date1.month - date2.month) - 
(1 if date1.day < date2.day else 0)
-
-    @staticmethod
-    def diff_year(date1, date2):
-        return (date1.year - date2.year) - \
-               (1 if date1.month < date2.month or (date1.month == date2.month 
and date1.day < date2.day) else 0)
-
-    @staticmethod
-    def unscale_decimal(decimal_value):
-        value_tuple = decimal_value.as_tuple()
-        return int(("-" if value_tuple.sign else "") + "".join([str(d) for d 
in value_tuple.digits]))
+EPOCH = datetime.utcfromtimestamp(0)
+EPOCH_YEAR = datetime.utcfromtimestamp(0).year
+
+
+def human_year(year_ordinal):
+    return "{0:0=4d}".format(EPOCH_YEAR + year_ordinal)
+
+
+def human_month(month_ordinal):
+    return "{0:0=4d}-{1:0=2d}".format(EPOCH_YEAR + int(month_ordinal / 12), 1 
+ int(month_ordinal % 12))
+
+
+def human_day(day_ordinal):
+    day = EPOCH + timedelta(days=day_ordinal)
+    return "{0:0=4d}-{1:0=2d}-{2:0=2d}".format(day.year, day.month, day.day)
+
+
+def human_time(micros_from_midnight):
+    day = EPOCH + timedelta(microseconds=micros_from_midnight)
+    return "{}".format(day.time())
+
+
+def human_timestamp_with_timezone(timestamp_micros):
+    day = EPOCH + timedelta(microseconds=timestamp_micros)
+    return pytz.timezone("UTC").localize(day).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def human_timestamp_without_timezone(timestamp_micros):
+    day = EPOCH + timedelta(microseconds=timestamp_micros)
+    return day.isoformat()
+
+
+def human_hour(hour_ordinal):
+    time = EPOCH + timedelta(hours=hour_ordinal)
+    return "{0:0=4d}-{1:0=2d}-{2:0=2d}-{3:0=2d}".format(time.year, time.month, 
time.day, time.hour)
+
+
+def base_64_encode(buffer):
+    return base64.b64encode(buffer).decode('utf-8')
+
+
+def diff_hour(date1, date2):

Review comment:
       same with these, you could probably collapse this and changes to 
`timestamps.py` into simpler methods if we stuck to existing time libraries (I 
note they are already being created as timestamps anyways)

##########
File path: python/iceberg/api/transforms/transform_util.py
##########
@@ -15,70 +15,70 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import base64
 from datetime import datetime, timedelta
 
 import pytz
 
 
-class TransformUtil(object):
-    EPOCH = datetime.utcfromtimestamp(0)
-    EPOCH_YEAR = datetime.utcfromtimestamp(0).year
-
-    @staticmethod
-    def human_year(year_ordinal):
-        return "{0:0=4d}".format(TransformUtil.EPOCH_YEAR + year_ordinal)
-
-    @staticmethod
-    def human_month(month_ordinal):
-        return "{0:0=4d}-{1:0=2d}".format(TransformUtil.EPOCH_YEAR + 
int(month_ordinal / 12), 1 + int(month_ordinal % 12))
-
-    @staticmethod
-    def human_day(day_ordinal):
-        day = TransformUtil.EPOCH + timedelta(days=day_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}".format(day.year, day.month, 
day.day)
-
-    @staticmethod
-    def human_time(micros_from_midnight):
-        day = TransformUtil.EPOCH + 
timedelta(microseconds=micros_from_midnight)
-        return "{}".format(day.time())
-
-    @staticmethod
-    def human_timestamp_with_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return 
pytz.timezone("UTC").localize(day).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-
-    @staticmethod
-    def human_timestamp_without_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return day.isoformat()
-
-    @staticmethod
-    def human_hour(hour_ordinal):
-        time = TransformUtil.EPOCH + timedelta(hours=hour_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}-{3:0=2d}".format(time.year, 
time.month, time.day, time.hour)
-
-    @staticmethod
-    def base_64_encode(buffer):
-        raise NotImplementedError()
-
-    @staticmethod
-    def diff_hour(date1, date2):
-        return int((date1 - date2).total_seconds() / 3600)
-
-    @staticmethod
-    def diff_day(date1, date2):
-        return (date1 - date2).days
-
-    @staticmethod
-    def diff_month(date1, date2):
-        return (date1.year - date2.year) * 12 + (date1.month - date2.month) - 
(1 if date1.day < date2.day else 0)
-
-    @staticmethod
-    def diff_year(date1, date2):
-        return (date1.year - date2.year) - \
-               (1 if date1.month < date2.month or (date1.month == date2.month 
and date1.day < date2.day) else 0)
-
-    @staticmethod
-    def unscale_decimal(decimal_value):
-        value_tuple = decimal_value.as_tuple()
-        return int(("-" if value_tuple.sign else "") + "".join([str(d) for d 
in value_tuple.digits]))
+EPOCH = datetime.utcfromtimestamp(0)
+EPOCH_YEAR = datetime.utcfromtimestamp(0).year
+
+
+def human_year(year_ordinal):
+    return "{0:0=4d}".format(EPOCH_YEAR + year_ordinal)
+
+
+def human_month(month_ordinal):
+    return "{0:0=4d}-{1:0=2d}".format(EPOCH_YEAR + int(month_ordinal / 12), 1 
+ int(month_ordinal % 12))
+
+
+def human_day(day_ordinal):
+    day = EPOCH + timedelta(days=day_ordinal)
+    return "{0:0=4d}-{1:0=2d}-{2:0=2d}".format(day.year, day.month, day.day)
+
+
+def human_time(micros_from_midnight):
+    day = EPOCH + timedelta(microseconds=micros_from_midnight)
+    return "{}".format(day.time())
+
+
+def human_timestamp_with_timezone(timestamp_micros):
+    day = EPOCH + timedelta(microseconds=timestamp_micros)
+    return pytz.timezone("UTC").localize(day).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
+
+
+def human_timestamp_without_timezone(timestamp_micros):
+    day = EPOCH + timedelta(microseconds=timestamp_micros)
+    return day.isoformat()
+
+
+def human_hour(hour_ordinal):
+    time = EPOCH + timedelta(hours=hour_ordinal)
+    return "{0:0=4d}-{1:0=2d}-{2:0=2d}-{3:0=2d}".format(time.year, time.month, 
time.day, time.hour)
+
+
+def base_64_encode(buffer):
+    return base64.b64encode(buffer).decode('utf-8')
+
+
+def diff_hour(date1, date2):
+    return int((date1 - date2).total_seconds() / 3600)
+
+
+def diff_day(date1, date2):
+    return (date1 - date2).days
+
+
+def diff_month(date1, date2):
+    return (date1.year - date2.year) * 12 + (date1.month - date2.month) - (1 
if date1.day < date2.day else 0)
+
+
+def diff_year(date1, date2):
+    return (date1.year - date2.year) - \
+           (1 if date1.month < date2.month or (date1.month == date2.month and 
date1.day < date2.day) else 0)
+
+
+def unscale_decimal(decimal_value):

Review comment:
       similar to time manipulation it feels like we could do this w/ standard 
library methods rather than doing our own decimal arithmetic

##########
File path: python/iceberg/api/transforms/transform_util.py
##########
@@ -15,70 +15,70 @@
 # specific language governing permissions and limitations
 # under the License.
 
+import base64
 from datetime import datetime, timedelta
 
 import pytz
 
 
-class TransformUtil(object):
-    EPOCH = datetime.utcfromtimestamp(0)
-    EPOCH_YEAR = datetime.utcfromtimestamp(0).year
-
-    @staticmethod
-    def human_year(year_ordinal):
-        return "{0:0=4d}".format(TransformUtil.EPOCH_YEAR + year_ordinal)
-
-    @staticmethod
-    def human_month(month_ordinal):
-        return "{0:0=4d}-{1:0=2d}".format(TransformUtil.EPOCH_YEAR + 
int(month_ordinal / 12), 1 + int(month_ordinal % 12))
-
-    @staticmethod
-    def human_day(day_ordinal):
-        day = TransformUtil.EPOCH + timedelta(days=day_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}".format(day.year, day.month, 
day.day)
-
-    @staticmethod
-    def human_time(micros_from_midnight):
-        day = TransformUtil.EPOCH + 
timedelta(microseconds=micros_from_midnight)
-        return "{}".format(day.time())
-
-    @staticmethod
-    def human_timestamp_with_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return 
pytz.timezone("UTC").localize(day).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
-
-    @staticmethod
-    def human_timestamp_without_timezone(timestamp_micros):
-        day = TransformUtil.EPOCH + timedelta(microseconds=timestamp_micros)
-        return day.isoformat()
-
-    @staticmethod
-    def human_hour(hour_ordinal):
-        time = TransformUtil.EPOCH + timedelta(hours=hour_ordinal)
-        return "{0:0=4d}-{1:0=2d}-{2:0=2d}-{3:0=2d}".format(time.year, 
time.month, time.day, time.hour)
-
-    @staticmethod
-    def base_64_encode(buffer):
-        raise NotImplementedError()
-
-    @staticmethod
-    def diff_hour(date1, date2):
-        return int((date1 - date2).total_seconds() / 3600)
-
-    @staticmethod
-    def diff_day(date1, date2):
-        return (date1 - date2).days
-
-    @staticmethod
-    def diff_month(date1, date2):
-        return (date1.year - date2.year) * 12 + (date1.month - date2.month) - 
(1 if date1.day < date2.day else 0)
-
-    @staticmethod
-    def diff_year(date1, date2):
-        return (date1.year - date2.year) - \
-               (1 if date1.month < date2.month or (date1.month == date2.month 
and date1.day < date2.day) else 0)
-
-    @staticmethod
-    def unscale_decimal(decimal_value):
-        value_tuple = decimal_value.as_tuple()
-        return int(("-" if value_tuple.sign else "") + "".join([str(d) for d 
in value_tuple.digits]))
+EPOCH = datetime.utcfromtimestamp(0)
+EPOCH_YEAR = datetime.utcfromtimestamp(0).year
+
+
+def human_year(year_ordinal):

Review comment:
       Why not use `strftime` everywhere? eg 
   
   ```
   from dateutil.relativedelta import relativedelta
   (EPOCH + relativedelta(years=year_ordinal)).strftime('%Y')
   ```
   
   I know it is a bit slower than your impl but I find date math so easy to 
screw up. I woudl rather not have to worry about it.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to