>From Peeyush Gupta <[email protected]>: Peeyush Gupta has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20793?usp=email )
( 2 is the latest approved patch-set. No files were changed between the latest approved patch-set and the submitted one. )Change subject: [ASTERIXDB-3634][EXT] Support TIMESTAMP and TIMESTAMP_NANO iceberg data type ...................................................................... [ASTERIXDB-3634][EXT] Support TIMESTAMP and TIMESTAMP_NANO iceberg data type - user model changes: no - storage format changes: no - interface changes: no Ext-ref: MB-63115 Change-Id: Id24d0ba9a90d4d9039ca966f435c45a733427585 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20793 Integration-Tests: Jenkins <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> Tested-by: Peeyush Gupta <[email protected]> --- M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java M asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java 3 files changed, 64 insertions(+), 11 deletions(-) Approvals: Peeyush Gupta: Verified Hussain Towaileb: Looks good to me, approved Jenkins: Verified diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java index aee5616..c778b69 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java @@ -19,6 +19,8 @@ package org.apache.asterix.external.input.record.reader.aws.iceberg.converter; import java.io.DataOutput; +import java.time.ZoneId; +import java.time.ZoneOffset; import java.util.List; import java.util.Map; import java.util.TimeZone; @@ -45,8 +47,9 @@ private final boolean decimalToDouble; private final boolean timestampAsLong; private final boolean dateAsInt; + private final boolean timeAsInt; - private final int timeZoneOffset; + private final ZoneId timeZoneId; private final AMutableDate mutableDate = new AMutableDate(0); private final AMutableDateTime mutableDateTime = new AMutableDateTime(0); private final List<Warning> warnings; @@ -57,13 +60,15 @@ .getOrDefault(ExternalDataConstants.IcebergOptions.DECIMAL_TO_DOUBLE, ExternalDataConstants.FALSE)); timestampAsLong = Boolean.parseBoolean(configuration .getOrDefault(ExternalDataConstants.IcebergOptions.TIMESTAMP_AS_LONG, ExternalDataConstants.TRUE)); + timeAsInt = Boolean.parseBoolean(configuration.getOrDefault(ExternalDataConstants.IcebergOptions.TIME_AS_INT, + ExternalDataConstants.TRUE)); dateAsInt = Boolean.parseBoolean(configuration.getOrDefault(ExternalDataConstants.IcebergOptions.DATE_AS_INT, ExternalDataConstants.TRUE)); String configuredTimeZoneId = configuration.get(ExternalDataConstants.IcebergOptions.TIMEZONE); if (configuredTimeZoneId != null && !configuredTimeZoneId.isEmpty()) { - timeZoneOffset = TimeZone.getTimeZone(configuredTimeZoneId).getRawOffset(); + timeZoneId = TimeZone.getTimeZone(configuredTimeZoneId).toZoneId(); } else { - timeZoneOffset = 0; + timeZoneId = ZoneOffset.UTC; } } @@ -89,14 +94,18 @@ return decimalToDouble; } - public int getTimeZoneOffset() { - return timeZoneOffset; + public ZoneId getTimeZoneId() { + return timeZoneId; } public boolean isTimestampAsLong() { return timestampAsLong; } + public boolean isTimeAsInt() { + return timeAsInt; + } + public boolean isDateAsInt() { return dateAsInt; } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java index 0fc9e35..91974a8 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java @@ -26,11 +26,14 @@ import java.math.BigDecimal; import java.nio.ByteBuffer; import java.time.LocalDate; +import java.time.LocalDateTime; import java.time.LocalTime; +import java.time.ZoneId; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.UUID; +import java.util.concurrent.TimeUnit; import org.apache.asterix.builders.IARecordBuilder; import org.apache.asterix.builders.IAsterixListBuilder; @@ -170,6 +173,7 @@ return; case TIMESTAMP: case TIMESTAMP_NANO: + serializeTimestamp(value, out); case GEOMETRY: case GEOGRAPHY: case VARIANT: @@ -312,14 +316,34 @@ public void serializeDate(Object value, DataOutput output) throws HyracksDataException { LocalDate localDate = (LocalDate) value; - aDate.setValue((int) localDate.toEpochDay()); - dateSerde.serialize(aDate, output); + if (parserContext.isDateAsInt()) { + serializeInteger((int) localDate.toEpochDay(), output); + } else { + aDate.setValue((int) localDate.toEpochDay()); + dateSerde.serialize(aDate, output); + } } public void serializeTime(Object value, DataOutput output) throws HyracksDataException { LocalTime localTime = (LocalTime) value; - aTime.setValue((int) (localTime.toNanoOfDay() / 1_000_000)); - timeSerde.serialize(aTime, output); + int timeInMillis = (int) TimeUnit.NANOSECONDS.toMillis(localTime.toNanoOfDay()); + if (parserContext.isTimeAsInt()) { + serializeInteger(timeInMillis, output); + } else { + aTime.setValue(timeInMillis); + timeSerde.serialize(aTime, output); + } + } + + public void serializeTimestamp(Object value, DataOutput output) throws HyracksDataException { + LocalDateTime localDateTime = (LocalDateTime) value; + ZoneId zoneId = parserContext.getTimeZoneId(); + long timeStampInMillis = localDateTime.atZone(zoneId).toInstant().toEpochMilli(); + if (parserContext.isTimestampAsLong()) { + serializeLong(timeStampInMillis, output); + } else { + parserContext.serializeDateTime(timeStampInMillis, output); + } } private static HyracksDataException createUnsupportedException(Type type) { @@ -353,8 +377,27 @@ } case STRUCT -> ATypeTag.OBJECT; case LIST, MAP -> ATypeTag.ARRAY; - case DATE -> ATypeTag.DATE; - case TIME -> ATypeTag.TIME; + case DATE -> { + if (parserContext.isDateAsInt()) { + yield ATypeTag.INTEGER; + } else { + yield ATypeTag.DATE; + } + } + case TIME -> { + if (parserContext.isTimeAsInt()) { + yield ATypeTag.INTEGER; + } else { + yield ATypeTag.TIME; + } + } + case TIMESTAMP, TIMESTAMP_NANO -> { + if (parserContext.isTimestampAsLong()) { + yield ATypeTag.BIGINT; + } else { + yield ATypeTag.DATETIME; + } + } default -> throw createUnsupportedException(type); }; } diff --git a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java index f99c424..4fe3f6b 100644 --- a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java +++ b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java @@ -435,6 +435,7 @@ public static final String DECIMAL_TO_DOUBLE = "decimal-to-double"; public static final String TIMESTAMP_AS_LONG = "timestamp-to-long"; + public static final String TIME_AS_INT = "time-to-int"; public static final String DATE_AS_INT = "date-to-int"; public static final String TIMEZONE = "timezone"; } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20793?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: Id24d0ba9a90d4d9039ca966f435c45a733427585 Gerrit-Change-Number: 20793 Gerrit-PatchSet: 4 Gerrit-Owner: Peeyush Gupta <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Peeyush Gupta <[email protected]>
