>From Peeyush Gupta <[email protected]>:
Peeyush Gupta has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20793?usp=email )
Change subject: [ASTERIXDB-3634][EXT] Support TIMESTAMP and TIMESTAMP_NANO
iceberg data type
......................................................................
[ASTERIXDB-3634][EXT] Support TIMESTAMP and TIMESTAMP_NANO iceberg data type
- user model changes: no
- storage format changes: no
- interface changes: no
Ext-ref: MB-63115
Change-Id: Id24d0ba9a90d4d9039ca966f435c45a733427585
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
3 files changed, 64 insertions(+), 11 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/93/20793/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java
index aee5616..c778b69 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/record/reader/aws/iceberg/converter/IcebergConverterContext.java
@@ -19,6 +19,8 @@
package org.apache.asterix.external.input.record.reader.aws.iceberg.converter;
import java.io.DataOutput;
+import java.time.ZoneId;
+import java.time.ZoneOffset;
import java.util.List;
import java.util.Map;
import java.util.TimeZone;
@@ -45,8 +47,9 @@
private final boolean decimalToDouble;
private final boolean timestampAsLong;
private final boolean dateAsInt;
+ private final boolean timeAsInt;
- private final int timeZoneOffset;
+ private final ZoneId timeZoneId;
private final AMutableDate mutableDate = new AMutableDate(0);
private final AMutableDateTime mutableDateTime = new AMutableDateTime(0);
private final List<Warning> warnings;
@@ -57,13 +60,15 @@
.getOrDefault(ExternalDataConstants.IcebergOptions.DECIMAL_TO_DOUBLE,
ExternalDataConstants.FALSE));
timestampAsLong = Boolean.parseBoolean(configuration
.getOrDefault(ExternalDataConstants.IcebergOptions.TIMESTAMP_AS_LONG,
ExternalDataConstants.TRUE));
+ timeAsInt =
Boolean.parseBoolean(configuration.getOrDefault(ExternalDataConstants.IcebergOptions.TIME_AS_INT,
+ ExternalDataConstants.TRUE));
dateAsInt =
Boolean.parseBoolean(configuration.getOrDefault(ExternalDataConstants.IcebergOptions.DATE_AS_INT,
ExternalDataConstants.TRUE));
String configuredTimeZoneId =
configuration.get(ExternalDataConstants.IcebergOptions.TIMEZONE);
if (configuredTimeZoneId != null && !configuredTimeZoneId.isEmpty()) {
- timeZoneOffset =
TimeZone.getTimeZone(configuredTimeZoneId).getRawOffset();
+ timeZoneId = TimeZone.getTimeZone(configuredTimeZoneId).toZoneId();
} else {
- timeZoneOffset = 0;
+ timeZoneId = ZoneOffset.UTC;
}
}
@@ -89,14 +94,18 @@
return decimalToDouble;
}
- public int getTimeZoneOffset() {
- return timeZoneOffset;
+ public ZoneId getTimeZoneId() {
+ return timeZoneId;
}
public boolean isTimestampAsLong() {
return timestampAsLong;
}
+ public boolean isTimeAsInt() {
+ return timeAsInt;
+ }
+
public boolean isDateAsInt() {
return dateAsInt;
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
index 0fc9e35..7f14f0e 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
@@ -26,11 +26,14 @@
import java.math.BigDecimal;
import java.nio.ByteBuffer;
import java.time.LocalDate;
+import java.time.LocalDateTime;
import java.time.LocalTime;
+import java.time.ZoneId;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.UUID;
+import java.util.concurrent.TimeUnit;
import org.apache.asterix.builders.IARecordBuilder;
import org.apache.asterix.builders.IAsterixListBuilder;
@@ -170,6 +173,7 @@
return;
case TIMESTAMP:
case TIMESTAMP_NANO:
+ serializeTimestamp(value, out);
case GEOMETRY:
case GEOGRAPHY:
case VARIANT:
@@ -312,14 +316,34 @@
public void serializeDate(Object value, DataOutput output) throws
HyracksDataException {
LocalDate localDate = (LocalDate) value;
- aDate.setValue((int) localDate.toEpochDay());
- dateSerde.serialize(aDate, output);
+ if (parserContext.isDateAsInt()) {
+ serializeInteger((int) localDate.toEpochDay(), output);
+ } else {
+ aDate.setValue((int) localDate.toEpochDay());
+ dateSerde.serialize(aDate, output);
+ }
}
public void serializeTime(Object value, DataOutput output) throws
HyracksDataException {
LocalTime localTime = (LocalTime) value;
- aTime.setValue((int) (localTime.toNanoOfDay() / 1_000_000));
- timeSerde.serialize(aTime, output);
+ int timeInMillis = (int)
TimeUnit.NANOSECONDS.toMillis(localTime.toNanoOfDay());
+ if (parserContext.isTimeAsInt()) {
+ serializeLong(timeInMillis, output);
+ } else {
+ aTime.setValue(timeInMillis);
+ timeSerde.serialize(aTime, output);
+ }
+ }
+
+ public void serializeTimestamp(Object value, DataOutput output) throws
HyracksDataException {
+ LocalDateTime localDateTime = (LocalDateTime) value;
+ ZoneId zoneId = parserContext.getTimeZoneId();
+ long timeStampInMillis =
localDateTime.atZone(zoneId).toInstant().toEpochMilli();
+ if (parserContext.isTimestampAsLong()) {
+ serializeLong(timeStampInMillis, output);
+ } else {
+ parserContext.serializeDateTime(timeStampInMillis, output);
+ }
}
private static HyracksDataException createUnsupportedException(Type type) {
@@ -353,8 +377,27 @@
}
case STRUCT -> ATypeTag.OBJECT;
case LIST, MAP -> ATypeTag.ARRAY;
- case DATE -> ATypeTag.DATE;
- case TIME -> ATypeTag.TIME;
+ case DATE -> {
+ if (parserContext.isDateAsInt()) {
+ yield ATypeTag.INTEGER;
+ } else {
+ yield ATypeTag.DATE;
+ }
+ }
+ case TIME -> {
+ if (parserContext.isTimeAsInt()) {
+ yield ATypeTag.INTEGER;
+ } else {
+ yield ATypeTag.TIME;
+ }
+ }
+ case TIMESTAMP, TIMESTAMP_NANO -> {
+ if (parserContext.isTimestampAsLong()) {
+ yield ATypeTag.BIGINT;
+ } else {
+ yield ATypeTag.DATETIME;
+ }
+ }
default -> throw createUnsupportedException(type);
};
}
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 923bd30..932ae77 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -434,6 +434,7 @@
public static final String DECIMAL_TO_DOUBLE = "decimal-to-double";
public static final String TIMESTAMP_AS_LONG = "timestamp-to-long";
+ public static final String TIME_AS_INT = "time-to-long";
public static final String DATE_AS_INT = "date-to-int";
public static final String TIMEZONE = "timezone";
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20793?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Id24d0ba9a90d4d9039ca966f435c45a733427585
Gerrit-Change-Number: 20793
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>