>From Hussain Towaileb <[email protected]>:
Hussain Towaileb has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21178?usp=email )
Change subject: [ASTERIXDB-3634][EXT]: correctly parse float and map types (pt.
1)
......................................................................
[ASTERIXDB-3634][EXT]: correctly parse float and map types (pt. 1)
Details:
- Parse float type as float instead of promoting to double.
- Parse Map type (with String key) to JSON object type.
Ext-ref: MB-70462
Change-Id: I4aed8bb2a77741b7f2da2436ef113da8ada1d524
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
1 file changed, 39 insertions(+), 36 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/78/21178/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
index 7266f2c..99523a8 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
@@ -212,21 +212,8 @@
NestedField field = schema.fields().get(i);
String fieldName = field.name();
Type fieldType = field.type();
- Object sourceValue = structLike.get(i, Object.class);
- ATypeTag typeTag = getTypeTag(fieldType, sourceValue == null,
parserContext);
- IValueReference value;
- if (valueEmbedder.shouldEmbed(fieldName, typeTag)) {
- value = valueEmbedder.getEmbeddedValue();
- } else {
- valueBuffer.reset();
- parseValue(fieldType, sourceValue,
valueBuffer.getDataOutput());
- value = valueBuffer;
- }
-
- if (value != null) {
- // Ignore missing values
-
objectBuilder.addField(parserContext.getSerializedFieldName(fieldName), value);
- }
+ Object fieldValue = structLike.get(i, Object.class);
+ parseValueAndAddObjectField(valueBuffer, objectBuilder, fieldType,
fieldName, fieldValue);
}
embedMissingValues(objectBuilder, parserContext, valueEmbedder);
@@ -236,31 +223,46 @@
}
private void parseMap(Types.MapType mapSchema, Map<?, ?> map, DataOutput
out) throws IOException {
- final IMutableValueStorage item = parserContext.enterCollection();
- final IMutableValueStorage valueBuffer = parserContext.enterObject();
+ IMutableValueStorage valueBuffer = parserContext.enterObject();
IARecordBuilder objectBuilder =
parserContext.getObjectBuilder(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
- IAsterixListBuilder listBuilder =
-
parserContext.getCollectionBuilder(DefaultOpenFieldType.NESTED_OPEN_AORDERED_LIST_TYPE);
+ valueEmbedder.enterObject();
Type keyType = mapSchema.keyType();
Type valueType = mapSchema.valueType();
- for (Map.Entry<?, ?> entry : map.entrySet()) {
- objectBuilder.reset(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
- valueBuffer.reset();
- parseValue(keyType, entry.getKey(), valueBuffer.getDataOutput());
-
objectBuilder.addField(parserContext.getSerializedFieldName("key"),
valueBuffer);
- valueBuffer.reset();
- parseValue(valueType, entry.getValue(),
valueBuffer.getDataOutput());
-
objectBuilder.addField(parserContext.getSerializedFieldName("value"),
valueBuffer);
- item.reset();
- objectBuilder.write(item.getDataOutput(), true);
- listBuilder.addItem(item);
+ // TODO: we can't support non-string keys since we map MAP-TYPE to
OBJECT-TYPE in AsterixDB
+ if (keyType != Types.StringType.get()) {
+ throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED,
"Iceberg Parser", "MAP with non-string keys");
}
- listBuilder.write(out, true);
+ for (Map.Entry<?, ?> entry : map.entrySet()) {
+ String fieldName = (String) entry.getKey();
+ Object fieldValue = entry.getValue();
+ parseValueAndAddObjectField(valueBuffer, objectBuilder, valueType,
fieldName, fieldValue);
+ }
+
+ embedMissingValues(objectBuilder, parserContext, valueEmbedder);
+ objectBuilder.write(out, true);
+ valueEmbedder.exitObject();
parserContext.exitObject(valueBuffer, null, objectBuilder);
- parserContext.exitCollection(item, listBuilder);
+ }
+
+ private void parseValueAndAddObjectField(IMutableValueStorage valueBuffer,
IARecordBuilder objectBuilder,
+ Type valueType, String fieldName, Object fieldValue) throws
IOException {
+ ATypeTag typeTag = getTypeTag(valueType, fieldValue == null,
parserContext);
+ IValueReference value;
+ if (valueEmbedder.shouldEmbed(fieldName, typeTag)) {
+ value = valueEmbedder.getEmbeddedValue();
+ } else {
+ valueBuffer.reset();
+ parseValue(valueType, fieldValue, valueBuffer.getDataOutput());
+ value = valueBuffer;
+ }
+
+ if (value != null) {
+ // Ignore missing values
+
objectBuilder.addField(parserContext.getSerializedFieldName(fieldName), value);
+ }
}
private void serializeInteger(Object value, DataOutput out) throws
HyracksDataException {
@@ -276,9 +278,9 @@
}
private void serializeFloat(Object value, DataOutput out) throws
HyracksDataException {
- float floatValue = (Float) value;
- aDouble.setValue(floatValue);
- doubleSerde.serialize(aDouble, out);
+ Float floatValue = (Float) value;
+ aFloat.setValue(floatValue);
+ floatSerde.serialize(aFloat, out);
}
private void serializeDouble(Object value, DataOutput out) throws
HyracksDataException {
@@ -376,7 +378,8 @@
return switch (type.typeId()) {
case BOOLEAN -> ATypeTag.BOOLEAN;
case INTEGER, LONG -> ATypeTag.BIGINT;
- case FLOAT, DOUBLE -> ATypeTag.DOUBLE;
+ case FLOAT -> ATypeTag.FLOAT;
+ case DOUBLE -> ATypeTag.DOUBLE;
case STRING -> ATypeTag.STRING;
case UUID -> ATypeTag.UUID;
case FIXED, BINARY -> ATypeTag.BINARY;
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21178?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I4aed8bb2a77741b7f2da2436ef113da8ada1d524
Gerrit-Change-Number: 21178
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>