>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21178?usp=email )


Change subject: [ASTERIXDB-3634][EXT]: correctly parse float and map types (pt. 
1)
......................................................................

[ASTERIXDB-3634][EXT]: correctly parse float and map types (pt. 1)

Details:
- Parse float type as float instead of promoting to double.
- Parse Map type (with String key) to JSON object type.

Ext-ref: MB-70462
Change-Id: I4aed8bb2a77741b7f2da2436ef113da8ada1d524
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
1 file changed, 39 insertions(+), 36 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/78/21178/1

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
index 7266f2c..99523a8 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/IcebergParquetDataParser.java
@@ -212,21 +212,8 @@
             NestedField field = schema.fields().get(i);
             String fieldName = field.name();
             Type fieldType = field.type();
-            Object sourceValue = structLike.get(i, Object.class);
-            ATypeTag typeTag = getTypeTag(fieldType, sourceValue == null, 
parserContext);
-            IValueReference value;
-            if (valueEmbedder.shouldEmbed(fieldName, typeTag)) {
-                value = valueEmbedder.getEmbeddedValue();
-            } else {
-                valueBuffer.reset();
-                parseValue(fieldType, sourceValue, 
valueBuffer.getDataOutput());
-                value = valueBuffer;
-            }
-
-            if (value != null) {
-                // Ignore missing values
-                
objectBuilder.addField(parserContext.getSerializedFieldName(fieldName), value);
-            }
+            Object fieldValue = structLike.get(i, Object.class);
+            parseValueAndAddObjectField(valueBuffer, objectBuilder, fieldType, 
fieldName, fieldValue);
         }

         embedMissingValues(objectBuilder, parserContext, valueEmbedder);
@@ -236,31 +223,46 @@
     }

     private void parseMap(Types.MapType mapSchema, Map<?, ?> map, DataOutput 
out) throws IOException {
-        final IMutableValueStorage item = parserContext.enterCollection();
-        final IMutableValueStorage valueBuffer = parserContext.enterObject();
+        IMutableValueStorage valueBuffer = parserContext.enterObject();
         IARecordBuilder objectBuilder = 
parserContext.getObjectBuilder(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
-        IAsterixListBuilder listBuilder =
-                
parserContext.getCollectionBuilder(DefaultOpenFieldType.NESTED_OPEN_AORDERED_LIST_TYPE);
+        valueEmbedder.enterObject();

         Type keyType = mapSchema.keyType();
         Type valueType = mapSchema.valueType();

-        for (Map.Entry<?, ?> entry : map.entrySet()) {
-            objectBuilder.reset(DefaultOpenFieldType.NESTED_OPEN_RECORD_TYPE);
-            valueBuffer.reset();
-            parseValue(keyType, entry.getKey(), valueBuffer.getDataOutput());
-            
objectBuilder.addField(parserContext.getSerializedFieldName("key"), 
valueBuffer);
-            valueBuffer.reset();
-            parseValue(valueType, entry.getValue(), 
valueBuffer.getDataOutput());
-            
objectBuilder.addField(parserContext.getSerializedFieldName("value"), 
valueBuffer);
-            item.reset();
-            objectBuilder.write(item.getDataOutput(), true);
-            listBuilder.addItem(item);
+        // TODO: we can't support non-string keys since we map MAP-TYPE to 
OBJECT-TYPE in AsterixDB
+        if (keyType != Types.StringType.get()) {
+            throw new RuntimeDataException(ErrorCode.TYPE_UNSUPPORTED, 
"Iceberg Parser", "MAP with non-string keys");
         }

-        listBuilder.write(out, true);
+        for (Map.Entry<?, ?> entry : map.entrySet()) {
+            String fieldName = (String) entry.getKey();
+            Object fieldValue = entry.getValue();
+            parseValueAndAddObjectField(valueBuffer, objectBuilder, valueType, 
fieldName, fieldValue);
+        }
+
+        embedMissingValues(objectBuilder, parserContext, valueEmbedder);
+        objectBuilder.write(out, true);
+        valueEmbedder.exitObject();
         parserContext.exitObject(valueBuffer, null, objectBuilder);
-        parserContext.exitCollection(item, listBuilder);
+    }
+
+    private void parseValueAndAddObjectField(IMutableValueStorage valueBuffer, 
IARecordBuilder objectBuilder,
+            Type valueType, String fieldName, Object fieldValue) throws 
IOException {
+        ATypeTag typeTag = getTypeTag(valueType, fieldValue == null, 
parserContext);
+        IValueReference value;
+        if (valueEmbedder.shouldEmbed(fieldName, typeTag)) {
+            value = valueEmbedder.getEmbeddedValue();
+        } else {
+            valueBuffer.reset();
+            parseValue(valueType, fieldValue, valueBuffer.getDataOutput());
+            value = valueBuffer;
+        }
+
+        if (value != null) {
+            // Ignore missing values
+            
objectBuilder.addField(parserContext.getSerializedFieldName(fieldName), value);
+        }
     }

     private void serializeInteger(Object value, DataOutput out) throws 
HyracksDataException {
@@ -276,9 +278,9 @@
     }

     private void serializeFloat(Object value, DataOutput out) throws 
HyracksDataException {
-        float floatValue = (Float) value;
-        aDouble.setValue(floatValue);
-        doubleSerde.serialize(aDouble, out);
+        Float floatValue = (Float) value;
+        aFloat.setValue(floatValue);
+        floatSerde.serialize(aFloat, out);
     }

     private void serializeDouble(Object value, DataOutput out) throws 
HyracksDataException {
@@ -376,7 +378,8 @@
         return switch (type.typeId()) {
             case BOOLEAN -> ATypeTag.BOOLEAN;
             case INTEGER, LONG -> ATypeTag.BIGINT;
-            case FLOAT, DOUBLE -> ATypeTag.DOUBLE;
+            case FLOAT -> ATypeTag.FLOAT;
+            case DOUBLE -> ATypeTag.DOUBLE;
             case STRING -> ATypeTag.STRING;
             case UUID -> ATypeTag.UUID;
             case FIXED, BINARY -> ATypeTag.BINARY;

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21178?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I4aed8bb2a77741b7f2da2436ef113da8ada1d524
Gerrit-Change-Number: 21178
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>

Reply via email to