>From Peeyush Gupta <[email protected]>:

Peeyush Gupta has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739 )


Change subject: WIP: support copy statement with csv files
......................................................................

WIP: support copy statement with csv files

Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
---
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
3 files changed, 40 insertions(+), 9 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/39/17739/1

diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 09f9697..4e52321 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -46,13 +46,20 @@
     }

     private DelimitedDataParser createParser(IHyracksTaskContext ctx) throws 
HyracksDataException {
-        IValueParserFactory[] valueParserFactories = 
ExternalDataUtils.getValueParserFactories(recordType);
         char delimiter = ExternalDataUtils.validateGetDelimiter(configuration);
         char quote = ExternalDataUtils.validateGetQuote(configuration, 
delimiter);
         boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
         String nullString = 
configuration.get(ExternalDataConstants.KEY_NULL_STR);
-        return new DelimitedDataParser(ctx, valueParserFactories, delimiter, 
quote, hasHeader, recordType,
-                
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
 nullString);
+        if (configuration.containsKey(ExternalDataConstants.KEY_FIELDS)) {
+            ARecordType recordTypeFromConfiguration = 
ExternalDataUtils.getRecordType(configuration);
+            return new DelimitedDataParser(ctx, 
ExternalDataUtils.getValueParserFactories(recordTypeFromConfiguration),
+                    delimiter, quote, hasHeader, recordTypeFromConfiguration,
+                    
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
 nullString);
+        } else {
+            IValueParserFactory[] valueParserFactories = 
ExternalDataUtils.getValueParserFactories(recordType);
+            return new DelimitedDataParser(ctx, valueParserFactories, 
delimiter, quote, hasHeader, recordType,
+                    
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
 nullString);
+        }
     }

     @Override
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index bc2ce63..a43af0f 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -133,6 +133,7 @@
     public static final String KEY_REDACT_WARNINGS = "redact-warnings";
     public static final String KEY_REQUESTED_FIELDS = "requested-fields";
     public static final String KEY_EXTERNAL_SCAN_BUFFER_SIZE = 
"external-scan-buffer-size";
+    public static final String KEY_FIELDS = "fields";

     /**
      * Keys for adapter name
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 2605fe7..22e51ee 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -70,12 +70,7 @@
 import org.apache.asterix.external.util.ExternalDataConstants.ParquetOptions;
 import org.apache.asterix.external.util.aws.s3.S3Constants;
 import org.apache.asterix.external.util.aws.s3.S3Utils;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.types.TypeTagUtil;
+import org.apache.asterix.om.types.*;
 import org.apache.asterix.runtime.evaluators.common.NumberUtils;
 import 
org.apache.asterix.runtime.projection.ExternalDatasetProjectionFiltrationInfo;
 import org.apache.asterix.runtime.projection.FunctionCallInformation;
@@ -106,6 +101,9 @@
     private static final Map<ATypeTag, IValueParserFactory> 
valueParserFactoryMap = new EnumMap<>(ATypeTag.class);
     private static final int DEFAULT_MAX_ARGUMENT_SZ = 1024 * 1024;
     private static final int HEADER_FUDGE = 64;
+    private static final String COMMA = ",";
+    private static final String DOT = ".";
+    private static final String COLON = ":";

     static {
         valueParserFactoryMap.put(ATypeTag.INTEGER, 
IntegerParserFactory.INSTANCE);
@@ -1001,4 +999,20 @@
             ExternalDataPrefix externalDataPrefix, IExternalFilterEvaluator 
evaluator) throws HyracksDataException {
         return !key.endsWith("/") && predicate.test(matchers, key) && 
externalDataPrefix.evaluate(key, evaluator);
     }
+
+    public static ARecordType getRecordType(Map<String, String> configuration) 
throws HyracksDataException {
+        String fieldsString = 
configuration.get(ExternalDataConstants.KEY_FIELDS);
+        if (fieldsString == null) {
+            throw new HyracksDataException("fields key missing");
+        } else {
+            List<String> fields = new ArrayList<>();
+            List<IAType> fieldTypes = new ArrayList<>();
+            
Arrays.stream(fieldsString.strip().split(COMMA)).sequential().forEach(e -> {
+                String[] entry = e.split(COLON);
+                fields.add(entry[0]);
+                fieldTypes.add(BuiltinTypeMap.getBuiltinType(entry[1]));
+            });
+            return new ARecordType("root", fields.toArray(new String[0]), 
fieldTypes.toArray(new IAType[0]), true);
+        }
+    }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
Gerrit-Change-Number: 17739
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-MessageType: newchange

Reply via email to