>From Peeyush Gupta <[email protected]>:
Peeyush Gupta has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739 )
Change subject: WIP: support copy statement with csv files
......................................................................
WIP: support copy statement with csv files
Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
---
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
3 files changed, 40 insertions(+), 9 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/39/17739/1
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
index 09f9697..4e52321 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/parser/factory/DelimitedDataParserFactory.java
@@ -46,13 +46,20 @@
}
private DelimitedDataParser createParser(IHyracksTaskContext ctx) throws
HyracksDataException {
- IValueParserFactory[] valueParserFactories =
ExternalDataUtils.getValueParserFactories(recordType);
char delimiter = ExternalDataUtils.validateGetDelimiter(configuration);
char quote = ExternalDataUtils.validateGetQuote(configuration,
delimiter);
boolean hasHeader = ExternalDataUtils.hasHeader(configuration);
String nullString =
configuration.get(ExternalDataConstants.KEY_NULL_STR);
- return new DelimitedDataParser(ctx, valueParserFactories, delimiter,
quote, hasHeader, recordType,
-
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
nullString);
+ if (configuration.containsKey(ExternalDataConstants.KEY_FIELDS)) {
+ ARecordType recordTypeFromConfiguration =
ExternalDataUtils.getRecordType(configuration);
+ return new DelimitedDataParser(ctx,
ExternalDataUtils.getValueParserFactories(recordTypeFromConfiguration),
+ delimiter, quote, hasHeader, recordTypeFromConfiguration,
+
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
nullString);
+ } else {
+ IValueParserFactory[] valueParserFactories =
ExternalDataUtils.getValueParserFactories(recordType);
+ return new DelimitedDataParser(ctx, valueParserFactories,
delimiter, quote, hasHeader, recordType,
+
ExternalDataUtils.getDataSourceType(configuration).equals(DataSourceType.STREAM),
nullString);
+ }
}
@Override
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index bc2ce63..a43af0f 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -133,6 +133,7 @@
public static final String KEY_REDACT_WARNINGS = "redact-warnings";
public static final String KEY_REQUESTED_FIELDS = "requested-fields";
public static final String KEY_EXTERNAL_SCAN_BUFFER_SIZE =
"external-scan-buffer-size";
+ public static final String KEY_FIELDS = "fields";
/**
* Keys for adapter name
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
index 2605fe7..22e51ee 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataUtils.java
@@ -70,12 +70,7 @@
import org.apache.asterix.external.util.ExternalDataConstants.ParquetOptions;
import org.apache.asterix.external.util.aws.s3.S3Constants;
import org.apache.asterix.external.util.aws.s3.S3Utils;
-import org.apache.asterix.om.types.ARecordType;
-import org.apache.asterix.om.types.ATypeTag;
-import org.apache.asterix.om.types.AUnionType;
-import org.apache.asterix.om.types.EnumDeserializer;
-import org.apache.asterix.om.types.IAType;
-import org.apache.asterix.om.types.TypeTagUtil;
+import org.apache.asterix.om.types.*;
import org.apache.asterix.runtime.evaluators.common.NumberUtils;
import
org.apache.asterix.runtime.projection.ExternalDatasetProjectionFiltrationInfo;
import org.apache.asterix.runtime.projection.FunctionCallInformation;
@@ -106,6 +101,9 @@
private static final Map<ATypeTag, IValueParserFactory>
valueParserFactoryMap = new EnumMap<>(ATypeTag.class);
private static final int DEFAULT_MAX_ARGUMENT_SZ = 1024 * 1024;
private static final int HEADER_FUDGE = 64;
+ private static final String COMMA = ",";
+ private static final String DOT = ".";
+ private static final String COLON = ":";
static {
valueParserFactoryMap.put(ATypeTag.INTEGER,
IntegerParserFactory.INSTANCE);
@@ -1001,4 +999,20 @@
ExternalDataPrefix externalDataPrefix, IExternalFilterEvaluator
evaluator) throws HyracksDataException {
return !key.endsWith("/") && predicate.test(matchers, key) &&
externalDataPrefix.evaluate(key, evaluator);
}
+
+ public static ARecordType getRecordType(Map<String, String> configuration)
throws HyracksDataException {
+ String fieldsString =
configuration.get(ExternalDataConstants.KEY_FIELDS);
+ if (fieldsString == null) {
+ throw new HyracksDataException("fields key missing");
+ } else {
+ List<String> fields = new ArrayList<>();
+ List<IAType> fieldTypes = new ArrayList<>();
+
Arrays.stream(fieldsString.strip().split(COMMA)).sequential().forEach(e -> {
+ String[] entry = e.split(COLON);
+ fields.add(entry[0]);
+ fieldTypes.add(BuiltinTypeMap.getBuiltinType(entry[1]));
+ });
+ return new ARecordType("root", fields.toArray(new String[0]),
fieldTypes.toArray(new IAType[0]), true);
+ }
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17739
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I7ac452559ab02e35f5b5fa84fbd0853a08b2bc86
Gerrit-Change-Number: 17739
Gerrit-PatchSet: 1
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-MessageType: newchange