>From Savyasach Reddy <[email protected]>:
Savyasach Reddy has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19135 )
Change subject: [NO ISSUE]: Support reading from .gzip files on HDFS
......................................................................
[NO ISSUE]: Support reading from .gzip files on HDFS
- user model changes: no
- storage format changes: no
- interface changes: no
details:
- Add a codec to read .gzip files as GzipCodec
Change-Id: Id7998b0a6fa367ed20f45bf38a0987333259e292
---
M
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
7 files changed, 53 insertions(+), 21 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/35/19135/1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
index b68c38b..a384802 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
@@ -24,13 +24,10 @@
CREATE TYPE OpenType AS {
};
-CREATE EXTERNAL DATASET Customer(OpenType) USING S3 (
- ("accessKeyId"="dummyAccessKey"),
- ("secretAccessKey"="dummySecretKey"),
- ("region"="us-west-2"),
- ("serviceEndpoint"="http://127.0.0.1:8001"),
- ("container"="playground"),
-
("definition"="external-filter/car/{company:string}/customer/{customer_id:int}"),
+CREATE EXTERNAL DATASET Customer(OpenType) USING %adapter% (
+ %template%,
+ %additional_Properties%,
+
("definition"="%path_prefix%external-filter/car/{company:string}/customer/{customer_id:int}"),
("embed-filter-values" = "false"),
("format"="json")
);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
index f1a22d0..a9a7a8f 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
@@ -20,18 +20,15 @@
USE test;
COPY Customer c
-TO S3
-PATH ("copy-to-result", "car", company, "customer", customer_id)
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "car", company, "customer", customer_id)
OVER (
PARTITION BY c.company company,
c.customer_id customer_id
)
WITH {
- "accessKeyId":"dummyAccessKey",
- "secretAccessKey":"dummySecretKey",
- "region":"us-west-2",
- "serviceEndpoint":"http://127.0.0.1:8001",
- "container":"playground",
+ %template_colons%,
+ %additionalProperties%
"format":"json",
"compression":"gzip"
}
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
index 14d1d92..f46ddf9 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
@@ -19,13 +19,10 @@
USE test;
-CREATE EXTERNAL DATASET CustomerCopy(OpenType) USING S3 (
- ("accessKeyId"="dummyAccessKey"),
- ("secretAccessKey"="dummySecretKey"),
- ("region"="us-west-2"),
- ("serviceEndpoint"="http://127.0.0.1:8001"),
- ("container"="playground"),
-
("definition"="copy-to-result/car/{company:string}/customer/{customer_id:int}"),
+CREATE EXTERNAL DATASET CustomerCopy(OpenType) USING %adapter% (
+ %template%,
+ %additional_Properties%,
+
("definition"="%path_prefix%copy-to-result/car/{company:string}/customer/{customer_id:int}"),
("embed-filter-values" = "false"),
("format"="json")
);
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 5ae2fc5..c8a6785 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -21,6 +21,11 @@
<test-group name="copy-to">
<test-case FilePath="copy-to">
<compilation-unit name="partition">
+ <placeholder name="adapter" value="S3" />
+ <placeholder name="pathprefix" value="" />
+ <placeholder name="path_prefix" value="" />
+ <placeholder name="additionalProperties"
value='"container":"playground",' />
+ <placeholder name="additional_Properties"
value='("container"="playground")' />
<output-dir compare="Text">partition</output-dir>
</compilation-unit>
</test-case>
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index a5af248..6851433 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -93,6 +93,16 @@
</compilation-unit>
</test-case>
<test-case FilePath="copy-to">
+ <compilation-unit name="partition">
+ <placeholder name="adapter" value="HDFS" />
+ <placeholder name="pathprefix" value='"/playground", ' />
+ <placeholder name="path_prefix" value="/playground/" />
+ <placeholder name="additionalProperties" value="" />
+ <placeholder name="additional_Properties" value='("input-format" =
"text-input-format")' />
+ <output-dir compare="Text">partition</output-dir>
+ </compilation-unit>
+ </test-case>
+ <test-case FilePath="copy-to">
<compilation-unit name="simple-write">
<placeholder name="adapter" value="HDFS" />
<placeholder name="pathprefix" value='"/playground", ' />
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 1de2cd2..d487e68 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -179,6 +179,7 @@
public static final String CLASS_NAME_HDFS_FILESYSTEM =
"org.apache.hadoop.hdfs.DistributedFileSystem";
public static final String S3A_CHANGE_DETECTION_REQUIRED =
"requireVersionChangeDetection";
public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY =
"fs.s3a.change.detection.version.required";
+ public static final String HDFS_IO_COMPRESSION_CODECS_KEY =
"io.compression.codecs";
/**
* input formats aliases
*/
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index b08c507..35f2a94 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -70,6 +70,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
@@ -262,6 +263,7 @@
if (useDatanodeHostname != null) {
conf.set(ExternalDataConstants.KEY_HDFS_USE_DATANODE_HOSTNAME,
useDatanodeHostname);
}
+ conf.set(ExternalDataConstants.HDFS_IO_COMPRESSION_CODECS_KEY,
AliasGzipCodec.class.getName());
return conf;
}
@@ -593,4 +595,11 @@
return ExternalDataConstants.KEY_ADAPTER_NAME_HDFS
.equalsIgnoreCase(configuration.get(ExternalDataConstants.KEY_EXTERNAL_SOURCE_TYPE));
}
+
+ public static class AliasGzipCodec extends GzipCodec {
+ @Override
+ public String getDefaultExtension() {
+ return "." + ExternalDataConstants.KEY_COMPRESSION_GZIP;
+ }
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19135
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Id7998b0a6fa367ed20f45bf38a0987333259e292
Gerrit-Change-Number: 19135
Gerrit-PatchSet: 1
Gerrit-Owner: Savyasach Reddy <[email protected]>
Gerrit-MessageType: newchange