>From Savyasach Reddy <[email protected]>:

Savyasach Reddy has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19135 )


Change subject: [NO ISSUE]: Support reading from .gzip files on HDFS
......................................................................

[NO ISSUE]: Support reading from .gzip files on HDFS

- user model changes: no
- storage format changes: no
- interface changes: no

details:
- Add a codec to read .gzip files as GzipCodec

Change-Id: Id7998b0a6fa367ed20f45bf38a0987333259e292
---
M 
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
M 
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
7 files changed, 53 insertions(+), 21 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/35/19135/1

diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
index b68c38b..a384802 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.01.ddl.sqlpp
@@ -24,13 +24,10 @@
 CREATE TYPE OpenType AS {
 };

-CREATE EXTERNAL DATASET Customer(OpenType) USING S3 (
-    ("accessKeyId"="dummyAccessKey"),
-    ("secretAccessKey"="dummySecretKey"),
-    ("region"="us-west-2"),
-    ("serviceEndpoint"="http://127.0.0.1:8001";),
-    ("container"="playground"),
-    
("definition"="external-filter/car/{company:string}/customer/{customer_id:int}"),
+CREATE EXTERNAL DATASET Customer(OpenType) USING %adapter% (
+    %template%,
+    %additional_Properties%,
+    
("definition"="%path_prefix%external-filter/car/{company:string}/customer/{customer_id:int}"),
     ("embed-filter-values" = "false"),
     ("format"="json")
 );
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
index f1a22d0..a9a7a8f 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.02.update.sqlpp
@@ -20,18 +20,15 @@
 USE test;

 COPY Customer c
-TO S3
-PATH ("copy-to-result", "car", company, "customer", customer_id)
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "car", company, "customer", customer_id)
 OVER (
    PARTITION BY c.company company,
                 c.customer_id customer_id
 )
 WITH {
-    "accessKeyId":"dummyAccessKey",
-    "secretAccessKey":"dummySecretKey",
-    "region":"us-west-2",
-    "serviceEndpoint":"http://127.0.0.1:8001";,
-    "container":"playground",
+    %template_colons%,
+    %additionalProperties%
     "format":"json",
     "compression":"gzip"
 }
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
index 14d1d92..f46ddf9 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/partition/partition.03.ddl.sqlpp
@@ -19,13 +19,10 @@

 USE test;

-CREATE EXTERNAL DATASET CustomerCopy(OpenType) USING S3 (
-    ("accessKeyId"="dummyAccessKey"),
-    ("secretAccessKey"="dummySecretKey"),
-    ("region"="us-west-2"),
-    ("serviceEndpoint"="http://127.0.0.1:8001";),
-    ("container"="playground"),
-    
("definition"="copy-to-result/car/{company:string}/customer/{customer_id:int}"),
+CREATE EXTERNAL DATASET CustomerCopy(OpenType) USING %adapter% (
+    %template%,
+    %additional_Properties%,
+    
("definition"="%path_prefix%copy-to-result/car/{company:string}/customer/{customer_id:int}"),
     ("embed-filter-values" = "false"),
     ("format"="json")
 );
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index 5ae2fc5..c8a6785 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -21,6 +21,11 @@
   <test-group name="copy-to">
     <test-case FilePath="copy-to">
       <compilation-unit name="partition">
+        <placeholder name="adapter" value="S3" />
+        <placeholder name="pathprefix" value="" />
+        <placeholder name="path_prefix" value="" />
+        <placeholder name="additionalProperties" 
value='"container":"playground",' />
+        <placeholder name="additional_Properties" 
value='("container"="playground")' />
         <output-dir compare="Text">partition</output-dir>
       </compilation-unit>
     </test-case>
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
index a5af248..6851433 100644
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp_hdfs.xml
@@ -93,6 +93,16 @@
       </compilation-unit>
     </test-case>
     <test-case FilePath="copy-to">
+      <compilation-unit name="partition">
+        <placeholder name="adapter" value="HDFS" />
+        <placeholder name="pathprefix" value='"/playground", ' />
+        <placeholder name="path_prefix" value="/playground/" />
+        <placeholder name="additionalProperties" value="" />
+        <placeholder name="additional_Properties" value='("input-format" = 
"text-input-format")' />
+        <output-dir compare="Text">partition</output-dir>
+      </compilation-unit>
+    </test-case>
+    <test-case FilePath="copy-to">
       <compilation-unit name="simple-write">
         <placeholder name="adapter" value="HDFS" />
         <placeholder name="pathprefix" value='"/playground", ' />
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
index 1de2cd2..d487e68 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/ExternalDataConstants.java
@@ -179,6 +179,7 @@
     public static final String CLASS_NAME_HDFS_FILESYSTEM = 
"org.apache.hadoop.hdfs.DistributedFileSystem";
     public static final String S3A_CHANGE_DETECTION_REQUIRED = 
"requireVersionChangeDetection";
     public static final String S3A_CHANGE_DETECTION_REQUIRED_CONFIG_KEY = 
"fs.s3a.change.detection.version.required";
+    public static final String HDFS_IO_COMPRESSION_CODECS_KEY = 
"io.compression.codecs";
     /**
      * input formats aliases
      */
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
index b08c507..35f2a94 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/util/HDFSUtils.java
@@ -70,6 +70,7 @@
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
@@ -262,6 +263,7 @@
         if (useDatanodeHostname != null) {
             conf.set(ExternalDataConstants.KEY_HDFS_USE_DATANODE_HOSTNAME, 
useDatanodeHostname);
         }
+        conf.set(ExternalDataConstants.HDFS_IO_COMPRESSION_CODECS_KEY, 
AliasGzipCodec.class.getName());
         return conf;
     }

@@ -593,4 +595,11 @@
         return ExternalDataConstants.KEY_ADAPTER_NAME_HDFS
                 
.equalsIgnoreCase(configuration.get(ExternalDataConstants.KEY_EXTERNAL_SOURCE_TYPE));
     }
+
+    public static class AliasGzipCodec extends GzipCodec {
+        @Override
+        public String getDefaultExtension() {
+            return "." + ExternalDataConstants.KEY_COMPRESSION_GZIP;
+        }
+    }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19135
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: Id7998b0a6fa367ed20f45bf38a0987333259e292
Gerrit-Change-Number: 19135
Gerrit-PatchSet: 1
Gerrit-Owner: Savyasach Reddy <[email protected]>
Gerrit-MessageType: newchange

Reply via email to