>From Preetham Poluparthi <[email protected]>:
Preetham Poluparthi has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20952?usp=email )
Change subject: [NO ISSUE][EXT] Throw ExternalSourceError when colons are
present in HDFS path
......................................................................
[NO ISSUE][EXT] Throw ExternalSourceError when colons are present in HDFS path
Ext-ref: MB-70480
Change-Id: Ifdb04b9df814dce7528d94339099f072265c8724
---
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.26.update.sqlpp
A
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.27.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
M
asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
M
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
7 files changed, 90 insertions(+), 8 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/52/20952/1
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
index 0bfc4ee..7004c83 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.01.ddl.sqlpp
@@ -23,8 +23,10 @@
CREATE TYPE ColumnType1 AS {
- id: int
-};
+ id: int
+ };
+
+CREATE TYPE ColumnType2 AS {};
CREATE COLLECTION TestCollection(ColumnType1) PRIMARY KEY id;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.26.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.26.update.sqlpp
new file mode 100644
index 0000000..9d3dc07
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.26.update.sqlpp
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+
+
+
+
+COPY (
+select "123" as id
+ ) toWriter
+TO %adapter%
+PATH (%pathprefix% "copy-to-result", "parquet:errorchecks25")
+WITH {
+ %template_colons%,
+ %additionalProperties%
+ "format":"parquet"
+ }
+
+
+
+
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.27.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.27.update.sqlpp
new file mode 100644
index 0000000..c55bb96
--- /dev/null
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/negative/parquet-error-checks/parquet-error-checks.27.update.sqlpp
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+USE test;
+
+
+CREATE EXTERNAL DATASET DatasetCopy27(ColumnType2) USING %adapter%
+(
+ %template%,
+ %additional_Properties%
+ ("definition"="%path_prefix%copy-to-result/parquet:errorchecks25"),
+ ("format" = "parquet"),
+ ("requireVersionChangeDetection"="false"),
+ ("include"="*.parquet")
+);
+
+SELECT * from DatasetCopy27;
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
index d598e52..b7592f6 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.02.update.sqlpp
@@ -24,7 +24,7 @@
select "123" as id
) toWriter
TO %adapter%
-PATH (%pathprefix% "copy-to-result", "parquet-simple")
+PATH (%pathprefix% "copy-to-result", "parquet:simple")
TYPE ( {id:string} )
WITH {
%template_colons%,
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
index b3e94c1..e2eaa0a 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/copy-to/parquet-simple/parquet-simple.03.ddl.sqlpp
@@ -24,7 +24,7 @@
(
%template%,
%additional_Properties%
- ("definition"="%path_prefix%copy-to-result/parquet-simple"),
+ ("definition"="%path_prefix%copy-to-result/parquet:simple"),
("format" = "parquet"),
("requireVersionChangeDetection"="false"),
("include"="*.parquet")
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
index ae4154e..7d1102b 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_external_dataset_s3.xml
@@ -324,6 +324,7 @@
<expected-error>ASX0072: Parquet does not support arrays containing
mixed data types</expected-error>
<expected-error>HYR0132: Extra field in the result, field 'c' does not
exist at 'root' in the schema</expected-error>
<expected-error>ASX0072: Parquet does not support arrays containing
mixed data types</expected-error>
+ <expected-error>ASX1108: External source error.
java.lang.IllegalArgumentException: java.net.URISyntaxException: Relative path
in absolute URI: parquet:errorchecks25</expected-error>
</compilation-unit>
</test-case>
<test-case FilePath="copy-to/negative">
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
index 82653c2..2dbeb3b 100644
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/HDFSDataSourceFactory.java
@@ -225,11 +225,17 @@
}
}
- private InputSplit[] getInputSplits(JobConf conf, int numPartitions)
throws IOException {
- if (HDFSUtils.isEmpty(conf)) {
- return Scheduler.EMPTY_INPUT_SPLITS;
+ private InputSplit[] getInputSplits(JobConf conf, int numPartitions)
throws IOException, CompilationException {
+ try {
+ if (HDFSUtils.isEmpty(conf)) {
+ return Scheduler.EMPTY_INPUT_SPLITS;
+ }
+ return conf.getInputFormat().getSplits(conf, numPartitions);
+ } catch (IllegalArgumentException e) {
+ LOGGER.info("This error can be due to presence of colons in the
path "
+ + "of the files to be read. HDFS does not support colons
in file paths. Please check the paths of the files.");
+ throw CompilationException.create(ErrorCode.EXTERNAL_SOURCE_ERROR,
e, getMessageOrToString(e));
}
- return conf.getInputFormat().getSplits(conf, numPartitions);
}
/*
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20952?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: Ifdb04b9df814dce7528d94339099f072265c8724
Gerrit-Change-Number: 20952
Gerrit-PatchSet: 1
Gerrit-Owner: Preetham Poluparthi <[email protected]>