Repository: hive Updated Branches: refs/heads/branch-1.0 06f10fe41 -> b71f6aaa9
HIVE-11438: Join a ACID table with non-ACID table fail with MR on 1.0.0 (Daniel Dai reviewed by Jason Dere) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b71f6aaa Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b71f6aaa Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b71f6aaa Branch: refs/heads/branch-1.0 Commit: b71f6aaa9a7d669130618c9adce1b820570b50fb Parents: 06f10fe Author: Daniel Dai <da...@hortonworks.com> Authored: Mon Aug 10 14:30:56 2015 -0700 Committer: Daniel Dai <da...@hortonworks.com> Committed: Mon Aug 10 14:31:39 2015 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 1 + .../apache/hadoop/hive/ql/exec/Utilities.java | 10 +++- .../hadoop/hive/ql/io/orc/OrcInputFormat.java | 2 +- .../queries/clientpositive/join_acid_non_acid.q | 24 ++++++++ .../clientpositive/join_acid_non_acid.q.out | 58 ++++++++++++++++++++ 5 files changed, 93 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 3690e5c..18289f7 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -25,6 +25,7 @@ minimr.query.files=auto_sortmerge_join_16.q,\ infer_bucket_sort_reducers_power_two.q,\ input16_cc.q,\ join1.q,\ + join_acid_non_acid.q,\ leftsemijoin_mr.q,\ list_bucket_dml_10.q,\ load_fs2.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java index 913288f..b2db584 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java @@ -382,7 +382,15 @@ public final class Utilities { in = new ByteArrayInputStream(planBytes); in = new InflaterInputStream(in); } else { - in = new FileInputStream(localPath.toUri().getPath()); + try { + in = new FileInputStream(localPath.toUri().getPath()); + } catch (FileNotFoundException fnf) { + } + // If it is on frontend, localPath does not exist, try + // to fetch it on hdfs + if (in == null) { + in = path.getFileSystem(conf).open(path); + } } if(MAP_PLAN_NAME.equals(name)){ http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java index 200daa5..be0c947 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java @@ -132,7 +132,7 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>, @Override public boolean shouldSkipCombine(Path path, Configuration conf) throws IOException { - return (conf.get(AcidUtils.CONF_ACID_KEY) != null) || AcidUtils.isAcid(path, conf); + return (conf.getBoolean(AcidUtils.CONF_ACID_KEY, false)) || AcidUtils.isAcid(path, conf); } private static class OrcRecordReader http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/queries/clientpositive/join_acid_non_acid.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/join_acid_non_acid.q b/ql/src/test/queries/clientpositive/join_acid_non_acid.q new file mode 100644 index 0000000..43d768f --- /dev/null +++ b/ql/src/test/queries/clientpositive/join_acid_non_acid.q @@ -0,0 +1,24 @@ +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true"); + +INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I'); + +CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC; + +INSERT OVERWRITE TABLE orc_table VALUES (1, 'x'); + +set hive.cbo.enable=true; +SET hive.execution.engine=mr; +SET hive.auto.convert.join=false; +SET hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +SET hive.conf.validation=false; +SET hive.doing.acid=false; + +SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1; http://git-wip-us.apache.org/repos/asf/hive/blob/b71f6aaa/ql/src/test/results/clientpositive/join_acid_non_acid.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/join_acid_non_acid.q.out b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out new file mode 100644 index 0000000..4905351 --- /dev/null +++ b/ql/src/test/results/clientpositive/join_acid_non_acid.q.out @@ -0,0 +1,58 @@ +PREHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: CREATE TABLE orc_update_table (k1 INT, f1 STRING, op_code STRING) +CLUSTERED BY (k1) INTO 2 BUCKETS +STORED AS ORC TBLPROPERTIES("transactional"="true") +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_update_table +PREHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__1 +PREHOOK: Output: default@orc_update_table +POSTHOOK: query: INSERT INTO TABLE orc_update_table VALUES (1, 'a', 'I') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__1 +POSTHOOK: Output: default@orc_update_table +POSTHOOK: Lineage: orc_update_table.f1 SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.k1 EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: orc_update_table.op_code SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +PREHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@orc_table +POSTHOOK: query: CREATE TABLE orc_table (k1 INT, f1 STRING) +CLUSTERED BY (k1) SORTED BY (k1) INTO 2 BUCKETS +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@orc_table +PREHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +PREHOOK: type: QUERY +PREHOOK: Input: default@values__tmp__table__2 +PREHOOK: Output: default@orc_table +POSTHOOK: query: INSERT OVERWRITE TABLE orc_table VALUES (1, 'x') +POSTHOOK: type: QUERY +POSTHOOK: Input: default@values__tmp__table__2 +POSTHOOK: Output: default@orc_table +POSTHOOK: Lineage: orc_table.f1 SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +POSTHOOK: Lineage: orc_table.k1 EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +PREHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +PREHOOK: type: QUERY +PREHOOK: Input: default@orc_table +PREHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +POSTHOOK: query: SELECT t1.*, t2.* FROM orc_table t1 +JOIN orc_update_table t2 ON t1.k1=t2.k1 ORDER BY t1.k1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@orc_table +POSTHOOK: Input: default@orc_update_table +#### A masked pattern was here #### +1 x 1 a I