This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new 1dfa2d8 HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg) 1dfa2d8 is described below commit 1dfa2d8c666617cf148c9e5831390d1635649ff0 Author: Ramesh Kumar Thangarajan <rameshku...@cloudera.com> AuthorDate: Mon Sep 9 22:02:54 2019 -0700 HIVE-22164: Vectorized Limit operator returns wrong number of results with offset (Ramesh Kumar Thangarajan, reviewed by Vineet Garg) --- .../queries/clientpositive/vector_offset_limit.q | 34 ++++++++ .../clientpositive/llap/vector_offset_limit.q.out | 95 ++++++++++++++++++++++ 2 files changed, 129 insertions(+) diff --git a/ql/src/test/queries/clientpositive/vector_offset_limit.q b/ql/src/test/queries/clientpositive/vector_offset_limit.q new file mode 100644 index 0000000..dd6a3e7 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vector_offset_limit.q @@ -0,0 +1,34 @@ +set hive.auto.convert.join=true; +set hive.auto.convert.sortmerge.join=true; +set hive.default.rcfile.serde=org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe; +set hive.enforce.sortmergebucketmapjoin=true; +set hive.exec.reducers.bytes.per.reducer=67108864; +set hive.fetch.output.serde=org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe; +set hive.limit.optimize.enable=true; +set hive.limit.pushdown.memory.usage=0.04; +set hive.llap.io.enabled=true; +set hive.map.aggr.hash.min.reduction=0.99; +set hive.mapjoin.bucket.cache.size=10000; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.merge.mapfiles=false; +set hive.merge.nway.joins=false; +set hive.optimize.bucketmapjoin=true; +set hive.optimize.index.filter=true; +set hive.stats.fetch.bitvector=false; +set hive.stats.fetch.column.stats=true; +set hive.support.quoted.identifiers=none; +set hive.tez.auto.reducer.parallelism=true; +set hive.tez.bucket.pruning=true; +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.mapjoin.minmax.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; +set hive.vectorized.groupby.checkinterval=4096; + +drop table if exists TLIMITOFFSET; +create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc; +create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE; +INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE; + +SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20; +SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10; diff --git a/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out new file mode 100644 index 0000000..06c9682 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vector_offset_limit.q.out @@ -0,0 +1,95 @@ +PREHOOK: query: drop table if exists TLIMITOFFSET +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TLIMITOFFSET +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TLIMITOFFSET +POSTHOOK: query: create table if not exists TLIMITOFFSET (name string, id int, flag string) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TLIMITOFFSET +PREHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TLIMITOFFSETSTAGE +POSTHOOK: query: create table if not exists TLIMITOFFSETSTAGE (name string, id int, flag string) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TLIMITOFFSETSTAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tlimitoffsetstage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin3.txt' OVERWRITE INTO TABLE TLIMITOFFSETSTAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tlimitoffsetstage +PREHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffsetstage +PREHOOK: Output: default@tlimitoffset +POSTHOOK: query: INSERT INTO TABLE TLIMITOFFSET SELECT * from TLIMITOFFSETSTAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffsetstage +POSTHOOK: Output: default@tlimitoffset +POSTHOOK: Lineage: tlimitoffset.flag SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:flag, type:string, comment:null), ] +POSTHOOK: Lineage: tlimitoffset.id SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:id, type:int, comment:null), ] +POSTHOOK: Lineage: tlimitoffset.name SIMPLE [(tlimitoffsetstage)tlimitoffsetstage.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffset +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffset +POSTHOOK: Output: hdfs://### HDFS PATH ### +testname1 +testname2 +testname3 +testname4 +testname5 +testname6 +testname7 +testname8 +testname9 +testname10 +testname11 +testname12 +testname13 +testname14 +testname15 +testname16 +testname17 +testname18 +testname19 +testname20 +PREHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@tlimitoffset +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: SELECT name,id FROM TLIMITOFFSET where name='testname' ORDER BY id LIMIT 20 OFFSET 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tlimitoffset +POSTHOOK: Output: hdfs://### HDFS PATH ### +testname11 +testname12 +testname13 +testname14 +testname15 +testname16 +testname17 +testname18 +testname19 +testname20 +testname21 +testname22 +testname23 +testname24 +testname25 +testname26 +testname27 +testname28 +testname29 +testname30