Repository: hive
Updated Branches:
  refs/heads/branch-1 b6f6c4acb -> 4ac966cd8


HIVE-13330: ORC vectorized string dictionary reader does not differentiate null 
vs empty string dictionary (Prasanth Jayachandran reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/4ac966cd
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/4ac966cd
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/4ac966cd

Branch: refs/heads/branch-1
Commit: 4ac966cd8ea069f0935919a108acf15d6ec7799b
Parents: b6f6c4a
Author: Prasanth Jayachandran <prasan...@apache.org>
Authored: Mon Apr 4 19:39:01 2016 -0500
Committer: Prasanth Jayachandran <prasan...@apache.org>
Committed: Mon Apr 4 19:39:01 2016 -0500

----------------------------------------------------------------------
 .../hive/ql/io/orc/TreeReaderFactory.java       | 20 +++++--
 .../vector_orc_string_reader_empty_dict.q       | 20 +++++++
 .../vector_orc_string_reader_empty_dict.q.out   | 62 ++++++++++++++++++++
 3 files changed, 97 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/4ac966cd/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java 
b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index c8f9595..96df394 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -1657,6 +1657,7 @@ public class TreeReaderFactory {
    * stripe.
    */
   protected static class StringDictionaryTreeReader extends TreeReader {
+    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
     private DynamicByteArray dictionaryBuffer;
     private int[] dictionaryOffsets;
     protected IntegerReader reader;
@@ -1836,11 +1837,20 @@ public class TreeReaderFactory {
         }
         result.isRepeating = scratchlcv.isRepeating;
       } else {
-        // Entire stripe contains null strings.
-        result.isRepeating = true;
-        result.noNulls = false;
-        result.isNull[0] = true;
-        result.setRef(0, "".getBytes(), 0, 0);
+        if (dictionaryOffsets == null) {
+          // Entire stripe contains null strings.
+          result.isRepeating = true;
+          result.noNulls = false;
+          result.isNull[0] = true;
+          result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0);
+        } else {
+          // stripe contains nulls and empty strings
+          for (int i = 0; i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0);
+            }
+          }
+        }
       }
       return result;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/4ac966cd/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q 
b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
new file mode 100644
index 0000000..0e8a743
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
@@ -0,0 +1,20 @@
+create table orcstr (vcol varchar(20)) stored as orc;
+
+insert overwrite table orcstr select null from src;
+
+SET hive.fetch.task.conversion=none;
+
+SET hive.vectorized.execution.enabled=false;
+select vcol from orcstr limit 1;
+
+SET hive.vectorized.execution.enabled=true;
+select vcol from orcstr limit 1;
+
+insert overwrite table orcstr select "" from src;
+
+SET hive.vectorized.execution.enabled=false;
+select vcol from orcstr limit 1;
+
+SET hive.vectorized.execution.enabled=true;
+select vcol from orcstr limit 1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/4ac966cd/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out 
b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
new file mode 100644
index 0000000..4f00bed
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcstr
+PREHOOK: query: insert overwrite table orcstr select null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: insert overwrite table orcstr select "" from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select "" from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+

Reply via email to