Author: namit Date: Tue Mar 19 23:39:54 2013 New Revision: 1458570 URL: http://svn.apache.org/r1458570 Log: HIVE-4154 NPE reading column of empty string from ORC file (Kevin Wilfong via namit)
Added: hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1458570&r1=1458569&r2=1458570&view=diff ============================================================================== --- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original) +++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Mar 19 23:39:54 2013 @@ -17,6 +17,15 @@ */ package org.apache.hadoop.hive.ql.io.orc; +import java.io.EOFException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -30,15 +39,6 @@ import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import java.io.EOFException; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.sql.Timestamp; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - class RecordReaderImpl implements RecordReader { private final FSDataInputStream file; private final long firstRow; @@ -686,7 +686,13 @@ class RecordReaderImpl implements Record } else { length = dictionaryBuffer.size() - offset; } - dictionaryBuffer.setText(result, offset, length); + // If the column is just empty strings, the size will be zero, so the buffer will be null, + // in that case just return result as it will default to empty + if (dictionaryBuffer != null) { + dictionaryBuffer.setText(result, offset, length); + } else { + result.clear(); + } } return result; } Added: hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q?rev=1458570&view=auto ============================================================================== --- hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q (added) +++ hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q Tue Mar 19 23:39:54 2013 @@ -0,0 +1,16 @@ +CREATE TABLE test_orc (key STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; + +INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10; + +-- Test reading a column which is just empty strings + +SELECT * FROM test_orc; + +INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10; + +-- Test reading a column which has some empty strings + +SELECT * FROM test_orc; Added: hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out?rev=1458570&view=auto ============================================================================== --- hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out (added) +++ hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out Tue Mar 19 23:39:54 2013 @@ -0,0 +1,77 @@ +PREHOOK: query: CREATE TABLE test_orc (key STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE test_orc (key STRING) +ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' +STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' +OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@test_orc +PREHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_orc +POSTHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_orc +POSTHOOK: Lineage: test_orc.key SIMPLE [] +PREHOOK: query: -- Test reading a column which is just empty strings + +SELECT * FROM test_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@test_orc +#### A masked pattern was here #### +POSTHOOK: query: -- Test reading a column which is just empty strings + +SELECT * FROM test_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: test_orc.key SIMPLE [] + + + + + + + + + + +PREHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@test_orc +POSTHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@test_orc +POSTHOOK: Lineage: test_orc.key SIMPLE [] +POSTHOOK: Lineage: test_orc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- Test reading a column which has some empty strings + +SELECT * FROM test_orc +PREHOOK: type: QUERY +PREHOOK: Input: default@test_orc +#### A masked pattern was here #### +POSTHOOK: query: -- Test reading a column which has some empty strings + +SELECT * FROM test_orc +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test_orc +#### A masked pattern was here #### +POSTHOOK: Lineage: test_orc.key SIMPLE [] +POSTHOOK: Lineage: test_orc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] + + + +27 +165 + +255 + + +