HIVE-11095: SerDeUtils another bug ,when Text is reused (Xiaowei via Xuefu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9c64f937 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9c64f937 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9c64f937 Branch: refs/heads/llap Commit: 9c64f9378143b6804c384f0333014b8f1cbd8edd Parents: 97b4750 Author: Xuefu Zhang <xzh...@cloudera.com> Authored: Tue Jun 30 05:23:32 2015 -0700 Committer: Xuefu Zhang <xzh...@cloudera.com> Committed: Tue Jun 30 05:24:03 2015 -0700 ---------------------------------------------------------------------- data/files/encoding-utf8.txt | 12 +++ .../insert_non_utf8_encoding_table.q | 20 +++++ .../insert_non_utf8_encoding_table.q.out | 89 ++++++++++++++++++++ .../apache/hadoop/hive/serde2/SerDeUtils.java | 2 +- 4 files changed, 122 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/data/files/encoding-utf8.txt ---------------------------------------------------------------------- diff --git a/data/files/encoding-utf8.txt b/data/files/encoding-utf8.txt new file mode 100644 index 0000000..88bd256 --- /dev/null +++ b/data/files/encoding-utf8.txt @@ -0,0 +1,12 @@ +Tao,Li +Wisgood +Benguo,Me +Xianqiang,Shen +Wensheng,Wang +Haijun,Qiao +Shilong,Zhang +Xiaoqing,You +Aiqing,Song +Zhenhua,Han +Weiqi,Peng +Hua,Li http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q b/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q new file mode 100644 index 0000000..0f9db02 --- /dev/null +++ b/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q @@ -0,0 +1,20 @@ +drop table if exists table_with_utf8_encoding; + +create table table_with_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='utf-8'); + +load data local inpath '../../data/files/encoding-utf8.txt' overwrite into table table_with_utf8_encoding; + +select * from table_with_utf8_encoding; + +drop table if exists table_with_non_utf8_encoding; + +create table table_with_non_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1'); + +insert overwrite table table_with_non_utf8_encoding select name from table_with_utf8_encoding; + +select * from table_with_non_utf8_encoding; + http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out b/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out new file mode 100644 index 0000000..4d97d87 --- /dev/null +++ b/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out @@ -0,0 +1,89 @@ +PREHOOK: query: drop table if exists table_with_utf8_encoding +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists table_with_utf8_encoding +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_with_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='utf-8') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_with_utf8_encoding +POSTHOOK: query: create table table_with_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='utf-8') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_with_utf8_encoding +PREHOOK: query: load data local inpath '../../data/files/encoding-utf8.txt' overwrite into table table_with_utf8_encoding +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@table_with_utf8_encoding +POSTHOOK: query: load data local inpath '../../data/files/encoding-utf8.txt' overwrite into table table_with_utf8_encoding +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@table_with_utf8_encoding +PREHOOK: query: select * from table_with_utf8_encoding +PREHOOK: type: QUERY +PREHOOK: Input: default@table_with_utf8_encoding +#### A masked pattern was here #### +POSTHOOK: query: select * from table_with_utf8_encoding +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_with_utf8_encoding +#### A masked pattern was here #### +Tao,Li +Wisgood +Benguo,Me +Xianqiang,Shen +Wensheng,Wang +Haijun,Qiao +Shilong,Zhang +Xiaoqing,You +Aiqing,Song +Zhenhua,Han +Weiqi,Peng +Hua,Li +PREHOOK: query: drop table if exists table_with_non_utf8_encoding +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists table_with_non_utf8_encoding +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table table_with_non_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@table_with_non_utf8_encoding +POSTHOOK: query: create table table_with_non_utf8_encoding (name STRING) + ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' + WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@table_with_non_utf8_encoding +PREHOOK: query: insert overwrite table table_with_non_utf8_encoding select name from table_with_utf8_encoding +PREHOOK: type: QUERY +PREHOOK: Input: default@table_with_utf8_encoding +PREHOOK: Output: default@table_with_non_utf8_encoding +POSTHOOK: query: insert overwrite table table_with_non_utf8_encoding select name from table_with_utf8_encoding +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_with_utf8_encoding +POSTHOOK: Output: default@table_with_non_utf8_encoding +POSTHOOK: Lineage: table_with_non_utf8_encoding.name SIMPLE [(table_with_utf8_encoding)table_with_utf8_encoding.FieldSchema(name:name, type:string, comment:null), ] +PREHOOK: query: select * from table_with_non_utf8_encoding +PREHOOK: type: QUERY +PREHOOK: Input: default@table_with_non_utf8_encoding +#### A masked pattern was here #### +POSTHOOK: query: select * from table_with_non_utf8_encoding +POSTHOOK: type: QUERY +POSTHOOK: Input: default@table_with_non_utf8_encoding +#### A masked pattern was here #### +Tao,Li +Wisgood +Benguo,Me +Xianqiang,Shen +Wensheng,Wang +Haijun,Qiao +Shilong,Zhang +Xiaoqing,You +Aiqing,Song +Zhenhua,Han +Weiqi,Peng +Hua,Li http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java ---------------------------------------------------------------------- diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java index 40ede1a..c65174e 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java @@ -555,6 +555,6 @@ public final class SerDeUtils { } public static Text transformTextFromUTF8(Text text, Charset targetCharset) { - return new Text(new String(text.getBytes()).getBytes(targetCharset)); + return new Text(new String(text.getBytes(), 0, text.getLength()).getBytes(targetCharset)); } }