HIVE-11095: SerDeUtils another bug ,when Text is reused (Xiaowei via Xuefu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9c64f937
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9c64f937
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9c64f937

Branch: refs/heads/llap
Commit: 9c64f9378143b6804c384f0333014b8f1cbd8edd
Parents: 97b4750
Author: Xuefu Zhang <xzh...@cloudera.com>
Authored: Tue Jun 30 05:23:32 2015 -0700
Committer: Xuefu Zhang <xzh...@cloudera.com>
Committed: Tue Jun 30 05:24:03 2015 -0700

----------------------------------------------------------------------
 data/files/encoding-utf8.txt                    | 12 +++
 .../insert_non_utf8_encoding_table.q            | 20 +++++
 .../insert_non_utf8_encoding_table.q.out        | 89 ++++++++++++++++++++
 .../apache/hadoop/hive/serde2/SerDeUtils.java   |  2 +-
 4 files changed, 122 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/data/files/encoding-utf8.txt
----------------------------------------------------------------------
diff --git a/data/files/encoding-utf8.txt b/data/files/encoding-utf8.txt
new file mode 100644
index 0000000..88bd256
--- /dev/null
+++ b/data/files/encoding-utf8.txt
@@ -0,0 +1,12 @@
+Tao,Li
+Wisgood
+Benguo,Me
+Xianqiang,Shen
+Wensheng,Wang
+Haijun,Qiao
+Shilong,Zhang
+Xiaoqing,You
+Aiqing,Song
+Zhenhua,Han
+Weiqi,Peng
+Hua,Li

http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q 
b/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q
new file mode 100644
index 0000000..0f9db02
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/insert_non_utf8_encoding_table.q
@@ -0,0 +1,20 @@
+drop table if exists table_with_utf8_encoding;
+
+create table table_with_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='utf-8');
+
+load data local inpath '../../data/files/encoding-utf8.txt' overwrite into 
table table_with_utf8_encoding;
+
+select * from table_with_utf8_encoding;
+
+drop table if exists table_with_non_utf8_encoding;
+
+create table table_with_non_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1');
+
+insert overwrite table table_with_non_utf8_encoding  select name  from 
table_with_utf8_encoding;
+
+select * from table_with_non_utf8_encoding;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out 
b/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out
new file mode 100644
index 0000000..4d97d87
--- /dev/null
+++ b/ql/src/test/results/clientpositive/insert_non_utf8_encoding_table.q.out
@@ -0,0 +1,89 @@
+PREHOOK: query: drop table if exists table_with_utf8_encoding
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists table_with_utf8_encoding
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table table_with_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='utf-8')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_with_utf8_encoding
+POSTHOOK: query: create table table_with_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='utf-8')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_with_utf8_encoding
+PREHOOK: query: load data local inpath '../../data/files/encoding-utf8.txt' 
overwrite into table table_with_utf8_encoding
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@table_with_utf8_encoding
+POSTHOOK: query: load data local inpath '../../data/files/encoding-utf8.txt' 
overwrite into table table_with_utf8_encoding
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@table_with_utf8_encoding
+PREHOOK: query: select * from table_with_utf8_encoding
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_with_utf8_encoding
+#### A masked pattern was here ####
+POSTHOOK: query: select * from table_with_utf8_encoding
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_with_utf8_encoding
+#### A masked pattern was here ####
+Tao,Li
+Wisgood
+Benguo,Me
+Xianqiang,Shen
+Wensheng,Wang
+Haijun,Qiao
+Shilong,Zhang
+Xiaoqing,You
+Aiqing,Song
+Zhenhua,Han
+Weiqi,Peng
+Hua,Li
+PREHOOK: query: drop table if exists table_with_non_utf8_encoding
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists table_with_non_utf8_encoding
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table table_with_non_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@table_with_non_utf8_encoding
+POSTHOOK: query: create table table_with_non_utf8_encoding (name STRING) 
+ ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
+ WITH SERDEPROPERTIES ('serialization.encoding'='ISO8859_1')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@table_with_non_utf8_encoding
+PREHOOK: query: insert overwrite table table_with_non_utf8_encoding  select 
name  from table_with_utf8_encoding
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_with_utf8_encoding
+PREHOOK: Output: default@table_with_non_utf8_encoding
+POSTHOOK: query: insert overwrite table table_with_non_utf8_encoding  select 
name  from table_with_utf8_encoding
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_with_utf8_encoding
+POSTHOOK: Output: default@table_with_non_utf8_encoding
+POSTHOOK: Lineage: table_with_non_utf8_encoding.name SIMPLE 
[(table_with_utf8_encoding)table_with_utf8_encoding.FieldSchema(name:name, 
type:string, comment:null), ]
+PREHOOK: query: select * from table_with_non_utf8_encoding
+PREHOOK: type: QUERY
+PREHOOK: Input: default@table_with_non_utf8_encoding
+#### A masked pattern was here ####
+POSTHOOK: query: select * from table_with_non_utf8_encoding
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@table_with_non_utf8_encoding
+#### A masked pattern was here ####
+Tao,Li
+Wisgood
+Benguo,Me
+Xianqiang,Shen
+Wensheng,Wang
+Haijun,Qiao
+Shilong,Zhang
+Xiaoqing,You
+Aiqing,Song
+Zhenhua,Han
+Weiqi,Peng
+Hua,Li

http://git-wip-us.apache.org/repos/asf/hive/blob/9c64f937/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
index 40ede1a..c65174e 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/SerDeUtils.java
@@ -555,6 +555,6 @@ public final class SerDeUtils {
   }
 
   public static Text transformTextFromUTF8(Text text, Charset targetCharset) {
-    return new Text(new String(text.getBytes()).getBytes(targetCharset));
+    return new Text(new String(text.getBytes(), 0, 
text.getLength()).getBytes(targetCharset));
   }
 }

Reply via email to