This is an automated email from the ASF dual-hosted git repository.
szita pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 95e76f0 HIVE-24151: MultiDelimitSerDe shifts data if strings contain
non-ASCII characters (Adam Szita, reviewed by Peter Vary)
95e76f0 is described below
commit 95e76f0bd65e9c7f48252d5c4777ca5ab2a66c3b
Author: Adam Szita <40628386+sz...@users.noreply.github.com>
AuthorDate: Thu Sep 17 10:06:46 2020 +0200
HIVE-24151: MultiDelimitSerDe shifts data if strings contain non-ASCII
characters (Adam Szita, reviewed by Peter Vary)
---
data/files/t4_multi_delimit.csv| 5 ++
.../queries/clientpositive/serde_multi_delimit.q | 12 -
.../clientpositive/llap/serde_multi_delimit.q.out | 45 ++
.../hadoop/hive/serde2/MultiDelimitSerDe.java | 14 +++---
.../apache/hadoop/hive/serde2/lazy/LazyStruct.java | 55 +-
5 files changed, 101 insertions(+), 30 deletions(-)
diff --git a/data/files/t4_multi_delimit.csv b/data/files/t4_multi_delimit.csv
new file mode 100644
index 000..d1d
--- /dev/null
+++ b/data/files/t4_multi_delimit.csv
@@ -0,0 +1,5 @@
+Сок^,dsadsa
+ááé^,^,üóüóüóüóüó
+^,^,^,^,
+áűáűáűáű^,^,^,^,
+űűű^,ááá^,óóó
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/serde_multi_delimit.q
b/ql/src/test/queries/clientpositive/serde_multi_delimit.q
index 0d85175..e9e7f78 100644
--- a/ql/src/test/queries/clientpositive/serde_multi_delimit.q
+++ b/ql/src/test/queries/clientpositive/serde_multi_delimit.q
@@ -58,8 +58,18 @@ LOAD DATA LOCAL INPATH
"../../data/files/t3_multi_delimit.csv" INTO TABLE t3_mul
SELECT * FROM t3_multi_delimit;
+CREATE TABLE t4_multi_delimit(colA string,
+ colB string,
+ colC string)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
+WITH SERDEPROPERTIES ("field.delim"="^,")STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH "../../data/files/t4_multi_delimit.csv" INTO TABLE
t4_multi_delimit;
+
+SELECT * FROM t4_multi_delimit;
+
DROP TABLE t1_multi_delimit;
DROP TABLE t11_csv_serde;
DROP TABLE t2_multi_delimit;
-DROP TABLE t3_multi_delimit;
\ No newline at end of file
+DROP TABLE t3_multi_delimit;
+DROP TABLE t4_multi_delimit;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/serde_multi_delimit.q.out
b/ql/src/test/results/clientpositive/llap/serde_multi_delimit.q.out
index 3437744..837f620 100644
--- a/ql/src/test/results/clientpositive/llap/serde_multi_delimit.q.out
+++ b/ql/src/test/results/clientpositive/llap/serde_multi_delimit.q.out
@@ -198,6 +198,43 @@ NULL NULLNULLNULLNULL
8 8 NULL8 8
9 9 NULL9 9
10101010 NULLNULLNULLNULL
+PREHOOK: query: CREATE TABLE t4_multi_delimit(colA string,
+ colB string,
+ colC string)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
+WITH SERDEPROPERTIES ("field.delim"="^,")STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t4_multi_delimit
+POSTHOOK: query: CREATE TABLE t4_multi_delimit(colA string,
+ colB string,
+ colC string)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.MultiDelimitSerDe'
+WITH SERDEPROPERTIES ("field.delim"="^,")STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t4_multi_delimit
+PREHOOK: query: LOAD DATA LOCAL INPATH "../../data/files/t4_multi_delimit.csv"
INTO TABLE t4_multi_delimit
+PREHOOK: type: LOAD
+ A masked pattern was here
+PREHOOK: Output: default@t4_multi_delimit
+POSTHOOK: query: LOAD DATA LOCAL INPATH
"../../data/files/t4_multi_delimit.csv" INTO TABLE t4_multi_delimit
+POSTHOOK: type: LOAD
+ A masked pattern was here
+POSTHOOK: Output: default@t4_multi_delimit
+PREHOOK: query: SELECT * FROM t4_multi_delimit
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t4_multi_delimit
+ A masked pattern was here
+POSTHOOK: query: SELECT * FROM t4_multi_delimit
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t4_multi_delimit
+ A masked pattern was here
+Сок dsadsa NULL
+ááéüóüóüóüóüó
+
+áűáűáűáű
+űűűááá óóó
PREHOOK: query: DROP TABLE t1_multi_delimit
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@t1_multi_delimit
@@ -230,3 +267,11 @@ POSTHOOK: query: DROP TABLE t3_multi_delimit
POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@t3_multi_delimit
POSTHOOK: Output: default@t3_multi_delimit
+PREHOOK: query: DROP TABLE t4_multi_delimit
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@t4_multi_delimit
+PREHOOK: Output: default@t4_multi_delimit
+POSTHOOK: query: DROP TABLE t4_multi_delimit
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@t4_multi_delimit
+POSTHOOK: Output: default@t4_multi_delimit
diff --git
a/serde/src/ja