This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 53d13442af3 HIVE-28632: Fix issues in JSON SerDe implementations
related to Boolean, Binary data types (Araika Singh, reviewed by Denys Kuzmenko)
53d13442af3 is described below
commit 53d13442af33e272cf768549c8e4d4fc53e295f9
Author: NZEC <[email protected]>
AuthorDate: Thu Nov 28 18:49:55 2024 +0530
HIVE-28632: Fix issues in JSON SerDe implementations related to Boolean,
Binary data types (Araika Singh, reviewed by Denys Kuzmenko)
Closes #5550
---
data/files/jsonserde.txt | 1 +
ql/src/test/queries/clientpositive/json_serde3.q | 70 +++++
.../results/clientpositive/llap/json_serde3.q.out | 294 +++++++++++++++++++++
.../org/apache/hadoop/hive/serde2/JsonSerDe.java | 1 -
.../hadoop/hive/serde2/json/HiveJsonReader.java | 43 +--
5 files changed, 391 insertions(+), 18 deletions(-)
diff --git a/data/files/jsonserde.txt b/data/files/jsonserde.txt
new file mode 100644
index 00000000000..08963f2897b
--- /dev/null
+++ b/data/files/jsonserde.txt
@@ -0,0 +1 @@
+{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null,
"binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value",
"booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" :
true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" :
null, "booleannumfalse" : 0, "booleannumtrue" : -1}
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/json_serde3.q
b/ql/src/test/queries/clientpositive/json_serde3.q
new file mode 100644
index 00000000000..48e45bfa769
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/json_serde3.q
@@ -0,0 +1,70 @@
+drop table if exists json_serde3_1;
+drop table if exists json_serde3_2;
+drop table if exists json_serde3_3;
+drop table if exists json_serde3_4;
+
+create table json_serde3_1 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary
+ )
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_1;
+INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS STRING)),
BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value");
+
+select * from json_serde3_1;
+
+create table json_serde3_2 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_2;
+INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS STRING)),
BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value");
+
+select * from json_serde3_2;
+
+create table json_serde3_3 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_3;
+INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true, "FaLSE",
"somestringhere", null, 0, -1);
+
+select * from json_serde3_3;
+
+create table json_serde3_4 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_4;
+INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true, "FaLSE",
"somestringhere", null, 0, -1);
+
+select * from json_serde3_4;
+
+drop table json_serde3_1;
+drop table json_serde3_2;
+drop table json_serde3_3;
+drop table json_serde3_4;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/json_serde3.q.out
b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
new file mode 100644
index 00000000000..0b33db51d62
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
@@ -0,0 +1,294 @@
+PREHOOK: query: drop table if exists json_serde3_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_4
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_4
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table json_serde3_1 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary
+ )
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: create table json_serde3_1 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary
+ )
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_1
+POSTHOOK: Lineage: json_serde3_1.binarycolumn1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn3 EXPRESSION []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn5 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn6 SCRIPT []
+PREHOOK: query: select * from json_serde3_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_1
+#### A masked pattern was here ####
+-2 FALSE NULL TRUE 1.23E45 value
+-2 false NULL true 1.23E45 value
+PREHOOK: query: create table json_serde3_2 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: create table json_serde3_2 (
+ binarycolumn1 binary,
+ binarycolumn2 binary,
+ binarycolumn3 binary,
+ binarycolumn4 binary,
+ binarycolumn5 binary,
+ binarycolumn6 binary)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)),
BINARY(CAST(1.23e45 AS STRING)), "value")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_2
+POSTHOOK: Lineage: json_serde3_2.binarycolumn1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn3 EXPRESSION []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn5 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn6 SCRIPT []
+PREHOOK: query: select * from json_serde3_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_2
+#### A masked pattern was here ####
+-2 FALSE NULL TRUE 1.23E45 value
+-2 false NULL true 1.23E45 value
+PREHOOK: query: create table json_serde3_3 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: create table json_serde3_3 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true,
"FaLSE", "somestringhere", null, 0, -1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true,
"FaLSE", "somestringhere", null, 0, -1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_3
+POSTHOOK: Lineage: json_serde3_3.booleanboolean SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleancaseinsensitive SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleannull EXPRESSION []
+POSTHOOK: Lineage: json_serde3_3.booleannumfalse SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleannumtrue SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleanstring SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.somestring SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.stringfalse SCRIPT []
+PREHOOK: query: select * from json_serde3_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_3
+#### A masked pattern was here ####
+true true true false true NULL false true
+true true true false true NULL false true
+PREHOOK: query: create table json_serde3_4 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: create table json_serde3_4 (
+ booleancaseinsensitive boolean,
+ booleanstring boolean,
+ booleanboolean boolean,
+ stringfalse boolean,
+ somestring boolean,
+ booleannull boolean,
+ booleannumfalse boolean,
+ booleannumtrue boolean)
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_4
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_4
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_4
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_4
+PREHOOK: query: INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true,
"FaLSE", "somestringhere", null, 0, -1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true,
"FaLSE", "somestringhere", null, 0, -1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_4
+POSTHOOK: Lineage: json_serde3_4.booleanboolean SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleancaseinsensitive SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleannull EXPRESSION []
+POSTHOOK: Lineage: json_serde3_4.booleannumfalse SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleannumtrue SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleanstring SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.somestring SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.stringfalse SCRIPT []
+PREHOOK: query: select * from json_serde3_4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_4
+#### A masked pattern was here ####
+true true true false true NULL false true
+true true true false true NULL false true
+PREHOOK: query: drop table json_serde3_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: drop table json_serde3_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: drop table json_serde3_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: drop table json_serde3_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_2
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: drop table json_serde3_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_3
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: drop table json_serde3_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_3
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: drop table json_serde3_4
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_4
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: drop table json_serde3_4
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_4
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_4
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
b/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
index 534246e5f36..f7b4e541a6f 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
@@ -139,7 +139,6 @@ public class JsonSerDe extends AbstractSerDe {
this.jsonReader = new HiveJsonReader(this.soi, tsParser);
this.jsonWriter = new HiveJsonWriter(this.binaryEncoding,
getColumnNames());
- this.jsonReader.setBinaryEncoding(binaryEncoding);
this.jsonReader.enable(HiveJsonReader.Feature.COL_INDEX_PARSING);
if (writeablePrimitivesDeserialize) {
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
index 7ade47b6e5a..c504b9ec4ff 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
@@ -112,8 +112,10 @@ public class HiveJsonReader {
private final ObjectMapper objectMapper;
private final TimestampParser tsParser;
- private BinaryEncoding binaryEncoding;
private final ObjectInspector oi;
+ private static final Pattern BASE64_PATTERN = Pattern.compile(
+ "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
+ );
/**
* Enumeration that defines all on/off features for this reader.
@@ -168,7 +170,6 @@ public class HiveJsonReader {
* @param tsParser Custom timestamp parser
*/
public HiveJsonReader(ObjectInspector oi, TimestampParser tsParser) {
- this.binaryEncoding = BinaryEncoding.BASE64;
this.tsParser = tsParser;
this.oi = oi;
this.objectMapper = new ObjectMapper();
@@ -397,7 +398,11 @@ public class HiveJsonReader {
case LONG:
return Long.valueOf(leafNode.asLong());
case BOOLEAN:
- return Boolean.valueOf(leafNode.asBoolean());
+ if ("false".equalsIgnoreCase(leafNode.asText())) {
+ return Boolean.FALSE;
+ } else {
+ return Boolean.valueOf(leafNode.asBoolean(true));
+ }
case FLOAT:
return Float.valueOf((float) leafNode.asDouble());
case DOUBLE:
@@ -450,15 +455,19 @@ public class HiveJsonReader {
*/
private byte[] getByteValue(final JsonNode binaryNode) throws SerDeException
{
try {
- switch (this.binaryEncoding) {
+ BinaryEncoding binaryEncoding = getBinaryEncodingForNode(binaryNode);
+ switch (binaryEncoding) {
case RAWSTRING:
- final String byteText = binaryNode.textValue();
+ final String byteText = binaryNode.asText();
+ if (byteText == null) {
+ return null;
+ }
return byteText.getBytes(StandardCharsets.UTF_8);
case BASE64:
return binaryNode.binaryValue();
default:
throw new SerDeException(
- "No such binary encoding: " + this.binaryEncoding);
+ "No such binary encoding: " + binaryEncoding);
}
} catch (IOException e) {
throw new SerDeException("Error generating JSON binary type from
record.",
@@ -466,6 +475,16 @@ public class HiveJsonReader {
}
}
+ private BinaryEncoding getBinaryEncodingForNode(JsonNode binaryNode) {
+ String jsonValue = binaryNode.textValue();
+
+ if (jsonValue == null || jsonValue.length() % 4 != 0 ||
!BASE64_PATTERN.matcher(jsonValue).matches()) {
+ return BinaryEncoding.RAWSTRING;
+ }
+
+ return BinaryEncoding.BASE64;
+ }
+
/**
* Matches the JSON object's field name with the Hive data type.
*
@@ -560,18 +579,8 @@ public class HiveJsonReader {
return oi;
}
- public BinaryEncoding getBinaryEncodingType() {
- return binaryEncoding;
- }
-
- public void setBinaryEncoding(BinaryEncoding encoding) {
- this.binaryEncoding = encoding;
- }
-
@Override
public String toString() {
- return "HiveJsonReader [features=" + features + ", tsParser=" + tsParser
- + ", binaryEncoding=" + binaryEncoding + "]";
+ return "HiveJsonReader [features=" + features + ", tsParser=" + tsParser;
}
-
}