This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 53d13442af3 HIVE-28632: Fix issues in JSON SerDe implementations 
related to Boolean, Binary data types (Araika Singh, reviewed by Denys Kuzmenko)
53d13442af3 is described below

commit 53d13442af33e272cf768549c8e4d4fc53e295f9
Author: NZEC <[email protected]>
AuthorDate: Thu Nov 28 18:49:55 2024 +0530

    HIVE-28632: Fix issues in JSON SerDe implementations related to Boolean, 
Binary data types (Araika Singh, reviewed by Denys Kuzmenko)
    
    Closes #5550
---
 data/files/jsonserde.txt                           |   1 +
 ql/src/test/queries/clientpositive/json_serde3.q   |  70 +++++
 .../results/clientpositive/llap/json_serde3.q.out  | 294 +++++++++++++++++++++
 .../org/apache/hadoop/hive/serde2/JsonSerDe.java   |   1 -
 .../hadoop/hive/serde2/json/HiveJsonReader.java    |  43 +--
 5 files changed, 391 insertions(+), 18 deletions(-)

diff --git a/data/files/jsonserde.txt b/data/files/jsonserde.txt
new file mode 100644
index 00000000000..08963f2897b
--- /dev/null
+++ b/data/files/jsonserde.txt
@@ -0,0 +1 @@
+{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null, 
"binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value", 
"booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" : 
true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" : 
null, "booleannumfalse" : 0, "booleannumtrue" : -1}
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/json_serde3.q 
b/ql/src/test/queries/clientpositive/json_serde3.q
new file mode 100644
index 00000000000..48e45bfa769
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/json_serde3.q
@@ -0,0 +1,70 @@
+drop table if exists json_serde3_1;
+drop table if exists json_serde3_2;
+drop table if exists json_serde3_3;
+drop table if exists json_serde3_4;
+
+create table json_serde3_1 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary
+    )
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table 
json_serde3_1;
+INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS STRING)), 
BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value");
+
+select * from json_serde3_1;
+
+create table json_serde3_2 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table 
json_serde3_2;
+INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS STRING)), 
BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value");
+
+select * from json_serde3_2;
+
+create table json_serde3_3 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table 
json_serde3_3;
+INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true, "FaLSE", 
"somestringhere", null, 0, -1);
+
+select * from json_serde3_3;
+
+create table json_serde3_4 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table 
json_serde3_4;
+INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true, "FaLSE", 
"somestringhere", null, 0, -1);
+
+select * from json_serde3_4;
+
+drop table json_serde3_1;
+drop table json_serde3_2;
+drop table json_serde3_3;
+drop table json_serde3_4;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/json_serde3.q.out 
b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
new file mode 100644
index 00000000000..0b33db51d62
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
@@ -0,0 +1,294 @@
+PREHOOK: query: drop table if exists json_serde3_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_4
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_4
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create table json_serde3_1 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary
+    )
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: create table json_serde3_1 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary
+    )
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_1
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_1
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS 
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: INSERT INTO TABLE json_serde3_1 VALUES (BINARY(CAST(-2 AS 
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_1
+POSTHOOK: Lineage: json_serde3_1.binarycolumn1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn3 EXPRESSION []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn5 SCRIPT []
+POSTHOOK: Lineage: json_serde3_1.binarycolumn6 SCRIPT []
+PREHOOK: query: select * from json_serde3_1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_1
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_1
+#### A masked pattern was here ####
+-2     FALSE   NULL    TRUE    1.23E45 value
+-2     false   NULL    true    1.23E45 value
+PREHOOK: query: create table json_serde3_2 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: create table json_serde3_2 (
+    binarycolumn1 binary,
+    binarycolumn2 binary,
+    binarycolumn3 binary,
+    binarycolumn4 binary,
+    binarycolumn5 binary,
+    binarycolumn6 binary)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_2
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_2
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS 
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value")
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: INSERT INTO TABLE json_serde3_2 VALUES (BINARY(CAST(-2 AS 
STRING)), BINARY(CAST(false AS STRING)), null, BINARY(CAST(true AS STRING)), 
BINARY(CAST(1.23e45 AS STRING)), "value")
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_2
+POSTHOOK: Lineage: json_serde3_2.binarycolumn1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn3 EXPRESSION []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn5 SCRIPT []
+POSTHOOK: Lineage: json_serde3_2.binarycolumn6 SCRIPT []
+PREHOOK: query: select * from json_serde3_2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_2
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_2
+#### A masked pattern was here ####
+-2     FALSE   NULL    TRUE    1.23E45 value
+-2     false   NULL    true    1.23E45 value
+PREHOOK: query: create table json_serde3_3 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: create table json_serde3_3 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_3
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_3
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true, 
"FaLSE", "somestringhere", null, 0, -1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: INSERT INTO TABLE json_serde3_3 VALUES ("TrUE", "true", true, 
"FaLSE", "somestringhere", null, 0, -1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_3
+POSTHOOK: Lineage: json_serde3_3.booleanboolean SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleancaseinsensitive SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleannull EXPRESSION []
+POSTHOOK: Lineage: json_serde3_3.booleannumfalse SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleannumtrue SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.booleanstring SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.somestring SCRIPT []
+POSTHOOK: Lineage: json_serde3_3.stringfalse SCRIPT []
+PREHOOK: query: select * from json_serde3_3
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_3
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_3
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_3
+#### A masked pattern was here ####
+true   true    true    false   true    NULL    false   true
+true   true    true    false   true    NULL    false   true
+PREHOOK: query: create table json_serde3_4 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: create table json_serde3_4 (
+    booleancaseinsensitive boolean,
+    booleanstring boolean,
+    booleanboolean boolean,
+    stringfalse boolean,
+    somestring boolean,
+    booleannull boolean,
+    booleannumfalse boolean,
+    booleannumtrue boolean)
+  row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_4
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_4
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into 
table json_serde3_4
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_4
+PREHOOK: query: INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true, 
"FaLSE", "somestringhere", null, 0, -1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true", true, 
"FaLSE", "somestringhere", null, 0, -1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_4
+POSTHOOK: Lineage: json_serde3_4.booleanboolean SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleancaseinsensitive SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleannull EXPRESSION []
+POSTHOOK: Lineage: json_serde3_4.booleannumfalse SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleannumtrue SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.booleanstring SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.somestring SCRIPT []
+POSTHOOK: Lineage: json_serde3_4.stringfalse SCRIPT []
+PREHOOK: query: select * from json_serde3_4
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_4
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_4
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_4
+#### A masked pattern was here ####
+true   true    true    false   true    NULL    false   true
+true   true    true    false   true    NULL    false   true
+PREHOOK: query: drop table json_serde3_1
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_1
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_1
+POSTHOOK: query: drop table json_serde3_1
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_1
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_1
+PREHOOK: query: drop table json_serde3_2
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_2
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_2
+POSTHOOK: query: drop table json_serde3_2
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_2
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_2
+PREHOOK: query: drop table json_serde3_3
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_3
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_3
+POSTHOOK: query: drop table json_serde3_3
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_3
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_3
+PREHOOK: query: drop table json_serde3_4
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_4
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_4
+POSTHOOK: query: drop table json_serde3_4
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_4
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_4
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
index 534246e5f36..f7b4e541a6f 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/JsonSerDe.java
@@ -139,7 +139,6 @@ public class JsonSerDe extends AbstractSerDe {
     this.jsonReader = new HiveJsonReader(this.soi, tsParser);
     this.jsonWriter = new HiveJsonWriter(this.binaryEncoding, 
getColumnNames());
 
-    this.jsonReader.setBinaryEncoding(binaryEncoding);
     this.jsonReader.enable(HiveJsonReader.Feature.COL_INDEX_PARSING);
 
     if (writeablePrimitivesDeserialize) {
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
index 7ade47b6e5a..c504b9ec4ff 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
@@ -112,8 +112,10 @@ public class HiveJsonReader {
   private final ObjectMapper objectMapper;
 
   private final TimestampParser tsParser;
-  private BinaryEncoding binaryEncoding;
   private final ObjectInspector oi;
+  private static final Pattern BASE64_PATTERN = Pattern.compile(
+          "^(?:[A-Za-z0-9+/]{4})*(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?$"
+  );
 
   /**
    * Enumeration that defines all on/off features for this reader.
@@ -168,7 +170,6 @@ public class HiveJsonReader {
    * @param tsParser Custom timestamp parser
    */
   public HiveJsonReader(ObjectInspector oi, TimestampParser tsParser) {
-    this.binaryEncoding = BinaryEncoding.BASE64;
     this.tsParser = tsParser;
     this.oi = oi;
     this.objectMapper = new ObjectMapper();
@@ -397,7 +398,11 @@ public class HiveJsonReader {
     case LONG:
       return Long.valueOf(leafNode.asLong());
     case BOOLEAN:
-      return Boolean.valueOf(leafNode.asBoolean());
+      if ("false".equalsIgnoreCase(leafNode.asText())) {
+        return Boolean.FALSE;
+      } else {
+        return Boolean.valueOf(leafNode.asBoolean(true));
+      }
     case FLOAT:
       return Float.valueOf((float) leafNode.asDouble());
     case DOUBLE:
@@ -450,15 +455,19 @@ public class HiveJsonReader {
    */
   private byte[] getByteValue(final JsonNode binaryNode) throws SerDeException 
{
     try {
-      switch (this.binaryEncoding) {
+      BinaryEncoding binaryEncoding = getBinaryEncodingForNode(binaryNode);
+      switch (binaryEncoding) {
       case RAWSTRING:
-        final String byteText = binaryNode.textValue();
+        final String byteText = binaryNode.asText();
+        if (byteText == null) {
+          return null;
+        }
         return byteText.getBytes(StandardCharsets.UTF_8);
       case BASE64:
         return binaryNode.binaryValue();
       default:
         throw new SerDeException(
-            "No such binary encoding: " + this.binaryEncoding);
+            "No such binary encoding: " + binaryEncoding);
       }
     } catch (IOException e) {
       throw new SerDeException("Error generating JSON binary type from 
record.",
@@ -466,6 +475,16 @@ public class HiveJsonReader {
     }
   }
 
+  private BinaryEncoding getBinaryEncodingForNode(JsonNode binaryNode) {
+    String jsonValue = binaryNode.textValue();
+
+    if (jsonValue == null || jsonValue.length() % 4 != 0 || 
!BASE64_PATTERN.matcher(jsonValue).matches()) {
+      return BinaryEncoding.RAWSTRING;
+    }
+
+    return BinaryEncoding.BASE64;
+  }
+
   /**
    * Matches the JSON object's field name with the Hive data type.
    *
@@ -560,18 +579,8 @@ public class HiveJsonReader {
     return oi;
   }
 
-  public BinaryEncoding getBinaryEncodingType() {
-    return binaryEncoding;
-  }
-
-  public void setBinaryEncoding(BinaryEncoding encoding) {
-    this.binaryEncoding = encoding;
-  }
-
   @Override
   public String toString() {
-    return "HiveJsonReader [features=" + features + ", tsParser=" + tsParser
-        + ", binaryEncoding=" + binaryEncoding + "]";
+    return "HiveJsonReader [features=" + features + ", tsParser=" + tsParser;
   }
-
 }

Reply via email to