This is an automated email from the ASF dual-hosted git repository.
okumin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new fcb59c8bc98 HIVE-28673: Fix issues in JSON SerDe implementations
related to Decimal (#5584) (Araika Singh, reviewed by Indhumathi Muthumurugesh,
Shohei Okumiya)
fcb59c8bc98 is described below
commit fcb59c8bc98d7767d432029e808dc8788f882eec
Author: NZEC <[email protected]>
AuthorDate: Thu Feb 6 18:23:56 2025 +0530
HIVE-28673: Fix issues in JSON SerDe implementations related to Decimal
(#5584) (Araika Singh, reviewed by Indhumathi Muthumurugesh, Shohei Okumiya)
---
data/files/jsonserde.txt | 2 +-
ql/src/test/queries/clientpositive/json_serde3.q | 32 ++++-
.../results/clientpositive/llap/json_serde3.q.out | 138 ++++++++++++++++++++-
.../hadoop/hive/serde2/json/HiveJsonReader.java | 8 +-
4 files changed, 174 insertions(+), 6 deletions(-)
diff --git a/data/files/jsonserde.txt b/data/files/jsonserde.txt
index 08963f2897b..3fa8560cd57 100644
--- a/data/files/jsonserde.txt
+++ b/data/files/jsonserde.txt
@@ -1 +1 @@
-{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null,
"binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value",
"booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" :
true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" :
null, "booleannumfalse" : 0, "booleannumtrue" : -1}
\ No newline at end of file
+{"binarycolumn1" : -2, "binarycolumn2" : false, "binarycolumn3" : null,
"binarycolumn4" : true, "binarycolumn5" : 1.23e45, "binarycolumn6" : "value",
"booleancaseinsensitive" : "TrUE", "booleanstring" : "true", "booleanboolean" :
true, "stringfalse" : "FaLSE", "somestring" : "somestringhere", "booleannull" :
null, "booleannumfalse" : 0, "booleannumtrue" : -1, "decimalcol1" :
-9999999999999999.99, "decimalcol2" : 9999999999999999.99, "decimalcol3" :
1000000000000000000000000000000000000.0 [...]
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/json_serde3.q
b/ql/src/test/queries/clientpositive/json_serde3.q
index 48e45bfa769..4cb64e1d959 100644
--- a/ql/src/test/queries/clientpositive/json_serde3.q
+++ b/ql/src/test/queries/clientpositive/json_serde3.q
@@ -2,6 +2,8 @@ drop table if exists json_serde3_1;
drop table if exists json_serde3_2;
drop table if exists json_serde3_3;
drop table if exists json_serde3_4;
+drop table if exists json_serde3_5;
+drop table if exists json_serde3_6;
create table json_serde3_1 (
binarycolumn1 binary,
@@ -64,7 +66,35 @@ INSERT INTO TABLE json_serde3_4 VALUES ("TrUE", "true",
true, "FaLSE", "somestri
select * from json_serde3_4;
+create table json_serde3_5 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_5;
+INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
+
+select * from json_serde3_5;
+
+create table json_serde3_6 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe';
+
+LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into table
json_serde3_6;
+INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39);
+
+select * from json_serde3_6;
+
drop table json_serde3_1;
drop table json_serde3_2;
drop table json_serde3_3;
-drop table json_serde3_4;
\ No newline at end of file
+drop table json_serde3_4;
+drop table json_serde3_5;
+drop table json_serde3_6;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/llap/json_serde3.q.out
b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
index 0b33db51d62..b91ee954a6e 100644
--- a/ql/src/test/results/clientpositive/llap/json_serde3.q.out
+++ b/ql/src/test/results/clientpositive/llap/json_serde3.q.out
@@ -22,6 +22,18 @@ PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists json_serde3_4
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_5
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_5
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: drop table if exists json_serde3_6
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists json_serde3_6
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
PREHOOK: query: create table json_serde3_1 (
binarycolumn1 binary,
binarycolumn2 binary,
@@ -77,7 +89,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_1
#### A masked pattern was here ####
-2 FALSE NULL TRUE 1.23E45 value
--2 false NULL true 1.23E45 value
+-2 false NULL true 1.23E+45 value
PREHOOK: query: create table json_serde3_2 (
binarycolumn1 binary,
binarycolumn2 binary,
@@ -131,7 +143,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@json_serde3_2
#### A masked pattern was here ####
-2 FALSE NULL TRUE 1.23E45 value
--2 false NULL true 1.23E45 value
+-2 false NULL true 1.23E+45 value
PREHOOK: query: create table json_serde3_3 (
booleancaseinsensitive boolean,
booleanstring boolean,
@@ -252,6 +264,108 @@ POSTHOOK: Input: default@json_serde3_4
#### A masked pattern was here ####
true true true false true NULL false true
true true true false true NULL false true
+PREHOOK: query: create table json_serde3_5 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_5
+POSTHOOK: query: create table json_serde3_5 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hadoop.hive.serde2.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_5
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_5
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_5
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_5
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_5
+PREHOOK: query: INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_5
+POSTHOOK: query: INSERT INTO TABLE json_serde3_5 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_5
+POSTHOOK: Lineage: json_serde3_5.decimalcol1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_5.decimalcol2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_5.decimalcol3 SCRIPT []
+POSTHOOK: Lineage: json_serde3_5.decimalcol4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_5.decimalcol5 SCRIPT []
+PREHOOK: query: select * from json_serde3_5
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_5
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_5
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_5
+#### A masked pattern was here ####
+-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
+-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
+PREHOOK: query: create table json_serde3_6 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_6
+POSTHOOK: query: create table json_serde3_6 (
+ decimalcol1 decimal(18,2),
+ decimalcol2 decimal(38,2),
+ decimalcol3 decimal(38,2),
+ decimalcol4 decimal(18,2),
+ decimalcol5 decimal(38,2))
+ row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_6
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_6
+PREHOOK: type: LOAD
+#### A masked pattern was here ####
+PREHOOK: Output: default@json_serde3_6
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/jsonserde.txt' into
table json_serde3_6
+POSTHOOK: type: LOAD
+#### A masked pattern was here ####
+POSTHOOK: Output: default@json_serde3_6
+PREHOOK: query: INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@json_serde3_6
+POSTHOOK: query: INSERT INTO TABLE json_serde3_6 VALUES (-9999999999999999.99,
9999999999999999.99, 1000000000000000000000000000000000000.00, 99.999, 1e39)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@json_serde3_6
+POSTHOOK: Lineage: json_serde3_6.decimalcol1 SCRIPT []
+POSTHOOK: Lineage: json_serde3_6.decimalcol2 SCRIPT []
+POSTHOOK: Lineage: json_serde3_6.decimalcol3 SCRIPT []
+POSTHOOK: Lineage: json_serde3_6.decimalcol4 SCRIPT []
+POSTHOOK: Lineage: json_serde3_6.decimalcol5 SCRIPT []
+PREHOOK: query: select * from json_serde3_6
+PREHOOK: type: QUERY
+PREHOOK: Input: default@json_serde3_6
+#### A masked pattern was here ####
+POSTHOOK: query: select * from json_serde3_6
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@json_serde3_6
+#### A masked pattern was here ####
+-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
+-9999999999999999.99 9999999999999999.99 NULL 100.00 NULL
PREHOOK: query: drop table json_serde3_1
PREHOOK: type: DROPTABLE
PREHOOK: Input: default@json_serde3_1
@@ -292,3 +406,23 @@ POSTHOOK: type: DROPTABLE
POSTHOOK: Input: default@json_serde3_4
POSTHOOK: Output: database:default
POSTHOOK: Output: default@json_serde3_4
+PREHOOK: query: drop table json_serde3_5
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_5
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_5
+POSTHOOK: query: drop table json_serde3_5
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_5
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_5
+PREHOOK: query: drop table json_serde3_6
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@json_serde3_6
+PREHOOK: Output: database:default
+PREHOOK: Output: default@json_serde3_6
+POSTHOOK: query: drop table json_serde3_6
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@json_serde3_6
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@json_serde3_6
diff --git
a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
index c504b9ec4ff..da9721cc766 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/json/HiveJsonReader.java
@@ -37,6 +37,7 @@
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import com.fasterxml.jackson.databind.DeserializationFeature;
import org.apache.commons.lang3.tuple.ImmutablePair;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hive.common.type.Date;
@@ -56,6 +57,8 @@
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TimestampLocalTZTypeInfo;
import org.apache.hive.common.util.TimestampParser;
@@ -172,7 +175,7 @@ public HiveJsonReader(ObjectInspector oi) {
public HiveJsonReader(ObjectInspector oi, TimestampParser tsParser) {
this.tsParser = tsParser;
this.oi = oi;
- this.objectMapper = new ObjectMapper();
+ this.objectMapper = new
ObjectMapper().enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS);
}
/**
@@ -426,7 +429,8 @@ private Object visitLeafNode(final JsonNode leafNode,
case TIMESTAMP:
return tsParser.parseTimestamp(leafNode.asText());
case DECIMAL:
- return HiveDecimal.create(leafNode.asText());
+ HiveDecimal decimal = HiveDecimal.create(leafNode.asText());
+ return HiveDecimalUtils.enforcePrecisionScale(decimal, (DecimalTypeInfo)
typeInfo);
case TIMESTAMPLOCALTZ:
final Timestamp ts = tsParser.parseTimestamp(leafNode.asText());
final ZoneId zid = ((TimestampLocalTZTypeInfo) typeInfo).timeZone();