This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/branch-2 by this push: new 70bcaa8 HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234) 70bcaa8 is described below commit 70bcaa85eda16d3876bb4e3781c2ac030b821faa Author: Vineet G <vg...@apache.org> AuthorDate: Tue Jul 14 14:03:47 2020 -0700 HIVE-23807 Wrong results with vectorization enabled (Vineet Garg, reviewed by Jesus Camacho Rodriguez) (#1234) --- .../test/resources/testconfiguration.properties | 1 + .../exec/vector/expressions/CastStringToDate.java | 14 ++- .../vectorization_cast_string_to_date.q | 33 ++++++ .../llap/vectorization_cast_string_to_date.q.out | 118 +++++++++++++++++++++ 4 files changed, 161 insertions(+), 5 deletions(-) diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index deffd85..a1c7825 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -411,6 +411,7 @@ minillap.query.files=acid_bucket_pruning.q,\ bucket5.q,\ bucket6.q,\ except_distinct.q,\ + vectorization_cast_string_to_date.q,\ explainuser_2.q,\ empty_dir_in_table.q,\ intersect_all.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java index 4b176ae..2667708 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/CastStringToDate.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hive.common.util.DateParser; +import java.sql.Date; + import java.nio.charset.StandardCharsets; /** @@ -116,14 +118,16 @@ public class CastStringToDate extends VectorExpression { private void evaluate(LongColumnVector outV, BytesColumnVector inV, int i) { String dateString = new String(inV.vector[i], inV.start[i], inV.length[i], StandardCharsets.UTF_8); - if (dateParser.parseDate(dateString, sqlDate)) { + try { + Date utilDate = Date.valueOf(dateString); + sqlDate.setTime(utilDate.getTime()); outV.vector[i] = DateWritable.dateToDays(sqlDate); return; + } catch (IllegalArgumentException e) { + outV.vector[i] = 1; + outV.isNull[i] = true; + outV.noNulls = false; } - - outV.vector[i] = 1; - outV.isNull[i] = true; - outV.noNulls = false; } @Override diff --git a/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q new file mode 100644 index 0000000..0bc39dd --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorization_cast_string_to_date.q @@ -0,0 +1,33 @@ +CREATE TABLE `test13`( + `portfolio_valuation_date` string, + `price_cut_off_datetime` string, + `portfolio_id_valuation_source` string, + `contributor_full_path` string, + `position_market_value` double, + `mandate_name` string) +STORED AS ORC; + +INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo"); + +INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo"); + +INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo"); + +INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo"); + +INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo"); + +set hive.fetch.task.conversion=none; +set hive.explain.user=false; + +set hive.vectorized.execution.enabled=false; +select Cast(`test13`.`price_cut_off_datetime` AS date) from test13; + + +set hive.vectorized.execution.enabled=true; +select Cast(`test13`.`price_cut_off_datetime` AS date) from test13; diff --git a/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out new file mode 100644 index 0000000..4a5bd63 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/vectorization_cast_string_to_date.q.out @@ -0,0 +1,118 @@ +PREHOOK: query: CREATE TABLE `test13`( + `portfolio_valuation_date` string, + `price_cut_off_datetime` string, + `portfolio_id_valuation_source` string, + `contributor_full_path` string, + `position_market_value` double, + `mandate_name` string) +STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test13 +POSTHOOK: query: CREATE TABLE `test13`( + `portfolio_valuation_date` string, + `price_cut_off_datetime` string, + `portfolio_id_valuation_source` string, + `contributor_full_path` string, + `position_market_value` double, + `mandate_name` string) +STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test13 +PREHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo") +PREHOOK: type: QUERY +PREHOOK: Output: default@test13 +POSTHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.26, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test13 +POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo") +PREHOOK: type: QUERY +PREHOOK: Output: default@test13 +POSTHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.33, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test13 +POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo") +PREHOOK: type: QUERY +PREHOOK: Output: default@test13 +POSTHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, -0.03, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test13 +POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo") +PREHOOK: type: QUERY +PREHOOK: Output: default@test13 +POSTHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.16, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test13 +POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo") +PREHOOK: type: QUERY +PREHOOK: Output: default@test13 +POSTHOOK: query: INSERT INTO test13 values ( +"2020-01-31", "2020-02-07T03:14:48.007Z", "37", NULL, 0.08, "foo") +POSTHOOK: type: QUERY +POSTHOOK: Output: default@test13 +POSTHOOK: Lineage: test13.contributor_full_path SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col4, type:string, comment:), ] +POSTHOOK: Lineage: test13.mandate_name SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col6, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_id_valuation_source SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] +POSTHOOK: Lineage: test13.portfolio_valuation_date SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] +POSTHOOK: Lineage: test13.position_market_value EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col5, type:string, comment:), ] +POSTHOOK: Lineage: test13.price_cut_off_datetime SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col2, type:string, comment:), ] +PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13 +PREHOOK: type: QUERY +PREHOOK: Input: default@test13 +#### A masked pattern was here #### +POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test13 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13 +PREHOOK: type: QUERY +PREHOOK: Input: default@test13 +#### A masked pattern was here #### +POSTHOOK: query: select Cast(`test13`.`price_cut_off_datetime` AS date) from test13 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test13 +#### A masked pattern was here #### +NULL +NULL +NULL +NULL +NULL