Repository: hive Updated Branches: refs/heads/master d06b69f57 -> a8eb4aef4
HIVE-12207 : Query fails when non-ascii characters are used in string literals (Aleksei Statkevich via Ashutosh Chauhan) Signed-off-by: Ashutosh Chauhan <hashut...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a8eb4aef Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a8eb4aef Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a8eb4aef Branch: refs/heads/master Commit: a8eb4aef496568fccbde4898d42c2c14875f7c03 Parents: d06b69f Author: Aleksei Statkevich <me.alek...@gmail.com> Authored: Sat Oct 17 23:37:00 2015 -0800 Committer: Ashutosh Chauhan <hashut...@apache.org> Committed: Thu Nov 5 14:16:31 2015 -0800 ---------------------------------------------------------------------- .../calcite/translator/RexNodeConverter.java | 13 ++++++++--- .../queries/clientpositive/non_ascii_literal1.q | 1 + .../queries/clientpositive/non_ascii_literal2.q | 5 +++++ .../clientpositive/non_ascii_literal1.q.out | 9 ++++++++ .../clientpositive/non_ascii_literal2.q.out | 23 ++++++++++++++++++++ 5 files changed, 48 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index d315497..631a4ca 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -38,11 +38,14 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexNode; import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.sql.SqlCollation; import org.apache.calcite.sql.SqlIntervalQualifier; import org.apache.calcite.sql.SqlOperator; import org.apache.calcite.sql.fun.SqlCastFunction; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.util.ConversionUtil; +import org.apache.calcite.util.NlsString; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.common.type.Decimal128; @@ -301,6 +304,10 @@ public class RexNodeConverter { private static final BigInteger MIN_LONG_BI = BigInteger.valueOf(Long.MIN_VALUE), MAX_LONG_BI = BigInteger.valueOf(Long.MAX_VALUE); + private static NlsString asUnicodeString(String text) { + return new NlsString(text, ConversionUtil.NATIVE_UTF16_CHARSET_NAME, SqlCollation.IMPLICIT); + } + protected RexNode convert(ExprNodeConstantDesc literal) throws CalciteSemanticException { RexBuilder rexBuilder = cluster.getRexBuilder(); RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory(); @@ -377,16 +384,16 @@ public class RexNodeConverter { if (value instanceof HiveChar) { value = ((HiveChar) value).getValue(); } - calciteLiteral = rexBuilder.makeLiteral((String) value); + calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case VARCHAR: if (value instanceof HiveVarchar) { value = ((HiveVarchar) value).getValue(); } - calciteLiteral = rexBuilder.makeLiteral((String) value); + calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case STRING: - calciteLiteral = rexBuilder.makeLiteral((String) value); + calciteLiteral = rexBuilder.makeCharLiteral(asUnicodeString((String) value)); break; case DATE: Calendar cal = new GregorianCalendar(); http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/queries/clientpositive/non_ascii_literal1.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/non_ascii_literal1.q b/ql/src/test/queries/clientpositive/non_ascii_literal1.q new file mode 100644 index 0000000..9573653 --- /dev/null +++ b/ql/src/test/queries/clientpositive/non_ascii_literal1.q @@ -0,0 +1 @@ +select concat("Ðбвгде", "谢谢") from src limit 1; http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/queries/clientpositive/non_ascii_literal2.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/non_ascii_literal2.q b/ql/src/test/queries/clientpositive/non_ascii_literal2.q new file mode 100644 index 0000000..6b25273 --- /dev/null +++ b/ql/src/test/queries/clientpositive/non_ascii_literal2.q @@ -0,0 +1,5 @@ +create table non_ascii_literal2 as +select "谢谢" as col1, "Ðбвгде" as col2; + +select * from non_ascii_literal2 +where col2 = "Ðбвгде"; http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/results/clientpositive/non_ascii_literal1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/non_ascii_literal1.q.out b/ql/src/test/results/clientpositive/non_ascii_literal1.q.out new file mode 100644 index 0000000..5b28f4e --- /dev/null +++ b/ql/src/test/results/clientpositive/non_ascii_literal1.q.out @@ -0,0 +1,9 @@ +PREHOOK: query: select concat("Ðбвгде", "谢谢") from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select concat("Ðбвгде", "谢谢") from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +Ðбвгде谢谢 http://git-wip-us.apache.org/repos/asf/hive/blob/a8eb4aef/ql/src/test/results/clientpositive/non_ascii_literal2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/non_ascii_literal2.q.out b/ql/src/test/results/clientpositive/non_ascii_literal2.q.out new file mode 100644 index 0000000..7e19143 --- /dev/null +++ b/ql/src/test/results/clientpositive/non_ascii_literal2.q.out @@ -0,0 +1,23 @@ +PREHOOK: query: create table non_ascii_literal2 as +select "谢谢" as col1, "Ðбвгде" as col2 +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: database:default +PREHOOK: Output: default@non_ascii_literal2 +POSTHOOK: query: create table non_ascii_literal2 as +select "谢谢" as col1, "Ðбвгде" as col2 +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_ascii_literal2 +PREHOOK: query: select * from non_ascii_literal2 +where col2 = "Ðбвгде" +PREHOOK: type: QUERY +PREHOOK: Input: default@non_ascii_literal2 +#### A masked pattern was here #### +POSTHOOK: query: select * from non_ascii_literal2 +where col2 = "Ðбвгде" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_ascii_literal2 +#### A masked pattern was here #### +谢谢 Ðбвгде