Repository: drill Updated Branches: refs/heads/master e4938c094 -> 5a215144d
DRILL-5772: Enable UTF-8 support in query string by default 1. Bump up Drill Calcite version to in include CALCITE-2014 changes. 2. Add saffron.properties file to the Drill conf folder. 3. Add appopriate unit tests. closes #936 Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8eda4d77 Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8eda4d77 Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8eda4d77 Branch: refs/heads/master Commit: 8eda4d7749c129c692f9e57db4c2a755a9139052 Parents: e4938c0 Author: Arina Ielchiieva <arina.yelchiy...@gmail.com> Authored: Thu Sep 7 18:15:14 2017 +0300 Committer: Paul Rogers <prog...@maprtech.com> Committed: Mon Oct 30 11:42:43 2017 -0700 ---------------------------------------------------------------------- .../apache/drill/exec/hive/TestHiveStorage.java | 16 +--- distribution/src/assemble/bin.xml | 4 + distribution/src/resources/saffron.properties | 23 ++++++ .../drill/TestUtf8SupportInQueryString.java | 77 ++++++++++++++++++++ .../exec/expr/fn/impl/TestStringFunctions.java | 15 ---- .../src/test/resources/saffron.properties | 23 ++++++ pom.xml | 2 +- 7 files changed, 129 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java ---------------------------------------------------------------------- diff --git a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java index e465019..efd26ff 100644 --- a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java +++ b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java @@ -17,13 +17,9 @@ */ package org.apache.drill.exec.hive; -import mockit.Mock; -import mockit.MockUp; import mockit.integration.junit4.JMockit; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -import org.apache.calcite.util.Util; -import org.apache.calcite.util.ConversionUtil; import org.apache.drill.categories.HiveStorageTest; import org.apache.drill.categories.SlowTest; import org.apache.drill.common.exceptions.UserRemoteException; @@ -41,7 +37,6 @@ import org.junit.experimental.categories.Category; import org.junit.runner.RunWith; import java.math.BigDecimal; -import java.nio.charset.Charset; import java.sql.Date; import java.sql.Timestamp; import java.util.List; @@ -75,7 +70,7 @@ public class TestHiveStorage extends HiveTestBase { .sqlQuery(query) .unOrdered() .baselineColumns("col") - .baselineValues(200l) + .baselineValues(200L) .go(); } finally { final OperatorFixture.TestOptionSet testOptionSet = new OperatorFixture.TestOptionSet(); @@ -554,15 +549,6 @@ public class TestHiveStorage extends HiveTestBase { @Test // DRILL-3250 public void testNonAsciiStringLiterals() throws Exception { - // mock calcite util method to return utf charset - // instead of setting saffron.default.charset at system level - new MockUp<Util>() { - @Mock - Charset getDefaultCharset() { - return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME); - } - }; - testBuilder() .sqlQuery("select * from hive.empty_table where b = 'Ðбвгде谢谢'") .expectsEmptyResultSet() http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/distribution/src/assemble/bin.xml ---------------------------------------------------------------------- diff --git a/distribution/src/assemble/bin.xml b/distribution/src/assemble/bin.xml index b0119d2..faa2e72 100644 --- a/distribution/src/assemble/bin.xml +++ b/distribution/src/assemble/bin.xml @@ -352,5 +352,9 @@ <source>src/resources/core-site-example.xml</source> <outputDirectory>conf</outputDirectory> </file> + <file> + <source>src/resources/saffron.properties</source> + <outputDirectory>conf</outputDirectory> + </file> </files> </assembly> http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/distribution/src/resources/saffron.properties ---------------------------------------------------------------------- diff --git a/distribution/src/resources/saffron.properties b/distribution/src/resources/saffron.properties new file mode 100644 index 0000000..9a91343 --- /dev/null +++ b/distribution/src/resources/saffron.properties @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This properties file is used by Apache Calcite to define allowed charset in string literals, +# which is by default ISO-8859-1. +# Current configuration allows parsing UTF-8 by default, i.e. queries that contain utf-8 string literal. +# To take affect this file should be present in classpath. + +saffron.default.charset=UTF-16LE +saffron.default.nationalcharset=UTF-16LE +saffron.default.collation.name=UTF-16LE$en_US \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java b/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java new file mode 100644 index 0000000..e8573f8 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill; + +import mockit.Deencapsulation; +import mockit.Mock; +import mockit.MockUp; +import mockit.integration.junit4.JMockit; +import org.apache.calcite.util.SaffronProperties; +import org.apache.drill.common.exceptions.UserRemoteException; +import org.junit.Test; +import org.junit.runner.RunWith; + +import java.util.Properties; + +import static org.hamcrest.CoreMatchers.containsString; +import static org.junit.Assert.assertThat; + +@RunWith(JMockit.class) +public class TestUtf8SupportInQueryString extends BaseTestQuery { + + @Test + public void testUtf8SupportInQueryStringByDefault() throws Exception { + // can be defined in saffron.properties file present in classpath or system property + testBuilder() + .sqlQuery("select 'пÑивеÑ' as hello from (values(1))") + .unOrdered() + .baselineColumns("hello") + .baselineValues("пÑивеÑ") + .go(); + } + + @Test(expected = UserRemoteException.class) + public void testDisableUtf8SupportInQueryString() throws Exception { + Deencapsulation.setField(SaffronProperties.class, "properties", null); + final Properties properties = System.getProperties(); + final String charset = "ISO-8859-1"; + new MockUp<System>() + { + @Mock + Properties getProperties() { + Properties newProperties = new Properties(); + newProperties.putAll(properties); + newProperties.put("saffron.default.charset", charset); + newProperties.put("saffron.default.nationalcharset", charset); + newProperties.put("saffron.default.collation.name", charset + "$en_US"); + return newProperties; + } + }; + + final String hello = "пÑивеÑ"; + try { + test("values('%s')", hello); + } catch (UserRemoteException e) { + assertThat(e.getMessage(), containsString( + String.format("Failed to encode '%s' in character set '%s'", hello, charset))); + throw e; + } finally { + Deencapsulation.setField(SaffronProperties.class, "properties", null); + } + } + +} http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java index e0ebf0c..4249af6 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java @@ -19,11 +19,7 @@ package org.apache.drill.exec.expr.fn.impl; import static org.junit.Assert.assertTrue; -import mockit.Mock; -import mockit.MockUp; import mockit.integration.junit4.JMockit; -import org.apache.calcite.util.ConversionUtil; -import org.apache.calcite.util.Util; import org.apache.commons.io.FileUtils; import org.apache.drill.BaseTestQuery; import org.apache.drill.categories.SqlFunctionTest; @@ -38,7 +34,6 @@ import org.junit.runner.RunWith; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; -import java.nio.charset.Charset; @RunWith(JMockit.class) @Category(SqlFunctionTest.class) @@ -1339,16 +1334,6 @@ public class TestStringFunctions extends BaseTestQuery { @Ignore("DRILL-5477") @Test public void testMultiByteEncoding() throws Exception { - // mock calcite util method to return utf charset - // instead of setting saffron.default.charset at system level - new MockUp<Util>() - { - @Mock - Charset getDefaultCharset() { - return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME); - } - }; - testBuilder() .sqlQuery("select\n" + "upper('пÑивеÑ')as col_upper,\n" + http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/resources/saffron.properties ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/resources/saffron.properties b/exec/java-exec/src/test/resources/saffron.properties new file mode 100644 index 0000000..9a91343 --- /dev/null +++ b/exec/java-exec/src/test/resources/saffron.properties @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to you under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This properties file is used by Apache Calcite to define allowed charset in string literals, +# which is by default ISO-8859-1. +# Current configuration allows parsing UTF-8 by default, i.e. queries that contain utf-8 string literal. +# To take affect this file should be present in classpath. + +saffron.default.charset=UTF-16LE +saffron.default.nationalcharset=UTF-16LE +saffron.default.collation.name=UTF-16LE$en_US \ No newline at end of file http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 2c0d16c..2d3a0ac 100644 --- a/pom.xml +++ b/pom.xml @@ -36,7 +36,7 @@ <dep.guava.version>18.0</dep.guava.version> <forkCount>2</forkCount> <parquet.version>1.8.1-drill-r0</parquet.version> - <calcite.version>1.4.0-drill-r22</calcite.version> + <calcite.version>1.4.0-drill-r23</calcite.version> <janino.version>2.7.6</janino.version> <sqlline.version>1.1.9-drill-r7</sqlline.version> <jackson.version>2.7.8</jackson.version>