Repository: drill
Updated Branches:
  refs/heads/master e4938c094 -> 5a215144d


DRILL-5772: Enable UTF-8 support in query string by default

1. Bump up Drill Calcite version to in include CALCITE-2014 changes.
2. Add saffron.properties file to the Drill conf folder.
3. Add appopriate unit tests.

closes #936


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8eda4d77
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8eda4d77
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8eda4d77

Branch: refs/heads/master
Commit: 8eda4d7749c129c692f9e57db4c2a755a9139052
Parents: e4938c0
Author: Arina Ielchiieva <arina.yelchiy...@gmail.com>
Authored: Thu Sep 7 18:15:14 2017 +0300
Committer: Paul Rogers <prog...@maprtech.com>
Committed: Mon Oct 30 11:42:43 2017 -0700

----------------------------------------------------------------------
 .../apache/drill/exec/hive/TestHiveStorage.java | 16 +---
 distribution/src/assemble/bin.xml               |  4 +
 distribution/src/resources/saffron.properties   | 23 ++++++
 .../drill/TestUtf8SupportInQueryString.java     | 77 ++++++++++++++++++++
 .../exec/expr/fn/impl/TestStringFunctions.java  | 15 ----
 .../src/test/resources/saffron.properties       | 23 ++++++
 pom.xml                                         |  2 +-
 7 files changed, 129 insertions(+), 31 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
----------------------------------------------------------------------
diff --git 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
index e465019..efd26ff 100644
--- 
a/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
+++ 
b/contrib/storage-hive/core/src/test/java/org/apache/drill/exec/hive/TestHiveStorage.java
@@ -17,13 +17,9 @@
  */
 package org.apache.drill.exec.hive;
 
-import mockit.Mock;
-import mockit.MockUp;
 import mockit.integration.junit4.JMockit;
 import com.google.common.collect.ImmutableMap;
 import com.google.common.collect.Maps;
-import org.apache.calcite.util.Util;
-import org.apache.calcite.util.ConversionUtil;
 import org.apache.drill.categories.HiveStorageTest;
 import org.apache.drill.categories.SlowTest;
 import org.apache.drill.common.exceptions.UserRemoteException;
@@ -41,7 +37,6 @@ import org.junit.experimental.categories.Category;
 import org.junit.runner.RunWith;
 
 import java.math.BigDecimal;
-import java.nio.charset.Charset;
 import java.sql.Date;
 import java.sql.Timestamp;
 import java.util.List;
@@ -75,7 +70,7 @@ public class TestHiveStorage extends HiveTestBase {
           .sqlQuery(query)
           .unOrdered()
           .baselineColumns("col")
-          .baselineValues(200l)
+          .baselineValues(200L)
           .go();
     } finally {
       final OperatorFixture.TestOptionSet testOptionSet = new 
OperatorFixture.TestOptionSet();
@@ -554,15 +549,6 @@ public class TestHiveStorage extends HiveTestBase {
 
   @Test // DRILL-3250
   public void testNonAsciiStringLiterals() throws Exception {
-    // mock calcite util method to return utf charset
-    // instead of setting saffron.default.charset at system level
-    new MockUp<Util>() {
-      @Mock
-      Charset getDefaultCharset() {
-        return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
-      }
-    };
-
     testBuilder()
         .sqlQuery("select * from hive.empty_table where b = 
'Абвгде谢谢'")
         .expectsEmptyResultSet()

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/distribution/src/assemble/bin.xml
----------------------------------------------------------------------
diff --git a/distribution/src/assemble/bin.xml 
b/distribution/src/assemble/bin.xml
index b0119d2..faa2e72 100644
--- a/distribution/src/assemble/bin.xml
+++ b/distribution/src/assemble/bin.xml
@@ -352,5 +352,9 @@
       <source>src/resources/core-site-example.xml</source>
       <outputDirectory>conf</outputDirectory>
     </file>
+    <file>
+      <source>src/resources/saffron.properties</source>
+      <outputDirectory>conf</outputDirectory>
+    </file>
   </files>
 </assembly>

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/distribution/src/resources/saffron.properties
----------------------------------------------------------------------
diff --git a/distribution/src/resources/saffron.properties 
b/distribution/src/resources/saffron.properties
new file mode 100644
index 0000000..9a91343
--- /dev/null
+++ b/distribution/src/resources/saffron.properties
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This properties file is used by Apache Calcite to define allowed charset in 
string literals,
+# which is by default ISO-8859-1.
+# Current configuration allows parsing UTF-8 by default, i.e. queries that 
contain utf-8 string literal.
+# To take affect this file should be present in classpath.
+
+saffron.default.charset=UTF-16LE
+saffron.default.nationalcharset=UTF-16LE
+saffron.default.collation.name=UTF-16LE$en_US
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java
 
b/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java
new file mode 100644
index 0000000..e8573f8
--- /dev/null
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/TestUtf8SupportInQueryString.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill;
+
+import mockit.Deencapsulation;
+import mockit.Mock;
+import mockit.MockUp;
+import mockit.integration.junit4.JMockit;
+import org.apache.calcite.util.SaffronProperties;
+import org.apache.drill.common.exceptions.UserRemoteException;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+
+import java.util.Properties;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertThat;
+
+@RunWith(JMockit.class)
+public class TestUtf8SupportInQueryString extends BaseTestQuery {
+
+  @Test
+  public void testUtf8SupportInQueryStringByDefault() throws Exception {
+    // can be defined in saffron.properties file present in classpath or 
system property
+    testBuilder()
+        .sqlQuery("select 'привет' as hello from (values(1))")
+        .unOrdered()
+        .baselineColumns("hello")
+        .baselineValues("привет")
+        .go();
+  }
+
+  @Test(expected = UserRemoteException.class)
+  public void testDisableUtf8SupportInQueryString() throws Exception {
+    Deencapsulation.setField(SaffronProperties.class, "properties", null);
+    final Properties properties = System.getProperties();
+    final String charset = "ISO-8859-1";
+    new MockUp<System>()
+    {
+      @Mock
+      Properties getProperties() {
+        Properties newProperties = new Properties();
+        newProperties.putAll(properties);
+        newProperties.put("saffron.default.charset", charset);
+        newProperties.put("saffron.default.nationalcharset", charset);
+        newProperties.put("saffron.default.collation.name", charset + 
"$en_US");
+        return newProperties;
+      }
+    };
+
+    final String hello = "привет";
+    try {
+      test("values('%s')", hello);
+    } catch (UserRemoteException e) {
+      assertThat(e.getMessage(), containsString(
+          String.format("Failed to encode '%s' in character set '%s'", hello, 
charset)));
+      throw e;
+    } finally {
+      Deencapsulation.setField(SaffronProperties.class, "properties", null);
+    }
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index e0ebf0c..4249af6 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -19,11 +19,7 @@ package org.apache.drill.exec.expr.fn.impl;
 
 import static org.junit.Assert.assertTrue;
 
-import mockit.Mock;
-import mockit.MockUp;
 import mockit.integration.junit4.JMockit;
-import org.apache.calcite.util.ConversionUtil;
-import org.apache.calcite.util.Util;
 import org.apache.commons.io.FileUtils;
 import org.apache.drill.BaseTestQuery;
 import org.apache.drill.categories.SqlFunctionTest;
@@ -38,7 +34,6 @@ import org.junit.runner.RunWith;
 import java.io.BufferedWriter;
 import java.io.File;
 import java.io.FileWriter;
-import java.nio.charset.Charset;
 
 @RunWith(JMockit.class)
 @Category(SqlFunctionTest.class)
@@ -1339,16 +1334,6 @@ public class TestStringFunctions extends BaseTestQuery {
   @Ignore("DRILL-5477")
   @Test
   public void testMultiByteEncoding() throws Exception {
-    // mock calcite util method to return utf charset
-    // instead of setting saffron.default.charset at system level
-    new MockUp<Util>()
-    {
-      @Mock
-      Charset getDefaultCharset() {
-        return Charset.forName(ConversionUtil.NATIVE_UTF16_CHARSET_NAME);
-      }
-    };
-
     testBuilder()
         .sqlQuery("select\n" +
             "upper('привет')as col_upper,\n" +

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/exec/java-exec/src/test/resources/saffron.properties
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/resources/saffron.properties 
b/exec/java-exec/src/test/resources/saffron.properties
new file mode 100644
index 0000000..9a91343
--- /dev/null
+++ b/exec/java-exec/src/test/resources/saffron.properties
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This properties file is used by Apache Calcite to define allowed charset in 
string literals,
+# which is by default ISO-8859-1.
+# Current configuration allows parsing UTF-8 by default, i.e. queries that 
contain utf-8 string literal.
+# To take affect this file should be present in classpath.
+
+saffron.default.charset=UTF-16LE
+saffron.default.nationalcharset=UTF-16LE
+saffron.default.collation.name=UTF-16LE$en_US
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/8eda4d77/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 2c0d16c..2d3a0ac 100644
--- a/pom.xml
+++ b/pom.xml
@@ -36,7 +36,7 @@
     <dep.guava.version>18.0</dep.guava.version>
     <forkCount>2</forkCount>
     <parquet.version>1.8.1-drill-r0</parquet.version>
-    <calcite.version>1.4.0-drill-r22</calcite.version>
+    <calcite.version>1.4.0-drill-r23</calcite.version>
     <janino.version>2.7.6</janino.version>
     <sqlline.version>1.1.9-drill-r7</sqlline.version>
     <jackson.version>2.7.8</jackson.version>

Reply via email to