Repository: asterixdb Updated Branches: refs/heads/master 49a8a3aca -> 503c62200
[ASTERIXDB-2303][API] Fix Supplementary Chars Printing - user model changes: no - storage format changes: no - interface changes: no Details: - Properly print supplementary chars as utf8 by converting their java surrogates to a string. - Add test case. Change-Id: I59e825c11ff750d5b651fb86712023c52e98367e Reviewed-on: https://asterix-gerrit.ics.uci.edu/2429 Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Contrib: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Integration-Tests: Jenkins <jenk...@fulliautomatix.ics.uci.edu> Reviewed-by: Michael Blow <mb...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/503c6220 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/503c6220 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/503c6220 Branch: refs/heads/master Commit: 503c622005800f47c2182a258c8aa0ee42d3b936 Parents: 49a8a3a Author: Murtadha Hubail <mhub...@apache.org> Authored: Thu Mar 1 08:04:45 2018 +0300 Committer: Murtadha Hubail <mhub...@apache.org> Committed: Thu Mar 1 11:24:21 2018 -0800 ---------------------------------------------------------------------- .../string/utf8/utf8.1.query.sqlpp | 19 +++++++++++++++ .../runtimets/results/string/utf8/utf8.1.adm | 1 + .../resources/runtimets/testsuite_sqlpp.xml | 5 ++++ .../data/nontagged/printers/PrintTools.java | 25 ++++++++++++++++++++ 4 files changed, 50 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp new file mode 100644 index 0000000..88909ef --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/queries_sqlpp/string/utf8/utf8.1.query.sqlpp @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +select value "\uD83D\uDE22\uD83D\uDE22\uD83D\uDC89\uD83D\uDC89 = ð¢ð¢ðð. Coffee ââ¼ï¸ð. ØسÙا"; \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm new file mode 100644 index 0000000..89c6334 --- /dev/null +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/string/utf8/utf8.1.adm @@ -0,0 +1 @@ +"ð¢ð¢ðð = ð¢ð¢ðð. Coffee ââ¼ï¸ð. ØسÙا" \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml index 4265163..9fc0b4b 100644 --- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml +++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite_sqlpp.xml @@ -6445,6 +6445,11 @@ <output-dir compare="Text">varlen-encoding</output-dir> </compilation-unit> </test-case> + <test-case FilePath="string"> + <compilation-unit name="utf8"> + <output-dir compare="Text">utf8</output-dir> + </compilation-unit> + </test-case> </test-group> <test-group name="subquery"> <test-case FilePath="subquery"> http://git-wip-us.apache.org/repos/asf/asterixdb/blob/503c6220/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java index b1039a5..8d05f0f 100644 --- a/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java +++ b/asterixdb/asterix-om/src/main/java/org/apache/asterix/dataflow/data/nontagged/printers/PrintTools.java @@ -357,6 +357,13 @@ public class PrintTools { break; } break; + case 3: + // special treatment for surrogates + if (Character.isHighSurrogate(c)) { + position += writeSupplementaryChar(os, b, maxPosition, position, c, sz); + sz = 0; + } + break; } while (sz > 0) { os.write(b[position]); @@ -378,4 +385,22 @@ public class PrintTools { os.write(HexPrinter.hex(c & 0x0f, HexPrinter.CASE.LOWER_CASE)); } + /** + * Writes a supplementary char consisting of high and low surrogates + * + * @return The length of the surrogates + * @throws IOException + */ + private static int writeSupplementaryChar(OutputStream os, byte[] src, int limit, int highSurrogatePos, + char highSurrogate, int highSurrogateSize) throws IOException { + final int lowSurrogatePos = highSurrogatePos + highSurrogateSize; + if (lowSurrogatePos >= limit) { + throw new IllegalStateException("malformed utf8 input"); + } + final char lowSurrogate = UTF8StringUtil.charAt(src, lowSurrogatePos); + final int lowSurrogateSize = UTF8StringUtil.charSize(src, lowSurrogatePos); + os.write(new String(new char[] { highSurrogate, lowSurrogate }).getBytes()); + return highSurrogateSize + lowSurrogateSize; + } + }