Yingyi Bu has submitted this change and it was merged.

Change subject: ASTERIXDB-1478: fix the utf8 reader.
......................................................................


ASTERIXDB-1478: fix the utf8 reader.

 1. Fix the ASTERIXDB-1478.
 2. Add the utf8 testCases.

Change-Id: Idb302dc604fcd71811de550d3d4bd727c81a13ee
Reviewed-on: https://asterix-gerrit.ics.uci.edu/1077
Sonar-Qube: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Tested-by: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Reviewed-by: Yingyi Bu <buyin...@gmail.com>
---
A asterixdb/asterix-app/data/adm-load/utf8.adm
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
A 
asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
M asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
M 
asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
M asterixdb/asterix-external-data/src/test/resources/results/beer.txt
8 files changed, 198 insertions(+), 2 deletions(-)

Approvals:
  Yingyi Bu: Looks good to me, approved
  Jenkins: Verified; No violations found



diff --git a/asterixdb/asterix-app/data/adm-load/utf8.adm 
b/asterixdb/asterix-app/data/adm-load/utf8.adm
new file mode 100644
index 0000000..2621bc4
--- /dev/null
+++ b/asterixdb/asterix-app/data/adm-load/utf8.adm
@@ -0,0 +1,100 @@
+{"id":"1","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"2","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"3","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"4","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"5","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"6","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"7","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"8","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"9","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"10","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"11","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"12","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"13","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"14","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"15","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"16","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"17","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"18","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"19","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"20","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"21","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"22","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"23","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"24","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"25","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"26","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"27","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"28","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"29","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"30","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"31","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"32","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"33","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"34","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"35","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"36","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"37","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"38","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"39","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"40","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"41","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"42","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"43","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"44","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"45","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"46","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"47","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"48","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"49","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"50","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"51","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"52","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"53","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"54","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"55","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"56","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"57","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"58","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"59","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"60","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"61","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"62","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"63","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"64","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"65","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"66","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"67","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"68","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"69","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"70","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"71","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"72","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"73","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"74","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"75","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"76","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"77","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"78","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"79","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"80","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"81","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"82","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"83","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"84","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"85","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"86","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"87","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"88","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"89","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"90","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"91","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"92","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"93","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"94","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"95","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"96","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"97","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"98","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"99","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
+{"id":"100","description":"随着人们信用活动的繁荣、社会对信用服务需求的激增,构建一个完整"}
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
new file mode 100644
index 0000000..bcd3d46
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.1.ddl.aql
@@ -0,0 +1,29 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+drop dataverse test if exists;
+create dataverse test
+use dataverse test;
+
+create type DocType as open {
+  id: string,
+  description: string?
+};
+
+create dataset Doc (DocType)
+primary key id;
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
new file mode 100644
index 0000000..4d4f4e5
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.2.update.aql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/**
+ *
+ * CSV file loading utf8
+ * Expected result: success
+ *
+ */
+
+use dataverse test;
+
+load dataset Doc
+using localfs
+(("path"="asterix_nc1://data/adm-load/utf8.adm"),("format"="adm"));
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
new file mode 100644
index 0000000..95507bf
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/queries/load/utf8/utf8.3.query.aql
@@ -0,0 +1,24 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+use dataverse test;
+
+let $s := count(
+for $i in dataset Doc
+return $i)
+return $s
diff --git 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
new file mode 100644
index 0000000..29d6383
--- /dev/null
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/load/utf8/utf8.1.adm
@@ -0,0 +1 @@
+100
diff --git a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml 
b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
index cf5bda3..749965e 100644
--- a/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
+++ b/asterixdb/asterix-app/src/test/resources/runtimets/testsuite.xml
@@ -6560,6 +6560,11 @@
         <output-dir compare="Text">adm_binary</output-dir>
       </compilation-unit>
     </test-case>
+    <test-case FilePath="load">
+      <compilation-unit name="utf8">
+        <output-dir compare="Text">utf8</output-dir>
+      </compilation-unit>
+    </test-case>
   </test-group>
   <test-group name="hints">
     <test-case FilePath="hints">
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
index 94333d1..8e166c0 100644
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/input/stream/AsterixInputStreamReader.java
@@ -38,6 +38,7 @@
     private CharBuffer charBuffer = 
CharBuffer.allocate(ExternalDataConstants.DEFAULT_BUFFER_SIZE);
     private CharsetDecoder decoder;
     private boolean done = false;
+    private boolean remaining = false;
 
     public AsterixInputStreamReader(AsterixInputStream in) {
         this.in = in;
@@ -75,6 +76,7 @@
         charBuffer.clear();
         while (charBuffer.position() == 0) {
             if (byteBuffer.hasRemaining()) {
+                remaining = true;
                 decoder.decode(byteBuffer, charBuffer, false);
                 System.arraycopy(charBuffer.array(), 0, cbuf, offset, 
charBuffer.position());
                 if (charBuffer.position() > 0) {
@@ -97,8 +99,13 @@
                 done = true;
                 return len;
             }
-            byteBuffer.position(len);
+            if (remaining) {
+                byteBuffer.position(len + byteBuffer.position());
+            } else {
+                byteBuffer.position(len);
+            }
             byteBuffer.flip();
+            remaining = false;
             decoder.decode(byteBuffer, charBuffer, false);
             System.arraycopy(charBuffer.array(), 0, cbuf, offset, 
charBuffer.position());
         }
diff --git 
a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt 
b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
index bcb3631..5a7983d 100644
--- a/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
+++ b/asterixdb/asterix-external-data/src/test/resources/results/beer.txt
@@ -1450,7 +1450,7 @@
 { "name": "Baron Helles Bock", "abv": 6.4, "ibu": 0.0, "srm": 0.0, "upc": 0, 
"type": "beer", "brewery_id": "baron_brewing_company", "updated": "2010-07-22 
20:00:20", "description": "The Helles-Bock is similar to a traditional Maibock. 
Bocks are traditionally brewed in the winter / early spring months and are 
served during the spring / early summer months. The Helles Bock has a copper 
golden color with a brilliant white head. The body showcases a clean sweet 
maltiness that is offset by just enough hops to balance it. Very smooth and 
easy, drinkable yet deceptive at 6.4%.\r\n\r\nAll ingredients for the beer are 
imported from Germany. Brewed in accordance to the German Beer Purity Law 
(Reinheitsgebot) of 1516.", "style": "German-Style Heller Bock/Maibock", 
"category": "German Lager" }
 { "id": "baron_brewing_company-baron_helles_bock", "flags": 0, "expiration": 
0, "cas": 244367687683, "rev": 1, "vbid": 27, "dtype": 1 }
 "baron_brewing_company-baron_helles_bock"
-{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", 
"state": "New Jersey", "code": "8753", "country": "United States", "phone": 
"1-732-244-7566", "website": "", "type": "rewery", "updated": "2010-07-22 
20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { 
"accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } }
+{ "name": "Basil T's Brew Pub and Italian Grill", "city": "Toms River", 
"state": "New Jersey", "code": "8753", "country": "United States", "phone": 
"1-732-244-7566", "website": "", "type": "brewery", "updated": "2010-07-22 
20:00:20", "description": "", "address": [ "1171 Hooper Avenue" ], "geo": { 
"accuracy": "RANGE_INTERPOLATED", "lat": 39.9767, "lon": -74.1829 } }
 { "id": "basil_t_s_brew_pub_and_italian_grill", "flags": 0, "expiration": 0, 
"cas": 244364410882, "rev": 1, "vbid": 20, "dtype": 1 }
 "basil_t_s_brew_pub_and_italian_grill"
 { "name": "Nieuw Ligt Grand Cru 2006", "abv": 12.0, "ibu": 0.0, "srm": 0.0, 
"upc": 0, "type": "beer", "brewery_id": "stadsbrouwerij_de_hemel", "updated": 
"2010-07-22 20:00:20", "description": "" }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/1077
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: Idb302dc604fcd71811de550d3d4bd727c81a13ee
Gerrit-PatchSet: 12
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Wenhai Li <lwhaym...@yahoo.com>
Gerrit-Reviewer: Chen Li <che...@gmail.com>
Gerrit-Reviewer: Jenkins <jenk...@fulliautomatix.ics.uci.edu>
Gerrit-Reviewer: Jianfeng Jia <jianfeng....@gmail.com>
Gerrit-Reviewer: Michael Blow <mb...@apache.org>
Gerrit-Reviewer: Till Westmann <ti...@apache.org>
Gerrit-Reviewer: Wail Alkowaileet <wael....@gmail.com>
Gerrit-Reviewer: Yingyi Bu <buyin...@gmail.com>
Gerrit-Reviewer: abdullah alamoudi <bamou...@gmail.com>

Reply via email to