This is an automated email from the ASF dual-hosted git repository.

djwang pushed a commit to branch merge-with-upstream
in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git

commit 42dddc466f9e10ba355ffbad90ffe70e14e31519
Author: Bradford D. Boyle <[email protected]>
AuthorDate: Mon Sep 19 13:27:48 2022 -0700

    Relax the requirement that C string length matches Java string length (#870)
    
    Commit 94f8ccad added a check that the length of received strings (as
    determined by strnlen) matched the length of the string as determined by
    the Java-based PXF service; if they did not, this was treated as a fatal
    error. Some users have reported that they have ORC/Parquet files with
    strings that contain ASCCII NUL-bytes. These strings would not have a
    strnlen calculate length that matches with the Java string lenght.
    
    This commit removes the requirements that the lengths be equal and
    instead logs a debug message when they do not match.
    
    Authored-by: Bradford D. Boyle <[email protected]>
---
 .../greenplum/pxf/automation/features/orc/OrcReadTest.java  |  6 ++++++
 .../main/pxf/features/orc/read/null_in_string/__init__.py   |  0
 .../features/orc/read/null_in_string/expected/query01.ans   | 13 +++++++++++++
 .../main/pxf/features/orc/read/null_in_string/runTest.py    | 11 +++++++++++
 .../pxf/features/orc/read/null_in_string/sql/query01.sql    |  3 +++
 5 files changed, 33 insertions(+)

diff --git 
a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
 
b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
index ec6ef820..480d19db 100644
--- 
a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
+++ 
b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java
@@ -175,6 +175,12 @@ public class OrcReadTest extends BaseFeature {
         runSqlTest("features/orc/read/null_in_string");
     }
 
+    @Test(groups = {"features", "gpdb", "security", "hcfs"})
+    public void orcReadStringsContainingNullByte() throws Exception {
+        prepareReadableExternalTable("pxf_orc_null_in_string", 
ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING);
+        runTincTest("pxf.features.orc.read.null_in_string.runTest");
+    }
+
     private void prepareReadableExternalTable(String name, String[] fields, 
String path) throws Exception {
         prepareReadableExternalTable(name, fields, path, false);
     }
diff --git 
a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git 
a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans
 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans
new file mode 100644
index 00000000..bbfd641a
--- /dev/null
+++ 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans
@@ -0,0 +1,13 @@
+-- start_ignore
+-- end_ignore
+-- @description query01 for reading strings contain NUL-byte from ORC files
+SELECT * FROM pxf_orc_null_in_string ORDER BY id;
+ id |               context               |     value
+----+-------------------------------------+---------------
+  1 | simple string                       | hello
+  2 | simple string with space            | hello world
+  3 | simple string with double quote     | hello "world"
+  4 | NUL-byte in middle of string        | hello
+  5 | NUL-byte at the beginning of string |
+  6 | NUL-byte at the end of string       | hello world
+(6 rows)
\ No newline at end of file
diff --git 
a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py
new file mode 100644
index 00000000..916deefe
--- /dev/null
+++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py
@@ -0,0 +1,11 @@
+from mpp.models import SQLConcurrencyTestCase
+
+class OrcNullInString(SQLConcurrencyTestCase):
+    """
+    @db_name pxfautomation
+    @concurrency 1
+    @gpdiff True
+    """
+    sql_dir = 'sql'
+    ans_dir = 'expected'
+    out_dir = 'output'
diff --git 
a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql
new file mode 100644
index 00000000..52e65cb3
--- /dev/null
+++ 
b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql
@@ -0,0 +1,3 @@
+-- @description query01 for reading strings contain NUL-byte from ORC files
+
+SELECT * FROM pxf_orc_null_in_string ORDER BY id;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to