This is an automated email from the ASF dual-hosted git repository. djwang pushed a commit to branch merge-with-upstream in repository https://gitbox.apache.org/repos/asf/cloudberry-pxf.git
commit 42dddc466f9e10ba355ffbad90ffe70e14e31519 Author: Bradford D. Boyle <[email protected]> AuthorDate: Mon Sep 19 13:27:48 2022 -0700 Relax the requirement that C string length matches Java string length (#870) Commit 94f8ccad added a check that the length of received strings (as determined by strnlen) matched the length of the string as determined by the Java-based PXF service; if they did not, this was treated as a fatal error. Some users have reported that they have ORC/Parquet files with strings that contain ASCCII NUL-bytes. These strings would not have a strnlen calculate length that matches with the Java string lenght. This commit removes the requirements that the lengths be equal and instead logs a debug message when they do not match. Authored-by: Bradford D. Boyle <[email protected]> --- .../greenplum/pxf/automation/features/orc/OrcReadTest.java | 6 ++++++ .../main/pxf/features/orc/read/null_in_string/__init__.py | 0 .../features/orc/read/null_in_string/expected/query01.ans | 13 +++++++++++++ .../main/pxf/features/orc/read/null_in_string/runTest.py | 11 +++++++++++ .../pxf/features/orc/read/null_in_string/sql/query01.sql | 3 +++ 5 files changed, 33 insertions(+) diff --git a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java index ec6ef820..480d19db 100644 --- a/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java +++ b/automation/src/test/java/org/greenplum/pxf/automation/features/orc/OrcReadTest.java @@ -175,6 +175,12 @@ public class OrcReadTest extends BaseFeature { runSqlTest("features/orc/read/null_in_string"); } + @Test(groups = {"features", "gpdb", "security", "hcfs"}) + public void orcReadStringsContainingNullByte() throws Exception { + prepareReadableExternalTable("pxf_orc_null_in_string", ORC_NULL_IN_STRING_COLUMNS, hdfsPath + ORC_NULL_IN_STRING); + runTincTest("pxf.features.orc.read.null_in_string.runTest"); + } + private void prepareReadableExternalTable(String name, String[] fields, String path) throws Exception { prepareReadableExternalTable(name, fields, path, false); } diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans new file mode 100644 index 00000000..bbfd641a --- /dev/null +++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/expected/query01.ans @@ -0,0 +1,13 @@ +-- start_ignore +-- end_ignore +-- @description query01 for reading strings contain NUL-byte from ORC files +SELECT * FROM pxf_orc_null_in_string ORDER BY id; + id | context | value +----+-------------------------------------+--------------- + 1 | simple string | hello + 2 | simple string with space | hello world + 3 | simple string with double quote | hello "world" + 4 | NUL-byte in middle of string | hello + 5 | NUL-byte at the beginning of string | + 6 | NUL-byte at the end of string | hello world +(6 rows) \ No newline at end of file diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py new file mode 100644 index 00000000..916deefe --- /dev/null +++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/runTest.py @@ -0,0 +1,11 @@ +from mpp.models import SQLConcurrencyTestCase + +class OrcNullInString(SQLConcurrencyTestCase): + """ + @db_name pxfautomation + @concurrency 1 + @gpdiff True + """ + sql_dir = 'sql' + ans_dir = 'expected' + out_dir = 'output' diff --git a/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql new file mode 100644 index 00000000..52e65cb3 --- /dev/null +++ b/automation/tincrepo/main/pxf/features/orc/read/null_in_string/sql/query01.sql @@ -0,0 +1,3 @@ +-- @description query01 for reading strings contain NUL-byte from ORC files + +SELECT * FROM pxf_orc_null_in_string ORDER BY id; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
