This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git
The following commit(s) were added to refs/heads/branch-1.6 by this push:
new 5ca422c ORC-1034: Fix the indexOf algorithm in `FileDump` (#943)
5ca422c is described below
commit 5ca422cf727d91add67bb2645109db0da0f6d001
Author: Guiyanakaung <[email protected]>
AuthorDate: Thu Oct 21 04:21:20 2021 +0800
ORC-1034: Fix the indexOf algorithm in `FileDump` (#943)
This matching algorithm is wrong when i does not backtrack after a failed
match in the middle. As a simple example data = OOORC, pattern= ORC, index = 1,
this algorithm will return -1.
This pr aims to fix the indexOf algorithm.
indexOf is used to find the ORC file ending identifier to recover the file,
it is important to ensure that the method is correct.
Add UT.
(cherry picked from commit 65ce439fc7cce84f12657e73659165d20a306bd9)
Signed-off-by: Dongjoon Hyun <[email protected]>
(cherry picked from commit 118d110aefbb13f76cb8adeb8b44c9a9f871eced)
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../src/java/org/apache/orc/tools/FileDump.java | 21 +++++++++------------
.../src/test/org/apache/orc/tools/TestFileDump.java | 8 ++++++++
2 files changed, 17 insertions(+), 12 deletions(-)
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java
b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index 129c8c0..823ea18 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -631,25 +631,22 @@ public final class FileDump {
}
// search for byte pattern in another byte array
- private static int indexOf(final byte[] data, final byte[] pattern, final
int index) {
+ public static int indexOf(final byte[] data, final byte[] pattern, final int
index) {
if (data == null || data.length == 0 || pattern == null || pattern.length
== 0 ||
index > data.length || index < 0) {
return -1;
}
- int j = 0;
- for (int i = index; i < data.length; i++) {
- if (pattern[j] == data[i]) {
- j++;
- } else {
- j = 0;
- }
-
- if (j == pattern.length) {
- return i - pattern.length + 1;
+ for (int i = index; i < data.length - pattern.length + 1; i++) {
+ boolean found = true;
+ for (int j = 0; j < pattern.length; j++) {
+ if (data[i + j] != pattern[j]) {
+ found = false;
+ break;
+ }
}
+ if (found) return i;
}
-
return -1;
}
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index 9cedd31..cadb8bc 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -698,4 +698,12 @@ public class TestFileDump {
assumeTrue(!System.getProperty("os.name").startsWith("Windows"));
TestFileDump.checkOutput(outputFilename, workDir + File.separator +
outputFilename);
}
+
+ @Test
+ public void testIndexOf() {
+ byte[] bytes = ("OO" + OrcFile.MAGIC).getBytes(StandardCharsets.UTF_8);
+ byte[] pattern = OrcFile.MAGIC.getBytes(StandardCharsets.UTF_8);
+
+ assertEquals(2, FileDump.indexOf(bytes, pattern, 1));
+ }
}