This is an automated email from the ASF dual-hosted git repository. weiz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new dbe2a323351 HIVE-26685: Improve path name escaping/unescaping (#3721) dbe2a323351 is described below commit dbe2a323351b7a0196fc7834023b9bc28cd3244e Author: James Petty <pettyjam...@users.noreply.github.com> AuthorDate: Mon Dec 5 13:04:54 2022 -0500 HIVE-26685: Improve path name escaping/unescaping (#3721) --- .../org/apache/hadoop/hive/common/FileUtils.java | 38 +++++++++++++++++++--- .../apache/hadoop/hive/common/TestFileUtils.java | 8 +++++ 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java index 37ff2c04dc2..17169d6e184 100644 --- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java +++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java @@ -258,6 +258,11 @@ public final class FileUtils { } } + /** + * Hex encoding characters indexed by integer value + */ + private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + static boolean needsEscaping(char c) { return c < charToEscape.size() && charToEscape.get(c); } @@ -287,12 +292,28 @@ public final class FileUtils { } } - StringBuilder sb = new StringBuilder(); + // Fast-path detection, no escaping and therefore no copying necessary + int firstEscapeIndex = -1; for (int i = 0; i < path.length(); i++) { + if (needsEscaping(path.charAt(i))) { + firstEscapeIndex = i; + break; + } + } + if (firstEscapeIndex == -1) { + return path; + } + + // slow path, escape beyond the first required escape character into a new string + StringBuilder sb = new StringBuilder(); + if (firstEscapeIndex > 0) { + sb.append(path, 0, firstEscapeIndex); + } + + for (int i = firstEscapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (needsEscaping(c)) { - sb.append('%'); - sb.append(String.format("%1$02X", (int) c)); + sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>> 4]).append(HEX_UPPER_CHARS[(0x0F & c)]); } else { sb.append(c); } @@ -301,8 +322,17 @@ public final class FileUtils { } public static String unescapePathName(String path) { + int firstUnescapeIndex = path.indexOf('%'); + if (firstUnescapeIndex == -1) { + return path; + } + StringBuilder sb = new StringBuilder(); - for (int i = 0; i < path.length(); i++) { + if (firstUnescapeIndex > 0) { + sb.append(path, 0, firstUnescapeIndex); + } + + for (int i = firstUnescapeIndex; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' && i + 2 < path.length()) { int code = -1; diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java index 2721deb7a03..9ffb52ba5f9 100644 --- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java +++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java @@ -303,6 +303,14 @@ public class TestFileUtils { assertEquals(1, assertExpectedFilePaths(itr, Collections.singletonList("mock:/tmp/dummy"))); } + @Test + public void testPathEscapeChars() { + StringBuilder sb = new StringBuilder(); + FileUtils.charToEscape.stream().forEach(integer -> sb.append((char) integer)); + String path = sb.toString(); + assertEquals(path, FileUtils.unescapePathName(FileUtils.escapePathName(path))); + } + private int assertExpectedFilePaths(RemoteIterator<? extends FileStatus> lfs, List<String> expectedPaths) throws Exception { int count = 0;