This is an automated email from the ASF dual-hosted git repository.

weiz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dbe2a323351 HIVE-26685: Improve path name escaping/unescaping (#3721)
dbe2a323351 is described below

commit dbe2a323351b7a0196fc7834023b9bc28cd3244e
Author: James Petty <pettyjam...@users.noreply.github.com>
AuthorDate: Mon Dec 5 13:04:54 2022 -0500

    HIVE-26685: Improve path name escaping/unescaping (#3721)
---
 .../org/apache/hadoop/hive/common/FileUtils.java   | 38 +++++++++++++++++++---
 .../apache/hadoop/hive/common/TestFileUtils.java   |  8 +++++
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 37ff2c04dc2..17169d6e184 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -258,6 +258,11 @@ public final class FileUtils {
     }
   }
 
+  /**
+   * Hex encoding characters indexed by integer value
+   */
+  private static final char[] HEX_UPPER_CHARS = {'0', '1', '2', '3', '4', '5', 
'6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
+
   static boolean needsEscaping(char c) {
     return c < charToEscape.size() && charToEscape.get(c);
   }
@@ -287,12 +292,28 @@ public final class FileUtils {
       }
     }
 
-    StringBuilder sb = new StringBuilder();
+    //  Fast-path detection, no escaping and therefore no copying necessary
+    int firstEscapeIndex = -1;
     for (int i = 0; i < path.length(); i++) {
+      if (needsEscaping(path.charAt(i))) {
+        firstEscapeIndex = i;
+        break;
+      }
+    }
+    if (firstEscapeIndex == -1) {
+      return path;
+    }
+
+    // slow path, escape beyond the first required escape character into a new 
string
+    StringBuilder sb = new StringBuilder();
+    if (firstEscapeIndex > 0) {
+      sb.append(path, 0, firstEscapeIndex);
+    }
+
+    for (int i = firstEscapeIndex; i < path.length(); i++) {
       char c = path.charAt(i);
       if (needsEscaping(c)) {
-        sb.append('%');
-        sb.append(String.format("%1$02X", (int) c));
+        sb.append('%').append(HEX_UPPER_CHARS[(0xF0 & c) >>> 
4]).append(HEX_UPPER_CHARS[(0x0F & c)]);
       } else {
         sb.append(c);
       }
@@ -301,8 +322,17 @@ public final class FileUtils {
   }
 
   public static String unescapePathName(String path) {
+    int firstUnescapeIndex = path.indexOf('%');
+    if (firstUnescapeIndex == -1) {
+      return path;
+    }
+
     StringBuilder sb = new StringBuilder();
-    for (int i = 0; i < path.length(); i++) {
+    if (firstUnescapeIndex > 0) {
+      sb.append(path, 0, firstUnescapeIndex);
+    }
+
+    for (int i = firstUnescapeIndex; i < path.length(); i++) {
       char c = path.charAt(i);
       if (c == '%' && i + 2 < path.length()) {
         int code = -1;
diff --git a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java 
b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
index 2721deb7a03..9ffb52ba5f9 100644
--- a/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
+++ b/common/src/test/org/apache/hadoop/hive/common/TestFileUtils.java
@@ -303,6 +303,14 @@ public class TestFileUtils {
     assertEquals(1, assertExpectedFilePaths(itr, 
Collections.singletonList("mock:/tmp/dummy")));
   }
 
+  @Test
+  public void testPathEscapeChars() {
+    StringBuilder sb = new StringBuilder();
+    FileUtils.charToEscape.stream().forEach(integer -> sb.append((char) 
integer));
+    String path = sb.toString();
+    assertEquals(path, 
FileUtils.unescapePathName(FileUtils.escapePathName(path)));
+  }
+
   private int assertExpectedFilePaths(RemoteIterator<? extends FileStatus> 
lfs, List<String> expectedPaths)
       throws Exception {
     int count = 0;

Reply via email to