Repository: hadoop
Updated Branches:
  refs/heads/branch-2 92ff52418 -> 3d36d4737


HADOOP-11506. Configuration variable expansion regex expensive for long values. 
(Gera Shegalov via gera)

(cherry picked from commit 644548f201743408904dfe24b9f5b515b2c96713)

Conflicts:
        
hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
        
hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java


Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo
Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3d36d473
Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3d36d473
Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3d36d473

Branch: refs/heads/branch-2
Commit: 3d36d4737c160d7dc8829e9dd6b801ef6726c0c0
Parents: 92ff524
Author: Gera Shegalov <g...@apache.org>
Authored: Fri Feb 6 01:08:32 2015 -0800
Committer: Gera Shegalov <g...@apache.org>
Committed: Fri Feb 6 01:44:14 2015 -0800

----------------------------------------------------------------------
 hadoop-common-project/hadoop-common/CHANGES.txt |   3 +
 .../org/apache/hadoop/conf/Configuration.java   | 100 +++++++++++++++++--
 .../apache/hadoop/conf/TestConfiguration.java   |  66 ++++++++++++
 3 files changed, 159 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/CHANGES.txt
----------------------------------------------------------------------
diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt 
b/hadoop-common-project/hadoop-common/CHANGES.txt
index 4b0faf4..41efde0 100644
--- a/hadoop-common-project/hadoop-common/CHANGES.txt
+++ b/hadoop-common-project/hadoop-common/CHANGES.txt
@@ -192,6 +192,9 @@ Release 2.7.0 - UNRELEASED
     HADOOP-11188. hadoop-azure: automatically expand page blobs when they 
become
     full. (Eric Hanson via cnauroth)
 
+    HADOOP-11506. Configuration variable expansion regex expensive for long
+    values. (Gera Shegalov via gera)
+
   BUG FIXES
 
     HADOOP-11488. Difference in default connection timeout for S3A FS

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 927d4b6..4271c11 100644
--- 
a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ 
b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -844,25 +844,101 @@ public class Configuration implements 
Iterable<Map.Entry<String,String>>,
     resources.add(resource);                      // add to resources
     reloadConfiguration();
   }
-  
-  private static final Pattern VAR_PATTERN =
-      Pattern.compile("\\$\\{[^\\}\\$\u0020]+\\}");
 
   private static final int MAX_SUBST = 20;
 
+  private static final int SUB_START_IDX = 0;
+  private static final int SUB_END_IDX = SUB_START_IDX + 1;
+
+  /**
+   * This is a manual implementation of the following regex
+   * "\\$\\{[^\\}\\$\u0020]+\\}". It can be 15x more efficient than
+   * a regex matcher as demonstrated by HADOOP-11506. This is noticeable with
+   * Hadoop apps building on the assumption Configuration#get is an O(1)
+   * hash table lookup, especially when the eval is a long string.
+   *
+   * @param eval a string that may contain variables requiring expansion.
+   * @return a 2-element int array res such that
+   * eval.substring(res[0], res[1]) is "var" for the left-most occurrence of
+   * ${var} in eval. If no variable is found -1, -1 is returned.
+   */
+  private static int[] findSubVariable(String eval) {
+    int[] result = {-1, -1};
+
+    int matchStart;
+    int leftBrace;
+
+    // scanning for a brace first because it's less frequent than $
+    // that can occur in nested class names
+    //
+    match_loop:
+    for (matchStart = 1, leftBrace = eval.indexOf('{', matchStart);
+         // minimum left brace position (follows '$')
+         leftBrace > 0
+         // right brace of a smallest valid expression "${c}"
+         && leftBrace + "{c".length() < eval.length();
+         leftBrace = eval.indexOf('{', matchStart)) {
+      int matchedLen = 0;
+      if (eval.charAt(leftBrace - 1) == '$') {
+        int subStart = leftBrace + 1; // after '{'
+        for (int i = subStart; i < eval.length(); i++) {
+          switch (eval.charAt(i)) {
+            case '}':
+              if (matchedLen > 0) { // match
+                result[SUB_START_IDX] = subStart;
+                result[SUB_END_IDX] = subStart + matchedLen;
+                break match_loop;
+              }
+              // fall through to skip 1 char
+            case ' ':
+            case '$':
+              matchStart = i + 1;
+              continue match_loop;
+            default:
+              matchedLen++;
+          }
+        }
+        // scanned from "${"  to the end of eval, and no reset via ' ', '$':
+        //    no match!
+        break match_loop;
+      } else {
+        // not a start of a variable
+        //
+        matchStart = leftBrace + 1;
+      }
+    }
+    return result;
+  }
+
+  /**
+   * Attempts to repeatedly expand the value {@code expr} by replacing the
+   * left-most substring of the form "${var}" in the following precedence order
+   * <ol>
+   *   <li>by the value of the Java system property "var" if defined</li>
+   *   <li>by the value of the configuration key "var" if defined</li>
+   * </ol>
+   *
+   * If var is unbounded the current state of expansion "prefix${var}suffix" is
+   * returned.
+   *
+   * @param expr the literal value of a config key
+   * @return null if expr is null, otherwise the value resulting from expanding
+   * expr using the algorithm above.
+   * @throws IllegalArgumentException when more than
+   * {@link Configuration#MAX_SUBST} replacements are required
+   */
   private String substituteVars(String expr) {
     if (expr == null) {
       return null;
     }
-    Matcher match = VAR_PATTERN.matcher("");
     String eval = expr;
-    for(int s=0; s<MAX_SUBST; s++) {
-      match.reset(eval);
-      if (!match.find()) {
+    for (int s = 0; s < MAX_SUBST; s++) {
+      final int[] varBounds = findSubVariable(eval);
+      if (varBounds[SUB_START_IDX] == -1) {
         return eval;
       }
-      String var = match.group();
-      var = var.substring(2, var.length()-1); // remove ${ .. }
+      final String var = eval.substring(varBounds[SUB_START_IDX],
+          varBounds[SUB_END_IDX]);
       String val = null;
       try {
         val = System.getProperty(var);
@@ -875,8 +951,12 @@ public class Configuration implements 
Iterable<Map.Entry<String,String>>,
       if (val == null) {
         return eval; // return literal ${var}: var is unbound
       }
+      final int dollar = varBounds[SUB_START_IDX] - "${".length();
+      final int afterRightBrace = varBounds[SUB_END_IDX] + "}".length();
       // substitute
-      eval = eval.substring(0, match.start())+val+eval.substring(match.end());
+      eval = eval.substring(0, dollar)
+             + val
+             + eval.substring(afterRightBrace);
     }
     throw new IllegalStateException("Variable substitution depth too large: " 
                                     + MAX_SUBST + " " + expr);

http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d36d473/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
----------------------------------------------------------------------
diff --git 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
index 5aa8350..a4020c7 100644
--- 
a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
+++ 
b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/conf/TestConfiguration.java
@@ -49,6 +49,8 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.net.NetUtils;
 import static org.apache.hadoop.util.PlatformName.IBM_JAVA;
+import static org.junit.Assert.fail;
+
 import org.codehaus.jackson.map.ObjectMapper;
 
 public class TestConfiguration extends TestCase {
@@ -1194,6 +1196,70 @@ public class TestConfiguration extends TestCase {
     }
   }
 
+  public void testInvalidSubstitutation() {
+    final Configuration configuration = new Configuration(false);
+
+    // 2-var loops
+    //
+    final String key = "test.random.key";
+    for (String keyExpression : Arrays.asList(
+    "${" + key + "}",
+    "foo${" + key + "}",
+    "foo${" + key + "}bar",
+    "${" + key + "}bar")) {
+      configuration.set(key, keyExpression);
+      checkSubDepthException(configuration, key);
+    }
+
+    //
+    // 3-variable loops
+    //
+
+    final String expVal1 = "${test.var2}";
+    String testVar1 = "test.var1";
+    configuration.set(testVar1, expVal1);
+    configuration.set("test.var2", "${test.var3}");
+    configuration.set("test.var3", "${test.var1}");
+    checkSubDepthException(configuration, testVar1);
+
+    // 3-variable loop with non-empty value prefix/suffix
+    //
+    final String expVal2 = "foo2${test.var2}bar2";
+    configuration.set(testVar1, expVal2);
+    configuration.set("test.var2", "foo3${test.var3}bar3");
+    configuration.set("test.var3", "foo1${test.var1}bar1");
+    checkSubDepthException(configuration, testVar1);
+  }
+
+  private static void checkSubDepthException(Configuration configuration,
+      String key) {
+    try {
+      configuration.get(key);
+      fail("IllegalStateException depth too large not thrown");
+    } catch (IllegalStateException e) {
+      assertTrue("Unexpected exception text: " + e,
+          e.getMessage().contains("substitution depth"));
+    }
+  }
+
+  public void testIncompleteSubbing() {
+    Configuration configuration = new Configuration(false);
+    String key = "test.random.key";
+    for (String keyExpression : Arrays.asList(
+        "{}",
+        "${}",
+        "{" + key,
+        "${" + key,
+        "foo${" + key,
+        "foo${" + key + "bar",
+        "foo{" + key + "}bar",
+        "${" + key + "bar")) {
+      configuration.set(key, keyExpression);
+      String value = configuration.get(key);
+      assertTrue("Unexpected value " + value, value.equals(keyExpression));
+    }
+  }
+
   public void testGetClassByNameOrNull() throws Exception {
    Configuration config = new Configuration();
    Class<?> clazz = config.getClassByNameOrNull("java.lang.Object");

Reply via email to