pjfanning commented on code in PR #4949:
URL: https://github.com/apache/zeppelin/pull/4949#discussion_r2202779428


##########
jdbc/src/main/java/org/apache/zeppelin/jdbc/JDBCInterpreter.java:
##########
@@ -589,17 +597,123 @@ public Connection getConnection(InterpreterContext 
context)
   }
 
   private void validateConnectionUrl(String url) {
-    String decodedUrl;
-    decodedUrl = URLDecoder.decode(url, StandardCharsets.UTF_8);
+    final String decodedUrl = urlDecode(url, url, 0);
+    final Map<String, String> params = parseUrlParameters(decodedUrl);
+
+    if (containsKeyIgnoreCase(params, ALLOW_LOAD_LOCAL) ||
+            containsKeyIgnoreCase(params, ALLOW_LOAD_LOCAL_IN_FILE_NAME) ||
+            containsKeyIgnoreCase(params, ALLOW_LOCAL_IN_FILE_NAME) ||
+            containsKeyIgnoreCase(params, ALLOW_URL_IN_LOCAL_IN_FILE_NAME) ||
+            containsKeyIgnoreCase(params, ALLOW_LOAD_LOCAL_IN_FILE_IN_PATH) ||
+            containsKeyIgnoreCase(params, AUTO_DESERIALIZE) ||
+            containsKeyIgnoreCase(params, SOCKET_FACTORY)) {
+      throw new IllegalArgumentException("Connection URL contains sensitive 
configuration");
+    }
 
-    if (containsIgnoreCase(decodedUrl, ALLOW_LOAD_LOCAL_IN_FILE_NAME) ||
-            containsIgnoreCase(decodedUrl, AUTO_DESERIALIZE) ||
-            containsIgnoreCase(decodedUrl, ALLOW_LOCAL_IN_FILE_NAME) ||
-            containsIgnoreCase(decodedUrl, ALLOW_URL_IN_LOCAL_IN_FILE_NAME)) {
+    // the INIT parameter name is a bit generic so we check it only for H2
+    if (containsIgnoreCase(decodedUrl, "jdbc:h2") && 
containsKeyIgnoreCase(params, INIT)) {
       throw new IllegalArgumentException("Connection URL contains sensitive 
configuration");
     }
   }
 
+  /**
+   * Decode the URL encoded string recursively until no more decoding is 
needed.
+   * This is to handle cases where the URL might be double-encoded.
+   *
+   * @param url the original URL (for logging purposes)
+   * @param encoded the URL encoded string
+   * @param recurseCount the current recursion depth
+   * @return the decoded string
+   * @throws IllegalArgumentException if the recursion depth exceeds 100
+   */
+  private static String urlDecode(final String url,
+                                  final String encoded,
+                                  final int recurseCount) {
+    if (recurseCount > 10) {
+      throw new IllegalArgumentException("illegal URL encoding detected: " + 
url);
+    }
+    final String decoded = URLDecoder.decode(encoded, StandardCharsets.UTF_8);
+    if (decoded.equals(encoded)) {
+      return decoded; // No more decoding needed or max recursion reached
+    }
+    return urlDecode(url, decoded, recurseCount + 1);
+  }
+
+  private static Map<String, String> parseUrlParameters(final String url) {
+    final Map<String, String> parameters = new HashMap<>();
+
+    // MySQL supports parentheses in the URL
+    // 
https://dev.mysql.com/doc/connectors/en/connector-j-reference-jdbc-url-format.html
+    // eg jdbc:mysql://(host=myhost,port=1111,allowLoadLocalInfile=true)/db
+    int parensIndex = extractFromParens(url, 0, parameters);
+    while (parensIndex > 0) {
+      parensIndex = extractFromParens(url, parensIndex, parameters);
+    }
+
+    // Split the URL into the base part and the parameters part
+    String[] parts = url.split("[?&;]");

Review Comment:
   the cost of this regex will be tiny compared to the I/O cost of using JDBC 
commands so I'm not very interested in optimising every line of code here



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: reviews-unsubscr...@zeppelin.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to