This is an automated email from the ASF dual-hosted git repository.

gnodet pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git


The following commit(s) were added to refs/heads/main by this push:
     new 6efd31852ab9 chore: improve PomSanitizer detection and robustness
6efd31852ab9 is described below

commit 6efd31852ab964b30ddf8cac71e8c59642eebba5
Author: Guillaume Nodet <[email protected]>
AuthorDate: Wed May 20 08:50:54 2026 +0200

    chore: improve PomSanitizer detection and robustness
    
    Extend PomSanitizer in camel-jbang-mcp to close detection gaps:
    - Handle CDATA-wrapped values that previously bypassed detection
    - Recognize Camel property placeholders ({{...}}) alongside Maven ${...}
    - Detect URL-embedded credentials (://user:password@host) in any element
    - Add connection-string/connectionstring/connection_string keywords
    - Merge detection and masking into a single regex pass
    - 12 new test cases covering all additions
    
    Closes #23332
---
 .../dsl/jbang/core/commands/mcp/PomSanitizer.java  | 83 +++++++++++--------
 .../jbang/core/commands/mcp/PomSanitizerTest.java  | 93 ++++++++++++++++++++++
 2 files changed, 142 insertions(+), 34 deletions(-)

diff --git 
a/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
 
b/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
index ecb50eddb2d4..710566a99db8 100644
--- 
a/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
+++ 
b/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
@@ -22,7 +22,6 @@ import java.util.List;
 import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
-import java.util.stream.Collectors;
 
 import org.jboss.logging.Logger;
 
@@ -30,16 +29,18 @@ import org.jboss.logging.Logger;
  * Utility to detect and sanitize sensitive data in POM content before 
processing.
  * <p>
  * Scans for common credential patterns (passwords, tokens, API keys, secrets) 
in XML element values and masks them.
- * Property placeholders (e.g., {@code ${db.password}}) are preserved since 
they reference external values and do not
- * contain actual secrets.
+ * Also detects credentials embedded in URLs ({@code ://user:password@host}). 
Property placeholders (e.g.,
+ * {@code ${db.password}}, {@code {{vault:password}}}) are preserved since 
they reference external values and do not
+ * contain actual secrets. CDATA-wrapped values are inspected and masked when 
appropriate.
  * <p>
  * <b>Limitations:</b> Detection is tag-name-based using keyword matching. 
This means:
  * <ul>
  * <li><b>False positives</b> — non-secret values in elements whose names 
happen to contain a keyword (e.g.,
  * {@code <password-policy>strict</password-policy>},
  * {@code <token-refresh-interval>300</token-refresh-interval>}).</li>
- * <li><b>False negatives</b> — actual secrets in elements with non-obvious 
names (e.g., credentials embedded in JDBC
- * URLs, or elements named {@code <my.credential>} where the singular form is 
not in the keyword list).</li>
+ * <li><b>False negatives</b> — actual secrets in elements with non-obvious 
names (e.g., elements named
+ * {@code <my.credential>} where the singular form is not in the keyword 
list). URL credential detection is limited to
+ * the {@code ://user:password@host} pattern.</li>
  * </ul>
  * This heuristic is a best-effort safety net, not a guarantee. Users should 
still avoid passing sensitive data.
  */
@@ -49,17 +50,25 @@ final class PomSanitizer {
 
     private static final String SENSITIVE_KEYWORDS
             = 
"password|passwd|token|apikey|api-key|api_key|secret|secretkey|secret-key|secret_key"
-              + 
"|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials";
+              + 
"|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials"
+              + "|connection-string|connectionstring|connection_string";
 
     /**
      * Pattern matching XML elements whose tag names contain sensitive 
keywords. Captures: group(1) = element name,
-     * group(2) = element value.
+     * group(2) = full content between tags (including whitespace and optional 
CDATA wrapper).
      */
     private static final Pattern SENSITIVE_ELEMENT_PATTERN = Pattern.compile(
             "<([a-zA-Z0-9_.:-]*(?:" + SENSITIVE_KEYWORDS + 
")[a-zA-Z0-9_.:-]*)>"
-                                                                             + 
"\\s*([^<]+?)\\s*"
+                                                                             + 
"(\\s*(?:<!\\[CDATA\\[.*?\\]\\]>|[^<]+?)\\s*)"
                                                                              + 
"</\\1>",
-            Pattern.CASE_INSENSITIVE);
+            Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
+
+    /**
+     * Pattern matching URL-embedded credentials ({@code 
://user:password@host}). Captures: group(1) = scheme through
+     * username ({@code ://user}), group(2) = password.
+     */
+    private static final Pattern URL_CREDENTIAL_PATTERN = Pattern.compile(
+            "(://[^/@\\s:]+):([^/@\\s]+)@");
 
     private PomSanitizer() {
     }
@@ -70,48 +79,41 @@ final class PomSanitizer {
      * @return list of element names that contain sensitive values
      */
     static List<String> detectSensitiveContent(String pomContent) {
-        Set<String> findings = new LinkedHashSet<>();
-
-        Matcher matcher = SENSITIVE_ELEMENT_PATTERN.matcher(pomContent);
-        while (matcher.find()) {
-            String value = matcher.group(2).trim();
-            // Property placeholders like ${my.password} are not actual secrets
-            if (!value.startsWith("${")) {
-                findings.add(matcher.group(1));
-            }
-        }
-
-        return new ArrayList<>(findings);
+        return sanitize(pomContent).detectedPatterns();
     }
 
     /**
-     * Sanitize POM content by masking sensitive element values.
+     * Sanitize POM content by masking sensitive element values and 
URL-embedded credentials.
      * <p>
-     * Property placeholders (e.g., {@code ${db.password}}) are preserved 
since they do not contain actual secret
-     * values.
+     * Property placeholders (e.g., {@code ${db.password}}, {@code 
{{vault:password}}}) are preserved since they do not
+     * contain actual secret values. CDATA-wrapped values are inspected and 
masked when they contain plain-text secrets.
      *
      * @return sanitization result with the processed POM content and detected 
patterns
      */
     static SanitizationResult sanitize(String pomContent) {
-        List<String> detected = detectSensitiveContent(pomContent);
-
-        String sanitized = pomContent;
+        Set<String> detected = new LinkedHashSet<>();
 
-        // Mask sensitive element values (preserve property placeholders)
-        sanitized = SENSITIVE_ELEMENT_PATTERN.matcher(sanitized).replaceAll(mr 
-> {
-            String value = mr.group(2).trim();
-            if (value.startsWith("${")) {
+        String sanitized = 
SENSITIVE_ELEMENT_PATTERN.matcher(pomContent).replaceAll(mr -> {
+            String elementName = mr.group(1);
+            String value = extractValue(mr.group(2));
+            if (isPlaceholder(value)) {
                 return Matcher.quoteReplacement(mr.group());
             }
+            detected.add(elementName);
             return Matcher.quoteReplacement(
-                    "<" + mr.group(1) + ">***MASKED***</" + mr.group(1) + ">");
+                    "<" + elementName + ">***MASKED***</" + elementName + ">");
+        });
+
+        sanitized = URL_CREDENTIAL_PATTERN.matcher(sanitized).replaceAll(mr -> 
{
+            detected.add("(URL credential)");
+            return Matcher.quoteReplacement(mr.group(1) + ":***MASKED***@");
         });
 
         if (!detected.isEmpty()) {
             LOG.warnf("Sensitive data detected in pomContent: %s. Content was 
sanitized before processing.", detected);
         }
 
-        return new SanitizationResult(sanitized, detected);
+        return new SanitizationResult(sanitized, new ArrayList<>(detected));
     }
 
     /**
@@ -129,11 +131,24 @@ final class PomSanitizer {
         List<String> warnings = new ArrayList<>();
         if (!sr.detectedPatterns().isEmpty()) {
             warnings.add("Sensitive data detected and masked: "
-                         + 
sr.detectedPatterns().stream().collect(Collectors.joining(", ")));
+                         + String.join(", ", sr.detectedPatterns()));
         }
         return new ProcessedPom(sr.pomContent(), warnings);
     }
 
+    private static String extractValue(String content) {
+        String trimmed = content.trim();
+        if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
+            return trimmed.substring(9, trimmed.length() - 3).trim();
+        }
+        return trimmed;
+    }
+
+    private static boolean isPlaceholder(String value) {
+        return value.startsWith("${")
+                || (value.startsWith("{{") && value.endsWith("}}"));
+    }
+
     record SanitizationResult(
             String pomContent,
             List<String> detectedPatterns) {
diff --git 
a/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
 
b/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
index d77e5a3d8e05..3f155fe9b732 100644
--- 
a/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
+++ 
b/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
@@ -215,6 +215,99 @@ class PomSanitizerTest {
         assertThat(findings).anyMatch(f -> f.contains("passphrase"));
     }
 
+    @Test
+    void detectsConnectionStringElement() {
+        String pom
+                = 
"<project><properties><connectionString>Server=myserver;Password=secret123</connectionString></properties></project>";
+        List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+        assertThat(findings).anyMatch(f -> f.contains("connectionString"));
+    }
+
+    // ---- CDATA tests ----
+
+    @Test
+    void detectsCdataWrappedSecrets() {
+        String pom = 
"<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
+        List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+        assertThat(findings).anyMatch(f -> f.contains("password"));
+    }
+
+    @Test
+    void masksCdataWrappedSecrets() {
+        String pom = 
"<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).doesNotContain("superSecret123");
+        
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
+    }
+
+    @Test
+    void preservesPlaceholderInsideCdata() {
+        String pom = 
"<project><properties><db.password><![CDATA[${env.DB_PASSWORD}]]></db.password></properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).contains("${env.DB_PASSWORD}");
+        assertThat(result.detectedPatterns()).isEmpty();
+    }
+
+    // ---- Camel property placeholder tests ----
+
+    @Test
+    void ignoresCamelPropertyPlaceholders() {
+        String pom = "<project><properties>"
+                     + "<db.password>{{vault:db/password}}</db.password>"
+                     + "<api.token>{{my.token}}</api.token>"
+                     + "</properties></project>";
+        List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+        assertThat(findings).isEmpty();
+    }
+
+    @Test
+    void preservesCamelPropertyPlaceholders() {
+        String pom = 
"<project><properties><db.password>{{vault:db/password}}</db.password></properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).contains("{{vault:db/password}}");
+        assertThat(result.detectedPatterns()).isEmpty();
+    }
+
+    @Test
+    void masksPartialCamelPlaceholder() {
+        String pom = 
"<project><properties><db.password>{{notClosed</db.password></properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).contains("***MASKED***");
+    }
+
+    // ---- URL credential tests ----
+
+    @Test
+    void detectsUrlEmbeddedCredentials() {
+        String pom = "<project><properties>"
+                     + 
"<db.url>jdbc:mysql://admin:[email protected]:3306/mydb</db.url>"
+                     + "</properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).doesNotContain("s3cret");
+        assertThat(result.pomContent()).contains("://admin:***MASKED***@");
+        assertThat(result.detectedPatterns()).anyMatch(f -> f.contains("URL 
credential"));
+    }
+
+    @Test
+    void elementPatternTakesPrecedenceOverUrlPattern() {
+        String pom = "<project><properties>"
+                     + 
"<db.password>jdbc:mysql://admin:s3cret@host/db</db.password>"
+                     + "</properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
+        assertThat(result.pomContent()).doesNotContain("s3cret");
+    }
+
+    @Test
+    void noFalsePositiveOnPortNumbers() {
+        String pom = "<project><properties>"
+                     + "<db.url>http://localhost:8080/api</db.url>"
+                     + "</properties></project>";
+        PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+        assertThat(result.pomContent()).contains("http://localhost:8080/api";);
+        assertThat(result.detectedPatterns()).isEmpty();
+    }
+
     // ---- Process helper tests ----
 
     @Test

Reply via email to