This is an automated email from the ASF dual-hosted git repository.
gnodet pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/camel.git
The following commit(s) were added to refs/heads/main by this push:
new 6efd31852ab9 chore: improve PomSanitizer detection and robustness
6efd31852ab9 is described below
commit 6efd31852ab964b30ddf8cac71e8c59642eebba5
Author: Guillaume Nodet <[email protected]>
AuthorDate: Wed May 20 08:50:54 2026 +0200
chore: improve PomSanitizer detection and robustness
Extend PomSanitizer in camel-jbang-mcp to close detection gaps:
- Handle CDATA-wrapped values that previously bypassed detection
- Recognize Camel property placeholders ({{...}}) alongside Maven ${...}
- Detect URL-embedded credentials (://user:password@host) in any element
- Add connection-string/connectionstring/connection_string keywords
- Merge detection and masking into a single regex pass
- 12 new test cases covering all additions
Closes #23332
---
.../dsl/jbang/core/commands/mcp/PomSanitizer.java | 83 +++++++++++--------
.../jbang/core/commands/mcp/PomSanitizerTest.java | 93 ++++++++++++++++++++++
2 files changed, 142 insertions(+), 34 deletions(-)
diff --git
a/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
b/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
index ecb50eddb2d4..710566a99db8 100644
---
a/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
+++
b/dsl/camel-jbang/camel-jbang-mcp/src/main/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizer.java
@@ -22,7 +22,6 @@ import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import java.util.stream.Collectors;
import org.jboss.logging.Logger;
@@ -30,16 +29,18 @@ import org.jboss.logging.Logger;
* Utility to detect and sanitize sensitive data in POM content before
processing.
* <p>
* Scans for common credential patterns (passwords, tokens, API keys, secrets)
in XML element values and masks them.
- * Property placeholders (e.g., {@code ${db.password}}) are preserved since
they reference external values and do not
- * contain actual secrets.
+ * Also detects credentials embedded in URLs ({@code ://user:password@host}).
Property placeholders (e.g.,
+ * {@code ${db.password}}, {@code {{vault:password}}}) are preserved since
they reference external values and do not
+ * contain actual secrets. CDATA-wrapped values are inspected and masked when
appropriate.
* <p>
* <b>Limitations:</b> Detection is tag-name-based using keyword matching.
This means:
* <ul>
* <li><b>False positives</b> — non-secret values in elements whose names
happen to contain a keyword (e.g.,
* {@code <password-policy>strict</password-policy>},
* {@code <token-refresh-interval>300</token-refresh-interval>}).</li>
- * <li><b>False negatives</b> — actual secrets in elements with non-obvious
names (e.g., credentials embedded in JDBC
- * URLs, or elements named {@code <my.credential>} where the singular form is
not in the keyword list).</li>
+ * <li><b>False negatives</b> — actual secrets in elements with non-obvious
names (e.g., elements named
+ * {@code <my.credential>} where the singular form is not in the keyword
list). URL credential detection is limited to
+ * the {@code ://user:password@host} pattern.</li>
* </ul>
* This heuristic is a best-effort safety net, not a guarantee. Users should
still avoid passing sensitive data.
*/
@@ -49,17 +50,25 @@ final class PomSanitizer {
private static final String SENSITIVE_KEYWORDS
=
"password|passwd|token|apikey|api-key|api_key|secret|secretkey|secret-key|secret_key"
- +
"|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials";
+ +
"|accesskey|access-key|access_key|passphrase|privatekey|private-key|private_key|credentials"
+ + "|connection-string|connectionstring|connection_string";
/**
* Pattern matching XML elements whose tag names contain sensitive
keywords. Captures: group(1) = element name,
- * group(2) = element value.
+ * group(2) = full content between tags (including whitespace and optional
CDATA wrapper).
*/
private static final Pattern SENSITIVE_ELEMENT_PATTERN = Pattern.compile(
"<([a-zA-Z0-9_.:-]*(?:" + SENSITIVE_KEYWORDS +
")[a-zA-Z0-9_.:-]*)>"
- +
"\\s*([^<]+?)\\s*"
+ +
"(\\s*(?:<!\\[CDATA\\[.*?\\]\\]>|[^<]+?)\\s*)"
+
"</\\1>",
- Pattern.CASE_INSENSITIVE);
+ Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
+
+ /**
+ * Pattern matching URL-embedded credentials ({@code
://user:password@host}). Captures: group(1) = scheme through
+ * username ({@code ://user}), group(2) = password.
+ */
+ private static final Pattern URL_CREDENTIAL_PATTERN = Pattern.compile(
+ "(://[^/@\\s:]+):([^/@\\s]+)@");
private PomSanitizer() {
}
@@ -70,48 +79,41 @@ final class PomSanitizer {
* @return list of element names that contain sensitive values
*/
static List<String> detectSensitiveContent(String pomContent) {
- Set<String> findings = new LinkedHashSet<>();
-
- Matcher matcher = SENSITIVE_ELEMENT_PATTERN.matcher(pomContent);
- while (matcher.find()) {
- String value = matcher.group(2).trim();
- // Property placeholders like ${my.password} are not actual secrets
- if (!value.startsWith("${")) {
- findings.add(matcher.group(1));
- }
- }
-
- return new ArrayList<>(findings);
+ return sanitize(pomContent).detectedPatterns();
}
/**
- * Sanitize POM content by masking sensitive element values.
+ * Sanitize POM content by masking sensitive element values and
URL-embedded credentials.
* <p>
- * Property placeholders (e.g., {@code ${db.password}}) are preserved
since they do not contain actual secret
- * values.
+ * Property placeholders (e.g., {@code ${db.password}}, {@code
{{vault:password}}}) are preserved since they do not
+ * contain actual secret values. CDATA-wrapped values are inspected and
masked when they contain plain-text secrets.
*
* @return sanitization result with the processed POM content and detected
patterns
*/
static SanitizationResult sanitize(String pomContent) {
- List<String> detected = detectSensitiveContent(pomContent);
-
- String sanitized = pomContent;
+ Set<String> detected = new LinkedHashSet<>();
- // Mask sensitive element values (preserve property placeholders)
- sanitized = SENSITIVE_ELEMENT_PATTERN.matcher(sanitized).replaceAll(mr
-> {
- String value = mr.group(2).trim();
- if (value.startsWith("${")) {
+ String sanitized =
SENSITIVE_ELEMENT_PATTERN.matcher(pomContent).replaceAll(mr -> {
+ String elementName = mr.group(1);
+ String value = extractValue(mr.group(2));
+ if (isPlaceholder(value)) {
return Matcher.quoteReplacement(mr.group());
}
+ detected.add(elementName);
return Matcher.quoteReplacement(
- "<" + mr.group(1) + ">***MASKED***</" + mr.group(1) + ">");
+ "<" + elementName + ">***MASKED***</" + elementName + ">");
+ });
+
+ sanitized = URL_CREDENTIAL_PATTERN.matcher(sanitized).replaceAll(mr ->
{
+ detected.add("(URL credential)");
+ return Matcher.quoteReplacement(mr.group(1) + ":***MASKED***@");
});
if (!detected.isEmpty()) {
LOG.warnf("Sensitive data detected in pomContent: %s. Content was
sanitized before processing.", detected);
}
- return new SanitizationResult(sanitized, detected);
+ return new SanitizationResult(sanitized, new ArrayList<>(detected));
}
/**
@@ -129,11 +131,24 @@ final class PomSanitizer {
List<String> warnings = new ArrayList<>();
if (!sr.detectedPatterns().isEmpty()) {
warnings.add("Sensitive data detected and masked: "
- +
sr.detectedPatterns().stream().collect(Collectors.joining(", ")));
+ + String.join(", ", sr.detectedPatterns()));
}
return new ProcessedPom(sr.pomContent(), warnings);
}
+ private static String extractValue(String content) {
+ String trimmed = content.trim();
+ if (trimmed.startsWith("<![CDATA[") && trimmed.endsWith("]]>")) {
+ return trimmed.substring(9, trimmed.length() - 3).trim();
+ }
+ return trimmed;
+ }
+
+ private static boolean isPlaceholder(String value) {
+ return value.startsWith("${")
+ || (value.startsWith("{{") && value.endsWith("}}"));
+ }
+
record SanitizationResult(
String pomContent,
List<String> detectedPatterns) {
diff --git
a/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
b/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
index d77e5a3d8e05..3f155fe9b732 100644
---
a/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
+++
b/dsl/camel-jbang/camel-jbang-mcp/src/test/java/org/apache/camel/dsl/jbang/core/commands/mcp/PomSanitizerTest.java
@@ -215,6 +215,99 @@ class PomSanitizerTest {
assertThat(findings).anyMatch(f -> f.contains("passphrase"));
}
+ @Test
+ void detectsConnectionStringElement() {
+ String pom
+ =
"<project><properties><connectionString>Server=myserver;Password=secret123</connectionString></properties></project>";
+ List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+ assertThat(findings).anyMatch(f -> f.contains("connectionString"));
+ }
+
+ // ---- CDATA tests ----
+
+ @Test
+ void detectsCdataWrappedSecrets() {
+ String pom =
"<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
+ List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+ assertThat(findings).anyMatch(f -> f.contains("password"));
+ }
+
+ @Test
+ void masksCdataWrappedSecrets() {
+ String pom =
"<project><properties><db.password><![CDATA[superSecret123]]></db.password></properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).doesNotContain("superSecret123");
+
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
+ }
+
+ @Test
+ void preservesPlaceholderInsideCdata() {
+ String pom =
"<project><properties><db.password><![CDATA[${env.DB_PASSWORD}]]></db.password></properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).contains("${env.DB_PASSWORD}");
+ assertThat(result.detectedPatterns()).isEmpty();
+ }
+
+ // ---- Camel property placeholder tests ----
+
+ @Test
+ void ignoresCamelPropertyPlaceholders() {
+ String pom = "<project><properties>"
+ + "<db.password>{{vault:db/password}}</db.password>"
+ + "<api.token>{{my.token}}</api.token>"
+ + "</properties></project>";
+ List<String> findings = PomSanitizer.detectSensitiveContent(pom);
+ assertThat(findings).isEmpty();
+ }
+
+ @Test
+ void preservesCamelPropertyPlaceholders() {
+ String pom =
"<project><properties><db.password>{{vault:db/password}}</db.password></properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).contains("{{vault:db/password}}");
+ assertThat(result.detectedPatterns()).isEmpty();
+ }
+
+ @Test
+ void masksPartialCamelPlaceholder() {
+ String pom =
"<project><properties><db.password>{{notClosed</db.password></properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).contains("***MASKED***");
+ }
+
+ // ---- URL credential tests ----
+
+ @Test
+ void detectsUrlEmbeddedCredentials() {
+ String pom = "<project><properties>"
+ +
"<db.url>jdbc:mysql://admin:[email protected]:3306/mydb</db.url>"
+ + "</properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).doesNotContain("s3cret");
+ assertThat(result.pomContent()).contains("://admin:***MASKED***@");
+ assertThat(result.detectedPatterns()).anyMatch(f -> f.contains("URL
credential"));
+ }
+
+ @Test
+ void elementPatternTakesPrecedenceOverUrlPattern() {
+ String pom = "<project><properties>"
+ +
"<db.password>jdbc:mysql://admin:s3cret@host/db</db.password>"
+ + "</properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+
assertThat(result.pomContent()).contains("<db.password>***MASKED***</db.password>");
+ assertThat(result.pomContent()).doesNotContain("s3cret");
+ }
+
+ @Test
+ void noFalsePositiveOnPortNumbers() {
+ String pom = "<project><properties>"
+ + "<db.url>http://localhost:8080/api</db.url>"
+ + "</properties></project>";
+ PomSanitizer.SanitizationResult result = PomSanitizer.sanitize(pom);
+ assertThat(result.pomContent()).contains("http://localhost:8080/api");
+ assertThat(result.detectedPatterns()).isEmpty();
+ }
+
// ---- Process helper tests ----
@Test