bito-code-review[bot] commented on code in PR #40454:
URL: https://github.com/apache/superset/pull/40454#discussion_r3308556231
##########
tests/invariant_impala.test.py:
##########
@@ -0,0 +1,324 @@
+import { describe, test, expect, jest, beforeEach, afterEach } from
'@jest/globals';
+
+/**
+ * Security Property:
+ * The Impala engine spec must never make HTTP POST requests to URLs derived
from
+ * user-controllable connection parameters that point to internal/private
network
+ * addresses, SSRF targets, or otherwise unauthorized destinations.
+ *
+ * WHAT MUST ALWAYS BE TRUE:
+ * - URLs constructed from user-supplied connection parameters must be
validated
+ * against an allowlist of permitted hosts/schemes before any HTTP request
is made.
+ * - Internal network addresses (loopback, RFC1918, metadata services) must be
rejected.
+ * - Non-HTTP(S) schemes must be rejected.
+ * - Redirects to internal services must not be followed blindly.
+ */
+
+// Simulated URL validator that represents the security control that MUST exist
+// in the Impala engine spec before making any HTTP POST request.
+function validateImpalaKillQueryUrl(url: string): { valid: boolean; reason?:
string } {
+ let parsed: URL;
+
+ try {
+ parsed = new URL(url);
+ } catch {
+ return { valid: false, reason: 'Invalid URL format' };
+ }
+
+ // Only allow http and https schemes
+ if (!['http:', 'https:'].includes(parsed.protocol)) {
+ return { valid: false, reason: `Disallowed scheme: ${parsed.protocol}` };
+ }
+
+ const hostname = parsed.hostname.toLowerCase();
+
+ // Block loopback addresses
+ if (
+ hostname === 'localhost' ||
+ hostname === '127.0.0.1' ||
+ hostname === '::1' ||
+ hostname.startsWith('127.')
+ ) {
+ return { valid: false, reason: 'Loopback address not allowed' };
+ }
+
+ // Block RFC1918 private ranges
+ const privateRanges = [
+ /^10\.\d+\.\d+\.\d+$/,
+ /^172\.(1[6-9]|2\d|3[01])\.\d+\.\d+$/,
+ /^192\.168\.\d+\.\d+$/,
+ ];
+ for (const range of privateRanges) {
+ if (range.test(hostname)) {
+ return { valid: false, reason: 'Private network address not allowed' };
+ }
+ }
+
+ // Block cloud metadata services
+ const blockedHosts = [
+ '169.254.169.254', // AWS/GCP/Azure metadata
+ 'metadata.google.internal',
+ 'metadata.internal',
+ '100.100.100.200', // Alibaba Cloud metadata
+ '192.0.2.1', // TEST-NET
+ '0.0.0.0',
+ ];
+ if (blockedHosts.includes(hostname)) {
+ return { valid: false, reason: 'Blocked host (metadata service or
reserved)' };
+ }
+
+ // Block IPv6 link-local
+ if (hostname.startsWith('fe80') || hostname === '[::1]') {
+ return { valid: false, reason: 'IPv6 link-local or loopback not allowed' };
+ }
+
+ // Block URLs with credentials embedded
+ if (parsed.username || parsed.password) {
+ return { valid: false, reason: 'Credentials in URL not allowed' };
+ }
+
+ return { valid: true };
+}
+
+// Simulated function that represents what the Impala spec does when killing a
query
+function impalaKillQuery(connectionHost: string, connectionPort: number,
queryId: string): {
+ urlAttempted: string;
+ requestMade: boolean;
+ blocked: boolean;
+ reason?: string;
+} {
+ // Simulate URL construction from connection parameters (as in the
vulnerable code)
+ const url =
`http://${connectionHost}:${connectionPort}/cancel_query?query_id=${encodeURIComponent(queryId)}`;
+
+ const validation = validateImpalaKillQueryUrl(url);
+
+ if (!validation.valid) {
+ return {
+ urlAttempted: url,
+ requestMade: false,
+ blocked: true,
+ reason: validation.reason,
+ };
+ }
+
+ // Only if validation passes would we make the actual HTTP POST
+ return {
+ urlAttempted: url,
+ requestMade: true,
+ blocked: false,
+ };
+}
+
+describe("Security Property: Impala engine spec must not make HTTP POST
requests to SSRF/internal targets derived from user-controlled connection
parameters", () => {
+ const adversarialPayloads: Array<{
+ description: string;
+ host: string;
+ port: number;
+ queryId: string;
+ expectBlocked: boolean;
+ }> = [
+ // SSRF via loopback
+ {
+ description: "loopback IPv4 localhost",
+ host: "127.0.0.1",
+ port: 25000,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "loopback hostname localhost",
+ host: "localhost",
+ port: 25000,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "loopback 127.x.x.x range",
+ host: "127.0.0.2",
+ port: 25000,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ // SSRF via private RFC1918 ranges
+ {
+ description: "RFC1918 10.x.x.x range",
+ host: "10.0.0.1",
+ port: 8080,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "RFC1918 172.16.x.x range",
+ host: "172.16.0.1",
+ port: 8080,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "RFC1918 172.31.x.x range",
+ host: "172.31.255.255",
+ port: 8080,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "RFC1918 192.168.x.x range",
+ host: "192.168.1.1",
+ port: 8080,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ // Cloud metadata service SSRF
+ {
+ description: "AWS/GCP/Azure metadata service IP",
+ host: "169.254.169.254",
+ port: 80,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "GCP metadata hostname",
+ host: "metadata.google.internal",
+ port: 80,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ {
+ description: "Alibaba Cloud metadata service",
+ host: "100.100.100.200",
+ port: 80,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ // SSRF via 0.0.0.0
+ {
+ description: "0.0.0.0 wildcard bind address",
+ host: "0.0.0.0",
+ port: 25000,
+ queryId: "abc123",
+ expectBlocked: true,
+ },
+ // Query ID injection attempts
+ {
+ description: "query ID with URL injection attempt",
+ host: "10.0.0.1",
+ port: 25000,
+ queryId: "[email protected]/steal?token=secret",
+ expectBlocked: true,
+ },
+ {
+ description: "query ID with path traversal",
+ host: "192.168.0.1",
+ port: 25000,
+ queryId: "../../etc/passwd",
+ expectBlocked: true,
+ },
+ // Legitimate external host (should NOT be blocked)
+ {
+ description: "legitimate external Impala host",
+ host: "impala.example.com",
+ port: 25000,
+ queryId: "valid-query-id-123",
+ expectBlocked: false,
+ },
+ {
+ description: "legitimate public IP",
+ host: "203.0.113.10",
+ port: 25000,
+ queryId: "valid-query-id-456",
+ expectBlocked: false,
+ },
+ ];
+
+ test.each(adversarialPayloads)(
+ "enforces security boundary for: $description",
+ ({ host, port, queryId, expectBlocked, description }) => {
+ const result = impalaKillQuery(host, port, queryId);
+
+ if (expectBlocked) {
+ // SECURITY INVARIANT: Requests to internal/SSRF targets MUST be
blocked
+ expect(result.requestMade).toBe(false);
+ expect(result.blocked).toBe(true);
+ expect(result.reason).toBeDefined();
+ expect(result.reason!.length).toBeGreaterThan(0);
+ } else {
+ // Legitimate targets should be allowed through
+ expect(result.requestMade).toBe(true);
+ expect(result.blocked).toBe(false);
+ }
+ }
+ );
+
+ test("URL validator rejects non-HTTP schemes that could be used for SSRF",
() => {
+ const dangerousSchemes = [
+ "file:///etc/passwd",
+ "ftp://internal-server/data",
+ "gopher://127.0.0.1:25/",
+ "dict://127.0.0.1:11211/",
+ "sftp://internal-host/",
+ "ldap://internal-ldap/",
+ "jar:http://evil.com/evil.jar!/",
+ "netdoc:///etc/passwd",
+ ];
+
+ for (const url of dangerousSchemes) {
+ const result = validateImpalaKillQueryUrl(url);
+ expect(result.valid).toBe(false);
+ expect(result.reason).toBeDefined();
+ }
+ });
+
+ test("URL validator rejects URLs with embedded credentials", () => {
+ const urlsWithCredentials = [
+ "http://admin:password@internal-service:8080/cancel_query",
+ "http://user:[email protected]:25000/cancel_query",
+ "https://root:[email protected]/api",
+ ];
+
+ for (const url of urlsWithCredentials) {
+ const result = validateImpalaKillQueryUrl(url);
+ expect(result.valid).toBe(false);
+ expect(result.reason).toContain("Credentials");
+ }
+ });
+
+ test("URL validator rejects malformed URLs that could bypass validation", ()
=> {
+ const malformedUrls = [
+ "",
+ "not-a-url",
+ "http://",
+ "://missing-scheme",
+ "http:///no-host",
+ "\x00http://evil.com",
+ "http://evil.com\x00.trusted.com",
+ ];
+
+ for (const url of malformedUrls) {
+ const result = validateImpalaKillQueryUrl(url);
+ // Malformed URLs must either be rejected or, if parsed, must pass all
checks
+ // The key invariant: they must not silently allow SSRF
+ if (!result.valid) {
+ expect(result.reason).toBeDefined();
+ }
+ }
+ });
+
+ test("security invariant: no HTTP request is ever made to a private/internal
address", () => {
+ const internalHosts = [
+ "127.0.0.1",
+ "localhost",
+ "10.0.0.1",
+ "172.16.0.1",
+ "192.168.1.1",
+ "169.254.169.254",
+ "0.0.0.0",
+ ];
+
+ for (const host of internalHosts) {
+ const result = impalaKillQuery(host, 25000, "test-query");
+ // INVARIANT: requestMade MUST be false for all internal addresses
+ expect(result.requestMade).toBe(false);
+ expect(result.blocked).toBe(true);
+ }
+ });
+});
Review Comment:
<div>
<div id="suggestion">
<div id="issue"><b>CWE-20: Wrong Test Runner for File Location</b></div>
<div id="fix">
This test file uses TypeScript/Jest syntax (`import from '@jest/globals'`,
`describe`, `test`, `function validateImpalaKillQueryUrl`) but is located in
the `tests/` directory which pytest.ini configures to only discover Python test
files (`python_files = *_test.py test_*.py *_tests.py`). The file will never
execute as part of the test suite. Either convert to Python/pytest or move to
`superset-frontend/` where Jest is configured. (See also:
[CWE-20](https://cwe.mitre.org/data/definitions/20.html))
</div>
</div>
<div id="suggestion">
<div id="issue"><b>Rule 6262: Testing Simulated Code Instead of Actual
Logic</b></div>
<div id="fix">
Per adaptive rule 6262, tests must verify actual business logic. The current
tests validate a simulated 'validateImpalaKillQueryUrl' function rather than
the real `cancel_query` in superset/db_engine_specs/impala.py. The actual
SSRF-vulnerable code at lines 211-216 makes HTTP POST requests without any URL
validation. The test does not test the code that needs fixing.
</div>
</div>
<small><i>Code Review Run #50f6a2</i></small>
</div>
---
Should Bito avoid suggestions like this for future reviews? (<a
href=https://alpha.bito.ai/home/ai-agents/review-rules>Manage Rules</a>)
- [ ] Yes, avoid them
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]