This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new 66e7823 Use Hyperscan for ignore patterns to avoid backtracking
attacks
66e7823 is described below
commit 66e7823b0cee7638e6cb13c1162acc88b75c96cb
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Jan 28 20:30:10 2026 +0000
Use Hyperscan for ignore patterns to avoid backtracking attacks
---
atr/models/__init__.py | 4 +-
atr/models/api.py | 16 +++++++-
atr/models/validation.py | 77 ++++++++++++++++++++++++++++++++++++++
atr/shared/ignores.py | 22 +++++++++++
atr/storage/readers/checks.py | 21 +----------
atr/storage/writers/checks.py | 22 +++++++++++
atr/util.py | 22 +++++++++++
scripts/check_models_imports.py | 9 +++--
tests/unit/test_ignore_patterns.py | 62 ++++++++++++++++++++++++++++++
9 files changed, 229 insertions(+), 26 deletions(-)
diff --git a/atr/models/__init__.py b/atr/models/__init__.py
index a9cb122..52c3cad 100644
--- a/atr/models/__init__.py
+++ b/atr/models/__init__.py
@@ -15,7 +15,7 @@
# specific language governing permissions and limitations
# under the License.
-from . import api, distribution, helpers, results, schema, sql, tabulate
+from . import api, distribution, helpers, results, schema, sql, tabulate,
validation
# If we use .__name__, pyright gives a warning
-__all__ = ["api", "distribution", "helpers", "results", "schema", "sql",
"tabulate"]
+__all__ = ["api", "distribution", "helpers", "results", "schema", "sql",
"tabulate", "validation"]
diff --git a/atr/models/api.py b/atr/models/api.py
index ad9842e..436f002 100644
--- a/atr/models/api.py
+++ b/atr/models/api.py
@@ -21,7 +21,7 @@ from typing import Annotated, Any, Literal, TypeVar
import pydantic
-from . import schema, sql, tabulate
+from . import schema, sql, tabulate, validation
T = TypeVar("T")
@@ -173,6 +173,20 @@ class IgnoreAddArgs(schema.Strict):
status: sql.CheckResultStatusIgnore | None = schema.default_example(None,
sql.CheckResultStatusIgnore.FAILURE)
message_glob: str | None = schema.default_example(None, "sha512 matches
for apache-example-0.0.1/*.xml")
+ @pydantic.model_validator(mode="after")
+ def validate_patterns(self) -> "IgnoreAddArgs":
+ for pattern in [
+ self.release_glob,
+ self.checker_glob,
+ self.primary_rel_path_glob,
+ self.member_rel_path_glob,
+ self.message_glob,
+ ]:
+ if pattern is None:
+ continue
+ validation.validate_ignore_pattern(pattern)
+ return self
+
class IgnoreAddResults(schema.Strict):
endpoint: Literal["/ignore/add"] = schema.alias("endpoint")
diff --git a/atr/models/validation.py b/atr/models/validation.py
new file mode 100644
index 0000000..00ff352
--- /dev/null
+++ b/atr/models/validation.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+from typing import Final
+
+import hyperscan
+
+MAX_IGNORE_PATTERN_LENGTH: Final[int] = 128
+
+
+class HyperscanPattern:
+ __slots__ = ("_db",)
+
+ def __init__(self, db: hyperscan.Database) -> None:
+ self._db = db
+
+ def search(self, value: str):
+ matched = False
+
+ def on_match(_id: int, _start: int, _end: int, _flags: int, _context:
object) -> bool:
+ nonlocal matched
+ matched = True
+ return True
+
+ try:
+ self._db.scan(value.encode("utf-8"), on_match)
+ except hyperscan.ScanTerminated:
+ return True
+ except hyperscan.HyperscanError:
+ return None
+
+ return True if matched else None
+
+
+def compile_ignore_pattern(pattern: str):
+ # TODO: This requires importing Hyperscan in atr/models
+ # We want to avoid such dependencies
+ # But if we move this out, we can't do full validation in the models
+ if len(pattern) > MAX_IGNORE_PATTERN_LENGTH:
+ raise ValueError(f"Pattern exceeds {MAX_IGNORE_PATTERN_LENGTH}
characters")
+ if pattern.startswith("^") or pattern.endswith("$"):
+ regex_pattern = pattern
+ else:
+ regex_pattern = re.escape(pattern).replace(r"\*", ".*")
+ # Should maybe add .replace(r"\?", ".?")
+ # We must turn off Chimera mode to avoid backtracking
+ db = hyperscan.Database(mode=hyperscan.HS_MODE_BLOCK, chimera=False)
+ try:
+ db.compile([regex_pattern])
+ except hyperscan.HyperscanError as exc:
+ raise ValueError(f"Invalid ignore pattern: {exc}") from exc
+ return HyperscanPattern(db)
+
+
+def validate_ignore_pattern(pattern: str) -> None:
+ """Raise an exception if the pattern is invalid."""
+ if pattern == "!":
+ return
+ raw_pattern = pattern
+ if raw_pattern.startswith("!"):
+ raw_pattern = raw_pattern[1:]
+ compile_ignore_pattern(raw_pattern)
diff --git a/atr/shared/ignores.py b/atr/shared/ignores.py
index 8583466..347a133 100644
--- a/atr/shared/ignores.py
+++ b/atr/shared/ignores.py
@@ -24,6 +24,7 @@ import pydantic
import atr.form as form
import atr.models.sql as sql
+import atr.models.validation as validation
type ADD = Literal["add"]
type DELETE = Literal["delete"]
@@ -93,6 +94,13 @@ class AddIgnoreForm(form.Form):
]
):
raise ValueError("At least one field must be set")
+ _validate_ignore_form_patterns(
+ self.release_glob,
+ self.checker_glob,
+ self.primary_rel_path_glob,
+ self.member_rel_path_glob,
+ self.message_glob,
+ )
return self
@@ -130,6 +138,13 @@ class UpdateIgnoreForm(form.Form):
]
):
raise ValueError("At least one field must be set")
+ _validate_ignore_form_patterns(
+ self.release_glob,
+ self.checker_glob,
+ self.primary_rel_path_glob,
+ self.member_rel_path_glob,
+ self.message_glob,
+ )
return self
@@ -137,3 +152,10 @@ type IgnoreForm = Annotated[
AddIgnoreForm | DeleteIgnoreForm | UpdateIgnoreForm,
form.DISCRIMINATOR,
]
+
+
+def _validate_ignore_form_patterns(*patterns: str) -> None:
+ for pattern in patterns:
+ if not pattern:
+ continue
+ validation.validate_ignore_pattern(pattern)
diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py
index 3b8de1b..15131bb 100644
--- a/atr/storage/readers/checks.py
+++ b/atr/storage/readers/checks.py
@@ -18,13 +18,13 @@
# Removing this will cause circular imports
from __future__ import annotations
-import re
from typing import TYPE_CHECKING
import atr.db as db
import atr.models.sql as sql
import atr.storage as storage
import atr.storage.types as types
+import atr.util as util
if TYPE_CHECKING:
import pathlib
@@ -142,21 +142,4 @@ class GeneralPublic:
return True
def __check_ignore_match_pattern(self, pattern: str | None, value: str |
None) -> bool:
- if pattern == "!":
- # Special case, "!" matches None
- return True if (value is None) else False
- if (pattern is None) or (value is None):
- return False
- negate = False
- if pattern.startswith("!"):
- pattern = pattern[1:]
- negate = True
- if pattern.startswith("^") or pattern.endswith("$"):
- regex = re.compile(pattern)
- else:
- regex = re.compile(re.escape(pattern).replace(r"\*", ".*"))
- # Should maybe add .replace(r"\?", ".?")
- matched = regex.search(value) is not None
- if negate:
- return not matched
- return matched
+ return util.match_ignore_pattern(pattern, value)
diff --git a/atr/storage/writers/checks.py b/atr/storage/writers/checks.py
index 1665119..ca18f2c 100644
--- a/atr/storage/writers/checks.py
+++ b/atr/storage/writers/checks.py
@@ -24,9 +24,17 @@ import sqlmodel
import atr.db as db
import atr.models.sql as sql
+import atr.models.validation as validation
import atr.storage as storage
+def _validate_ignore_patterns(*patterns: str | None) -> None:
+ for pattern in patterns:
+ if pattern is None:
+ continue
+ validation.validate_ignore_pattern(pattern)
+
+
class GeneralPublic:
def __init__(
self,
@@ -99,6 +107,13 @@ class CommitteeMember(CommitteeParticipant):
status: sql.CheckResultStatusIgnore | None = None,
message_glob: str | None = None,
) -> None:
+ _validate_ignore_patterns(
+ release_glob,
+ checker_glob,
+ primary_rel_path_glob,
+ member_rel_path_glob,
+ message_glob,
+ )
cri = sql.CheckResultIgnore(
asf_uid=self.__asf_uid,
created=datetime.datetime.now(datetime.UTC),
@@ -138,6 +153,13 @@ class CommitteeMember(CommitteeParticipant):
status: sql.CheckResultStatusIgnore | None = None,
message_glob: str | None = None,
) -> None:
+ _validate_ignore_patterns(
+ release_glob,
+ checker_glob,
+ primary_rel_path_glob,
+ member_rel_path_glob,
+ message_glob,
+ )
cri = await self.__data.get(sql.CheckResultIgnore, id)
if cri is None:
raise storage.AccessError(f"Ignore {id} not found")
diff --git a/atr/util.py b/atr/util.py
index b736abb..64a2318 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -50,6 +50,7 @@ import atr.config as config
import atr.ldap as ldap
import atr.log as log
import atr.models.sql as sql
+import atr.models.validation as validation
import atr.registry as registry
import atr.tarzip as tarzip
import atr.user as user
@@ -628,6 +629,27 @@ def key_ssh_fingerprint_core(ssh_key_string: str) -> str:
raise ValueError("Invalid SSH key format")
+def match_ignore_pattern(pattern: str | None, value: str | None) -> bool:
+ if pattern == "!":
+ # Special case, "!" matches None
+ return value is None
+ if (pattern is None) or (value is None):
+ return False
+ negate = False
+ raw_pattern = pattern
+ if raw_pattern.startswith("!"):
+ raw_pattern = raw_pattern[1:]
+ negate = True
+ try:
+ regex = validation.compile_ignore_pattern(raw_pattern)
+ except ValueError:
+ return False
+ matched = regex.search(value) is not None
+ if negate:
+ return not matched
+ return matched
+
+
async def number_of_release_files(release: sql.Release) -> int:
"""Return the number of files in a release."""
if (path := release_directory_revision(release)) is None:
diff --git a/scripts/check_models_imports.py b/scripts/check_models_imports.py
index 80ae8e3..1045be8 100755
--- a/scripts/check_models_imports.py
+++ b/scripts/check_models_imports.py
@@ -23,6 +23,7 @@ from typing import Final
_ALLOWED_PACKAGES: Final = frozenset(
{
+ "hyperscan",
"pydantic",
"pydantic_core",
"sqlalchemy",
@@ -31,6 +32,10 @@ _ALLOWED_PACKAGES: Final = frozenset(
)
+def main() -> None:
+ sys.exit(_run())
+
+
def _check_file(path: pathlib.Path) -> list[str]:
errors = []
tree = ast.parse(path.read_text(), filename=str(path))
@@ -74,9 +79,5 @@ def _run() -> int:
return 1 if errors else 0
-def main() -> None:
- sys.exit(_run())
-
-
if __name__ == "__main__":
main()
diff --git a/tests/unit/test_ignore_patterns.py
b/tests/unit/test_ignore_patterns.py
new file mode 100644
index 0000000..47bd213
--- /dev/null
+++ b/tests/unit/test_ignore_patterns.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Final
+
+import pytest
+
+import atr.models.validation as validation
+
+REDOS_PATTERN: Final[str] = "^(a+)+$"
+
+
+def test_match_ignore_pattern_avoids_redos_regression() -> None:
+ value = ("a" * 4096) + "X"
+ regex = validation.compile_ignore_pattern(REDOS_PATTERN)
+ assert regex.search(value) is None
+
+
+def test_validate_ignore_pattern_allows_literal_lookaround_tokens() -> None:
+ validation.validate_ignore_pattern("(?=a)")
+
+
+def test_validate_ignore_pattern_hyperscan_supported_constructs() -> None:
+ pattern = r"^(?i)apple(?-i)banana[[:digit:]]{2}\b(?#fruit)|^cherry\s+date$"
+ regex = validation.compile_ignore_pattern(pattern)
+ assert regex.search("APPLEbanana12 ") is True
+ assert regex.search("applebanana99-") is True
+ assert regex.search("cherry date") is True
+ assert regex.search("cherry\tdate") is True
+
+ assert regex.search("APPLEBANANA12 ") is None
+ assert regex.search("applebanana123 ") is None
+ assert regex.search("applebanana12x") is None
+ assert regex.search("applebanana12_") is None
+ assert regex.search("cherrydate") is None
+ assert regex.search("xcherry date") is None
+ assert regex.search("cherry datex") is None
+
+
+def test_validate_ignore_pattern_rejects_regex_lookaround() -> None:
+ with pytest.raises(ValueError, match="Invalid ignore pattern"):
+ validation.validate_ignore_pattern("^(?=a)$")
+
+
+def test_validate_ignore_pattern_rejects_too_long() -> None:
+ pattern = "a" * (validation.MAX_IGNORE_PATTERN_LENGTH + 1)
+ with pytest.raises(ValueError, match="Pattern exceeds"):
+ validation.validate_ignore_pattern(pattern)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]