This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-release.git


The following commit(s) were added to refs/heads/main by this push:
     new fbe5580  Allow public keys without an ASF UID to be uploaded
fbe5580 is described below

commit fbe5580858fc8591f1bf8474cf7582f6bb542256
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue May 27 14:53:58 2025 +0100

    Allow public keys without an ASF UID to be uploaded
    
    - Make the ASF UID optional on public keys
    - Do not consider files signed if the signing key ASF UID is missing
    - Refactor extracting email addresses from UIDs
    - Log tasks held by unmanaged PIDs in the task manager
    - Fix a problem with displaying emails for committee public keys
    - Add an Alembic migration for the optional ASF UID on public keys
    - Ensure that Alembic migration autogeneration is sqlite3 compatible
---
 atr/db/__init__.py                              |  2 +-
 atr/db/interaction.py                           | 27 ++++++++++---------
 atr/db/models.py                                |  4 +--
 atr/manager.py                                  | 35 +++++++++++++++++++++++--
 atr/routes/keys.py                              |  7 +++--
 atr/tasks/checks/signature.py                   | 25 +++++++++++++-----
 atr/templates/keys-review.html                  |  4 +--
 atr/util.py                                     |  6 +++++
 migrations/env.py                               |  2 ++
 migrations/versions/0004_2025.05.27_52cbd2b5.py | 27 +++++++++++++++++++
 10 files changed, 108 insertions(+), 31 deletions(-)

diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index daa1aeb..5d31707 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -317,7 +317,7 @@ class Session(sqlalchemy.ext.asyncio.AsyncSession):
         expires: Opt[datetime.datetime | None] = NOT_SET,
         primary_declared_uid: Opt[str | None] = NOT_SET,
         secondary_declared_uids: Opt[list[str]] = NOT_SET,
-        apache_uid: Opt[str] = NOT_SET,
+        apache_uid: Opt[str | None] = NOT_SET,
         ascii_armored_key: Opt[str] = NOT_SET,
         _committees: bool = False,
     ) -> Query[models.PublicSigningKey]:
diff --git a/atr/db/interaction.py b/atr/db/interaction.py
index 1e5aa3d..9365cf9 100644
--- a/atr/db/interaction.py
+++ b/atr/db/interaction.py
@@ -79,15 +79,16 @@ async def key_user_add(asf_uid: str | None, public_key: 
str, selected_committees
             for uid_str in key.get("uids", []):
                 if asf_uid := await asyncio.to_thread(_asf_uid_from_uid_str, 
uid_str):
                     break
-    if asf_uid is None:
-        # We place this here to make it easier on the type checkers
-        non_asf_uids = key.get("uids", [])
-        first_non_asf_uid = non_asf_uids[0] if non_asf_uids else "None"
-        raise ApacheUserMissingError(
-            f"No Apache UID found. Fingerprint: {key.get('fingerprint', 
'Unknown')}. Primary UID: {first_non_asf_uid}",
-            fingerprint=key.get("fingerprint"),
-            primary_uid=first_non_asf_uid,
-        )
+    # if asf_uid is None:
+    #     # We place this here to make it easier on the type checkers
+    #     non_asf_uids = key.get("uids", [])
+    #     first_non_asf_uid = non_asf_uids[0] if non_asf_uids else "None"
+    #     raise ApacheUserMissingError(
+    #         f"No Apache UID found. Fingerprint: {key.get('fingerprint', 
'Unknown')}.
+    #         f" Primary UID: {first_non_asf_uid}",
+    #         fingerprint=key.get("fingerprint"),
+    #         primary_uid=first_non_asf_uid,
+    #     )
 
     # Store key in database
     async with db.session() as data:
@@ -95,7 +96,7 @@ async def key_user_add(asf_uid: str | None, public_key: str, 
selected_committees
 
 
 async def key_user_session_add(
-    asf_uid: str,
+    asf_uid: str | None,
     public_key: str,
     key: dict,
     selected_committees: list[str],
@@ -126,6 +127,7 @@ async def key_user_session_add(
     async with data.begin():
         existing = await data.public_signing_key(fingerprint=fingerprint, 
apache_uid=asf_uid).get()
 
+        # TODO: This can race
         if existing:
             logging.info(f"Found existing key {fingerprint}, updating 
associations")
             key_record = existing
@@ -181,10 +183,9 @@ async def key_user_session_add(
                 logging.warning(f"Could not find committee {committee_name} to 
link key {fingerprint}")
                 continue
 
-    # Extract email for sorting
+    # TODO: What if there is no email?
     user_id_str = key_record.primary_declared_uid or ""
-    email_match = re.search(r"<([^>]+)>", user_id_str)
-    email = email_match.group(1) if email_match else user_id_str
+    email = util.email_from_uid(user_id_str) or ""
 
     return {
         "key_id": key_record.fingerprint[:16],
diff --git a/atr/db/models.py b/atr/db/models.py
index 1789475..5b17550 100644
--- a/atr/db/models.py
+++ b/atr/db/models.py
@@ -108,8 +108,8 @@ class PublicSigningKey(sqlmodel.SQLModel, table=True):
     secondary_declared_uids: list[str] = sqlmodel.Field(
         default_factory=list, sa_column=sqlalchemy.Column(sqlalchemy.JSON)
     )
-    # The UID used by Apache
-    apache_uid: str
+    # The UID used by Apache, if available
+    apache_uid: str | None
     # The ASCII armored key
     ascii_armored_key: str
     # The committees that use this key
diff --git a/atr/manager.py b/atr/manager.py
index d3b1494..9f2f9e4 100644
--- a/atr/manager.py
+++ b/atr/manager.py
@@ -290,12 +290,43 @@ class WorkerManager:
                 await self.spawn_worker()
             _LOGGER.info(f"Worker pool restored to {len(self.workers)} 
workers")
 
+    async def _log_tasks_held_by_unmanaged_pids(self, data: db.Session, 
active_worker_pids: list[int]) -> None:
+        """Log tasks that are active and held by PIDs not managed by this 
worker manager."""
+        foreign_tasks_stmt = sqlmodel.select(models.Task.pid, 
models.Task.id).where(
+            sqlmodel.and_(
+                
models.validate_instrumented_attribute(models.Task.pid).notin_(active_worker_pids),
+                models.Task.status == models.TaskStatus.ACTIVE,
+                
models.validate_instrumented_attribute(models.Task.pid).isnot(None),
+            )
+        )
+        foreign_tasks_result = await data.execute(foreign_tasks_stmt)
+        foreign_pids_with_tasks: dict[int, int] = {
+            row.pid: row.id for row in foreign_tasks_result if row.pid is not 
None
+        }
+
+        if not foreign_pids_with_tasks:
+            return
+
+        _LOGGER.debug(f"Found tasks potentially claimed by non-managed PIDs: 
{foreign_pids_with_tasks}")
+        for foreign_pid, task_id_held in foreign_pids_with_tasks.items():
+            try:
+                os.kill(foreign_pid, 0)
+                _LOGGER.warning(f"Task {task_id_held} is held by an active, 
unmanaged process (PID: {foreign_pid})")
+            except ProcessLookupError:
+                _LOGGER.info(f"Task {task_id_held} was held by PID 
{foreign_pid}, which is no longer running")
+            except Exception as e:
+                _LOGGER.error(f"Unexpected error: {foreign_pid} holding task 
{task_id_held}: {e}")
+
     async def reset_broken_tasks(self) -> None:
-        """Reset any tasks that were being processed by exited workers."""
+        """Reset any tasks that were being processed by exited or unmanaged 
workers."""
         try:
             async with db.session() as data:
                 async with data.begin():
                     active_worker_pids = list(self.workers)
+                    try:
+                        await self._log_tasks_held_by_unmanaged_pids(data, 
active_worker_pids)
+                    except Exception:
+                        ...
 
                     update_stmt = (
                         sqlmodel.update(models.Task)
@@ -310,7 +341,7 @@ class WorkerManager:
 
                     result = await data.execute(update_stmt)
                     if result.rowcount > 0:
-                        _LOGGER.info(f"Reset {result.rowcount} tasks to state 
'QUEUED' as their worker died")
+                        _LOGGER.info(f"Reset {result.rowcount} tasks to state 
'QUEUED' due to worker issues")
 
         except Exception as e:
             _LOGGER.error(f"Error resetting broken tasks: {e}")
diff --git a/atr/routes/keys.py b/atr/routes/keys.py
index 85678cd..14d71d3 100644
--- a/atr/routes/keys.py
+++ b/atr/routes/keys.py
@@ -25,7 +25,6 @@ import hashlib
 import logging
 import logging.handlers
 import pathlib
-import re
 import textwrap
 from collections.abc import Sequence
 
@@ -283,6 +282,7 @@ async def keys(session: routes.CommitterSession) -> str:
         now=datetime.datetime.now(datetime.UTC),
         delete_form=delete_form,
         update_committee_keys_form=update_committee_keys_form,
+        email_from_key=util.email_from_uid,
     )
 
 
@@ -382,9 +382,8 @@ async def update_committee_keys(session: 
routes.CommitterSession, committee_name
         for key in sorted_keys:
             fingerprint_short = key.fingerprint[:16].upper()
             apache_uid = key.apache_uid
-            primary_declared_uid_str = key.primary_declared_uid or ""
-            email_match = re.search(r"<([^>]+)>", primary_declared_uid_str)
-            email = email_match.group(1) if email_match else 
primary_declared_uid_str
+            # TODO: What if there is no email?
+            email = util.email_from_uid(key.primary_declared_uid or "") or ""
             if email == f"{apache_uid}@apache.org":
                 comment_line = f"# {fingerprint_short} {email}"
             else:
diff --git a/atr/tasks/checks/signature.py b/atr/tasks/checks/signature.py
index 7da35b6..39a534e 100644
--- a/atr/tasks/checks/signature.py
+++ b/atr/tasks/checks/signature.py
@@ -85,19 +85,22 @@ async def _check_core_logic(committee_name: str, 
artifact_path: str, signature_p
             
.where(models.validate_instrumented_attribute(models.Committee.name) == 
committee_name)
         )
         result = await session.execute(statement)
-        public_keys = [key.ascii_armored_key for key in result.scalars().all()]
-    _LOGGER.info(f"Found {len(public_keys)} public keys for committee_name: 
'{committee_name}'")
+        db_public_keys = result.scalars().all()
+    _LOGGER.info(f"Found {len(db_public_keys)} public keys for committee_name: 
'{committee_name}'")
+    apache_uid_map = {key.fingerprint.lower(): bool(key.apache_uid) for key in 
db_public_keys if key.fingerprint}
+    public_keys = [key.ascii_armored_key for key in db_public_keys]
 
     return await asyncio.to_thread(
         _check_core_logic_verify_signature,
         signature_path=signature_path,
         artifact_path=artifact_path,
         ascii_armored_keys=public_keys,
+        apache_uid_map=apache_uid_map,
     )
 
 
 def _check_core_logic_verify_signature(
-    signature_path: str, artifact_path: str, ascii_armored_keys: list[str]
+    signature_path: str, artifact_path: str, ascii_armored_keys: list[str], 
apache_uid_map: dict[str, bool]
 ) -> dict[str, Any]:
     """Verify a GPG signature for a file."""
     with tempfile.TemporaryDirectory(prefix="gpg-") as gpg_dir, 
open(signature_path, "rb") as sig_file:
@@ -111,10 +114,13 @@ def _check_core_logic_verify_signature(
                 continue
         verified = gpg.verify_file(sig_file, str(artifact_path))
 
+    key_fp = verified.pubkey_fingerprint.lower() if 
verified.pubkey_fingerprint else None
+    apache_uid_ok = (key_fp is not None) and apache_uid_map.get(key_fp, False)
+
     # Collect all available information for debugging
     debug_info = {
         "key_id": verified.key_id or "Not available",
-        "fingerprint": verified.fingerprint.lower() if verified.fingerprint 
else "Not available",
+        "fingerprint": key_fp or "Not available",
         "pubkey_fingerprint": verified.pubkey_fingerprint.lower() if 
verified.pubkey_fingerprint else "Not available",
         "creation_date": verified.creation_date or "Not available",
         "timestamp": verified.timestamp or "Not available",
@@ -125,12 +131,17 @@ def _check_core_logic_verify_signature(
         "trust_text": verified.trust_text if hasattr(verified, "trust_text") 
else "Not available",
         "stderr": verified.stderr if hasattr(verified, "stderr") else "Not 
available",
         "num_committee_keys": len(ascii_armored_keys),
+        "key_has_apache_uid": apache_uid_ok,
     }
 
-    if not verified:
+    if (not verified) or (not apache_uid_ok):
+        error_msg = "No valid signature found"
+        if verified and (not apache_uid_ok):
+            error_msg = "Verifying key lacks an ASF UID"
+            debug_info["status"] = "Invalid: Key lacks ASF UID"
         return {
             "verified": False,
-            "error": "No valid signature found",
+            "error": error_msg,
             "debug_info": debug_info,
         }
 
@@ -139,7 +150,7 @@ def _check_core_logic_verify_signature(
         "key_id": verified.key_id,
         "timestamp": verified.timestamp,
         "username": verified.username or "Unknown",
-        "fingerprint": verified.pubkey_fingerprint.lower() or "Unknown",
+        "fingerprint": key_fp or "Unknown",
         "status": "Valid signature",
         "debug_info": debug_info,
     }
diff --git a/atr/templates/keys-review.html b/atr/templates/keys-review.html
index 3765e53..aeb5b6f 100644
--- a/atr/templates/keys-review.html
+++ b/atr/templates/keys-review.html
@@ -183,8 +183,8 @@
                 <td class="text-break font-monospace px-2">
                   <a href="{{ as_url(routes.keys.show_gpg_key, 
fingerprint=key.fingerprint) }}">{{ key.fingerprint[:16]|upper }}</a>
                 </td>
-                <td class="text-break px-2">{{ key.declared_uid or 'Not 
specified' }}</td>
-                <td class="text-break px-2">{{ key.apache_uid }}</td>
+                <td class="text-break px-2">{{ 
email_from_key(key.primary_declared_uid) or 'Not specified' }}</td>
+                <td class="text-break px-2">{{ key.apache_uid or "-" }}</td>
               </tr>
             {% endfor %}
           </tbody>
diff --git a/atr/util.py b/atr/util.py
index ca89278..a427210 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -214,6 +214,12 @@ async def create_hard_link_clone(
     await _clone_recursive(source_dir, dest_dir)
 
 
+def email_from_uid(uid: str) -> str | None:
+    if m := re.search(r"<([^>]+)>", uid):
+        return m.group(1)
+    return None
+
+
 async def file_sha3(path: str) -> str:
     """Compute SHA3-256 hash of a file."""
     sha3 = hashlib.sha3_256()
diff --git a/migrations/env.py b/migrations/env.py
index 6004efb..ecf78d9 100644
--- a/migrations/env.py
+++ b/migrations/env.py
@@ -126,6 +126,7 @@ def run_migrations_offline() -> None:
         dialect_opts={"paramstyle": "named"},
         render_item=render_item_override,
         process_revision_directives=process_revision_directives_custom_naming,
+        render_as_batch=True,
     )
 
     with alembic.context.begin_transaction():
@@ -149,6 +150,7 @@ def run_migrations_online() -> None:
             target_metadata=target_metadata,
             render_item=render_item_override,
             
process_revision_directives=process_revision_directives_custom_naming,
+            render_as_batch=True,
         )
 
         with alembic.context.begin_transaction():
diff --git a/migrations/versions/0004_2025.05.27_52cbd2b5.py 
b/migrations/versions/0004_2025.05.27_52cbd2b5.py
new file mode 100644
index 0000000..fb29d54
--- /dev/null
+++ b/migrations/versions/0004_2025.05.27_52cbd2b5.py
@@ -0,0 +1,27 @@
+"""Make ASF UID optional on public keys
+
+Revision ID: 0004_2025.05.27_52cbd2b5
+Revises: 0003_2025.05.21_ebed2397
+Create Date: 2025-05-27 13:18:44.637580+00:00
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# Revision identifiers, used by Alembic
+revision: str = "0004_2025.05.27_52cbd2b5"
+down_revision: str | None = "0003_2025.05.21_ebed2397"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def upgrade() -> None:
+    with op.batch_alter_table("publicsigningkey", schema=None) as batch_op:
+        batch_op.alter_column("apache_uid", existing_type=sa.VARCHAR(), 
nullable=True)
+
+
+def downgrade() -> None:
+    with op.batch_alter_table("publicsigningkey", schema=None) as batch_op:
+        batch_op.alter_column("apache_uid", existing_type=sa.VARCHAR(), 
nullable=False)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to