This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-release.git


The following commit(s) were added to refs/heads/main by this push:
     new 82d09c8  Ensure that only new and modified files are checked
82d09c8 is described below

commit 82d09c85381d56230378b0573f03fd1e207881b2
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 25 19:04:55 2025 +0200

    Ensure that only new and modified files are checked
---
 atr/db/__init__.py  | 51 +++++++++++++++++++++++++++++++++++----------------
 atr/routes/files.py | 27 ++-------------------------
 atr/tasks/rsync.py  | 37 +++++++++++++++++++++++++++----------
 3 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index 150c771..55845fa 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -517,22 +517,6 @@ def create_async_db_session() -> 
sqlalchemy.ext.asyncio.AsyncSession:
         return util.validate_as_type(_global_async_sessionmaker(), 
sqlalchemy.ext.asyncio.AsyncSession)
 
 
-def session() -> Session:
-    """Create a new asynchronous database session."""
-    global _global_atr_sessionmaker
-
-    if quart.has_app_context():
-        extensions = quart.current_app.extensions
-        return util.validate_as_type(extensions["atr_db_session"](), Session)
-    else:
-        if _global_atr_sessionmaker is None:
-            engine = create_async_engine(config.get())
-            _global_atr_sessionmaker = 
sqlalchemy.ext.asyncio.async_sessionmaker(
-                bind=engine, class_=Session, expire_on_commit=False
-            )
-        return util.validate_as_type(_global_atr_sessionmaker(), Session)
-
-
 # FIXME: this method is deprecated and should be removed
 def create_sync_db_engine() -> None:
     """Create a synchronous database engine."""
@@ -556,6 +540,25 @@ def create_sync_db_session() -> sqlalchemy.orm.Session:
     return sqlalchemy.orm.Session(_global_sync_engine)
 
 
+async def recent_tasks(data: Session, release_name: str, file_path: str, 
modified: int) -> dict[str, models.Task]:
+    """Get the most recent task for each task type for a specific file."""
+    tasks = await data.task(
+        release_name=release_name,
+        path=str(file_path),
+        modified=modified,
+    ).all()
+
+    # Group by task_type and keep the most recent one
+    # We use the highest id to determine the most recent task
+    recent_tasks: dict[str, models.Task] = {}
+    for task in tasks:
+        # If we haven't seen this task type before or if this task is newer
+        if (task.task_type not in recent_tasks) or (task.id > 
recent_tasks[task.task_type].id):
+            recent_tasks[task.task_type] = task
+
+    return recent_tasks
+
+
 def select_in_load(*entities: Any) -> orm.strategy_options._AbstractLoad:
     """Eagerly load the given entities from the query."""
     validated_entities = []
@@ -579,6 +582,22 @@ def select_in_load_nested(parent: Any, *descendants: Any) 
-> orm.strategy_option
     return result
 
 
+def session() -> Session:
+    """Create a new asynchronous database session."""
+    global _global_atr_sessionmaker
+
+    if quart.has_app_context():
+        extensions = quart.current_app.extensions
+        return util.validate_as_type(extensions["atr_db_session"](), Session)
+    else:
+        if _global_atr_sessionmaker is None:
+            engine = create_async_engine(config.get())
+            _global_atr_sessionmaker = 
sqlalchemy.ext.asyncio.async_sessionmaker(
+                bind=engine, class_=Session, expire_on_commit=False
+            )
+        return util.validate_as_type(_global_atr_sessionmaker(), Session)
+
+
 def validate_instrumented_attribute(obj: Any) -> orm.InstrumentedAttribute:
     """Check if the given object is an InstrumentedAttribute."""
     if not isinstance(obj, orm.InstrumentedAttribute):
diff --git a/atr/routes/files.py b/atr/routes/files.py
index dcb5846..b6543fe 100644
--- a/atr/routes/files.py
+++ b/atr/routes/files.py
@@ -124,27 +124,6 @@ def _authentication_failed() -> NoReturn:
     raise base.ASFQuartException("Not authenticated", errorcode=401)
 
 
-async def _get_recent_tasks_by_type(
-    data: db.Session, release_name: str, file_path: str, modified: int
-) -> dict[str, models.Task]:
-    """Get the most recent task for each task type for a specific file."""
-    tasks = await data.task(
-        release_name=release_name,
-        path=str(file_path),
-        modified=modified,
-    ).all()
-
-    # Group by task_type and keep the most recent one
-    # We use the highest id to determine the most recent task
-    recent_tasks: dict[str, models.Task] = {}
-    for task in tasks:
-        # If we haven't seen this task type before or if this task is newer
-        if (task.task_type not in recent_tasks) or (task.id > 
recent_tasks[task.task_type].id):
-            recent_tasks[task.task_type] = task
-
-    return recent_tasks
-
-
 async def _number_of_release_files(release: models.Release) -> int:
     """Return the number of files in the release."""
     path_project = release.project.name
@@ -411,9 +390,7 @@ async def root_files_list(session: CommitterSession, 
project_name: str, version_
         path_modified[path] = int(await aiofiles.os.path.getmtime(full_path))
 
         # Get the most recent task for each type
-        path_tasks[path] = await _get_recent_tasks_by_type(
-            data, f"{project_name}-{version_name}", str(path), 
path_modified[path]
-        )
+        path_tasks[path] = await db.recent_tasks(data, 
f"{project_name}-{version_name}", str(path), path_modified[path])
 
     return await quart.render_template(
         "files-list.html",
@@ -458,7 +435,7 @@ async def root_files_checks(session: CommitterSession, 
project_name: str, versio
         file_size = await aiofiles.os.path.getsize(full_path)
 
         # Get the most recent task for each task type
-        recent_tasks = await _get_recent_tasks_by_type(data, 
f"{project_name}-{version_name}", file_path, modified)
+        recent_tasks = await db.recent_tasks(data, 
f"{project_name}-{version_name}", file_path, modified)
 
         # Convert to a list for the template
         tasks = list(recent_tasks.values())
diff --git a/atr/tasks/rsync.py b/atr/tasks/rsync.py
index 764eb55..795cd1c 100644
--- a/atr/tasks/rsync.py
+++ b/atr/tasks/rsync.py
@@ -16,8 +16,9 @@
 # under the License.
 
 import logging
-from typing import Any, Final
+from typing import TYPE_CHECKING, Any, Final
 
+import aiofiles.os
 import pydantic
 
 import atr.db as db
@@ -26,6 +27,9 @@ import atr.tasks as tasks
 import atr.tasks.task as task
 import atr.util as util
 
+if TYPE_CHECKING:
+    from collections.abc import Callable, Coroutine
+
 # _CONFIG: Final = config.get()
 _LOGGER: Final = logging.getLogger(__name__)
 
@@ -57,18 +61,31 @@ async def _analyse_core(asf_uid: str, project_name: str, 
release_version: str) -
     base_path = util.get_candidate_draft_dir() / project_name / release_version
     paths = await util.paths_recursive(base_path)
     release_name = f"{project_name}-{release_version}"
+
     async with db.session() as data:
         release = await data.release(name=release_name, 
_committee=True).demand(RuntimeError("Release not found"))
         for path in paths:
+            # This works because path is relative
+            full_path = base_path / path
+
+            # We only want to analyse files that are new or have changed
+            # But rsync can set timestamps to the past, so we can't rely on 
them
+            # Instead, we can run any tasks when the file has a different 
modified time
+            # TODO: This may cause problems if the file is backdated
+            modified = int(await aiofiles.os.path.getmtime(full_path))
+            cached_tasks = await db.recent_tasks(data, release_name, 
str(path), modified)
+
             # Add new tasks for each path
-            if path.name.endswith(".asc"):
-                for task in await tasks.asc_checks(release, str(path)):
-                    data.add(task)
-            elif path.name.endswith(".sha256") or 
path.name.endswith(".sha512"):
-                for task in await tasks.sha_checks(release, str(path)):
-                    data.add(task)
-            elif path.name.endswith(".tar.gz"):
-                for task in await tasks.tar_gz_checks(release, str(path)):
-                    data.add(task)
+            task_functions: dict[str, Callable[..., Coroutine[Any, Any, 
list[models.Task]]]] = {
+                ".asc": tasks.asc_checks,
+                ".sha256": tasks.sha_checks,
+                ".sha512": tasks.sha_checks,
+                ".tar.gz": tasks.tar_gz_checks,
+            }
+            for task_type, task_function in task_functions.items():
+                if path.name.endswith(task_type):
+                    for task in await task_function(release, str(path)):
+                        if task.task_type not in cached_tasks:
+                            data.add(task)
         await data.commit()
     return {"paths": [str(path) for path in paths]}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to