This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-release.git
The following commit(s) were added to refs/heads/main by this push:
new 82d09c8 Ensure that only new and modified files are checked
82d09c8 is described below
commit 82d09c85381d56230378b0573f03fd1e207881b2
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 25 19:04:55 2025 +0200
Ensure that only new and modified files are checked
---
atr/db/__init__.py | 51 +++++++++++++++++++++++++++++++++++----------------
atr/routes/files.py | 27 ++-------------------------
atr/tasks/rsync.py | 37 +++++++++++++++++++++++++++----------
3 files changed, 64 insertions(+), 51 deletions(-)
diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index 150c771..55845fa 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -517,22 +517,6 @@ def create_async_db_session() ->
sqlalchemy.ext.asyncio.AsyncSession:
return util.validate_as_type(_global_async_sessionmaker(),
sqlalchemy.ext.asyncio.AsyncSession)
-def session() -> Session:
- """Create a new asynchronous database session."""
- global _global_atr_sessionmaker
-
- if quart.has_app_context():
- extensions = quart.current_app.extensions
- return util.validate_as_type(extensions["atr_db_session"](), Session)
- else:
- if _global_atr_sessionmaker is None:
- engine = create_async_engine(config.get())
- _global_atr_sessionmaker =
sqlalchemy.ext.asyncio.async_sessionmaker(
- bind=engine, class_=Session, expire_on_commit=False
- )
- return util.validate_as_type(_global_atr_sessionmaker(), Session)
-
-
# FIXME: this method is deprecated and should be removed
def create_sync_db_engine() -> None:
"""Create a synchronous database engine."""
@@ -556,6 +540,25 @@ def create_sync_db_session() -> sqlalchemy.orm.Session:
return sqlalchemy.orm.Session(_global_sync_engine)
+async def recent_tasks(data: Session, release_name: str, file_path: str,
modified: int) -> dict[str, models.Task]:
+ """Get the most recent task for each task type for a specific file."""
+ tasks = await data.task(
+ release_name=release_name,
+ path=str(file_path),
+ modified=modified,
+ ).all()
+
+ # Group by task_type and keep the most recent one
+ # We use the highest id to determine the most recent task
+ recent_tasks: dict[str, models.Task] = {}
+ for task in tasks:
+ # If we haven't seen this task type before or if this task is newer
+ if (task.task_type not in recent_tasks) or (task.id >
recent_tasks[task.task_type].id):
+ recent_tasks[task.task_type] = task
+
+ return recent_tasks
+
+
def select_in_load(*entities: Any) -> orm.strategy_options._AbstractLoad:
"""Eagerly load the given entities from the query."""
validated_entities = []
@@ -579,6 +582,22 @@ def select_in_load_nested(parent: Any, *descendants: Any)
-> orm.strategy_option
return result
+def session() -> Session:
+ """Create a new asynchronous database session."""
+ global _global_atr_sessionmaker
+
+ if quart.has_app_context():
+ extensions = quart.current_app.extensions
+ return util.validate_as_type(extensions["atr_db_session"](), Session)
+ else:
+ if _global_atr_sessionmaker is None:
+ engine = create_async_engine(config.get())
+ _global_atr_sessionmaker =
sqlalchemy.ext.asyncio.async_sessionmaker(
+ bind=engine, class_=Session, expire_on_commit=False
+ )
+ return util.validate_as_type(_global_atr_sessionmaker(), Session)
+
+
def validate_instrumented_attribute(obj: Any) -> orm.InstrumentedAttribute:
"""Check if the given object is an InstrumentedAttribute."""
if not isinstance(obj, orm.InstrumentedAttribute):
diff --git a/atr/routes/files.py b/atr/routes/files.py
index dcb5846..b6543fe 100644
--- a/atr/routes/files.py
+++ b/atr/routes/files.py
@@ -124,27 +124,6 @@ def _authentication_failed() -> NoReturn:
raise base.ASFQuartException("Not authenticated", errorcode=401)
-async def _get_recent_tasks_by_type(
- data: db.Session, release_name: str, file_path: str, modified: int
-) -> dict[str, models.Task]:
- """Get the most recent task for each task type for a specific file."""
- tasks = await data.task(
- release_name=release_name,
- path=str(file_path),
- modified=modified,
- ).all()
-
- # Group by task_type and keep the most recent one
- # We use the highest id to determine the most recent task
- recent_tasks: dict[str, models.Task] = {}
- for task in tasks:
- # If we haven't seen this task type before or if this task is newer
- if (task.task_type not in recent_tasks) or (task.id >
recent_tasks[task.task_type].id):
- recent_tasks[task.task_type] = task
-
- return recent_tasks
-
-
async def _number_of_release_files(release: models.Release) -> int:
"""Return the number of files in the release."""
path_project = release.project.name
@@ -411,9 +390,7 @@ async def root_files_list(session: CommitterSession,
project_name: str, version_
path_modified[path] = int(await aiofiles.os.path.getmtime(full_path))
# Get the most recent task for each type
- path_tasks[path] = await _get_recent_tasks_by_type(
- data, f"{project_name}-{version_name}", str(path),
path_modified[path]
- )
+ path_tasks[path] = await db.recent_tasks(data,
f"{project_name}-{version_name}", str(path), path_modified[path])
return await quart.render_template(
"files-list.html",
@@ -458,7 +435,7 @@ async def root_files_checks(session: CommitterSession,
project_name: str, versio
file_size = await aiofiles.os.path.getsize(full_path)
# Get the most recent task for each task type
- recent_tasks = await _get_recent_tasks_by_type(data,
f"{project_name}-{version_name}", file_path, modified)
+ recent_tasks = await db.recent_tasks(data,
f"{project_name}-{version_name}", file_path, modified)
# Convert to a list for the template
tasks = list(recent_tasks.values())
diff --git a/atr/tasks/rsync.py b/atr/tasks/rsync.py
index 764eb55..795cd1c 100644
--- a/atr/tasks/rsync.py
+++ b/atr/tasks/rsync.py
@@ -16,8 +16,9 @@
# under the License.
import logging
-from typing import Any, Final
+from typing import TYPE_CHECKING, Any, Final
+import aiofiles.os
import pydantic
import atr.db as db
@@ -26,6 +27,9 @@ import atr.tasks as tasks
import atr.tasks.task as task
import atr.util as util
+if TYPE_CHECKING:
+ from collections.abc import Callable, Coroutine
+
# _CONFIG: Final = config.get()
_LOGGER: Final = logging.getLogger(__name__)
@@ -57,18 +61,31 @@ async def _analyse_core(asf_uid: str, project_name: str,
release_version: str) -
base_path = util.get_candidate_draft_dir() / project_name / release_version
paths = await util.paths_recursive(base_path)
release_name = f"{project_name}-{release_version}"
+
async with db.session() as data:
release = await data.release(name=release_name,
_committee=True).demand(RuntimeError("Release not found"))
for path in paths:
+ # This works because path is relative
+ full_path = base_path / path
+
+ # We only want to analyse files that are new or have changed
+ # But rsync can set timestamps to the past, so we can't rely on
them
+ # Instead, we can run any tasks when the file has a different
modified time
+ # TODO: This may cause problems if the file is backdated
+ modified = int(await aiofiles.os.path.getmtime(full_path))
+ cached_tasks = await db.recent_tasks(data, release_name,
str(path), modified)
+
# Add new tasks for each path
- if path.name.endswith(".asc"):
- for task in await tasks.asc_checks(release, str(path)):
- data.add(task)
- elif path.name.endswith(".sha256") or
path.name.endswith(".sha512"):
- for task in await tasks.sha_checks(release, str(path)):
- data.add(task)
- elif path.name.endswith(".tar.gz"):
- for task in await tasks.tar_gz_checks(release, str(path)):
- data.add(task)
+ task_functions: dict[str, Callable[..., Coroutine[Any, Any,
list[models.Task]]]] = {
+ ".asc": tasks.asc_checks,
+ ".sha256": tasks.sha_checks,
+ ".sha512": tasks.sha_checks,
+ ".tar.gz": tasks.tar_gz_checks,
+ }
+ for task_type, task_function in task_functions.items():
+ if path.name.endswith(task_type):
+ for task in await task_function(release, str(path)):
+ if task.task_type not in cached_tasks:
+ data.add(task)
await data.commit()
return {"paths": [str(path) for path in paths]}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]