This is an automated email from the ASF dual-hosted git repository.
tn pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-atr-experiments.git
The following commit(s) were added to refs/heads/main by this push:
new 733961a chore: create datasources package, move apache module to it,
refactor and add more tests
733961a is described below
commit 733961ae4470f1c64d43bb55e2d9c96f71e10e3d
Author: Thomas Neidhart <[email protected]>
AuthorDate: Mon Mar 3 10:34:52 2025 +0100
chore: create datasources package, move apache module to it, refactor and
add more tests
---
atr/blueprints/secret/secret.py | 10 +-
atr/datasources/__init__.py | 16 +++
atr/{ => datasources}/apache.py | 52 ++++++---
atr/util.py | 19 ++-
tests/datasources/__init__.py | 16 +++
tests/datasources/test_apache.py | 77 +++++++++++++
tests/datasources/testdata/committees.json | 41 +++++++
tests/datasources/testdata/groups.json | 8 ++
tests/datasources/testdata/podlings.json | 10 ++
tests/datasources/testdata/projects.json | 22 ++++
tests/datasources/testdata/retired_committees.json | 11 ++
tests/test_apache.py | 127 ---------------------
12 files changed, 259 insertions(+), 150 deletions(-)
diff --git a/atr/blueprints/secret/secret.py b/atr/blueprints/secret/secret.py
index 4d5b906..87f031f 100644
--- a/atr/blueprints/secret/secret.py
+++ b/atr/blueprints/secret/secret.py
@@ -28,10 +28,10 @@ from werkzeug.wrappers.response import Response
from asfquart.base import ASFQuartException
from asfquart.session import read as session_read
-from atr.apache import (
+from atr.datasources.apache import (
+ get_current_podlings_data,
get_groups_data,
- get_ldap_projects_data,
- get_podlings_data,
+ get_projects_data,
)
from atr.db import get_session
from atr.db.models import (
@@ -186,8 +186,8 @@ async def secret_projects_update() -> str | Response:
"""Update projects from remote data."""
if request.method == "POST":
try:
- apache_projects = await get_ldap_projects_data()
- podlings_data = await get_podlings_data()
+ apache_projects = await get_projects_data()
+ podlings_data = await get_current_podlings_data()
groups_data = await get_groups_data()
except httpx.RequestError as e:
await flash(f"Failed to fetch data: {e!s}", "error")
diff --git a/atr/datasources/__init__.py b/atr/datasources/__init__.py
new file mode 100644
index 0000000..13a8339
--- /dev/null
+++ b/atr/datasources/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/atr/apache.py b/atr/datasources/apache.py
similarity index 78%
rename from atr/apache.py
rename to atr/datasources/apache.py
index c161e35..13b12f7 100644
--- a/atr/apache.py
+++ b/atr/datasources/apache.py
@@ -15,17 +15,21 @@
# specific language governing permissions and limitations
# under the License.
+"""Apache specific data-sources."""
+
from __future__ import annotations
-from collections.abc import Generator, ItemsView
from datetime import datetime
-from typing import Annotated
+from typing import TYPE_CHECKING, Annotated
import httpx
from pydantic import BaseModel, Field, RootModel
from atr.util import DictToList
+if TYPE_CHECKING:
+ from collections.abc import Generator, ItemsView
+
_WHIMSY_COMMITTEE_INFO_URL =
"https://whimsy.apache.org/public/committee-info.json"
_WHIMSY_COMMITTEE_RETIRED_URL =
"https://whimsy.apache.org/public/committee-retired.json"
_WHIMSY_PROJECTS_URL =
"https://whimsy.apache.org/public/public_ldap_projects.json"
@@ -33,7 +37,7 @@ _PROJECT_PODLINGS_URL =
"https://projects.apache.org/json/foundation/podlings.js
_PROJECT_GROUPS_URL = "https://projects.apache.org/json/foundation/groups.json"
-class LDAPProjects(BaseModel):
+class ProjectData(BaseModel):
last_timestamp: str = Field(alias="lastTimestamp")
project_count: int
projects: Annotated[list[Project], DictToList(key="name")]
@@ -45,8 +49,8 @@ class LDAPProjects(BaseModel):
class Project(BaseModel):
name: str
- createTimestamp: str
- modifyTimestamp: str
+ create_timestamp: str = Field(alias="createTimestamp")
+ modify_timestamp: str = Field(alias="modifyTimestamp")
member_count: int
owner_count: int
members: list[str]
@@ -55,14 +59,14 @@ class Project(BaseModel):
podling: str | None = None
-class CommitteeInfo(BaseModel):
+class CommitteeData(BaseModel):
last_updated: str
committee_count: int
pmc_count: int
committees: Annotated[list[Committee], DictToList(key="name")]
-class CommitteeRetired(BaseModel):
+class RetiredCommitteeData(BaseModel):
last_updated: str
retired_count: int
retired: Annotated[list[RetiredCommittee], DictToList(key="name")]
@@ -76,7 +80,7 @@ class Committee(BaseModel):
mail_list: str
established: str
report: list[str]
- # chair: Annotated[list[User], DictToList(key="id")]
+ chair: Annotated[list[User], DictToList(key="id")]
roster_count: int
roster: Annotated[list[User], DictToList(key="id")]
pmc: bool
@@ -85,7 +89,7 @@ class Committee(BaseModel):
class User(BaseModel):
id: str
name: str
- date: str
+ date: str | None = None
class RetiredCommittee(BaseModel):
@@ -98,7 +102,7 @@ class RetiredCommittee(BaseModel):
class PodlingStatus(BaseModel):
description: str
homepage: str
- name: str
+ name: str = Field(alias="name")
pmc: str
podling: bool
started: str
@@ -119,6 +123,9 @@ class PodlingsData(RootModel):
def get(self, key: str) -> PodlingStatus | None:
return self.root.get(key)
+ def __len__(self) -> int:
+ return len(self.root)
+
class GroupsData(RootModel):
root: dict[str, list[str]]
@@ -132,35 +139,44 @@ class GroupsData(RootModel):
def get(self, key: str) -> list[str] | None:
return self.root.get(key)
+ def __len__(self) -> int:
+ return len(self.root)
+
-async def get_ldap_projects_data() -> LDAPProjects:
+async def get_projects_data() -> ProjectData:
async with httpx.AsyncClient() as client:
response = await client.get(_WHIMSY_PROJECTS_URL)
response.raise_for_status()
data = response.json()
- return LDAPProjects.model_validate(data)
+ return ProjectData.model_validate(data)
-async def get_committee_info_data() -> CommitteeInfo:
+async def get_active_committee_data() -> CommitteeData:
+ """Returns the list of currently active committees."""
+
async with httpx.AsyncClient() as client:
response = await client.get(_WHIMSY_COMMITTEE_INFO_URL)
response.raise_for_status()
data = response.json()
- return CommitteeInfo.model_validate(data)
+ return CommitteeData.model_validate(data)
+
+async def get_retired_committee_data() -> RetiredCommitteeData:
+ """Returns the list of retired committees."""
-async def get_committee_retired_data() -> CommitteeRetired:
async with httpx.AsyncClient() as client:
response = await client.get(_WHIMSY_COMMITTEE_RETIRED_URL)
response.raise_for_status()
data = response.json()
- return CommitteeRetired.model_validate(data)
+ return RetiredCommitteeData.model_validate(data)
-async def get_podlings_data() -> PodlingsData:
+async def get_current_podlings_data() -> PodlingsData:
+ """Returns the list of current podlings."""
+
async with httpx.AsyncClient() as client:
response = await client.get(_PROJECT_PODLINGS_URL)
response.raise_for_status()
@@ -169,6 +185,8 @@ async def get_podlings_data() -> PodlingsData:
async def get_groups_data() -> GroupsData:
+ """Returns LDAP Groups with their members."""
+
async with httpx.AsyncClient() as client:
response = await client.get(_PROJECT_GROUPS_URL)
response.raise_for_status()
diff --git a/atr/util.py b/atr/util.py
index 476575c..3e90be5 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -16,6 +16,7 @@
# under the License.
import hashlib
+from collections.abc import Mapping
from dataclasses import dataclass
from functools import cache
from pathlib import Path
@@ -63,6 +64,10 @@ def _get_dict_to_list_inner_type_adapter(source_type: Any,
key: str) -> TypeAdap
root_adapter = TypeAdapter(source_type)
schema = root_adapter.core_schema
+ # support further nesting of model classes
+ if schema["type"] == "definitions":
+ schema = schema["schema"]
+
assert schema["type"] == "list"
assert (item_schema := schema["items_schema"])
assert item_schema["type"] == "model"
@@ -80,9 +85,21 @@ def _get_dict_to_list_inner_type_adapter(source_type: Any,
key: str) -> TypeAdap
def _get_dict_to_list_validator(inner_adapter: TypeAdapter[dict[Any, Any]],
key: str) -> Any:
def validator(val: Any) -> Any:
+ from pydantic.fields import FieldInfo
+
if isinstance(val, dict):
validated = inner_adapter.validate_python(val)
- return [{key: k, **{f: getattr(v, f) for f in v.model_fields}} for
k, v in validated.items()]
+
+ # need to get the alias of the field in the nested model
+ # as this will be fed into the actual model class
+ def get_alias(field_name: str, field_infos: Mapping[str,
FieldInfo]) -> Any:
+ field = field_infos[field_name]
+ return field.alias if field.alias else field_name
+
+ return [
+ {key: k, **{get_alias(f, v.model_fields): getattr(v, f) for f
in v.model_fields}}
+ for k, v in validated.items()
+ ]
return val
diff --git a/tests/datasources/__init__.py b/tests/datasources/__init__.py
new file mode 100644
index 0000000..13a8339
--- /dev/null
+++ b/tests/datasources/__init__.py
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
diff --git a/tests/datasources/test_apache.py b/tests/datasources/test_apache.py
new file mode 100644
index 0000000..4cd47f4
--- /dev/null
+++ b/tests/datasources/test_apache.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import json
+import os
+from typing import Any
+
+from atr.datasources.apache import CommitteeData, GroupsData, PodlingsData,
ProjectData, RetiredCommitteeData
+
+
+def _load_test_data(name: str) -> Any:
+ with open(os.path.join(os.path.dirname(__file__), "testdata",
f"{name}.json")) as f:
+ return json.load(f)
+
+
+def test_projects_data_model():
+ projects = ProjectData.model_validate(_load_test_data("projects"))
+
+ assert projects is not None
+ assert projects.project_count == 1
+ assert projects.projects[0].name == "tooling"
+
+
+def test_committee_data_model():
+ committees = CommitteeData.model_validate(_load_test_data("committees"))
+
+ assert committees is not None
+ assert committees.pmc_count == 1
+
+ tooling = committees.committees[0]
+ assert tooling.name == "tooling"
+ assert len(tooling.roster) == 3
+ assert "tn" in map(lambda x: x.id, tooling.roster)
+
+ assert len(tooling.chair) == 1
+ assert "wave" in map(lambda x: x.id, tooling.chair)
+
+
+def test_retired_committee_data_model():
+ retired_committees =
RetiredCommitteeData.model_validate(_load_test_data("retired_committees"))
+
+ assert retired_committees is not None
+ assert retired_committees.retired_count == 1
+
+ pmc = retired_committees.retired[0]
+ assert pmc.name == "abdera"
+
+
+def test_podlings_data_model():
+ podlings = PodlingsData.model_validate(_load_test_data("podlings"))
+
+ assert len(podlings) == 1
+ podling = podlings.get("amoro")
+ assert podling is not None
+ assert podling.name == "Apache Amoro (Incubating)"
+
+
+def test_groups_data_model():
+ groups = GroupsData.model_validate(_load_test_data("groups"))
+
+ assert len(groups) == 2
+ assert groups.get("accumulo") is not None
+ assert groups.get("accumulo-pmc") is not None
diff --git a/tests/datasources/testdata/committees.json
b/tests/datasources/testdata/committees.json
new file mode 100644
index 0000000..f8e7f40
--- /dev/null
+++ b/tests/datasources/testdata/committees.json
@@ -0,0 +1,41 @@
+{
+ "last_updated": "2025-02-19 21:57:21 UTC",
+ "committee_count": 1,
+ "pmc_count": 1,
+ "committees": {
+ "tooling": {
+ "display_name": "Tooling",
+ "site": "http://tooling.apache.org/",
+ "description": "tools, tools, tools",
+ "mail_list": "tooling",
+ "established": "01/2025",
+ "report": [
+ "January",
+ "April",
+ "July",
+ "October"
+ ],
+ "chair": {
+ "wave": {
+ "name": "Dave Fisher"
+ }
+ },
+ "roster_count": 3,
+ "roster": {
+ "wave": {
+ "name": "Dave Fisher",
+ "date": "2025-01-01"
+ },
+ "sbp": {
+ "name": "Sean B. Palmer",
+ "date": "2025-02-01"
+ },
+ "tn": {
+ "name": "Thomas Neidhart",
+ "date": "2025-03-01"
+ }
+ },
+ "pmc": true
+ }
+ }
+}
diff --git a/tests/datasources/testdata/groups.json
b/tests/datasources/testdata/groups.json
new file mode 100644
index 0000000..867eaf3
--- /dev/null
+++ b/tests/datasources/testdata/groups.json
@@ -0,0 +1,8 @@
+{
+ "accumulo": [
+ "a"
+ ],
+ "accumulo-pmc": [
+ "b"
+ ]
+}
diff --git a/tests/datasources/testdata/podlings.json
b/tests/datasources/testdata/podlings.json
new file mode 100644
index 0000000..55ad0a7
--- /dev/null
+++ b/tests/datasources/testdata/podlings.json
@@ -0,0 +1,10 @@
+{
+ "amoro": {
+ "description": "Amoro is a Lakehouse management system built on open data
lake formats like Apache Iceberg and Apache Paimon.",
+ "homepage": "http://amoro.incubator.apache.org/",
+ "name": "Apache Amoro (Incubating)",
+ "pmc": "incubator",
+ "podling": true,
+ "started": "2024-03"
+ }
+}
diff --git a/tests/datasources/testdata/projects.json
b/tests/datasources/testdata/projects.json
new file mode 100644
index 0000000..213af2d
--- /dev/null
+++ b/tests/datasources/testdata/projects.json
@@ -0,0 +1,22 @@
+{
+ "lastTimestamp": "20250219115218Z",
+ "project_count": 1,
+ "projects": {
+ "tooling": {
+ "createTimestamp": "20170713020428Z",
+ "modifyTimestamp": "20240725001829Z",
+ "member_count": 3,
+ "owner_count": 3,
+ "members": [
+ "wave",
+ "sbp",
+ "tn"
+ ],
+ "owners": [
+ "wave",
+ "sbp",
+ "tn"
+ ]
+ }
+ }
+}
diff --git a/tests/datasources/testdata/retired_committees.json
b/tests/datasources/testdata/retired_committees.json
new file mode 100644
index 0000000..9ec94cd
--- /dev/null
+++ b/tests/datasources/testdata/retired_committees.json
@@ -0,0 +1,11 @@
+{
+ "last_updated": "2025-02-19 21:57:21 UTC",
+ "retired_count": 1,
+ "retired": {
+ "abdera": {
+ "display_name": "Abdera",
+ "description": "blablabla",
+ "retired": "2017-03"
+ }
+ }
+}
diff --git a/tests/test_apache.py b/tests/test_apache.py
deleted file mode 100644
index 8741d5a..0000000
--- a/tests/test_apache.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-import json
-
-from atr.apache import CommitteeInfo, CommitteeRetired, LDAPProjects
-
-
-def test_ldap_projects_model():
- json_data = """
-{
- "lastTimestamp": "20250219115218Z",
- "project_count": 1,
- "projects": {
- "tooling": {
- "createTimestamp": "20170713020428Z",
- "modifyTimestamp": "20240725001829Z",
- "member_count": 3,
- "owner_count": 3,
- "members": [
- "wave",
- "sbp",
- "tn"
- ],
- "owners": [
- "wave",
- "sbp",
- "tn"
- ]
- }
- }
-}"""
- projects = LDAPProjects.model_validate(json.loads(json_data))
-
- assert projects is not None
- assert projects.project_count == 1
- assert projects.projects[0].name == "tooling"
-
-
-def test_committee_info_model():
- json_data = """
-{
- "last_updated": "2025-02-19 21:57:21 UTC",
- "committee_count": 1,
- "pmc_count": 1,
- "committees": {
- "tooling": {
- "display_name": "Tooling",
- "site": "http://tooling.apache.org/",
- "description": "tools, tools, tools",
- "mail_list": "tooling",
- "established": "01/2025",
- "report": [
- "January",
- "April",
- "July",
- "October"
- ],
- "chair": {
- "wave": {
- "name": "Dave Fisher"
- }
- },
- "roster_count": 3,
- "roster": {
- "wave": {
- "name": "Dave Fisher",
- "date": "2025-01-01"
- },
- "sbp": {
- "name": "Sean B. Palmer",
- "date": "2025-02-01"
- },
- "tn": {
- "name": "Thomas Neidhart",
- "date": "2025-03-01"
- }
- },
- "pmc": true
- }
- }
-}"""
- committees = CommitteeInfo.model_validate(json.loads(json_data))
-
- assert committees is not None
- assert committees.pmc_count == 1
-
- tooling = committees.committees[0]
- assert tooling.name == "tooling"
- assert len(tooling.roster) == 3
- assert "tn" in map(lambda x: x.id, tooling.roster)
-
-
-def test_committee_retired_model():
- json_data = """
-{
- "last_updated": "2025-02-19 21:57:21 UTC",
- "retired_count": 1,
- "retired": {
- "abdera": {
- "display_name": "Abdera",
- "description": "blablabla",
- "retired": "2017-03"
- }
- }
-}"""
- retired_committees = CommitteeRetired.model_validate(json.loads(json_data))
-
- assert retired_committees is not None
- assert retired_committees.retired_count == 1
-
- pmc = retired_committees.retired[0]
- assert pmc.name == "abdera"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]