This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git


The following commit(s) were added to refs/heads/main by this push:
     new d9dad703b chore(dev/release): add changelog generator (#2456)
d9dad703b is described below

commit d9dad703bbaf499406f548276d46fad08b292144
Author: David Li <[email protected]>
AuthorDate: Fri Jan 17 03:29:06 2025 -0500

    chore(dev/release): add changelog generator (#2456)
    
    Fixes #2452.
    
    ---------
    
    Co-authored-by: Sutou Kouhei <[email protected]>
---
 .github/workflows/dev_adbc.yml                     |  72 ++++++++
 .github/workflows/dev_pr.yml                       |   2 +-
 ci/conda_env_dev.txt                               |   4 +-
 ci/conda_env_dev.txt => dev/adbc_dev/__init__.py   |   6 -
 dev/adbc_dev/changelog.py                          | 135 +++++++++++++++
 .../adbc_dev/tests/__init__.py                     |   6 -
 dev/adbc_dev/tests/test_changelog.py               | 184 +++++++++++++++++++++
 .../dev_pr => dev/adbc_dev}/title_check.py         |  61 +++++--
 dev/release/01-prepare.sh                          |  12 ++
 dev/release/utils-common.sh                        |  15 +-
 10 files changed, 456 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/dev_adbc.yml b/.github/workflows/dev_adbc.yml
new file mode 100644
index 000000000..817166a12
--- /dev/null
+++ b/.github/workflows/dev_adbc.yml
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Dev ADBC
+
+on:
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "dev/**"
+      - ".github/workflows/dev_adbc.yml"
+  push:
+    paths:
+      - "dev/**"
+      - ".github/workflows/dev_adbc.yml"
+
+concurrency:
+  group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+defaults:
+  run:
+    shell: bash -l -eo pipefail {0}
+
+jobs:
+  pre-commit:
+    name: "pre-commit"
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          persist-credentials: false
+
+      - name: Cache Conda
+        uses: actions/cache@v4
+        with:
+          path: ~/conda_pkgs_dir
+          key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{ 
env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }}
+      - uses: conda-incubator/setup-miniconda@v3
+        with:
+          miniforge-version: latest
+          use-only-tar-bz2: false
+          use-mamba: true
+
+      - name: Install Dependencies
+        run: |
+          mamba install -c conda-forge \
+            --file ci/conda_env_dev.txt \
+            pytest
+
+      - name: Test
+        run: |
+          pytest -vv dev/adbc_dev/
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 2beba3120..c1723dcb9 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -63,7 +63,7 @@ jobs:
         env:
           PR_TITLE: ${{ github.event.pull_request.title }}
         run: |
-          python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout 
"$PR_TITLE"
+          python dev/adbc_dev/title_check.py $(pwd)/pr_checkout "$PR_TITLE"
 
       # Pings make it into the commit message where they annoy the user every
       # time the commit gets pushed somewhere
diff --git a/ci/conda_env_dev.txt b/ci/conda_env_dev.txt
index 0a631e884..ff4ea5b3f 100644
--- a/ci/conda_env_dev.txt
+++ b/ci/conda_env_dev.txt
@@ -15,8 +15,10 @@
 # specific language governing permissions and limitations
 # under the License.
 
-commitizen
 gh>=2.32.0
 jq
 pre-commit
+pygit2
+python
+python-dotenv
 twine
diff --git a/ci/conda_env_dev.txt b/dev/adbc_dev/__init__.py
similarity index 94%
copy from ci/conda_env_dev.txt
copy to dev/adbc_dev/__init__.py
index 0a631e884..13a83393a 100644
--- a/ci/conda_env_dev.txt
+++ b/dev/adbc_dev/__init__.py
@@ -14,9 +14,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-commitizen
-gh>=2.32.0
-jq
-pre-commit
-twine
diff --git a/dev/adbc_dev/changelog.py b/dev/adbc_dev/changelog.py
new file mode 100755
index 000000000..fec3b16c6
--- /dev/null
+++ b/dev/adbc_dev/changelog.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Generate a changelog from our commit log."""
+
+import argparse
+import datetime
+import sys
+from pathlib import Path
+
+import dotenv
+import pygit2
+
+from . import title_check
+
+
+def display(*args, **kwargs):
+    print(*args, file=sys.stderr, **kwargs)
+
+
+def get_commit(repo: pygit2.Repository, rev: str) -> pygit2.Oid:
+    try:
+        return repo.lookup_reference_dwim(rev).target
+    except KeyError:
+        return repo[rev].id
+
+
+def list_commits(
+    repo: pygit2.Repository, from_rev: str, to_rev: str
+) -> list[title_check.Commit]:
+    root = Path(repo.workdir)
+    from_commit = get_commit(repo, from_rev)
+    to_commit = get_commit(repo, to_rev)
+    walker = repo.walk(to_commit, pygit2.GIT_SORT_TIME)
+    walker.hide(from_commit)
+    commits = []
+    for commit in walker:
+        title = commit.message.strip().split("\n")[0]
+        commits.append(title_check.matches_commit_format(root, title))
+    return commits
+
+
+def format_commit(commit: title_check.Commit) -> str:
+    components = ""
+    warning = ""
+    if commit.components:
+        components = f"**{', '.join(commit.components)}**: "
+    if commit.breaking_change:
+        warning = "⚠️ "
+    return f"{warning}{components}{commit.subject}"
+
+
+def format_section(title: str, commits: list[title_check.Commit]) -> list[str]:
+    if not commits:
+        return []
+
+    lines = [f"### {title}", ""]
+    commits.sort(key=lambda commit: (commit.components, commit.subject))
+    lines.extend(f"- {format_commit(commit)}" for commit in commits)
+    lines.append("")
+    return lines
+
+
+def format_changelog(
+    title: str, release: dict[str, str], commits: list[title_check.Commit]
+) -> str:
+    date = datetime.date.today().strftime("%Y-%m-%d")
+    lines = [
+        f"## {title} ({date})",
+        "",
+        "### Versions",
+        "",
+        f"- C/C++/GLib/Go/Python/Ruby: {release['VERSION_NATIVE']}",
+        f"- C#: {release['VERSION_CSHARP']}",
+        f"- Java: {release['VERSION_JAVA']}",
+        f"- R: {release['VERSION_R']}",
+        f"- Rust: {release['VERSION_RUST']}",
+        "",
+    ]
+
+    breaking = [commit for commit in commits if commit.breaking_change]
+    lines.extend(format_section("Breaking Changes", breaking))
+
+    feat = [commit for commit in commits if commit.category == "feat"]
+    lines.extend(format_section("New Features", feat))
+
+    fix = [commit for commit in commits if commit.category == "fix"]
+    lines.extend(format_section("Bugfixes", fix))
+
+    docs = [commit for commit in commits if commit.category == "docs"]
+    lines.extend(format_section("Documentation Improvements", docs))
+
+    perf = [commit for commit in commits if commit.category == "perf"]
+    lines.extend(format_section("Performance Improvements", perf))
+
+    return "\n".join(lines)
+
+
+def main():
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument("from_rev", help="The start revision.")
+    parser.add_argument("to_rev", help="The end revision.")
+    parser.add_argument("--name", required=True, help="The name of the 
release.")
+
+    args = parser.parse_args()
+
+    repo_root = Path(__file__).parent.parent.parent.resolve()
+    release = dotenv.dotenv_values(repo_root / "dev/release/versions.env")
+    display("Opening repository at", repo_root)
+    repo = pygit2.Repository(repo_root)
+
+    commits = list_commits(repo, args.from_rev, args.to_rev)
+    changelog = format_changelog(args.name, release, commits)
+    print(changelog, end="")
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/ci/conda_env_dev.txt b/dev/adbc_dev/tests/__init__.py
similarity index 94%
copy from ci/conda_env_dev.txt
copy to dev/adbc_dev/tests/__init__.py
index 0a631e884..13a83393a 100644
--- a/ci/conda_env_dev.txt
+++ b/dev/adbc_dev/tests/__init__.py
@@ -14,9 +14,3 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-
-commitizen
-gh>=2.32.0
-jq
-pre-commit
-twine
diff --git a/dev/adbc_dev/tests/test_changelog.py 
b/dev/adbc_dev/tests/test_changelog.py
new file mode 100644
index 000000000..d56e8e9c5
--- /dev/null
+++ b/dev/adbc_dev/tests/test_changelog.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import pygit2
+import pytest
+
+from .. import changelog, title_check
+
+root = Path(__file__).parent.parent.parent.parent.resolve()
+
+
[email protected](scope="module")
+def repo() -> pygit2.Repository:
+    repo_root = Path(__file__).parent.parent.parent.parent.resolve()
+    return pygit2.Repository(repo_root)
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_basic(commit_type) -> None:
+    title = f"{commit_type}: test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == []
+    assert not commit.breaking_change
+    assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_breaking(commit_type) -> None:
+    title = f"{commit_type}!: test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == []
+    assert commit.breaking_change
+    assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_component(commit_type) -> None:
+    title = f"{commit_type}(python): test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == ["python"]
+    assert not commit.breaking_change
+    assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_multi(commit_type) -> None:
+    title = f"{commit_type}(c,format,python)!: test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == ["c", "format", "python"]
+    assert commit.breaking_change
+    assert commit.subject == "test"
+
+    title = f"{commit_type}!(c,format,python): test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == ["c", "format", "python"]
+    assert commit.breaking_change
+    assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_nested(commit_type) -> None:
+    title = f"{commit_type}(c/driver,dev/release)!: test"
+    commit = title_check.matches_commit_format(root, title)
+    assert not commit.failed_validation_reasons
+    assert commit.category == commit_type
+    assert commit.components == ["c/driver", "dev/release"]
+    assert commit.breaking_change
+    assert commit.subject == "test"
+
+
[email protected](
+    "msg",
+    [
+        "feat:",
+        "unknown: foo",
+        "feat(): test",
+        "feat()!: test",
+        "feat!(c)!: test",
+        "feat(nonexistent): test",
+        "feat(c,): test",
+        "feat(c ): test",
+        "feat( c): test",
+        "feat(a#): test",
+    ],
+)
+def test_title_check_bad(msg: str) -> None:
+    commit = title_check.matches_commit_format(root, msg)
+    assert commit.failed_validation_reasons
+
+
+def test_list_commits(repo: pygit2.Repository) -> None:
+    # the base rev is not included
+    commits = changelog.list_commits(
+        repo,
+        "2360993884e6f82a6da9080d9fcd0dcf8c362b1d",
+        "8f6ffe5bd1ee5667b5626f91fc3f928f93ae94cd",
+    )
+    assert len(commits) == 2
+    assert (
+        commits[0].subject
+        == "bump com.uber.nullaway:nullaway from 0.12.2 to 0.12.3 in /java 
(#2417)"
+    )
+    assert (
+        commits[1].subject
+        == "bump golang.org/x/tools from 0.28.0 to 0.29.0 in /go/adbc (#2419)"
+    )
+
+
[email protected]("ref", ["HEAD", 
"2360993884e6f82a6da9080d9fcd0dcf8c362b1d"])
+def test_get_commit(repo: pygit2.Repository, ref: str) -> None:
+    assert changelog.get_commit(repo, ref) is not None
+
+
+def test_format_commit(repo: pygit2.Repository) -> None:
+    commits = changelog.list_commits(
+        repo,
+        "196522bb2f11665c7b6e0d1ed98da174315a19d9",
+        "63bb903b7ddc7730beaa3d092dc5808632cd4b08",
+    )
+    assert len(commits) == 1
+
+    formatted = changelog.format_commit(commits[0])
+    assert (
+        formatted
+        == "**c**: don't use sketchy cast to test backwards compatibility 
(#2425)"
+    )
+
+    commits = changelog.list_commits(
+        repo,
+        "5299ea01ab31b276c27059d82efdbdead22029e9",
+        "460937c76b923420d07d5bcfd29166c80eb45d80",
+    )
+    assert len(commits) == 1
+
+    formatted = changelog.format_commit(commits[0])
+    assert formatted == (
+        "⚠️ **java/driver-manager**: support loading "
+        "AdbcDrivers from the ServiceLoader (#1475)"
+    )
+
+
+def test_format_section(repo: pygit2.Repository) -> None:
+    assert changelog.format_section("Breaking Changes", []) == []
+
+    commits = changelog.list_commits(
+        repo,
+        "5299ea01ab31b276c27059d82efdbdead22029e9",
+        "460937c76b923420d07d5bcfd29166c80eb45d80",
+    )
+    assert len(commits) == 1
+
+    assert changelog.format_section("Breaking Changes", commits) == [
+        "### Breaking Changes",
+        "",
+        f"- {changelog.format_commit(commits[0])}",
+        "",
+    ]
diff --git a/.github/workflows/dev_pr/title_check.py 
b/dev/adbc_dev/title_check.py
similarity index 65%
rename from .github/workflows/dev_pr/title_check.py
rename to dev/adbc_dev/title_check.py
index df5bf92ca..c67a06733 100644
--- a/.github/workflows/dev_pr/title_check.py
+++ b/dev/adbc_dev/title_check.py
@@ -38,31 +38,55 @@ COMMIT_TYPES = {
 }
 
 
-def matches_commit_format(root: Path, title: str) -> typing.List[str]:
+class Commit(typing.NamedTuple):
+    category: str
+    components: list[str]
+    breaking_change: bool
+    subject: str
+
+    failed_validation_reasons: list[str]
+
+
+def matches_commit_format(root: Path, title: str) -> list[str]:
     """Check a title and return a list of reasons why it's invalid."""
     if not root.is_dir():
-        return [f"Invalid root: must be a directory: {root}"]
+        return Commit(
+            category="",
+            components=[],
+            breaking_change=False,
+            subject="",
+            failed_validation_reasons=[f"Invalid root: must be a directory: 
{root}"],
+        )
 
     # Relax the initial regex a bit, do more friendly validation below
+    # We'll allow a deviation from Conventional Commits (feat!(foo) instead of
+    # feat(foo)!) since that appears to have snuck in already
     commit_type = "([a-z]+)"
+    breaking = "(!?)"
     scope = r"(?:\(([^\)]*)\))?"
-    delimiter = "!?:"
+    delimiter = "(!?):"
     subject = " (.+)"
-    commit = re.compile(f"^{commit_type}{scope}{delimiter}{subject}$")
+    commit = 
re.compile(f"^{commit_type}{breaking}{scope}{delimiter}{subject}$")
     valid_component = re.compile(r"^[a-zA-Z0-9_/\-\.]+$")
 
     m = commit.match(title)
     if m is None:
-        return [
-            "Format is incorrect, see 
https://www.conventionalcommits.org/en/v1.0.0/";
-        ]
+        commit_spec = "https://www.conventionalcommits.org/en/v1.0.0/";
+        return Commit(
+            category="",
+            components=[],
+            breaking_change=False,
+            subject="",
+            failed_validation_reasons=[f"Format is incorrect, see 
{commit_spec}"],
+        )
 
     reasons = []
     commit_type = m.group(1)
     if commit_type not in COMMIT_TYPES:
         reasons.append(f"Invalid commit type: {commit_type}")
 
-    components = m.group(2)
+    breaking = m.group(2)
+    components = m.group(3)
     if components is not None:
         if not components.strip():
             reasons.append("Invalid components: must not be empty")
@@ -84,13 +108,24 @@ def matches_commit_format(root: Path, title: str) -> 
typing.List[str]:
                         f"or directory in the repo: {component}"
                     )
 
-    subject = m.group(3)
+    delimiter = m.group(4)
+    subject = m.group(5)
     if subject.strip() != subject:
         reasons.append(f"Invalid subject: must have no trailing space: 
{subject}")
     if subject.strip().endswith("."):
         reasons.append(f"Invalid subject: must not end in a period: {subject}")
 
-    return reasons
+    if bool(breaking) and bool(delimiter):
+        # feat!(foo)!: subject
+        reasons.append("Can only provide breaking-change '!' once")
+
+    return Commit(
+        category=commit_type,
+        components=components or [],
+        breaking_change=bool(breaking) or bool(delimiter),
+        subject=subject,
+        failed_validation_reasons=reasons,
+    )
 
 
 def main():
@@ -102,13 +137,13 @@ def main():
 
     print(f'PR title: "{args.title}"')
 
-    reasons = matches_commit_format(args.root, args.title)
-    if not reasons:
+    commit = matches_commit_format(args.root, args.title)
+    if not commit.failed_validation_reasons:
         print("Title is valid")
         return 0
 
     print("Title is invalid:")
-    for reason in reasons:
+    for reason in commit.failed_validation_reasons:
         print("-", reason)
     return 1
 
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 8767e91d8..bd3dec805 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -57,6 +57,18 @@ main() {
         echo ;
         changelog
     ) >> ${SOURCE_DIR}/../../CHANGELOG.md
+
+    read -p "Please review the changelog. Press ENTER to continue..." ignored
+    git diff ${SOURCE_DIR}/../../CHANGELOG.md
+
+    echo "Is the changelog correct?"
+    select yn in "y" "n"; do
+        case $yn in
+            y ) echo "Continuing"; break;;
+            n ) echo "Aborting"; return 1;;
+        esac
+    done
+
     git add ${SOURCE_DIR}/../../CHANGELOG.md
     git commit -m "chore: update CHANGELOG.md for ${RELEASE}"
 
diff --git a/dev/release/utils-common.sh b/dev/release/utils-common.sh
index 99c9b6633..af7c77adb 100644
--- a/dev/release/utils-common.sh
+++ b/dev/release/utils-common.sh
@@ -38,20 +38,7 @@ header() {
 
 changelog() {
     # Strip trailing blank line
-    local -r changelog=$(printf '%s\n' "$(cz ch --dry-run --unreleased-version 
"ADBC Libraries ${RELEASE}" --start-rev apache-arrow-adbc-${PREVIOUS_RELEASE})")
-    # Split off header
-    local -r header=$(echo "${changelog}" | head -n 1)
-    local -r trailer=$(echo "${changelog}" | tail -n+2)
-    echo "${header}"
-    echo
-    echo "### Versions"
-    echo
-    echo "- C/C++/GLib/Go/Python/Ruby: ${VERSION_NATIVE}"
-    echo "- C#: ${VERSION_CSHARP}"
-    echo "- Java: ${VERSION_JAVA}"
-    echo "- R: ${VERSION_R}"
-    echo "- Rust: ${VERSION_RUST}"
-    echo "${trailer}"
+    env PYTHONPATH=${SOURCE_TOP_DIR}/dev python -m adbc_dev.changelog --name 
"ADBC Libraries ${RELEASE}" apache-arrow-adbc-${PREVIOUS_RELEASE} HEAD 
2>/dev/null
 }
 
 header "Config"

Reply via email to