This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new d9dad703b chore(dev/release): add changelog generator (#2456)
d9dad703b is described below
commit d9dad703bbaf499406f548276d46fad08b292144
Author: David Li <[email protected]>
AuthorDate: Fri Jan 17 03:29:06 2025 -0500
chore(dev/release): add changelog generator (#2456)
Fixes #2452.
---------
Co-authored-by: Sutou Kouhei <[email protected]>
---
.github/workflows/dev_adbc.yml | 72 ++++++++
.github/workflows/dev_pr.yml | 2 +-
ci/conda_env_dev.txt | 4 +-
ci/conda_env_dev.txt => dev/adbc_dev/__init__.py | 6 -
dev/adbc_dev/changelog.py | 135 +++++++++++++++
.../adbc_dev/tests/__init__.py | 6 -
dev/adbc_dev/tests/test_changelog.py | 184 +++++++++++++++++++++
.../dev_pr => dev/adbc_dev}/title_check.py | 61 +++++--
dev/release/01-prepare.sh | 12 ++
dev/release/utils-common.sh | 15 +-
10 files changed, 456 insertions(+), 41 deletions(-)
diff --git a/.github/workflows/dev_adbc.yml b/.github/workflows/dev_adbc.yml
new file mode 100644
index 000000000..817166a12
--- /dev/null
+++ b/.github/workflows/dev_adbc.yml
@@ -0,0 +1,72 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Dev ADBC
+
+on:
+ pull_request:
+ branches:
+ - main
+ paths:
+ - "dev/**"
+ - ".github/workflows/dev_adbc.yml"
+ push:
+ paths:
+ - "dev/**"
+ - ".github/workflows/dev_adbc.yml"
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.ref }}-${{ github.workflow }}
+ cancel-in-progress: true
+
+permissions:
+ contents: read
+
+defaults:
+ run:
+ shell: bash -l -eo pipefail {0}
+
+jobs:
+ pre-commit:
+ name: "pre-commit"
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+ persist-credentials: false
+
+ - name: Cache Conda
+ uses: actions/cache@v4
+ with:
+ path: ~/conda_pkgs_dir
+ key: conda-${{ runner.os }}-${{ steps.get-date.outputs.today }}-${{
env.CACHE_NUMBER }}-${{ hashFiles('ci/**') }}
+ - uses: conda-incubator/setup-miniconda@v3
+ with:
+ miniforge-version: latest
+ use-only-tar-bz2: false
+ use-mamba: true
+
+ - name: Install Dependencies
+ run: |
+ mamba install -c conda-forge \
+ --file ci/conda_env_dev.txt \
+ pytest
+
+ - name: Test
+ run: |
+ pytest -vv dev/adbc_dev/
diff --git a/.github/workflows/dev_pr.yml b/.github/workflows/dev_pr.yml
index 2beba3120..c1723dcb9 100644
--- a/.github/workflows/dev_pr.yml
+++ b/.github/workflows/dev_pr.yml
@@ -63,7 +63,7 @@ jobs:
env:
PR_TITLE: ${{ github.event.pull_request.title }}
run: |
- python .github/workflows/dev_pr/title_check.py $(pwd)/pr_checkout
"$PR_TITLE"
+ python dev/adbc_dev/title_check.py $(pwd)/pr_checkout "$PR_TITLE"
# Pings make it into the commit message where they annoy the user every
# time the commit gets pushed somewhere
diff --git a/ci/conda_env_dev.txt b/ci/conda_env_dev.txt
index 0a631e884..ff4ea5b3f 100644
--- a/ci/conda_env_dev.txt
+++ b/ci/conda_env_dev.txt
@@ -15,8 +15,10 @@
# specific language governing permissions and limitations
# under the License.
-commitizen
gh>=2.32.0
jq
pre-commit
+pygit2
+python
+python-dotenv
twine
diff --git a/ci/conda_env_dev.txt b/dev/adbc_dev/__init__.py
similarity index 94%
copy from ci/conda_env_dev.txt
copy to dev/adbc_dev/__init__.py
index 0a631e884..13a83393a 100644
--- a/ci/conda_env_dev.txt
+++ b/dev/adbc_dev/__init__.py
@@ -14,9 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-commitizen
-gh>=2.32.0
-jq
-pre-commit
-twine
diff --git a/dev/adbc_dev/changelog.py b/dev/adbc_dev/changelog.py
new file mode 100755
index 000000000..fec3b16c6
--- /dev/null
+++ b/dev/adbc_dev/changelog.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Generate a changelog from our commit log."""
+
+import argparse
+import datetime
+import sys
+from pathlib import Path
+
+import dotenv
+import pygit2
+
+from . import title_check
+
+
+def display(*args, **kwargs):
+ print(*args, file=sys.stderr, **kwargs)
+
+
+def get_commit(repo: pygit2.Repository, rev: str) -> pygit2.Oid:
+ try:
+ return repo.lookup_reference_dwim(rev).target
+ except KeyError:
+ return repo[rev].id
+
+
+def list_commits(
+ repo: pygit2.Repository, from_rev: str, to_rev: str
+) -> list[title_check.Commit]:
+ root = Path(repo.workdir)
+ from_commit = get_commit(repo, from_rev)
+ to_commit = get_commit(repo, to_rev)
+ walker = repo.walk(to_commit, pygit2.GIT_SORT_TIME)
+ walker.hide(from_commit)
+ commits = []
+ for commit in walker:
+ title = commit.message.strip().split("\n")[0]
+ commits.append(title_check.matches_commit_format(root, title))
+ return commits
+
+
+def format_commit(commit: title_check.Commit) -> str:
+ components = ""
+ warning = ""
+ if commit.components:
+ components = f"**{', '.join(commit.components)}**: "
+ if commit.breaking_change:
+ warning = "⚠️ "
+ return f"{warning}{components}{commit.subject}"
+
+
+def format_section(title: str, commits: list[title_check.Commit]) -> list[str]:
+ if not commits:
+ return []
+
+ lines = [f"### {title}", ""]
+ commits.sort(key=lambda commit: (commit.components, commit.subject))
+ lines.extend(f"- {format_commit(commit)}" for commit in commits)
+ lines.append("")
+ return lines
+
+
+def format_changelog(
+ title: str, release: dict[str, str], commits: list[title_check.Commit]
+) -> str:
+ date = datetime.date.today().strftime("%Y-%m-%d")
+ lines = [
+ f"## {title} ({date})",
+ "",
+ "### Versions",
+ "",
+ f"- C/C++/GLib/Go/Python/Ruby: {release['VERSION_NATIVE']}",
+ f"- C#: {release['VERSION_CSHARP']}",
+ f"- Java: {release['VERSION_JAVA']}",
+ f"- R: {release['VERSION_R']}",
+ f"- Rust: {release['VERSION_RUST']}",
+ "",
+ ]
+
+ breaking = [commit for commit in commits if commit.breaking_change]
+ lines.extend(format_section("Breaking Changes", breaking))
+
+ feat = [commit for commit in commits if commit.category == "feat"]
+ lines.extend(format_section("New Features", feat))
+
+ fix = [commit for commit in commits if commit.category == "fix"]
+ lines.extend(format_section("Bugfixes", fix))
+
+ docs = [commit for commit in commits if commit.category == "docs"]
+ lines.extend(format_section("Documentation Improvements", docs))
+
+ perf = [commit for commit in commits if commit.category == "perf"]
+ lines.extend(format_section("Performance Improvements", perf))
+
+ return "\n".join(lines)
+
+
+def main():
+ parser = argparse.ArgumentParser(description=__doc__)
+ parser.add_argument("from_rev", help="The start revision.")
+ parser.add_argument("to_rev", help="The end revision.")
+ parser.add_argument("--name", required=True, help="The name of the
release.")
+
+ args = parser.parse_args()
+
+ repo_root = Path(__file__).parent.parent.parent.resolve()
+ release = dotenv.dotenv_values(repo_root / "dev/release/versions.env")
+ display("Opening repository at", repo_root)
+ repo = pygit2.Repository(repo_root)
+
+ commits = list_commits(repo, args.from_rev, args.to_rev)
+ changelog = format_changelog(args.name, release, commits)
+ print(changelog, end="")
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/ci/conda_env_dev.txt b/dev/adbc_dev/tests/__init__.py
similarity index 94%
copy from ci/conda_env_dev.txt
copy to dev/adbc_dev/tests/__init__.py
index 0a631e884..13a83393a 100644
--- a/ci/conda_env_dev.txt
+++ b/dev/adbc_dev/tests/__init__.py
@@ -14,9 +14,3 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
-
-commitizen
-gh>=2.32.0
-jq
-pre-commit
-twine
diff --git a/dev/adbc_dev/tests/test_changelog.py
b/dev/adbc_dev/tests/test_changelog.py
new file mode 100644
index 000000000..d56e8e9c5
--- /dev/null
+++ b/dev/adbc_dev/tests/test_changelog.py
@@ -0,0 +1,184 @@
+#!/usr/bin/env python3
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from pathlib import Path
+
+import pygit2
+import pytest
+
+from .. import changelog, title_check
+
+root = Path(__file__).parent.parent.parent.parent.resolve()
+
+
[email protected](scope="module")
+def repo() -> pygit2.Repository:
+ repo_root = Path(__file__).parent.parent.parent.parent.resolve()
+ return pygit2.Repository(repo_root)
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_basic(commit_type) -> None:
+ title = f"{commit_type}: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == []
+ assert not commit.breaking_change
+ assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_breaking(commit_type) -> None:
+ title = f"{commit_type}!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == []
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_component(commit_type) -> None:
+ title = f"{commit_type}(python): test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["python"]
+ assert not commit.breaking_change
+ assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_multi(commit_type) -> None:
+ title = f"{commit_type}(c,format,python)!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c", "format", "python"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+ title = f"{commit_type}!(c,format,python): test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c", "format", "python"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
[email protected]("commit_type", title_check.COMMIT_TYPES)
+def test_title_check_nested(commit_type) -> None:
+ title = f"{commit_type}(c/driver,dev/release)!: test"
+ commit = title_check.matches_commit_format(root, title)
+ assert not commit.failed_validation_reasons
+ assert commit.category == commit_type
+ assert commit.components == ["c/driver", "dev/release"]
+ assert commit.breaking_change
+ assert commit.subject == "test"
+
+
[email protected](
+ "msg",
+ [
+ "feat:",
+ "unknown: foo",
+ "feat(): test",
+ "feat()!: test",
+ "feat!(c)!: test",
+ "feat(nonexistent): test",
+ "feat(c,): test",
+ "feat(c ): test",
+ "feat( c): test",
+ "feat(a#): test",
+ ],
+)
+def test_title_check_bad(msg: str) -> None:
+ commit = title_check.matches_commit_format(root, msg)
+ assert commit.failed_validation_reasons
+
+
+def test_list_commits(repo: pygit2.Repository) -> None:
+ # the base rev is not included
+ commits = changelog.list_commits(
+ repo,
+ "2360993884e6f82a6da9080d9fcd0dcf8c362b1d",
+ "8f6ffe5bd1ee5667b5626f91fc3f928f93ae94cd",
+ )
+ assert len(commits) == 2
+ assert (
+ commits[0].subject
+ == "bump com.uber.nullaway:nullaway from 0.12.2 to 0.12.3 in /java
(#2417)"
+ )
+ assert (
+ commits[1].subject
+ == "bump golang.org/x/tools from 0.28.0 to 0.29.0 in /go/adbc (#2419)"
+ )
+
+
[email protected]("ref", ["HEAD",
"2360993884e6f82a6da9080d9fcd0dcf8c362b1d"])
+def test_get_commit(repo: pygit2.Repository, ref: str) -> None:
+ assert changelog.get_commit(repo, ref) is not None
+
+
+def test_format_commit(repo: pygit2.Repository) -> None:
+ commits = changelog.list_commits(
+ repo,
+ "196522bb2f11665c7b6e0d1ed98da174315a19d9",
+ "63bb903b7ddc7730beaa3d092dc5808632cd4b08",
+ )
+ assert len(commits) == 1
+
+ formatted = changelog.format_commit(commits[0])
+ assert (
+ formatted
+ == "**c**: don't use sketchy cast to test backwards compatibility
(#2425)"
+ )
+
+ commits = changelog.list_commits(
+ repo,
+ "5299ea01ab31b276c27059d82efdbdead22029e9",
+ "460937c76b923420d07d5bcfd29166c80eb45d80",
+ )
+ assert len(commits) == 1
+
+ formatted = changelog.format_commit(commits[0])
+ assert formatted == (
+ "⚠️ **java/driver-manager**: support loading "
+ "AdbcDrivers from the ServiceLoader (#1475)"
+ )
+
+
+def test_format_section(repo: pygit2.Repository) -> None:
+ assert changelog.format_section("Breaking Changes", []) == []
+
+ commits = changelog.list_commits(
+ repo,
+ "5299ea01ab31b276c27059d82efdbdead22029e9",
+ "460937c76b923420d07d5bcfd29166c80eb45d80",
+ )
+ assert len(commits) == 1
+
+ assert changelog.format_section("Breaking Changes", commits) == [
+ "### Breaking Changes",
+ "",
+ f"- {changelog.format_commit(commits[0])}",
+ "",
+ ]
diff --git a/.github/workflows/dev_pr/title_check.py
b/dev/adbc_dev/title_check.py
similarity index 65%
rename from .github/workflows/dev_pr/title_check.py
rename to dev/adbc_dev/title_check.py
index df5bf92ca..c67a06733 100644
--- a/.github/workflows/dev_pr/title_check.py
+++ b/dev/adbc_dev/title_check.py
@@ -38,31 +38,55 @@ COMMIT_TYPES = {
}
-def matches_commit_format(root: Path, title: str) -> typing.List[str]:
+class Commit(typing.NamedTuple):
+ category: str
+ components: list[str]
+ breaking_change: bool
+ subject: str
+
+ failed_validation_reasons: list[str]
+
+
+def matches_commit_format(root: Path, title: str) -> list[str]:
"""Check a title and return a list of reasons why it's invalid."""
if not root.is_dir():
- return [f"Invalid root: must be a directory: {root}"]
+ return Commit(
+ category="",
+ components=[],
+ breaking_change=False,
+ subject="",
+ failed_validation_reasons=[f"Invalid root: must be a directory:
{root}"],
+ )
# Relax the initial regex a bit, do more friendly validation below
+ # We'll allow a deviation from Conventional Commits (feat!(foo) instead of
+ # feat(foo)!) since that appears to have snuck in already
commit_type = "([a-z]+)"
+ breaking = "(!?)"
scope = r"(?:\(([^\)]*)\))?"
- delimiter = "!?:"
+ delimiter = "(!?):"
subject = " (.+)"
- commit = re.compile(f"^{commit_type}{scope}{delimiter}{subject}$")
+ commit =
re.compile(f"^{commit_type}{breaking}{scope}{delimiter}{subject}$")
valid_component = re.compile(r"^[a-zA-Z0-9_/\-\.]+$")
m = commit.match(title)
if m is None:
- return [
- "Format is incorrect, see
https://www.conventionalcommits.org/en/v1.0.0/"
- ]
+ commit_spec = "https://www.conventionalcommits.org/en/v1.0.0/"
+ return Commit(
+ category="",
+ components=[],
+ breaking_change=False,
+ subject="",
+ failed_validation_reasons=[f"Format is incorrect, see
{commit_spec}"],
+ )
reasons = []
commit_type = m.group(1)
if commit_type not in COMMIT_TYPES:
reasons.append(f"Invalid commit type: {commit_type}")
- components = m.group(2)
+ breaking = m.group(2)
+ components = m.group(3)
if components is not None:
if not components.strip():
reasons.append("Invalid components: must not be empty")
@@ -84,13 +108,24 @@ def matches_commit_format(root: Path, title: str) ->
typing.List[str]:
f"or directory in the repo: {component}"
)
- subject = m.group(3)
+ delimiter = m.group(4)
+ subject = m.group(5)
if subject.strip() != subject:
reasons.append(f"Invalid subject: must have no trailing space:
{subject}")
if subject.strip().endswith("."):
reasons.append(f"Invalid subject: must not end in a period: {subject}")
- return reasons
+ if bool(breaking) and bool(delimiter):
+ # feat!(foo)!: subject
+ reasons.append("Can only provide breaking-change '!' once")
+
+ return Commit(
+ category=commit_type,
+ components=components or [],
+ breaking_change=bool(breaking) or bool(delimiter),
+ subject=subject,
+ failed_validation_reasons=reasons,
+ )
def main():
@@ -102,13 +137,13 @@ def main():
print(f'PR title: "{args.title}"')
- reasons = matches_commit_format(args.root, args.title)
- if not reasons:
+ commit = matches_commit_format(args.root, args.title)
+ if not commit.failed_validation_reasons:
print("Title is valid")
return 0
print("Title is invalid:")
- for reason in reasons:
+ for reason in commit.failed_validation_reasons:
print("-", reason)
return 1
diff --git a/dev/release/01-prepare.sh b/dev/release/01-prepare.sh
index 8767e91d8..bd3dec805 100755
--- a/dev/release/01-prepare.sh
+++ b/dev/release/01-prepare.sh
@@ -57,6 +57,18 @@ main() {
echo ;
changelog
) >> ${SOURCE_DIR}/../../CHANGELOG.md
+
+ read -p "Please review the changelog. Press ENTER to continue..." ignored
+ git diff ${SOURCE_DIR}/../../CHANGELOG.md
+
+ echo "Is the changelog correct?"
+ select yn in "y" "n"; do
+ case $yn in
+ y ) echo "Continuing"; break;;
+ n ) echo "Aborting"; return 1;;
+ esac
+ done
+
git add ${SOURCE_DIR}/../../CHANGELOG.md
git commit -m "chore: update CHANGELOG.md for ${RELEASE}"
diff --git a/dev/release/utils-common.sh b/dev/release/utils-common.sh
index 99c9b6633..af7c77adb 100644
--- a/dev/release/utils-common.sh
+++ b/dev/release/utils-common.sh
@@ -38,20 +38,7 @@ header() {
changelog() {
# Strip trailing blank line
- local -r changelog=$(printf '%s\n' "$(cz ch --dry-run --unreleased-version
"ADBC Libraries ${RELEASE}" --start-rev apache-arrow-adbc-${PREVIOUS_RELEASE})")
- # Split off header
- local -r header=$(echo "${changelog}" | head -n 1)
- local -r trailer=$(echo "${changelog}" | tail -n+2)
- echo "${header}"
- echo
- echo "### Versions"
- echo
- echo "- C/C++/GLib/Go/Python/Ruby: ${VERSION_NATIVE}"
- echo "- C#: ${VERSION_CSHARP}"
- echo "- Java: ${VERSION_JAVA}"
- echo "- R: ${VERSION_R}"
- echo "- Rust: ${VERSION_RUST}"
- echo "${trailer}"
+ env PYTHONPATH=${SOURCE_TOP_DIR}/dev python -m adbc_dev.changelog --name
"ADBC Libraries ${RELEASE}" apache-arrow-adbc-${PREVIOUS_RELEASE} HEAD
2>/dev/null
}
header "Config"