This is an automated email from the ASF dual-hosted git repository.
janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 5e44fb0ce07 SOLR-17979 Improve release tooling during 9.10 release
(#3808)
5e44fb0ce07 is described below
commit 5e44fb0ce07980374e176dc1e57f7b9c00d4f349
Author: Jan Høydahl <[email protected]>
AuthorDate: Sun Nov 2 16:12:51 2025 +0100
SOLR-17979 Improve release tooling during 9.10 release (#3808)
- Port tool parseContributorsFromChanges.py to use logchange
- Release Wizard improvements for 9.10.0:
- support for persisting vars in commands
- changed logchange steps
- Many improvements in the 1-time changes2logchange.py utility
- Bug fix changes2html.py to handle ALPHA/BETA releases
- New validateChangelogs.py tool
---
dev-docs/changelog.adoc | 85 ++
dev-tools/scripts/README.md | 13 +
dev-tools/scripts/changes2logchange.py | 306 ++++-
dev-tools/scripts/parseContributorsFromChanges.py | 84 +-
dev-tools/scripts/releaseWizard.py | 105 +-
dev-tools/scripts/releaseWizard.yaml | 200 ++--
dev-tools/scripts/validateChangelogs.py | 1183 ++++++++++++++++++++
.../documentation/changes-to-html/changes2html.py | 2 +-
8 files changed, 1833 insertions(+), 145 deletions(-)
diff --git a/dev-docs/changelog.adoc b/dev-docs/changelog.adoc
index a89b522fcb9..fc4aa72cbc5 100644
--- a/dev-docs/changelog.adoc
+++ b/dev-docs/changelog.adoc
@@ -155,6 +155,91 @@ links:
url: https://issues.apache.org/jira/browse/SOLR-17960
----
+=== 6.3 Changelog validation tool
+
+There is a tool `dev-tools/scripts/validateChangelogs.py` that will do a
+cross-branch validation of changelog folders. It takes no arguments and
+discovers the branch structure automatically, and checks that each branch
+(unstable, stable, release, previous-stable, previous-bugfix) are in sync
+with remote and "clean". Then it checks that the changelog folders are the
+same across branches and computes some statistics on how many unreleased
+features are from each branch.
+
+The tool can also output a consolidated `CHANGELOG.md` file which calculates
+what (unreleased) features are likely to be released in what version. This
+resembles what we used to have in our `CHANGES.txt` on main branch before.
+The tool is integrated in the releaseWizard.
+
+ usage: validateChangelogs.py -h
+
+Example report output (Json or Markdown):
+
+[source,json,title=Example json report]
+----
+{
+ "success": false,
+ "errors": [
+ "Folder v9.8.1 not found on branches: {'branch_9_9'}",
+ "Folder v9.9.0 not found on branches: {'branch_9_9'}"
+ ],
+ "warnings": [],
+ "branch_report": {
+ "branch_9_9": {
+ "version": "9.9.1",
+ "has_changelog_folder": false,
+ "id": "previous_major_bugfix"
+ },
+ "branch_9x": {
+ "version": "9.10.0",
+ "unreleased_count": 31,
+ "id": "previous_major_stable",
+ "new_count": 31,
+ "new": [
+ "SOLR-17541-deprecate -cloudhttp2solrclient-builder.yml",
+ "SOLR-17619 Use logchange for changelog management.yml"
+ ],
+ "not_in_newer_count": 1,
+ "not_in_newer": [
+ "SOLR-17541-deprecate -cloudhttp2solrclient-builder.yml"
+ ]
+ },
+ "branch_10_0": {
+ "version": "10.0.0",
+ "unreleased_count": 146,
+ "id": "release",
+ "new_count": 5,
+ "new": [
+ "GITHUB#3666-removing redundant check if field exists in.yml",
+ "SOLR-12089-remove deprecated -breaksugestiontiebreaker-.yml",
+ "SOLR-14070-deprecate cloudsolrclient-s zookeeper hosts.yml"
+ ],
+ "not_in_newer_count": 2,
+ "not_in_newer": [
+ "SOLR-16562-solr-16578- upgrade caffeine to 3-1-4 and.yml",
+ "SOLR-17012-update apache hadoop to 3-3-6 and apache curator.yml"
+ ]
+ },
+ "branch_10x": {
+ "version": "10.1.0",
+ "unreleased_count": 147,
+ "id": "stable",
+ "new_count": 2,
+ "new": [
+ "PR#3758-logs- removed webapp-solr and also removed from.yml",
+ "SOLR-17963-these solrcloud commands no longer aquire locks.yml"
+ ]
+ },
+ "main": {
+ "version": "11.0.0",
+ "unreleased_count": 147,
+ "id": "main",
+ "new_count": 0,
+ "new": []
+ }
+ }
+}
+----
+
== 7. Further Reading
* xref:https://github.com/logchange/logchange[Logchange web page]
diff --git a/dev-tools/scripts/README.md b/dev-tools/scripts/README.md
index 19e10326095..a16fc9094b6 100644
--- a/dev-tools/scripts/README.md
+++ b/dev-tools/scripts/README.md
@@ -182,6 +182,19 @@ Each YAML file complies with the schema outlined in
`dev-docs/changelog.adoc`.
# Default behavior
python3 dev-tools/scripts/changes2logchange.py solr/CHANGES.txt
+### validateChangelogs.py
+
+Validates changelog folder structure and feature distribution across
development branches (main, stable, release). See dev-docs for more.
+
+### parseContributorsFromChanges.py
+
+Extracts unique author names from all YAML changelog files in a version folder
and outputs them as a comma-separated list sorted alphabetically. Used by RM to
assemble release notes.
+
+ usage: parseContributorsFromChanges.py <version>
+
+ # Example: Extract contributors for version 9.10.0
+ python3 dev-tools/scripts/parseContributorsFromChanges.py 9.10.0
+
### gitignore-gen.sh
TBD
diff --git a/dev-tools/scripts/changes2logchange.py
b/dev-tools/scripts/changes2logchange.py
index ae6f3f3e1e0..e51639d84a5 100755
--- a/dev-tools/scripts/changes2logchange.py
+++ b/dev-tools/scripts/changes2logchange.py
@@ -183,6 +183,10 @@ class AuthorParser:
# and then end of string
AUTHOR_PATTERN = re.compile(r'\s+\(([^()]+)\)\s*[.,]?\s*$', re.MULTILINE)
+ # Pattern to detect JIRA/GitHub issue references (should be extracted as
links, not authors)
+ # Matches: SOLR-65, LUCENE-123, INFRA-456, PR#789, PR-789, GITHUB#123
+ ISSUE_PATTERN =
re.compile(r'^(?:SOLR|LUCENE|INFRA)-\d+$|^PR[#-]\d+$|^GITHUB#\d+$')
+
@staticmethod
def parse_authors(entry_text: str) -> Tuple[str, List[Author]]:
"""
@@ -199,6 +203,9 @@ class AuthorParser:
Only matches author attribution at the END of the entry text,
not in the middle of descriptions like (aka Standalone)
+
+ Note: JIRA/GitHub issue IDs found in the author section are NOT added
as authors,
+ but are preserved in the returned text so IssueExtractor can process
them as links.
"""
# Find ALL matches and use the LAST one (rightmost)
# This ensures we get the actual author attribution, not mid-text
parentheses
@@ -214,39 +221,55 @@ class AuthorParser:
cleaned_text = entry_text[:match.start()].rstrip()
authors = []
+ found_issues = [] # Track JIRA issues found in author section
- # Split by comma, but be aware of "via" keyword
- # Pattern: "Author via Committer" or just "Author"
- segments = [seg.strip() for seg in author_text.split(',')]
+ # Split by comma and slash, which are both used as delimiters in
author sections
+ # Patterns handled:
+ # - "Author1, Author2" (comma delimiter)
+ # - "Author1 / Author2" (slash delimiter)
+ # - "Author1, Issue1 / Author2" (mixed delimiters)
+ # Also aware of "via" keyword: "Author via Committer"
+ segments = [seg.strip() for seg in re.split(r'[,/]', author_text)]
for segment in segments:
segment = segment.strip()
if not segment:
continue
+ # Check if this is a JIRA/GitHub issue reference
+ if AuthorParser.ISSUE_PATTERN.match(segment):
+ # Don't add as author, but remember to add it back to text for
IssueExtractor
+ found_issues.append(segment)
+ continue
+
# Handle "via" prefix (standalone or after author name)
if segment.startswith('via '):
# Malformed: standalone "via Committer" (comma was added
incorrectly)
# Extract just the committer name
committer_name = segment[4:].strip() # Remove "via " prefix
- if committer_name:
+ if committer_name and not
AuthorParser.ISSUE_PATTERN.match(committer_name):
authors.append(Author(name=committer_name))
elif ' via ' in segment:
# Format: "Author via Committer"
parts = segment.split(' via ')
author_name = parts[0].strip()
+ committer_name = parts[1].strip() if len(parts) > 1 else ""
- if author_name:
- # Normal case: "Author via Committer" - add the author
+ # Add author if not an issue ID
+ if author_name and not
AuthorParser.ISSUE_PATTERN.match(author_name):
authors.append(Author(name=author_name))
- else:
- # Should not happen, but handle it
- committer_name = parts[1].strip() if len(parts) > 1 else ""
- if committer_name:
- authors.append(Author(name=committer_name))
+
+ # Also add committer (the part after "via") as an author
+ if committer_name and not
AuthorParser.ISSUE_PATTERN.match(committer_name):
+ authors.append(Author(name=committer_name))
else:
- # Just an author name
- authors.append(Author(name=segment))
+ # Just an author name (if not an issue ID)
+ if not AuthorParser.ISSUE_PATTERN.match(segment):
+ authors.append(Author(name=segment))
+
+ # Add found issues back to the cleaned text so IssueExtractor can find
them
+ if found_issues:
+ cleaned_text = cleaned_text + " " + " ".join(found_issues)
return cleaned_text, authors
@@ -304,7 +327,9 @@ class SlugGenerator:
"""
Generate a slug from issue ID and title.
- Format: ISSUE-12345-short-slug or VERSION-entry-001-short-slug
+ Format: ISSUE-12345 short slug or VERSION entry 001 short slug
+ Note: Previous slug formats used dashes ("ISSUE-12345-short-slug"),
but this script now uses spaces between components (e.g., "ISSUE-12345 short
slug").
+ Spaces are preferred over dashes for improved readability, better
preservation of word boundaries, and to avoid unnecessary character
substitutions. This change also ensures that filenames remain filesystem-safe
while being more human-friendly.
Uses the actual issue ID without forcing SOLR- prefix
Ensures filesystem-safe filenames and respects word boundaries
Whitespace is preserved as spaces (not converted to dashes)
@@ -316,7 +341,7 @@ class SlugGenerator:
title_slug = SlugGenerator._sanitize_filename_part(title)
# Limit to reasonable length while respecting word boundaries
- # Target max length: 50 chars for slug (leaving room for base_issue
and dash)
+ # Target max length: 50 chars for slug (leaving room for base_issue
and space)
if len(title_slug) > 50:
# Find last word/space boundary within 50 chars
truncated = title_slug[:50]
@@ -333,7 +358,7 @@ class SlugGenerator:
# If no good boundary, use hard limit and clean up
title_slug = truncated.rstrip(' -')
- return f"{base_issue}-{title_slug}"
+ return f"{base_issue} {title_slug}"
@staticmethod
def _sanitize_issue_id(issue_id: str) -> str:
@@ -360,9 +385,9 @@ class SlugGenerator:
"""
Sanitize text for use in filenames.
- Convert to lowercase
- - Replace unsafe characters with dashes
- - Convert any whitespace to space (preserved in filename)
- - Remove multiple consecutive spaces or dashes
+ - Remove quotes, colons, backticks
+ - Replace other unsafe characters with dashes
+ - Convert any whitespace to single space
- Strip leading/trailing spaces and dashes
"""
# Convert to lowercase
@@ -371,17 +396,31 @@ class SlugGenerator:
# Normalize all whitespace to single spaces
text = re.sub(r'\s+', ' ', text)
- # Replace unsafe characters with dash
+ # Remove quotes, colons, backticks entirely (don't replace with dash)
+ text = re.sub(r'["\':´`]', '', text)
+
+ # Replace other unsafe characters (from UNSAFE_CHARS_PATTERN) with dash
+ # This covers: < > " / \ | ? * and control characters
+ # Note: we already removed quotes and colons above
text = SlugGenerator.UNSAFE_CHARS_PATTERN.sub('-', text)
- # Replace other non-alphanumeric (except space and dash) with dash
- text = re.sub(r'[^a-z0-9\s-]+', '-', text)
+ # Replace other non-alphanumeric (except space, dash, and dot) with
dash
+ text = re.sub(r'[^a-z0-9\s.\-]+', '-', text)
# Replace multiple consecutive dashes with single dash (but preserve
spaces)
text = re.sub(r'-+', '-', text)
- # Strip leading/trailing spaces and dashes
- text = text.strip(' -')
+ # Remove trailing dashes before we clean up space-dash sequences
+ text = text.rstrip('-')
+
+ # Handle " -" and "- " sequences: collapse to single space
+ text = re.sub(r'\s*-\s*', ' ', text)
+
+ # Replace multiple consecutive spaces with single space
+ text = re.sub(r'\s+', ' ', text)
+
+ # Strip leading/trailing spaces
+ text = text.strip(' ')
return text
@@ -406,7 +445,8 @@ class ChangesParser:
"""Main parser for CHANGES.txt file."""
# Pattern to match version headers: ================== 10.0.0
==================
- VERSION_HEADER_PATTERN = re.compile(r'=+\s+([\d.]+)\s+=+')
+ # Also supports pre-release versions: 4.0.0-ALPHA, 4.0.0-BETA, 4.0.0-RC1,
etc.
+ VERSION_HEADER_PATTERN =
re.compile(r'=+\s+([\d.]+(?:-[A-Za-z0-9]+)?)\s+=+')
# Pattern to match section headers: "Section Name" followed by dashes
# Matches patterns like "New Features\n---------------------"
@@ -643,6 +683,206 @@ class ReleaseDate:
return version_dates, latest_version
+class VersionWriter:
+ """Handles version enumeration, comparison, and release-date.txt
writing."""
+
+ def __init__(self, changes_file_path: str, changelog_dir: str):
+ self.changes_file_path = changes_file_path
+ self.changelog_dir = Path(changelog_dir)
+ self.parser = ChangesParser(changes_file_path)
+
+ # Fetch release dates from Apache projects JSON
+ version_dates_raw, _ = ReleaseDate.fetch_release_dates_and_latest()
+
+ # Normalize version keys for consistent lookup (e.g., "3.1" -> "3.1.0")
+ self.version_dates = {}
+ for version, date in version_dates_raw.items():
+ normalized = self._normalize_version(version)
+ # Keep the first occurrence (most canonical form)
+ if normalized not in self.version_dates:
+ self.version_dates[normalized] = date
+
+ def run(self):
+ """Execute version comparison and release-date.txt writing."""
+ print("Parsing CHANGES.txt for versions...")
+ self.parser.parse()
+
+ # Extract versions from CHANGES.txt
+ changes_versions = set(vs.version for vs in self.parser.versions)
+ print(f"Found {len(changes_versions)} versions in CHANGES.txt")
+
+ # Get existing version folders
+ existing_folders = self.get_existing_version_folders()
+ print(f"Found {len(existing_folders)} existing version folders in
changelog/")
+
+ # Get versions from solr.json (which is what ReleaseDate fetches)
+ solr_json_versions = set(self.version_dates.keys())
+ print(f"Found {len(solr_json_versions)} versions in solr.json\n")
+
+ # Build normalized version mappings for matching (supports semver like
3.1 == 3.1.0)
+ changes_normalized = {self._normalize_version(v): v for v in
changes_versions}
+ existing_normalized = {self._normalize_version(v): v for v in
existing_folders}
+ solr_normalized = {self._normalize_version(v): v for v in
solr_json_versions}
+
+ # Combine all normalized versions
+ all_normalized = sorted(set(changes_normalized.keys()) |
set(solr_normalized.keys()) | set(existing_normalized.keys()),
+ key=self._version_sort_key)
+
+ # Print comparison report
+ self._print_comparison_report(all_normalized, changes_normalized,
solr_normalized, existing_normalized)
+
+ # Write release-date.txt for existing folders
+ self._write_release_dates(existing_normalized)
+
+ def get_existing_version_folders(self) -> set:
+ """Get all existing vX.Y.Z folders in changelog/."""
+ if not self.changelog_dir.exists():
+ return set()
+
+ folders = set()
+ for item in self.changelog_dir.iterdir():
+ if item.is_dir() and item.name.startswith('v') and
item.name[1:].replace('.', '').isdigit():
+ # Extract version without 'v' prefix
+ version = item.name[1:]
+ folders.add(version)
+
+ return folders
+
+ @staticmethod
+ def _normalize_version(version: str) -> str:
+ """
+ Normalize incomplete version strings to X.Y.Z format.
+ Complete versions (3+ numeric parts) are left unchanged.
+ Incomplete versions are padded with zeros.
+ Pre-release versions (e.g., 4.0.0-ALPHA) are handled correctly.
+
+ Supports semantic versioning where "3.1" matches "3.1.0".
+ But keeps distinct versions separate: 3.6.0, 3.6.1, 3.6.2 are NOT
normalized to the same value.
+
+ Examples:
+ - "3.1" -> "3.1.0" (2 parts, pad to 3)
+ - "3" -> "3.0.0" (1 part, pad to 3)
+ - "3.1.0" -> "3.1.0" (3 parts, unchanged)
+ - "3.6.1" -> "3.6.1" (3 parts, unchanged)
+ - "3.6.2" -> "3.6.2" (3 parts, unchanged - NOT collapsed!)
+ - "4.0.0-ALPHA" -> "4.0.0-ALPHA" (pre-release, unchanged)
+ - "4.0-ALPHA" -> "4.0.0-ALPHA" (incomplete pre-release, pad to 3
numeric parts)
+ - "4.0.0-ALPHA.0" -> "4.0.0-ALPHA" (remove spurious .0 from
pre-release)
+ - "3.1.0.0" -> "3.1.0.0" (4 parts, unchanged)
+ """
+ # Check if this is a pre-release version (contains dash)
+ if '-' in version:
+ # Split on the dash to separate numeric version from pre-release
identifier
+ base_version, prerelease = version.split('-', 1)
+ base_parts = base_version.split('.')
+
+ # Pad the base version to 3 parts
+ while len(base_parts) < 3:
+ base_parts.append('0')
+
+ # Take only first 3 numeric parts, then rejoin with pre-release
identifier
+ # This prevents "4.0.0-ALPHA.0" from being added
+ normalized_base = '.'.join(base_parts[:3])
+ return f"{normalized_base}-{prerelease}"
+ else:
+ # Non-pre-release version - use original logic
+ parts = version.split('.')
+
+ # If already 3+ parts, return as-is (complete version)
+ if len(parts) >= 3:
+ return version
+
+ # If less than 3 parts, pad with zeros to make it 3 parts
+ while len(parts) < 3:
+ parts.append('0')
+ return '.'.join(parts)
+
+ def _version_sort_key(self, version: str) -> tuple:
+ """Convert version string to sortable tuple for proper ordering."""
+ try:
+ from packaging import version as pkg_version
+ return (pkg_version.parse(version),)
+ except Exception:
+ return (version,)
+
+ def _print_comparison_report(self, all_normalized_versions: list,
changes_normalized: dict,
+ solr_normalized: dict, existing_normalized:
dict):
+ """
+ Print a comparison report of versions across sources.
+
+ Args:
+ all_normalized_versions: List of normalized versions to display
+ changes_normalized: Dict mapping normalized version -> original
version from CHANGES.txt
+ solr_normalized: Dict mapping normalized version -> original
version from solr.json
+ existing_normalized: Dict mapping normalized version -> original
version from folders
+ """
+ print("=" * 100)
+ print(f"{'Normalized':<15} | {'CHANGES.txt':<15} | {'solr.json':<15} |
{'Folder':<15} | {'Release Date':<20}")
+ print("-" * 100)
+
+ for norm_version in all_normalized_versions:
+ in_changes = "✓" if norm_version in changes_normalized else " "
+ in_solr_json = "✓" if norm_version in solr_normalized else " "
+ has_folder = "✓" if norm_version in existing_normalized else " "
+
+ # Get original version strings for display
+ orig_changes = changes_normalized.get(norm_version, "")
+ orig_solr = solr_normalized.get(norm_version, "")
+ orig_folder = existing_normalized.get(norm_version, "")
+
+ # Get release date using normalized version (all version_dates
keys are normalized)
+ release_date = self.version_dates.get(norm_version, "(no date)")
+
+ # Format original versions as "orig" if different from normalized
+ changes_str = f"{orig_changes}" if orig_changes and orig_changes
!= norm_version else ""
+ solr_str = f"{orig_solr}" if orig_solr and orig_solr !=
norm_version else ""
+ folder_str = f"{orig_folder}" if orig_folder and orig_folder !=
norm_version else ""
+
+ print(f"{norm_version:<15} | {in_changes} {changes_str:<13} |
{in_solr_json} {solr_str:<13} | {has_folder} {folder_str:<13} |
{release_date:<20}")
+
+ print("=" * 100)
+
+ def _write_release_dates(self, existing_normalized: dict):
+ """
+ Write release-date.txt files for existing version folders that don't
have them.
+
+ Args:
+ existing_normalized: Dict mapping normalized version -> original
folder version string
+ """
+ written_count = 0
+ skipped_count = 0
+
+ print("\nWriting release-date.txt files:")
+ for norm_version in sorted(existing_normalized.keys(),
key=self._version_sort_key):
+ orig_folder_version = existing_normalized[norm_version]
+ version_dir = self.changelog_dir / f"v{orig_folder_version}"
+ release_date_file = version_dir / "release-date.txt"
+
+ # Get release date using normalized version (all version_dates
keys are normalized)
+ release_date = self.version_dates.get(norm_version)
+
+ if release_date:
+ if release_date_file.exists():
+ existing_content = release_date_file.read_text().strip()
+ if existing_content == release_date:
+ print(f" ✓ {orig_folder_version}: already has
release-date.txt")
+ else:
+ print(f" ⚠ {orig_folder_version}: already has
release-date.txt with different date ({existing_content})")
+ skipped_count += 1
+ else:
+ with open(release_date_file, 'w', encoding='utf-8') as f:
+ f.write(release_date + '\n')
+ version_display = f"{orig_folder_version} (normalized:
{norm_version})" if orig_folder_version != norm_version else orig_folder_version
+ print(f" ✓ {version_display}: wrote release-date.txt
({release_date})")
+ written_count += 1
+ else:
+ version_display = f"{orig_folder_version} (normalized:
{norm_version})" if orig_folder_version != norm_version else orig_folder_version
+ print(f" ⚠ {version_display}: no date found in solr.json")
+ skipped_count += 1
+
+ print(f"\nSummary: {written_count} files written, {skipped_count}
skipped/existing")
+
+
class MigrationRunner:
"""Orchestrates the complete migration process."""
@@ -707,8 +947,8 @@ class MigrationRunner:
print(f" Found {len(version_section.entries)} entries")
- # Write release-date.txt if we have a date for this version
- if version_section.version in self.version_dates:
+ # Write release-date.txt if we have a date for this version (only for
released versions)
+ if not is_unreleased and version_section.version in self.version_dates:
release_date = self.version_dates[version_section.version]
release_date_file = version_dir / "release-date.txt"
version_dir.mkdir(parents=True, exist_ok=True)
@@ -910,6 +1150,11 @@ def main():
help="Last released version (e.g., 9.9.0). Versions newer than this go
to unreleased/. "
"If not specified, fetches from Apache projects JSON."
)
+ parser.add_argument(
+ "--write-versions",
+ action="store_true",
+ help="Parse CHANGES.txt to enumerate versions, compare with solr.json,
and write release-date.txt files to existing changelog folders"
+ )
args = parser.parse_args()
@@ -922,6 +1167,13 @@ def main():
print(f"Error: CHANGES.txt file not found: {args.changes_file}",
file=sys.stderr)
sys.exit(1)
+ # Handle --write-versions mode
+ if args.write_versions:
+ writer = VersionWriter(args.changes_file, args.output_dir)
+ writer.run()
+ return
+
+ # Standard migration mode
runner = MigrationRunner(args.changes_file, args.output_dir,
args.last_released)
runner.run()
diff --git a/dev-tools/scripts/parseContributorsFromChanges.py
b/dev-tools/scripts/parseContributorsFromChanges.py
old mode 100644
new mode 100755
index de298420905..d8f6eb42b57
--- a/dev-tools/scripts/parseContributorsFromChanges.py
+++ b/dev-tools/scripts/parseContributorsFromChanges.py
@@ -1,3 +1,5 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
@@ -14,51 +16,59 @@
# limitations under the License.
import sys
-import re
-from collections import defaultdict
+import yaml
+from pathlib import Path
-# Read data from standard input
-data = sys.stdin.read()
+def print_usage():
+ print("Usage: parseContributorsFromChanges.py <version>")
+ print(" <version>: Version number (e.g., 9.10.0)")
+ print("\nThis script parses all YAML files in changelog/v<version>/ and
extracts unique authors.")
+ print("Output is a comma-separated list of authors sorted by name.")
+ sys.exit(1)
-# Replace all carriage return line feed (Windows) with line feed
-data = data.replace('\r\n', '\n')
+if len(sys.argv) < 2:
+ print("Error: Missing required argument <version>")
+ print_usage()
-# Replace all carriage return (Mac OS before X) with line feed
-data = data.replace('\r', '\n')
+version = sys.argv[1]
+changelog_dir = Path(f"changelog/v{version}")
-# Split data at blank lines
-paras = data.split('\n\n')
+if not changelog_dir.exists():
+ print(f"Error: Directory '{changelog_dir}' does not exist")
+ sys.exit(1)
-# Initialize a default dictionary to store contributors and their counts
-contributors = defaultdict(int)
+# Collect all unique authors
+authors = set()
-# Regular expression to find the attribution in parentheses at the end of a
line
-pattern = re.compile(r"\(([^()]*)\)$")
+# Process all .yml and .yaml files in the changelog directory
+yaml_files = list(changelog_dir.glob("*.yml")) +
list(changelog_dir.glob("*.yaml"))
-for para in paras:
- # Normalize whitespace (replace all whitespace with a single space)
- para = re.sub(r"\s+", ' ', para).strip()
- #print(f'> {para}')
+if not yaml_files:
+ print(f"Warning: No YAML files found in {changelog_dir}")
+ sys.exit(0)
- # Find all contributors in the line
- match = pattern.search(para.strip())
- if match:
- attribution = match.group(1)
- # might have a "via" committer; we only want the author here
- attribution = attribution.split(" via ")[0] # keep left side
- # Split the contributors by comma and strip whitespace
- for contributor in attribution.split(','):
- contributor = contributor.strip()
- contributors[contributor] += 1
+for yaml_file in sorted(yaml_files):
+ try:
+ with open(yaml_file, 'r') as f:
+ data = yaml.safe_load(f)
+ if data and 'authors' in data:
+ author_list = data['authors']
+ if isinstance(author_list, list):
+ for author_entry in author_list:
+ if isinstance(author_entry, dict) and 'name' in
author_entry:
+ author_name = author_entry['name'].strip()
+ # Filter out solrbot
+ if author_name.lower() != 'solrbot':
+ authors.add(author_name)
+ except Exception as e:
+ print(f"Warning: Error parsing {yaml_file}: {e}", file=sys.stderr)
-if 'solrbot' in contributors:
- del contributors['solrbot']
+# Sort authors by name
+sorted_authors = sorted(list(authors))
-sorted_contributors = sorted(contributors.items(), key=lambda item: item[1],
reverse=True)
+# Print contributors
+for author in sorted_authors:
+ print(author)
-# Print the contributors and their counts
-for contributor, count in sorted_contributors:
- print(f'{contributor}: {count}')
-
-print('\n\nThanks to all contributors!: ')
-print(', '.join([contributor for contributor, count in sorted_contributors]))
\ No newline at end of file
+print('\nThanks to all contributors!: ')
+print(', '.join(sorted_authors))
\ No newline at end of file
diff --git a/dev-tools/scripts/releaseWizard.py
b/dev-tools/scripts/releaseWizard.py
index dffe84c0fd3..de33ce4acb5 100755
--- a/dev-tools/scripts/releaseWizard.py
+++ b/dev-tools/scripts/releaseWizard.py
@@ -56,7 +56,7 @@ try:
import holidays
import yaml
from ics import Calendar, Event
- from jinja2 import Environment
+ from jinja2 import Environment, Undefined
except:
print("You lack some of the module dependencies to run this script.")
print("Please run 'pip3 install -r requirements.txt' and try again.")
@@ -74,6 +74,25 @@ state = None
templates = None
solr_news_file = None
+
+class ReadableUndefined(Undefined):
+ """Custom Undefined handler that renders undefined variables as {{ varname
}}
+
+ This allows users to see which variables are not yet defined when
displaying
+ command templates before execution, particularly useful for persist_vars
+ that haven't been captured yet.
+ """
+ def __str__(self):
+ return "{{ %s }}" % self._undefined_name
+
+ def __getattr__(self, name):
+ # Handle special Python attributes normally
+ if name[:2] == '__':
+ raise AttributeError(name)
+ # Chain undefined attribute access for nested vars like {{
todo_id.var_name }}
+ return ReadableUndefined(name="%s.%s" % (self._undefined_name, name))
+
+
# Edit this to add other global jinja2 variables or filters
def expand_jinja(text, vars=None):
global_vars = OrderedDict({
@@ -141,7 +160,7 @@ def expand_jinja(text, vars=None):
filled = replace_templates(text)
try:
- env = Environment(lstrip_blocks=True, keep_trailing_newline=False,
trim_blocks=True)
+ env = Environment(lstrip_blocks=True, keep_trailing_newline=False,
trim_blocks=True, undefined=ReadableUndefined)
env.filters['path_join'] = lambda paths: os.path.join(*paths)
env.filters['expanduser'] = lambda path: os.path.expanduser(path)
env.filters['formatdate'] = lambda date: (datetime.strftime(date, "%-d
%B %Y") if date else "<date>" )
@@ -1420,19 +1439,22 @@ def tail_file(file, lines):
break
-def run_with_log_tail(command, cwd, logfile=None, tail_lines=10, tee=False,
live=False, shell=None):
+def run_with_log_tail(command, cwd, logfile=None, tail_lines=10, tee=False,
live=False, shell=None, capture_output=False):
fh = sys.stdout
if logfile:
logdir = os.path.dirname(logfile)
if not os.path.exists(logdir):
os.makedirs(logdir)
fh = open(logfile, 'w')
- rc = run_follow(command, cwd, fh=fh, tee=tee, live=live, shell=shell)
+
+ rc, captured_output = run_follow(command, cwd, fh=fh, tee=tee, live=live,
shell=shell, capture_output=capture_output)
+
if logfile:
fh.close()
if not tee and tail_lines and tail_lines > 0:
tail_file(logfile, tail_lines)
- return rc
+
+ return rc, captured_output
def ask_yes_no(text):
@@ -1463,7 +1485,7 @@ def print_line_cr(line, linenum, stdout=True, tee=False):
print(line.rstrip())
-def run_follow(command, cwd=None, fh=sys.stdout, tee=False, live=False,
shell=None):
+def run_follow(command, cwd=None, fh=sys.stdout, tee=False, live=False,
shell=None, capture_output=False):
doShell = '&&' in command or '&' in command or shell is not None
if not doShell and not isinstance(command, list):
command = shlex.split(command)
@@ -1479,6 +1501,7 @@ def run_follow(command, cwd=None, fh=sys.stdout,
tee=False, live=False, shell=No
endstdout = endstderr = False
errlines = []
+ captured_lines = [] if capture_output else None
while not (endstderr and endstdout):
lines_before = lines_written
if not endstdout:
@@ -1490,6 +1513,8 @@ def run_follow(command, cwd=None, fh=sys.stdout,
tee=False, live=False, shell=No
else:
fh.write(chars)
fh.flush()
+ if capture_output:
+ captured_lines.append(chars)
if '\n' in chars:
lines_written += 1
else:
@@ -1499,6 +1524,8 @@ def run_follow(command, cwd=None, fh=sys.stdout,
tee=False, live=False, shell=No
else:
fh.write("%s\n" % line.rstrip())
fh.flush()
+ if capture_output:
+ captured_lines.append(line)
lines_written += 1
print_line_cr(line, lines_written, stdout=(fh ==
sys.stdout), tee=tee)
@@ -1536,7 +1563,12 @@ def run_follow(command, cwd=None, fh=sys.stdout,
tee=False, live=False, shell=No
for line in errlines:
fh.write("%s\n" % line.rstrip())
fh.flush()
- return rc
+
+ captured_output = None
+ if capture_output and captured_lines is not None:
+ captured_output = "".join(captured_lines)
+
+ return rc, captured_output
def is_windows():
@@ -1637,6 +1669,7 @@ class Commands(SecretYamlObject):
logfilename = cmd.logfile
logfile = None
cmd_to_run = "%s%s" % ("echo Dry run, command is: " if
dry_run else "", cmd.get_cmd())
+ need_capture = cmd.persist_vars and not dry_run
if cmd.redirect:
try:
out = run(cmd_to_run, cwd=cwd)
@@ -1645,6 +1678,7 @@ class Commands(SecretYamlObject):
outfile.write(out)
outfile.flush()
print("Wrote %s bytes to redirect file %s" %
(len(out), cmd.get_redirect()))
+ cmd_output = out
except Exception as e:
print("Command %s failed: %s" % (cmd_to_run,
e))
success = False
@@ -1668,8 +1702,8 @@ class Commands(SecretYamlObject):
if cmd.comment:
print("# %s\n" % cmd.get_comment())
start_time = time.time()
- returncode = run_with_log_tail(cmd_to_run, cwd,
logfile=logfile, tee=cmd.tee, tail_lines=25,
- live=cmd.live,
shell=cmd.shell)
+ returncode, cmd_output =
run_with_log_tail(cmd_to_run, cwd, logfile=logfile, tee=cmd.tee, tail_lines=25,
+
live=cmd.live, shell=cmd.shell, capture_output=need_capture)
elapsed = time.time() - start_time
if not returncode == 0:
if cmd.should_fail:
@@ -1684,9 +1718,23 @@ class Commands(SecretYamlObject):
print("Expected command to fail, but it
succeeded.")
success = False
break
- else:
- if elapsed > 30:
- print("Command completed in %s
seconds" % elapsed)
+
+ # Handle persist_vars: capture stdout and parse for
--wizard-var markers
+ if cmd.persist_vars and not dry_run and cmd_output:
+ try:
+ parsed_vars = parse_wizard_vars(cmd_output)
+ if parsed_vars:
+ todo = state.get_todo_by_id(self.todo_id)
+ if todo:
+ for var_name, var_value in
parsed_vars.items():
+ todo.state[var_name] = var_value
+ state.save()
+ for var_name, var_value in
parsed_vars.items():
+ print("Saved variable '%s' = '%s'"
% (var_name, var_value))
+ except Exception as e:
+ print("WARNING: Failed to persist variables:
%s" % e)
+ if elapsed > 30:
+ print("Command completed in %s seconds" % elapsed)
if not success:
print("WARNING: One or more commands failed, you may want to
check the logs")
return success
@@ -1719,7 +1767,7 @@ class Commands(SecretYamlObject):
return None
v = self.get_vars()
if self.todo_id:
- v.update(state.get_todo_by_id(self.todo_id).get_vars())
+ v.update(state.get_todo_by_id(self.todo_id).get_vars_and_state())
if isinstance(data, list):
if join:
return expand_jinja(" ".join(data), v)
@@ -1744,11 +1792,37 @@ def abbreviate_homedir(line):
return re.sub(r'([^/]|\b)%s' % os.path.expanduser('~'), "\\1~", line)
+def parse_wizard_vars(stdout_text):
+ """Parse --wizard-var markers from command stdout.
+
+ Format: --wizard-var KEY=VALUE
+
+ Returns a dict of extracted variables, with last value winning for
duplicates.
+ """
+ variables = {}
+ if not stdout_text:
+ return variables
+
+ for line in stdout_text.splitlines():
+ # Check if line starts with --wizard-var marker
+ if line.startswith("--wizard-var "):
+ # Extract the KEY=VALUE part
+ var_part = line[len("--wizard-var "):].strip()
+ if '=' in var_part:
+ key, _, value = var_part.partition('=')
+ key = key.strip()
+ value = value.strip()
+ if key: # Only store if key is not empty
+ variables[key] = value
+
+ return variables
+
+
class Command(SecretYamlObject):
yaml_tag = u'!Command'
hidden_fields = ['todo_id']
def __init__(self, cmd, cwd=None, stdout=None, logfile=None, tee=None,
live=None, comment=None, vars=None,
- todo_id=None, should_fail=None, redirect=None,
redirect_append=None, shell=None):
+ todo_id=None, should_fail=None, redirect=None,
redirect_append=None, shell=None, persist_vars=None):
self.cmd = cmd
self.cwd = cwd
self.comment = comment
@@ -1762,6 +1836,7 @@ class Command(SecretYamlObject):
self.todo_id = todo_id
self.redirect_append = redirect_append
self.redirect = redirect
+ self.persist_vars = persist_vars
if tee and stdout:
self.stdout = None
print("Command %s specifies 'tee' and 'stdout', using only 'tee'"
% self.cmd)
@@ -1806,7 +1881,7 @@ class Command(SecretYamlObject):
def jinjaify(self, data, join=False):
v = self.get_vars()
if self.todo_id:
- v.update(state.get_todo_by_id(self.todo_id).get_vars())
+ v.update(state.get_todo_by_id(self.todo_id).get_vars_and_state())
if isinstance(data, list):
if join:
return expand_jinja(" ".join(data), v)
diff --git a/dev-tools/scripts/releaseWizard.yaml
b/dev-tools/scripts/releaseWizard.yaml
index eed7943e9c5..8cf0ab118c6 100644
--- a/dev-tools/scripts/releaseWizard.yaml
+++ b/dev-tools/scripts/releaseWizard.yaml
@@ -161,6 +161,7 @@ templates:
# depends: # One or more dependencies which will bar execution
# - todo_id1
# - todo_id2
+# types: ['major', 'minor', 'bugfix'] # Limit TODO to specific release
types
# vars: # Dictionary of jinja2 variables local to this TODO, e.g.
# logfile_path: "{{ [rc_folder, 'logs'] | path_join }}"
# # Vars can contain global jinja vars or local vars earlier defined
(ordered dict)
@@ -171,11 +172,14 @@ templates:
# function: my_python_function # Will call the named function for complex
tasks
# commands: !Commands # A !Commands object holding commands to execute
for this todo
# root_folder: '{{ git_checkout_folder }}' # path to where commands will
run
-# commands_text: Introduction text to be displayed just before the
commands
+# commands_text: Introduction text to be displayed just before the
commands
# enable_execute: true # Set to false to never offer to run commands
automatically
# confirm_each_command: true # Set to false to run all commands without
prompting
# remove_files: ['file1', 'folder2'] # List of files or folders that
must be gone
# logs_prefix: prefix # Lets you prefix logs file names with this string
+# run_text: Optional text to display before asking to run commands
+# env: {} # Dictionary of environment variables to set before running
commands
+# vars: {} # Dictionary of local jinja2 variables for this Commands block
# commands: # List of !Commands to execute
# - !Command # One single command
# cmd: "ls {{ folder_to_ls }}" # A single command. May reference
jinja vars
@@ -183,13 +187,17 @@ templates:
# cwd: relative_path # Where to run command, relative to root_folder
# comment: # Will display a # or REM comment above the command in
printouts
# vars: {} # Possible to define local vars for this command only
-# logfile: my.og # Overrides log file name which may grow very long :)
+# logfile: my.log # Overrides log file name which may grow very long :)
# tee: false # If true, sends output to console and file
# stdout: false # if true, sends output only to console, not log
file
# live: false # If true, sends output live byte-by-byte to console
# redirect: file.txt # Send output to file. Use instead of >
# redirect_append: false # Will cause output to be appended, like >>
-# shell: false $ Set to true to use built-in shell commands
+# shell: false # Set to true to use built-in shell commands
+# should_fail: false # Set to true if command is expected to fail (for
testing)
+# persist_vars: false # If true, parses stdout for --wizard-var
KEY=VALUE markers
+# # and persists them to TODO state.
+# # Format: --wizard-var var_name=value with
spaces allowed in value
# user_input: # An array of !UserInput objects for requesting input from
user
# - !UserInput
# prompt: Please enter your gpg key ID, e.g. 0D8D0B93
@@ -680,46 +688,146 @@ groups:
cmd: python3 -u dev-tools/scripts/addDepsToChanges.py --user solrbot
--version {{ release_version }}
tee: true
- !Command
- cmd: git add -u . && git commit -m "Add dependency updates to
changelog for {{ release_version }}" && git push
+ cmd: git add changelog && git commit -m "Add dependency updates to
changelog for {{ release_version }}" && git push
logfile: dependency-changes.log
- !Todo
- id: generate_changelog_release
- title: Generate CHANGELOG.md for release
+ id: logchange_release
+ title: Run logchange release to prepare changelog folder
description: |
- Generate `CHANGELOG.md` by running `logchange release` and `logchange
generate`. This will prepare both a
- `changelog/v{{ release_version }}` folder and the `CHANGELOG.md` file in
one commit.
+ This task will run `logchange release` to prepare the new `changelog/v{{
release_version }}` folder
+ and record it in two separate commits. These commits are then
cherry-picked to the stable and unstable branches.
+ This task does not push anything upstream.
+
+ **IMPORTANT**: You must do each command in this TODO individually, as
the cherry-pick command relies on there being two
+ new commits on the release-branch to cherry-pick from.
depends: dependency_updates_changes
commands: !Commands
root_folder: '{{ git_checkout_folder }}'
- commands_text: Generate CHANGELOG.md
+ commands_text: Run `logchange release` to create the changelog folder
+ confirm_each_command: true
commands:
- !Command
- cmd: git checkout {{ release_branch }}
- stdout: true
- - !Command
- cmd: git pull --ff-only
+ cmd: |
+ git checkout {{ release_branch }} && \
+ echo "Running logchange release" && \
+ {{ gradle_cmd }} logchangeRelease && \
+ echo "Recording in two commits" && \
+ git rm -r changelog/unreleased && \
+ git commit -m "Logchange release on {{ release_branch }} - rm
changelog/unreleased" && \
+ echo "--wizard-var changelog_rm_sha=$(git rev-parse --short HEAD)"
&& \
+ git add changelog && \
+ git commit -m "Logchange release on {{ release_branch }} - add
changelog/v{{ release_version }}" && \
+ echo "--wizard-var changelog_add_sha=$(git rev-parse --short HEAD)"
+ comment: Creates `changelog/v{{ release_version }}` folder and records
it in two separate commits
+ logfile: logchange-release.log
+ shell: true
tee: true
+ persist_vars: true
- !Command
- cmd: "{{ gradle_cmd }} logchangeRelease"
- comment: Create `changelog/v{{ release_version }}` folder and move
unreleased entries
+ cmd: |
+ echo "--- Chery-picking to stable branch" && \
+ git checkout {{ stable_branch }} && \
+ git cherry-pick -X ours -X no-renames {{ changelog_rm_sha }} && \
+ git cherry-pick {{ changelog_add_sha }} && \
+ echo "--- Chery-picking to unstable branch" && \
+ git checkout main && \
+ git cherry-pick -X ours -X no-renames {{ changelog_rm_sha }} && \
+ git cherry-pick {{ changelog_add_sha }}
+ comment: Cherry-pick the release changelog commits to stable and
unstable branches
+ logfile: cherry-pick-changelog-release.log
+ shell: true
tee: true
+ - !Todo
+ id: validate_changelog_unreleased
+ title: Validate changelog structure across branches
+ description: |
+ Validate that the changelog folder structure is correct and consistent
across branches,
+ and verify that changelog entries are properly distributed (no released
files in unreleased folder).
+ This ensures the CHANGELOG.md generation was successful and ready for
the release.
+ depends: logchange_release
+ vars:
+ report_file: "{{ [rc_folder, 'changelog_validation_report.json'] |
path_join }}"
+ commands: !Commands
+ root_folder: '{{ git_checkout_folder }}'
+ commands_text: Run the changelog validation script to help assess state
of changelog folder
+ confirm_each_command: false
+ commands:
+ - !Command
+ cmd: python3 -u dev-tools/scripts/validateChangelogs.py
--report-file {{ report_file }} --format json --skip-sync-check
+ logfile: validateChangelogs.log
+ tee: true
+ post_description: |
+ Review the validation report at {{ report_file }}. If there are any
errors, you may need to
+ go back and fix the changelog entries and commit the fix to relevant
branch. If there are only
+ warnings, you can proceed. The validation ensures:
+ - All changelog/vX.Y.Z folders are identical across branches
+ - No released JIRAs exist in the unreleased folder
+ - You see a list of `changelog/unreleased/` files that are *new* to each
version
+ - !Todo
+ id: logchange_generate
+ title: Generate CHANGELOG.md using logchange generate
+ description: |
+ This task will run `logchange generate` to generate the `/CHANGELOG.md`
file
+ and commit it to the release branch. The generated CHANGELOG will be
cherry-picked
+ to the stable and unstable branches afterward.
+ depends: validate_changelog_unreleased
+ commands: !Commands
+ root_folder: '{{ git_checkout_folder }}'
+ commands_text: Generate `/CHANGELOG.md` using logchange
+ commands:
- !Command
- cmd: "{{ gradle_cmd }} logchangeGenerate"
- comment: Generate `CHANGELOG.md` in repository root
+ cmd: |
+ git checkout {{ release_branch }} && \
+ echo "Running logchange generate" && \
+ {{ gradle_cmd }} logchangeGenerate && \
+ sed '/^\[unreleased\]$/,+3d' ./CHANGELOG.md >/tmp/CHANGELOG.md.tmp
&& \
+ mv /tmp/CHANGELOG.md.tmp CHANGELOG.md && \
+ rm -f changelog/unreleased/version-summary.md && \
+ git add CHANGELOG.md changelog && \
+ git commit -m "CHANGELOG.md generated for release v{{
release_version }}" && \
+ echo "--wizard-var changelog_md_sha=$(git rev-parse --short HEAD)"
+ comment: Generate `/CHANGELOG.md` and commit to {{ release_branch }}
+ logfile: logchange-generate.log
+ shell: true
tee: true
+ persist_vars: true
- !Command
- cmd: git add CHANGELOG.md changelog && git commit -m "Changelog for
release v{{ release_version }}" && git push
- comment: Commit and push changelog changes
- logfile: commit_changelog.log
+ cmd: |
+ generate_sha=$(git rev-parse --short HEAD) && \
+ echo "--- Chery-picking to stable branch" && \
+ git checkout {{ stable_branch }} && \
+ git cherry-pick {{ changelog_md_sha }} && \
+ echo "--- Chery-picking to unstable branch" && \
+ git checkout main && \
+ git cherry-pick {{ changelog_md_sha }}
+ comment: Cherry-pick the CHANGELOG generation commit to stable and
unstable branches
+ logfile: cherry-pick-changelog-generate.log
+ shell: true
tee: true
- !Todo
- id: persist_changelog_sha
- title: Persist the SHA of the changelog commit
- description: Store the current git sha
- depends: generate_changelog_release
- vars:
- changelog_sha: '{{ current_git_rev }}'
- persist_vars: changelog_sha
+ id: push_changelog_to_branches
+ title: Push changelog edits to all branches
+ description: |
+ This task will push all the changelog edits (release, generate, and
other changes)
+ to each branch (release, stable, and unstable) upstream to the git
repository.
+
+ Perform this step after all changelog validations have passed and you're
satisfied
+ with the changelog state on all branches.
+ depends: logchange_generate
+ commands: !Commands
+ root_folder: '{{ git_checkout_folder }}'
+ commands_text: Push changelog commits to all branches
+ commands:
+ - !Command
+ cmd: |
+ for branch in {{ release_branch }} {{ stable_branch }} main; do
+ git checkout $branch
+ git push origin $branch
+ done
+ comment: Push changelog commits to all branches
+ logfile: push-changelog-all-branches.log
+ shell: true
+ tee: true
- !Todo
id: draft_release_notes
title: Get a draft of the release notes in place
@@ -1721,44 +1829,6 @@ groups:
- !Command
cmd: git add -u . && git commit -m "Add bugfix version {{
release_version }}" && git push
logfile: commit-stable.log
- - !Todo
- id: sync_changelog_cherry_pick
- title: Cherry-pick changelog to stable and unstable branches
- description: |
- Cherry-pick the changelog commit from the release branch to the stable
and main branches.
- This syncs both CHANGELOG.md and changelog folder changes across all
active branches.
-
- The changelog commit was created in the previous step
(generate_changelog_release) and will
- be referenced here. Cherry-pick will automatically handle conflict
resolution if needed.
- depends: generate_changelog_release
- commands: !Commands
- root_folder: '{{ git_checkout_folder }}'
- commands_text: Cherry-pick changelog changes to other branches
- commands:
- - !Command
- cmd: git checkout {{ stable_branch }}
- comment: Checkout stable branch
- stdout: true
- - !Command
- cmd: git pull --ff-only
- tee: true
- - !Command
- cmd: git cherry-pick {{ persist_changelog_sha.changelog_sha }} && git
push
- comment: Cherry-pick changelog commit
- logfile: cherry_pick_changelog_stable.log
- tee: true
- - !Command
- cmd: git checkout main
- comment: Checkout main branch
- stdout: true
- - !Command
- cmd: git pull --ff-only
- tee: true
- - !Command
- cmd: git cherry-pick {{ persist_changelog_sha.changelog_sha }} && git
push
- comment: Cherry-pick changelog commit
- logfile: cherry_pick_changelog_main.log
- tee: true
- !Todo
id: increment_release_version
title: Add the next version on release branch
diff --git a/dev-tools/scripts/validateChangelogs.py
b/dev-tools/scripts/validateChangelogs.py
new file mode 100755
index 00000000000..337b2fbaff7
--- /dev/null
+++ b/dev-tools/scripts/validateChangelogs.py
@@ -0,0 +1,1183 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#!/usr/bin/env python3
+"""
+Validate Solr changelog structure across branches.
+
+This tool helps release managers validate that the changelog folder structure
+and CHANGELOG.md file are correct across the four active development branches:
+- main (next major/minor release, e.g., 11.0.0)
+- branch_10x (stable branch, e.g., 10.1.0 next release)
+- branch_10_0 (release branch, e.g., 10.0.0, 10.0.1)
+- branch_9x (previous stable, e.g. 9.11.0)
+- branch_9_x (previous bugfix, e.g., 9.9.0)
+
+It checks that:
+1. Git status is clean (no uncommitted changes)
+2. All changelog/vX.Y.Z folders are identical across branches
+3. Released files don't exist in the 'unreleased' folder
+4. Generates a report showing features scheduled for each branch
+5. Checks for possible duplicate JIRA issues across yml files
+"""
+
+import os
+import sys
+import re
+import json
+import yaml
+import shutil
+import subprocess
+import argparse
+from pathlib import Path
+from dataclasses import dataclass, field
+from typing import Dict, List, Set, Optional, Tuple
+from collections import defaultdict
+
+@dataclass
+class BranchInfo:
+ """Information about a branch."""
+ name: str
+ version: str
+ is_main: bool = False
+ is_stable: bool = False
+ is_release: bool = False
+ is_previous_major_bugfix: bool = False # e.g., branch_9_9 (9.9.0)
+ is_previous_major_stable: bool = False # e.g., branch_9x (9.8.0)
+ changelog_path: Optional[Path] = None
+ unreleased_files: Set[str] = field(default_factory=set)
+ versioned_folders: Dict[str, Set[str]] = field(default_factory=dict)
+ has_changelog_folder: bool = True # False if changelog folder doesn't
exist yet
+ duplicate_issues: Dict[str, List[str]] = field(default_factory=dict) #
Maps issue ID -> list of files
+ new_count: int = 0 # Count of features new to this version (first
appearing in this branch)
+ not_in_newer: Set[str] = field(default_factory=set) # Files in unreleased
that don't appear in any newer branch
+
+
+class ChangelogValidator:
+ """Main validator for Solr changelog structure."""
+
+ def __init__(
+ self,
+ git_root: Optional[Path] = None,
+ work_dir: Optional[Path] = None,
+ report_file: Optional[Path] = None,
+ changelog_file: Optional[Path] = None,
+ fetch_remote: bool = False,
+ report_format: str = "md",
+ skip_sync_check: bool = False,
+ check_duplicates: bool = False,
+ ):
+ """Initialize the validator.
+
+ Args:
+ git_root: Root of git repository (auto-detected if not provided)
+ work_dir: Working directory for temporary branches (default: auto
in /tmp)
+ report_file: File to write validation report to (default: stdout)
+ changelog_file: File to write generated CHANGELOG.md to. (default:
none)
+ fetch_remote: If True, fetch from remote.
+ report_format: Report format ("md" for Markdown or "json" for JSON)
+ skip_sync_check: If True, skip git branch sync check
+ check_duplicates: If True, check for duplicate JIRA issues
(default: False)
+ """
+ if git_root is None:
+ git_root = self._find_git_root()
+ self.git_root = git_root
+ self.changelog_root = git_root / "changelog"
+ self.build_gradle = git_root / "build.gradle"
+ self.changelog_md = git_root / "CHANGELOG.md"
+ self.report_file = report_file
+ self.changelog_file = changelog_file
+ self.work_dir = work_dir
+ self.fetch_remote = fetch_remote
+ self.report_format = report_format
+ self.skip_sync_check = skip_sync_check
+ self.check_duplicates = check_duplicates
+ self.branches = {}
+ self.remote_branches = set()
+ self.errors = []
+ self.warnings = []
+ self.info_messages = []
+ self.current_branch = None
+ self.temp_branch = None
+
+ @staticmethod
+ def _find_git_root() -> Path:
+ """Find the git root directory."""
+ try:
+ result = subprocess.run(
+ ["git", "rev-parse", "--show-toplevel"],
+ capture_output=True,
+ text=True,
+ check=True
+ )
+ return Path(result.stdout.strip())
+ except subprocess.CalledProcessError:
+ print("Error: Not in a git repository")
+ sys.exit(1)
+
+ def run_git(self, args: List[str], check: bool = True) ->
subprocess.CompletedProcess:
+ """Run a git command."""
+ return subprocess.run(
+ ["git"] + args,
+ cwd=self.git_root,
+ capture_output=True,
+ text=True,
+ check=check
+ )
+
+ def validate_git_status(self) -> bool:
+ """Verify that git status is clean with no uncommitted changes."""
+ self.info_messages.append("Checking git status...")
+ result = self.run_git(["status", "--porcelain"], check=False)
+
+ if result.stdout.strip():
+ self.errors.append(
+ "Git status is not clean. Please commit or stash all
changes:\n" +
+ result.stdout
+ )
+ return False
+
+ # Store current branch for later restoration
+ result = self.run_git(["rev-parse", "--abbrev-ref", "HEAD"])
+ self.current_branch = result.stdout.strip()
+
+ self.info_messages.append("✓ Git status is clean")
+ return True
+
+ def _find_apache_remote(self) -> Optional[str]:
+ """Find the official Apache Solr remote (matching 'apache' and 'solr'
in URL)."""
+ result = self.run_git(["remote", "-v"], check=False)
+ if result.returncode != 0:
+ return None
+
+ for parts in (line.split() for line in
result.stdout.strip().split("\n") if line):
+ if len(parts) >= 2 and "apache" in parts[1].lower() and "solr" in
parts[1].lower():
+ return parts[0]
+ return None
+
+ def _get_remote_branches(self, remote: str) -> set:
+ """Get list of available branches from remote."""
+ result = self.run_git(["ls-remote", "--heads", remote], check=False)
+ if result.returncode != 0:
+ return set()
+
+ return {parts[1].replace("refs/heads/", "") for line in
result.stdout.strip().split("\n")
+ if (parts := line.split()) and len(parts) >= 2 and
parts[1].startswith("refs/heads/")}
+
+ def validate_branches_up_to_date(self) -> bool:
+ """Validate remote branches are available.
+
+ By default (fetch_remote=False): Uses cached remote-tracking branches
from last fetch
+ If fetch_remote=True: Fetches fresh list from Apache remote
+ """
+ apache_remote = self._find_apache_remote()
+ if not apache_remote:
+ self.errors.append(
+ "Could not find Apache Solr remote (matching 'apache/solr'). "
+ "Please ensure you have the official remote configured."
+ )
+ return False
+
+ if self.fetch_remote:
+ # Fetch fresh data from remote
+ self._log_and_print("Fetching fresh branch list from Apache
remote...")
+ self._log_and_print(f" Found Apache remote: {apache_remote}")
+
+ self._log_and_print(f" Fetching from {apache_remote}...",
flush=True)
+ result = self.run_git(["fetch", apache_remote], check=False)
+ if result.returncode != 0:
+ self.errors.append(
+ f"Failed to fetch from {apache_remote}: {result.stderr}"
+ )
+ return False
+ print(" ✓ Fetch complete")
+
+ remote_branches = self._get_remote_branches(apache_remote)
+ if not remote_branches:
+ self.errors.append(
+ f"Could not retrieve branch list from {apache_remote}"
+ )
+ return False
+
+ # Store the fetched remote branches for use in discover_branches()
+ self.remote_branches = remote_branches
+ else:
+ # Use cached remote-tracking branches
+ self._log_and_print("Using cached remote-tracking branches (run
with --fetch-remote to update)")
+
+ self._log_and_print(f" Found Apache remote: {apache_remote}")
+ return True
+
+ def parse_version_from_build_gradle(self, branch: str) -> Optional[str]:
+ """Parse baseVersion from build.gradle on a specific branch.
+
+ Tries to read from local branch first, then from remote if available.
+ In offline mode, also tries remote-tracking branches (e.g.,
origin/branch_X_Y).
+ """
+ gradle_path = self.build_gradle.relative_to(self.git_root)
+ content = self._get_file_from_branch(branch, str(gradle_path))
+
+ if not content:
+ self.warnings.append(f"Could not read build.gradle from branch
{branch}")
+ return None
+
+ match = re.search(r"String\s+baseVersion\s*=\s*['\"]([^'\"]+)['\"]",
content)
+ if match:
+ return match.group(1)
+
+ self.warnings.append(f"Could not find baseVersion in build.gradle on
branch {branch}")
+ return None
+
+ @staticmethod
+ def _extract_version(name: str) -> int:
+ """Extract major version number from branch name (e.g., 10 from
branch_10_0)."""
+ if m := re.search(r"branch_(\d+)", name):
+ return int(m.group(1))
+ return -1
+
+ @staticmethod
+ def _extract_branch_version_tuple(name: str) -> tuple:
+ """Extract full version from branch name as tuple for comparison.
+
+ Examples:
+ - branch_9_9 -> (9, 9)
+ - branch_9_1 -> (9, 1)
+ - branch_10_0 -> (10, 0)
+ """
+ if m := re.search(r"branch_(\d+)_(\d+)", name):
+ return (int(m.group(1)), int(m.group(2)))
+ return (-1, -1)
+
+ @staticmethod
+ def _parse_version_string(version: str) -> tuple:
+ """Convert version string to sortable tuple (e.g., '9.9.1' -> (9, 9,
1))."""
+ return tuple(int(p) for p in version.split("."))
+
+ def _log_and_print(self, msg: str, flush: bool = False) -> None:
+ """Log a message and print it to stdout."""
+ self.info_messages.append(msg)
+ print(msg, flush=flush)
+
+ def _format_error_for_display(self, error) -> str:
+ """Format an error for display. Handles both strings and dict
objects."""
+ if isinstance(error, dict):
+ return json.dumps(error, indent=2)
+ return str(error)
+
+ def _git_ref_output(self, cmd: List[str], branch: str, rel_path: str) ->
Optional[str]:
+ """Execute git command on a branch ref, trying local then remote.
Helper for file/tree operations."""
+ result = self.run_git([*cmd, f"{branch}:{rel_path}"], check=False)
+ if result.returncode != 0 and (remote := self._find_apache_remote()):
+ result = self.run_git([*cmd, f"{remote}/{branch}:{rel_path}"],
check=False)
+ return result.stdout if result.returncode == 0 else None
+
+ def _get_file_from_branch(self, branch: str, rel_path: str) ->
Optional[str]:
+ """Read a file from a branch, trying local first, then remote."""
+ return self._git_ref_output(["show"], branch, rel_path)
+
+ def _get_tree_from_branch(self, branch: str, rel_path: str) ->
Optional[str]:
+ """List tree contents from a branch, trying local first, then
remote."""
+ return self._git_ref_output(["ls-tree", "-r", "--name-only"], branch,
rel_path)
+
+ def discover_branches(self) -> bool:
+ """Discover available branches and determine their types."""
+ self._log_and_print("Discovering branches...")
+
+ # Get branch list (cached or fetched)
+ if not self.fetch_remote:
+ result = self.run_git(["branch", "-r"], check=False)
+ if result.returncode != 0:
+ self.errors.append(f"Failed to list branches: {result.stderr}")
+ return False
+ branches = sorted(set(b.split("/", 1)[1] for line in
result.stdout.split("\n")
+ if (b := line.strip()) and not
b.startswith("HEAD")))
+ msg = f" Found {len(branches)} branches (cached remote)"
+ else:
+ if not self.remote_branches:
+ self.errors.append("Remote branches not discovered. Run with
remote validation first.")
+ return False
+ branches = sorted(self.remote_branches)
+ msg = f" Found {len(branches)} branches from remote"
+
+ self._log_and_print(msg)
+
+ # Categorize and validate branches
+ main_b, stable_b, release_b, feature_b =
self._categorize_branches(branches)
+ if not all([main_b, stable_b, release_b]):
+ missing = [k for k, v in [("main", main_b), ("stable (branch_*x)",
stable_b),
+ ("release (branch_*_0)", release_b)] if
not v]
+ location = "in cached remote" if not self.fetch_remote else "on
fetched remote"
+ self.errors.append(f"Missing branches {location}: {',
'.join(missing)}")
+ return False
+
+ # Get current versions
+ stable = max(stable_b, key=self._extract_version)
+ release = max(release_b, key=self._extract_version)
+ prev_major_stable, prev_major_bugfix =
self._find_previous_major_branches(
+ stable_b, feature_b, self._extract_branch_version_tuple(release))
+
+ # Register branches
+ configs = [(main_b, True, False, False, False, False),
+ (stable, False, True, False, False, False),
+ (release, False, False, True, False, False)]
+ if prev_major_bugfix:
+ self._log_and_print("Not using previous major")
+ #configs.append((prev_major_bugfix, False, False, False, True,
False))
+ if prev_major_stable:
+ configs.append((prev_major_stable, False, False, False, False,
True))
+
+ return self._register_branches(configs)
+
+ def validate_branches_in_sync(self) -> bool:
+ """Validate that all discovered branches are up to date with their
remote tracking branches."""
+ if self.skip_sync_check:
+ self._log_and_print("Skipping branch sync check (--skip-sync-check
enabled)")
+ return True
+
+ self._log_and_print("Validating that all branches are in sync with
remote...")
+
+ out_of_sync = []
+ for branch_name in self.branches.keys():
+ # Check if branch has a tracking branch
+ result = self.run_git(
+ ["rev-list", "--left-right",
f"{branch_name}...origin/{branch_name}"],
+ check=False
+ )
+
+ if result.returncode != 0:
+ # Branch might not have a remote tracking branch, try to find
it
+ continue
+
+ # If there's any output, the branch is not in sync
+ # Format: commits only in left side (local) are prefixed with <
+ # commits only in right side (remote) are prefixed with >
+ lines = result.stdout.strip().split("\n")
+ local_only = [l for l in lines if l.startswith("<")]
+ remote_only = [l for l in lines if l.startswith(">")]
+
+ if local_only or remote_only:
+ local_count = len(local_only)
+ remote_count = len(remote_only)
+ out_of_sync.append(f"{branch_name} ({local_count} local,
{remote_count} remote)")
+
+ if out_of_sync:
+ self.errors.append(
+ f"The following branches are not in sync with remote:\n "
+ + "\n ".join(out_of_sync) +
+ "\nPlease run 'git pull' on these branches to update them, or
use --skip-sync-check to ignore this check (for testing only)."
+ )
+ return False
+
+ self._log_and_print(" ✓ All branches are in sync with remote")
+ return True
+
+ def get_branch_changelog_structure(self, branch: str) -> Tuple[Set[str],
Dict[str, Set[str]]]:
+ """Get changelog structure for a specific branch.
+
+ Tries local branch first, then remote if available.
+ In offline mode, also tries remote-tracking branches (e.g.,
origin/branch_X_Y).
+ """
+ unreleased = set()
+ versioned = defaultdict(set)
+ changelog_rel_path = self.changelog_root.relative_to(self.git_root)
+
+ output = self._get_tree_from_branch(branch, str(changelog_rel_path))
+ if not output:
+ # Only warn if this is a branch that should have changelog
+ # (not expected to warn for older branches without changelog
folder yet)
+ return unreleased, dict(versioned)
+
+ for line in output.strip().split("\n"):
+ if not line:
+ continue
+ # Extract relative path from changelog root
+ parts = line.split("/")
+ if len(parts) < 2:
+ continue
+
+ folder = parts[0]
+ filename = "/".join(parts[1:])
+
+ # Only include YAML files, skip metadata files like
version-summary.md
+ if not filename.endswith(('.yml', '.yaml')):
+ continue
+
+ if folder == "unreleased":
+ unreleased.add(filename)
+ elif re.match(r"v\d+\.\d+\.\d+", folder):
+ versioned[folder].add(filename)
+
+ return unreleased, dict(versioned)
+
+ def load_branch_data(self) -> bool:
+ """Load changelog data for all branches."""
+ self._log_and_print("Loading changelog data for all branches...")
+ for name, info in self.branches.items():
+ info.unreleased_files, info.versioned_folders =
self.get_branch_changelog_structure(name)
+ info.has_changelog_folder = bool(info.unreleased_files or
info.versioned_folders)
+ detail = (f"{len(info.unreleased_files)} unreleased,
{len(info.versioned_folders)} versioned"
+ if info.has_changelog_folder else "(no changelog folder
yet)")
+ self._log_and_print(f" {name}: {detail}")
+ return True
+
+ def _extract_issues_from_file(self, file_content: str) -> Set[str]:
+ """Extract JIRA and GitHub issue IDs from a changelog YAML file.
+
+ Returns a set of issue identifiers (e.g., 'SOLR-12345',
'GITHUB-PR-789').
+ """
+ issues = set()
+ try:
+ data = yaml.safe_load(file_content)
+ if data and isinstance(data, dict):
+ # Look for links section with issue references
+ links = data.get('links', [])
+ if isinstance(links, list):
+ for link in links:
+ if isinstance(link, dict):
+ name = link.get('name', '').strip()
+ if name:
+ # Extract just the issue ID part (e.g.,
"SOLR-17961" from "SOLR-17961")
+ # or "GITHUB-PR-123" from issue names
+ match = re.search(r'(SOLR-\d+|GITHUB-PR-\d+)',
name)
+ if match:
+ issues.add(match.group(1))
+ except Exception:
+ # If YAML parsing fails, silently continue
+ pass
+ return issues
+
+ def detect_duplicate_issues(self) -> bool:
+ """Detect duplicate JIRA/GitHub issue references within each branch.
+
+ Returns False if duplicates are found (adds warnings), True otherwise.
+ """
+ self._log_and_print("Detecting duplicate issues within each branch...")
+
+ has_duplicates = False
+
+ for branch_name, branch_info in self.branches.items():
+ if not branch_info.has_changelog_folder:
+ continue
+
+ # Collect all issues and their files for the unreleased section
+ issue_to_files = defaultdict(list)
+
+ for filename in branch_info.unreleased_files:
+ # Get the file content
+ file_content = self._get_file_from_branch(branch_name,
f"changelog/unreleased/{filename}")
+ if file_content:
+ issues = self._extract_issues_from_file(file_content)
+ for issue in issues:
+ issue_to_files[issue].append(filename)
+
+ # Find duplicates
+ duplicates = {issue: files for issue, files in
issue_to_files.items() if len(files) > 1}
+
+ if duplicates:
+ has_duplicates = True
+ branch_info.duplicate_issues = duplicates
+
+ # Create warning messages
+ for issue, files in sorted(duplicates.items()):
+ files_str = ", ".join(sorted(files))
+ msg = f"Branch {branch_name}: Issue {issue} appears in
multiple files: {files_str}"
+ self.warnings.append(msg)
+ self._log_and_print(f" ⚠ {msg}")
+
+ if not has_duplicates:
+ self._log_and_print(" ✓ No duplicate issues found")
+
+ return not has_duplicates
+
+ def _log_validation_result(self, errors_before: int, success_msg: str) ->
None:
+ """Log validation result based on error count."""
+ if len(self.errors) == errors_before:
+ self.info_messages.append(f" ✓ {success_msg}")
+ else:
+ self.info_messages.append(" ✗ Validation failed - see errors
above")
+
+ def _run_validation_step(self, step_func) -> bool:
+ """Run a validation step and report failure."""
+ if not step_func():
+ self.print_report(None)
+ return False
+ return True
+
+ def _generate_error_only_report(self) -> str:
+ """Generate a simple report with only errors and warnings."""
+ report_lines = []
+ if self.errors:
+ report_lines.append("ERRORS:")
+ report_lines.extend(f" ✗ {self._format_error_for_display(e)}" for
e in self.errors)
+ if self.warnings:
+ report_lines.append("\nWARNINGS:")
+ report_lines.extend(f" ⚠ {w}" for w in self.warnings)
+ return "\n".join(report_lines)
+
+ def validate_versioned_folders_identical(self) -> bool:
+ """Verify that all changelog/vX.Y.Z folders are identical across
branches."""
+ self.info_messages.append("Validating versioned folders are identical
across branches...")
+
+ all_folders = set().union(*(info.versioned_folders.keys() for info in
self.branches.values()))
+ if not all_folders:
+ self.info_messages.append(" No versioned folders found")
+ return True
+
+ errors_before = len(self.errors)
+
+ for folder in sorted(all_folders):
+ contents_by_branch = {b: info.versioned_folders.get(folder)
+ for b, info in self.branches.items() if
folder in info.versioned_folders}
+
+ # Check if folder exists on all branches
+ if len(contents_by_branch) != len(self.branches):
+ missing_branches = set(self.branches.keys()) -
set(contents_by_branch.keys())
+ error_obj = {
+ "folder": folder,
+ "missing_on_branches": sorted(missing_branches)
+ }
+ self.errors.append(error_obj)
+ continue
+
+ # Find union of all files and check for differences
+ all_files = set().union(*contents_by_branch.values())
+
+ # Build file-centric diffs: which branches have each file
+ diffs = {}
+ all_branches = sorted(contents_by_branch.keys())
+ for file in sorted(all_files):
+ branches_with_file = sorted([b for b, contents in
contents_by_branch.items() if file in contents])
+ # Only include files that don't exist in all branches
+ if len(branches_with_file) != len(contents_by_branch):
+ branches_without_file = sorted([b for b in all_branches if
b not in branches_with_file])
+ diffs[file] = {
+ "present_in": branches_with_file,
+ "missing_in": branches_without_file
+ }
+
+ # If there are any differences, create structured error
+ if diffs:
+ error_obj = {
+ "folder": folder,
+ "diffs": diffs
+ }
+ self.errors.append(error_obj)
+
+ self._log_validation_result(errors_before, f"All {len(all_folders)}
versioned folders are identical")
+ return True
+
+ def validate_no_released_in_unreleased(self) -> bool:
+ """Verify that no YAML changelog files from released versions exist in
unreleased folder."""
+ self.info_messages.append("Validating that released files don't exist
in unreleased folder...")
+ errors_before = len(self.errors)
+
+ for branch, info in self.branches.items():
+ released = set().union(*info.versioned_folders.values())
+ # Filter to only check YAML/YML changelog entry files
+ unreleased_yaml = {f for f in info.unreleased_files if
f.endswith(('.yml', '.yaml'))}
+ released_yaml = {f for f in released if f.endswith(('.yml',
'.yaml'))}
+ if conflicts := (unreleased_yaml & released_yaml):
+ self.errors.append(f"Branch {branch}: Files in both unreleased
and released: {conflicts}")
+
+ self._log_validation_result(errors_before, "No released files found in
unreleased folder")
+ return len(self.errors) == errors_before
+
+ def _get_branch_by_type(self, **kwargs) -> BranchInfo:
+ """Helper to retrieve a branch by its type flags."""
+ return next(i for i in self.branches.values()
+ if all(getattr(i, k) == v for k, v in kwargs.items()))
+
+ def _map_analysis_to_branches(self, analysis: Optional[Dict]) -> Dict[str,
tuple]:
+ """Map analysis keys to branch info. Returns {branch_name:
(analysis_key, analysis_data)}."""
+ if not analysis:
+ return {}
+ key_to_flags = {
+ "release": {"is_release": True},
+ "stable": {"is_stable": True},
+ "main": {"is_main": True},
+ "previous_major_bugfix": {"is_previous_major_bugfix": True},
+ "previous_major_stable": {"is_previous_major_stable": True},
+ }
+ return {(b := self._get_branch_by_type(**key_to_flags[k])).name: (k,
analysis[k])
+ for k in analysis.keys() if k in key_to_flags and (b :=
self._get_branch_by_type(**key_to_flags[k]))}
+
+ def _get_branch_configs_for_report(self, analysis: Dict) -> List[tuple]:
+ """Build branch configs for report. Returns list of (display_name,
key, label) tuples sorted by version."""
+ branch_configs = [
+ ("Release Branch", "release", "Features scheduled:"),
+ ("Stable Branch", "stable", "Additional features (not in
release):"),
+ ("Main Branch", "main", "Main-only features:"),
+ ]
+ if "previous_major_bugfix" in analysis:
+ branch_configs.append(("Previous Major Bugfix Branch",
"previous_major_bugfix", "Features (not in release):"))
+ if "previous_major_stable" in analysis:
+ branch_configs.append(("Previous Major Stable Branch",
"previous_major_stable", "Features (not in release):"))
+ branch_configs.sort(key=lambda cfg:
self._parse_version_string(analysis[cfg[1]]['version']))
+ return branch_configs
+
+ def _categorize_branches(self, branches: List[str]) -> tuple:
+ """Categorize branches by type patterns. Returns (main, stable,
release, feature) lists."""
+ return (next((b for b in branches if b == "main"), None),
+ [b for b in branches if re.match(r"branch_\d+x$", b)],
+ [b for b in branches if re.match(r"branch_\d+_0$", b)],
+ [b for b in branches if re.match(r"branch_\d+_[1-9]\d*$", b)])
+
+ def _find_previous_major_branches(self, stable_b: List[str], feature_b:
List[str], release_version: tuple) -> tuple:
+ """Find previous major stable and bugfix branches. Returns
(prev_major_stable, prev_major_bugfix)."""
+ older_stable = [b for b in stable_b if self._extract_version(b) <
release_version[0]]
+ prev_major_stable = max(older_stable, key=self._extract_version) if
older_stable else None
+
+ older_features = [b for b in feature_b if
self._extract_branch_version_tuple(b)[0] < release_version[0]]
+ prev_major_bugfix = max(older_features,
key=self._extract_branch_version_tuple) if older_features else None
+
+ return prev_major_stable, prev_major_bugfix
+
+ def _register_branches(self, configs: List[tuple]) -> bool:
+ """Register discovered branches. Returns True on success, False if
version parsing fails."""
+ for name, is_main, is_stable, is_release, is_prev_bugfix,
is_prev_stable in configs:
+ version = self.parse_version_from_build_gradle(name)
+ if not version:
+ self.errors.append(f"Could not parse version for branch
{name}")
+ return False
+ self.branches[name] = BranchInfo(
+ name=name, version=version, is_main=is_main,
is_stable=is_stable, is_release=is_release,
+ is_previous_major_bugfix=is_prev_bugfix,
is_previous_major_stable=is_prev_stable,
+ changelog_path=self.changelog_root,
+ )
+ self.info_messages.append(f" {name}: version {version}")
+ return True
+
+ def analyze_feature_distribution(self) -> Dict:
+ """Analyze which features are scheduled for each branch."""
+ self.info_messages.append("Analyzing feature distribution...")
+
+ release_info = self._get_branch_by_type(is_release=True)
+ stable_info = self._get_branch_by_type(is_stable=True)
+ main_info = self._get_branch_by_type(is_main=True)
+ prev_bugfix_info =
self._get_branch_by_type(is_previous_major_bugfix=True) if
any(b.is_previous_major_bugfix for b in self.branches.values()) else None
+ prev_stable_info =
self._get_branch_by_type(is_previous_major_stable=True) if
any(b.is_previous_major_stable for b in self.branches.values()) else None
+
+ # Calculate feature sets - ordered from oldest to newest branch
+ # Each branch shows only files that first appear in that branch
+ prev_stable_only = (prev_stable_info.unreleased_files) if
prev_stable_info else set()
+ prev_bugfix_only = (prev_bugfix_info.unreleased_files -
prev_stable_only) if prev_bugfix_info else set()
+ release_features = release_info.unreleased_files - prev_stable_only -
prev_bugfix_only
+ stable_only = stable_info.unreleased_files - prev_stable_only -
prev_bugfix_only - release_features
+ main_only = main_info.unreleased_files - prev_stable_only -
prev_bugfix_only - release_features - stable_only
+
+ # Calculate files not in any newer versions
+ # For each branch, find files that don't appear in any newer branch's
unreleased files
+ all_newer_files = {}
+ newer_cumulative = set()
+
+ # Build cumulative sets from newest to oldest
+ for branch_info in reversed(sorted(self.branches.values(), key=lambda
b: self._parse_version_string(b.version))):
+ newer_cumulative = newer_cumulative | branch_info.unreleased_files
+ all_newer_files[branch_info.name] = newer_cumulative.copy()
+
+ # Calculate not_in_newer for each branch (skip main branch since it's
always the newest)
+ for branch_info in self.branches.values():
+ if branch_info.is_main:
+ # Main branch is always newest, so it will never have files
not in newer versions
+ branch_info.not_in_newer = set()
+ continue
+
+ # Get all files from newer branches (excluding current branch's
own files from newer set)
+ newer_files = set()
+ for other_info in self.branches.values():
+ if self._parse_version_string(other_info.version) >
self._parse_version_string(branch_info.version):
+ newer_files |= other_info.unreleased_files
+
+ # Files in this branch's unreleased that don't appear in any newer
branch
+ branch_info.not_in_newer = branch_info.unreleased_files -
newer_files
+
+ # Build analysis dictionary
+ def build_entry(info: BranchInfo, features: Set[str]) -> Dict:
+ return {"version": info.version, **({"count": len(features),
"files": sorted(features)} if info.has_changelog_folder else
{"has_changelog_folder": False})}
+
+ analysis = {
+ "release": build_entry(release_info, release_features),
+ "stable": build_entry(stable_info, stable_only),
+ "main": build_entry(main_info, main_only),
+ }
+ if prev_bugfix_info:
+ analysis["previous_major_bugfix"] = build_entry(prev_bugfix_info,
prev_bugfix_only)
+ if prev_stable_info:
+ analysis["previous_major_stable"] = build_entry(prev_stable_info,
prev_stable_only)
+
+ # Log summary
+ for key, label in [("release", "Release"), ("stable", "Stable"),
("main", "Main"),
+ ("previous_major_bugfix", "Previous Major Bugfix"),
("previous_major_stable", "Previous Major Stable")]:
+ if key in analysis:
+ version = analysis[key]["version"]
+ if "has_changelog_folder" in analysis[key] and not
analysis[key]["has_changelog_folder"]:
+ self.info_messages.append(f" {label} branch ({version}):
(no changelog folder yet)")
+ else:
+ count = analysis[key].get("count", 0)
+ self.info_messages.append(f" {label} branch ({version}):
{count} features")
+
+ return analysis
+
+ def _copy_files_to_snapshot(self, branch_info: BranchInfo, files:
Set[str], snapshot_dir: Path) -> None:
+ """Helper to copy files from a branch to a snapshot directory."""
+ changelog_rel = self.changelog_root.relative_to(self.git_root)
+ for file in files:
+ result = self.run_git(
+ ["show",
f"{branch_info.name}:{changelog_rel}/unreleased/{file}"],
+ check=False
+ )
+ if result.returncode == 0:
+ (snapshot_dir / file).write_text(result.stdout)
+ else:
+ self.warnings.append(f"Could not retrieve unreleased/{file}
from {branch_info.name}")
+
+ def create_temp_branch_with_changelog(self, analysis: Dict) ->
Optional[str]:
+ """Create temporary git branch with merged changelog for generation."""
+ self.info_messages.append("Creating temporary branch for changelog
generation...")
+
+ try:
+ # Generate a unique branch name
+ self.temp_branch = f"__changelog-validation-{os.getpid()}__"
+
+ # Create the temp branch from current branch
+ result = self.run_git(["checkout", "-b", self.temp_branch],
check=False)
+ if result.returncode != 0:
+ self.errors.append(f"Failed to create temporary branch:
{result.stderr}")
+ return None
+
+ self.info_messages.append(f" Created temporary branch:
{self.temp_branch}")
+
+ # Get branch info
+ release_info = self._get_branch_by_type(is_release=True)
+ stable_info = self._get_branch_by_type(is_stable=True)
+ main_info = self._get_branch_by_type(is_main=True)
+
+ # Check if previous major branches exist
+ has_prev_bugfix = any(b.is_previous_major_bugfix for b in
self.branches.values())
+ prev_bugfix_info =
self._get_branch_by_type(is_previous_major_bugfix=True) if has_prev_bugfix else
None
+ has_prev_stable = any(b.is_previous_major_stable for b in
self.branches.values())
+ prev_stable_info =
self._get_branch_by_type(is_previous_major_stable=True) if has_prev_stable else
None
+
+ # Prepare changelog folder structure
+ changelog_dir = self.changelog_root
+
+ # Clear existing unreleased folder
+ unreleased_dir = changelog_dir / "unreleased"
+ if unreleased_dir.exists():
+ shutil.rmtree(unreleased_dir)
+ unreleased_dir.mkdir(parents=True, exist_ok=True)
+
+ # Create and prepare snapshot folders
+ release_features = release_info.unreleased_files
+ stable_features = stable_info.unreleased_files - release_features
+ main_features = main_info.unreleased_files - release_features -
stable_features
+
+ snapshots = {
+ changelog_dir / f"v{release_info.version}-SNAPSHOT":
(release_info, release_features),
+ changelog_dir / f"v{stable_info.version}-SNAPSHOT":
(stable_info, stable_features),
+ changelog_dir / f"v{main_info.version}-SNAPSHOT": (main_info,
main_features),
+ }
+ if prev_bugfix_info and prev_bugfix_info.has_changelog_folder:
+ snapshots[changelog_dir /
f"v{prev_bugfix_info.version}-SNAPSHOT"] = (prev_bugfix_info,
prev_bugfix_info.unreleased_files)
+ if prev_stable_info and prev_stable_info.has_changelog_folder:
+ snapshots[changelog_dir /
f"v{prev_stable_info.version}-SNAPSHOT"] = (prev_stable_info,
prev_stable_info.unreleased_files)
+
+ # Create snapshot directories and copy files
+ for snapshot_path, (branch_info, files) in snapshots.items():
+ if snapshot_path.exists():
+ shutil.rmtree(snapshot_path)
+ snapshot_path.mkdir(parents=True, exist_ok=True)
+ if branch_info and files:
+ self._copy_files_to_snapshot(branch_info, files,
snapshot_path)
+
+ self.info_messages.append(f" ✓ Prepared changelog structure in
temporary branch")
+ return self.temp_branch
+
+ except Exception as e:
+ self.errors.append(f"Failed to create temporary branch: {e}")
+ return None
+
+ def generate_changelog_preview(self, temp_branch: str) -> Optional[str]:
+ """Generate CHANGELOG.md preview using gradle task."""
+ self.info_messages.append("Generating changelog preview...")
+ try:
+ # Run logchangeGenerate task
+ result = subprocess.run(
+ ["./gradlew", "logchangeGenerate"],
+ cwd=self.git_root,
+ capture_output=True,
+ text=True,
+ timeout=60
+ )
+
+ if result.returncode != 0:
+ self.warnings.append(f"logchangeGenerate task failed:
{result.stderr}")
+ return None
+
+ # Read the generated CHANGELOG.md
+ if self.changelog_md.exists():
+ preview = re.sub(r'\[unreleased\]\s*\n-+\s*\n\s*\n', '',
self.changelog_md.read_text())
+ self.info_messages.append(" ✓ Generated changelog preview")
+ return preview
+ else:
+ self.warnings.append("CHANGELOG.md not generated")
+ return None
+
+ except subprocess.TimeoutExpired:
+ self.warnings.append("Changelog generation timed out")
+ return None
+ except Exception as e:
+ self.warnings.append(f"Could not generate changelog preview: {e}")
+ return None
+
+ def _print_git_status(self, message: str, git_result:
subprocess.CompletedProcess) -> None:
+ """Helper to print git command status."""
+ if git_result.returncode != 0:
+ error_msg = f"{message}: {git_result.stderr}"
+ print(f" ✗ {error_msg}")
+ self.warnings.append(error_msg)
+
+ def cleanup_temp_branch(self):
+ """Clean up temporary branch and restore original branch."""
+ if not self.temp_branch:
+ return
+
+ print(f"\nCleaning up temporary branch: {self.temp_branch}")
+
+ # Restore original branch
+ if self.current_branch:
+ #print(f" Restoring branch: {self.current_branch}")
+ result = self.run_git(["checkout", self.current_branch],
check=False)
+ self._print_git_status(f"Restored branch: {self.current_branch}",
result)
+ else:
+ print(" Warning: Could not determine original branch")
+
+ # Delete temporary branch
+ #print(f" Deleting temporary branch: {self.temp_branch}")
+ result = self.run_git(["branch", "-D", self.temp_branch], check=False)
+ self._print_git_status(f"Deleted temporary branch:
{self.temp_branch}", result)
+
+ # Clean up working directory
+ #print(f" Cleaning up working directory")
+ result = self.run_git(["reset", "--hard"], check=False)
+ if result.returncode == 0:
+ result = self.run_git(["clean", "-fd"], check=False)
+ self._print_git_status("Cleaned up working directory", result)
+
+ @staticmethod
+ def _get_branch_type(branch_info: BranchInfo) -> str:
+ """Get human-readable branch type abbreviation."""
+ type_map = [
+ (lambda b: b.is_previous_major_bugfix, "prev_bug"),
+ (lambda b: b.is_previous_major_stable, "prev_sta"),
+ (lambda b: b.is_release, "release"),
+ (lambda b: b.is_stable, "stable"),
+ ]
+ return next((t for check, t in type_map if check(branch_info)), "main")
+
+ def generate_report(self, analysis: Dict) -> str:
+ """Generate validation report in Markdown format."""
+ # Build mapping of branch names to analysis data
+ analysis_by_branch = self._map_analysis_to_branches(analysis)
+
+ # Generate branch information table
+ branches_table_rows = []
+ for i in sorted(self.branches.values(), key=lambda b:
self._parse_version_string(b.version)):
+ btype = self._get_branch_type(i)
+ new_count = ""
+ not_in_newer_count = ""
+ if i.name in analysis_by_branch:
+ _, analysis_data = analysis_by_branch[i.name]
+ new_count = str(analysis_data.get("count", ""))
+
+ if i.has_changelog_folder:
+ not_in_newer_count = str(len(i.not_in_newer))
+ row = f"| {i.name:15} | {btype:8} | {i.version:7} |
{len(i.unreleased_files):>10} | {new_count:>6} | {not_in_newer_count:>13} |"
+ else:
+ row = f"| {i.name:15} | {btype:8} | {i.version:7} |
{'N/A':>10} | {'N/A':>6} | {'N/A':>13} |"
+ branches_table_rows.append(row)
+
+ report = f"""# Solr Changelog Validation Report
+
+## Repository Status
+- **Git root:** `{self.git_root}`
+
+## Branch Information
+
+| Branch | Type | Version | Unreleased | New | Not in Newer |
+|-----------------|----------|---------|------------|--------|---------------|
+{chr(10).join(branches_table_rows)}
+
+## Feature Distribution
+"""
+
+ branch_configs = self._get_branch_configs_for_report(analysis)
+
+ for branch_name, key, label in branch_configs:
+ d = analysis[key]
+ if "has_changelog_folder" in d and not d["has_changelog_folder"]:
+ report += f"\n### {branch_name} (v{d['version']})\n- (no
changelog folder yet)\n"
+ else:
+ report += f"\n### {branch_name} (v{d['version']})\n-
**{label}** {d['count']}\n"
+ if d['files']:
+ files_str = "\n".join(f" - `{f}`" for f in d['files'][:5])
+ if len(d['files']) > 5:
+ files_str += f"\n - ... and {len(d['files']) - 5}
more"
+ report += files_str + "\n"
+
+ # Add duplicate issues section if found
+ has_duplicates = any(info.duplicate_issues for info in
self.branches.values())
+ if has_duplicates:
+ report += "\n## Duplicate Issues\n"
+ for branch_info in sorted(self.branches.values(), key=lambda b:
self._parse_version_string(b.version)):
+ if branch_info.duplicate_issues:
+ report += f"\n### {branch_info.name}
(v{branch_info.version})\n"
+ for issue, files in
sorted(branch_info.duplicate_issues.items()):
+ files_str = ", ".join(f"`{f}`" for f in sorted(files))
+ report += f"- Issue **{issue}** appears in:
{files_str}\n"
+
+ report += "\n## Validation Results\n"
+ if self.errors:
+ report += f"\n### ✗ {len(self.errors)} Error(s) Found\n"
+ for i, e in enumerate(self.errors, 1):
+ report += f"\n**Error
{i}:**\n```json\n{self._format_error_for_display(e)}\n```\n"
+ else:
+ report += "\n### ✓ All Validations Passed\n"
+
+ if self.warnings:
+ report += f"\n### ⚠ {len(self.warnings)} Warning(s)\n"
+ for w in self.warnings:
+ report += f"- {w}\n"
+
+ return report
+
+ def run(self) -> bool:
+ """Run the complete validation."""
+ print("\nStarting Solr changelog validation...\n")
+
+ try:
+ # Step 1: Check git status
+ if not self._run_validation_step(self.validate_git_status):
+ return False
+
+ # Step 2: Check if branches are up to date with remote (before
discovery)
+ if not
self._run_validation_step(self.validate_branches_up_to_date):
+ return False
+
+ # Step 3: Discover branches (uses remote or local branch list)
+ if not self._run_validation_step(self.discover_branches):
+ return False
+
+ # Step 3.5: Validate all discovered branches are in sync with
remote
+ if not self._run_validation_step(self.validate_branches_in_sync):
+ return False
+
+ # Step 4: Load branch data
+ if not self._run_validation_step(self.load_branch_data):
+ return False
+
+ # Step 5: Validate versioned folders
+ self.validate_versioned_folders_identical()
+
+ # Step 6: Validate no released files in unreleased
+ self.validate_no_released_in_unreleased()
+
+ # Step 7: Detect duplicate issues (warnings, not errors) - only if
enabled
+ if self.check_duplicates:
+ self.detect_duplicate_issues()
+
+ # Step 8: Analyze feature distribution
+ analysis = self.analyze_feature_distribution()
+
+ # Step 9: Create temporary branch and generate changelog
+ temp_branch = self.create_temp_branch_with_changelog(analysis)
+ changelog_preview = None
+
+ if temp_branch:
+ changelog_preview =
self.generate_changelog_preview(temp_branch)
+
+ # Step 10: Generate and print report
+ self.print_report(analysis, changelog_preview)
+
+ # Return success if no errors
+ success = len(self.errors) == 0
+
+ return success
+
+ finally:
+ # Always cleanup temp branch
+ self.cleanup_temp_branch()
+
+ def _generate_json_report(self, analysis: Optional[Dict] = None) -> str:
+ """Generate validation report in JSON format."""
+ analysis_by_branch = self._map_analysis_to_branches(analysis)
+ report_data = {
+ "success": len(self.errors) == 0,
+ "errors": self.errors,
+ "warnings": self.warnings,
+ "branch_report": {}
+ }
+
+ # Add branch information sorted by version in ascending order
+ sorted_branches = sorted(self.branches.values(), key=lambda b:
self._parse_version_string(b.version))
+ for info in sorted_branches:
+ branch_entry = {"version": info.version}
+
+ # Add unreleased count and files if changelog folder exists
+ if info.has_changelog_folder:
+ branch_entry["unreleased_count"] = len(info.unreleased_files)
+ # Don't include all unreleased files in JSON, keep it clean
+ else:
+ branch_entry["has_changelog_folder"] = False
+
+ # Add feature distribution info if available for this branch
+ if info.name in analysis_by_branch:
+ analysis_key, analysis_data = analysis_by_branch[info.name]
+ branch_entry["id"] = analysis_key
+ if "count" in analysis_data:
+ branch_entry["new_count"] = analysis_data["count"]
+ if "files" in analysis_data and info.has_changelog_folder:
+ branch_entry["new"] = analysis_data["files"]
+
+ # Add files not in any newer versions
+ if info.has_changelog_folder and info.not_in_newer:
+ branch_entry["not_in_newer_count"] = len(info.not_in_newer)
+ branch_entry["not_in_newer"] = sorted(info.not_in_newer)
+
+ # Add duplicate issues if found for this branch
+ if info.duplicate_issues:
+ branch_entry["duplicate_issues"] = {
+ issue: sorted(files) for issue, files in
info.duplicate_issues.items()
+ }
+
+ report_data["branch_report"][info.name] = branch_entry
+
+ return json.dumps(report_data, indent=2)
+
+ def print_report(self, analysis: Optional[Dict] = None, changelog_preview:
Optional[str] = None):
+ """Print/write the validation report.
+
+ If report_file is set, writes to that file. Otherwise prints to stdout.
+ If changelog_file is set, also writes the generated CHANGELOG.md to
that file.
+
+ Note: Info messages are printed live during validation, not repeated
here.
+ """
+ # Generate report based on format
+ if self.report_format == "json":
+ report = self._generate_json_report(analysis)
+ elif analysis:
+ report = self.generate_report(analysis)
+ else:
+ report = self._generate_error_only_report()
+
+ # Output report to file or stdout
+ if self.report_file:
+ self.report_file.write_text(report)
+ # Always print errors to stdout so user is alerted even when
writing to file
+ if self.errors:
+ print("ERRORS:")
+ for error in self.errors:
+ print(f" ✗ {self._format_error_for_display(error)}")
+ if self.warnings:
+ print("WARNINGS:")
+ for warning in self.warnings:
+ print(f" ⚠ {warning}")
+ print(f"Report written to: {self.report_file}")
+ else:
+ print(report)
+
+ # Write changelog preview if requested
+ if changelog_preview and self.changelog_file:
+ self.changelog_file.write_text(changelog_preview)
+ print(f"Changelog written to: {self.changelog_file}")
+
+
+def main():
+ """Main entry point with command-line argument parsing."""
+ parser = argparse.ArgumentParser(
+ description="Validate Solr changelog structure across branches",
+ )
+
+ parser.add_argument(
+ "-r", "--report-file",
+ type=Path,
+ help="File to write report to (default: stdout)",
+ metavar="PATH",
+ )
+
+ parser.add_argument(
+ "-c", "--changelog-file",
+ type=Path,
+ help="File to write generated CHANGELOG.md preview to",
+ metavar="PATH",
+ )
+
+ parser.add_argument(
+ "-w", "--work-dir",
+ type=Path,
+ help="Working directory (default TEMP dir)",
+ metavar="PATH",
+ )
+
+ parser.add_argument(
+ "--fetch-remote",
+ action="store_true",
+ help="Fetch fresh branch list from remote",
+ )
+
+ parser.add_argument(
+ "-f", "--format",
+ choices=["md", "json"],
+ default="md",
+ help="Report output format (default: md)",
+ )
+
+ parser.add_argument(
+ "--skip-sync-check",
+ action="store_true",
+ help="Skip branch in sync validation",
+ )
+
+ parser.add_argument(
+ "--check-duplicates",
+ action="store_true",
+ help="Check for duplicate JIRA issues",
+ )
+
+ args = parser.parse_args()
+
+ # Create validator with provided options
+ validator = ChangelogValidator(
+ report_file=args.report_file,
+ changelog_file=args.changelog_file,
+ work_dir=args.work_dir,
+ fetch_remote=args.fetch_remote,
+ report_format=args.format,
+ skip_sync_check=args.skip_sync_check,
+ check_duplicates=args.check_duplicates,
+ )
+
+ success = validator.run()
+ # JSON format always exits with 0, Markdown exits with 1 on errors
+ if args.format == "json":
+ sys.exit(0)
+ else:
+ sys.exit(0 if success else 1)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/gradle/documentation/changes-to-html/changes2html.py
b/gradle/documentation/changes-to-html/changes2html.py
index 1cd7bb93fbd..4827efa40d1 100755
--- a/gradle/documentation/changes-to-html/changes2html.py
+++ b/gradle/documentation/changes-to-html/changes2html.py
@@ -31,7 +31,7 @@ from pathlib import Path
class ChangelogParser:
"""Parse CHANGELOG.md generated by logchange"""
- RELEASE_PATTERN = re.compile(r'^\[(\d+(?:\.\d+)*)\](\s+-\s+(.+))?$')
+ RELEASE_PATTERN =
re.compile(r'^\[(\d+(?:\.\d+)*(?:-[a-zA-Z0-9.]+)?)\](\s+-\s+(.+))?$')
SECTION_PATTERN =
re.compile(r'^###\s+(\w+(?:\s+\w+)*)\s*(?:\(\d+\s+changes?\))?')
ITEM_PATTERN = re.compile(r'^###|^\[|^- ')