This is an automated email from the ASF dual-hosted git repository.

janhoy pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 74e6debe888 Tweak script addDepsToChanges.py (#3667)
74e6debe888 is described below

commit 74e6debe88856cff33fc212868387c30759ebab7
Author: Jan Høydahl <[email protected]>
AuthorDate: Wed Oct 8 15:06:41 2025 +0200

    Tweak script addDepsToChanges.py (#3667)
    
    Tweak addDepsToChanges script to skip (branch_9x) from solrbot commit 
message in CHANGES.txt
    
    (cherry picked from commit ebcaca8a14d61cc9d82b0a123528b485b6ff247a)
---
 dev-tools/scripts/addDepsToChanges.py | 102 +++++++++++++++++++++++++++++++---
 1 file changed, 93 insertions(+), 9 deletions(-)

diff --git a/dev-tools/scripts/addDepsToChanges.py 
b/dev-tools/scripts/addDepsToChanges.py
index 7220cb5a323..1fb5ceb7f5d 100755
--- a/dev-tools/scripts/addDepsToChanges.py
+++ b/dev-tools/scripts/addDepsToChanges.py
@@ -27,7 +27,36 @@ from scriptutil import *
 import argparse
 import re
 
-line_re = re.compile(r"(.*?) \(#(\d+)\)$")
+line_re = re.compile(r"(.*?) (\(branch_\d+x\) )?\(#(\d+)\)$")
+
+
+class ChangeEntry:
+    """
+    Represents one dependency change entry.
+    Fields:
+      - pr_num: string PR number (e.g., '3605')
+      - message: cleaned commit message text to be shown in CHANGES (noise 
removed)
+      - author: Git author (e.g., 'solrbot')
+    """
+
+    def __init__(self, pr_num: str, message: str, author: str):
+        self.pr_num = pr_num
+        self.message = message
+        self.author = author
+
+    def dep_key(self) -> str:
+        """
+        Extract a dependency key from the message after 'Update ' and before ' 
to', '(' or end.
+        This is used for de-duplication and sorting. Case-insensitive.
+        """
+        m = re.search(r"(?i)update\s+(.+?)(?:\s+to\b|\s*\(|$)", self.message)
+        if m:
+            return m.group(1).strip()
+        return self.message.strip()
+
+    def __str__(self) -> str:
+        # Keep trailing newline to preserve existing blank-line formatting by 
update_changes
+        return f"* PR#{self.pr_num}: {self.message} ({self.author})\n"
 
 
 def get_prev_release_tag(ver):
@@ -57,15 +86,50 @@ def read_config():
     return newconf
 
 
+def get_gitlog_lines(user: str, prev_tag: str):
+    """
+    Run git log and return a list of raw subject lines (%s), newest first.
+    """
+    output = run('git log --author=' + user + ' --oneline --no-merges 
--pretty=format:"%s" ' + prev_tag + '..')
+    return list(filter(None, output.split("\n")))
+
+
+def parse_gitlog_lines(lines, author: str):
+    """
+    Parse raw git log subject lines into ChangeEntry objects.
+    - Extract pr_num and message
+    - Strip '(branch_Nx)' noise
+    - Remove optional leading 'chore(deps): '
+    - Normalize 'update dependency X' to 'Update X' (case-insensitive)
+    """
+    entries = []
+    for line in lines:
+        match = line_re.search(line)
+        if not match:
+            print("Skipped un-parsable line: %s" % line)
+            continue
+        text = match.group(1)
+        pr_num = match.group(3)
+        # Clean message noise
+        msg = text
+        msg = re.sub(r"^chore\(deps\):\s*", "", msg, flags=re.IGNORECASE)
+        # Normalize 'update dependency' to 'Update '
+        msg = re.sub(r"(?i)^update\s+dependenc(y|ies)\s+", "Update ", msg)
+        entries.append(ChangeEntry(pr_num=pr_num, message=msg, author=author))
+    return entries
+
+
 def gitlog_to_changes(line, user="solrbot"):
     """
-    Converts a git log formatted line ending in (#<pr-num) into a CHANGES 
style line
+    DEPRECATED: Use parse_gitlog_lines + ChangeEntry.__str__ instead.
+    Converts a git log formatted line ending in (#<pr-num) into a CHANGES 
style line.
+    Strips any '(branch_Nx)' text from the commit message, as that is not 
needed in CHANGES.txt
     """
     match = line_re.search(line)
 
     if match:
         text = match.group(1)
-        pr_num = match.group(2)
+        pr_num = match.group(3)
         return "* PR#%s: %s (%s)\n" % (pr_num, text, user)
     else:
         print("Skipped un-parsable line: %s" % line)
@@ -123,6 +187,26 @@ def update_changes(filename, version, changes_lines):
         exit(1)
 
 
+def dedupe_entries(entries):
+    """
+    De-duplicate dependency update entries (newest first input) by dep_key.
+    Keeps the first occurrence for each dependency key.
+    """
+    seen = set()
+    result = []
+    for e in entries:
+        key = e.dep_key().lower()
+        if key not in seen:
+            seen.add(key)
+            result.append(e)
+    return result
+
+
+def sort_entries(entries):
+    """Return a new list sorted alphabetically by dependency key 
(case-insensitive)."""
+    return sorted(entries, key=lambda e: e.dep_key().lower())
+
+
 def main():
     if not os.path.exists('solr/CHANGES.txt'):
         sys.exit("Tool must be run from the root of a source checkout.")
@@ -130,12 +214,12 @@ def main():
     prev_tag = get_prev_release_tag(newconf.version)
     print("Adding dependency updates since git tag %s" % prev_tag)
     try:
-        gitlog_lines = run(
-            'git log --author=' + newconf.user + ' --oneline --no-merges 
--pretty=format:"%s" ' + prev_tag + '..').split(
-            "\n")
-        changes_lines = list(map(lambda l: gitlog_to_changes(l, newconf.user), 
list(filter(None, gitlog_lines))))
-        if changes_lines and len(changes_lines) > 0:
-            update_changes('solr/CHANGES.txt', newconf.version, changes_lines)
+        gitlog_lines = get_gitlog_lines(newconf.user, prev_tag)
+        entries = parse_gitlog_lines(gitlog_lines, author=newconf.user)
+        if entries:
+            deduped = dedupe_entries(entries)
+            change_lines = [str(e) for e in deduped]
+            update_changes('solr/CHANGES.txt', newconf.version, change_lines)
         else:
             print("No changes found for version %s" % newconf.version.dot)
         print("Done")

Reply via email to