Hi.

Since we moved to git world and we're in the preparation for ChangeLog messages
being in git commit messages, I think it's the right time to also simplify mklog
script.

I'm sending a new version (which should eventually replace contrib/mklog and 
contrib/mklog.pl).
Changes made in the version:

- the script uses unifdiff - it rapidly simplifies parsing of the '+-!' lines 
that is done
  in contrib/mklog
- no author nor date stamp is used - that all can be get from git
- --inline option is not supported - I don't see a use-case for it now
- the new script has a unit tests (just few of them for now)

I compares results in between the old Python script for last 80 commits and 
it's very close,
in some cases it does even better.

I'm planning to maintain and improve the script for the future.

Thoughts?
Martin
>From 9fa5d13856f0f5ba153801baf57d4a732829f609 Mon Sep 17 00:00:00 2001
From: Martin Liska <mli...@suse.cz>
Date: Fri, 15 May 2020 00:44:07 +0200
Subject: [PATCH] Add mklog-ng.py and gcc-mklog git alias.

contrib/ChangeLog:

	* gcc-git-customization.sh: Add gcc-mklog alias.
	* mklog_ng.py: New file.
	* test_mklog_ng.py: New file.
---
 contrib/gcc-git-customization.sh |   2 +
 contrib/mklog_ng.py              | 192 +++++++++++++++++++++++++++++++
 contrib/test_mklog_ng.py         | 158 +++++++++++++++++++++++++
 3 files changed, 352 insertions(+)
 create mode 100755 contrib/mklog_ng.py
 create mode 100755 contrib/test_mklog_ng.py

diff --git a/contrib/gcc-git-customization.sh b/contrib/gcc-git-customization.sh
index a932bf8c06a..b7b97327be3 100755
--- a/contrib/gcc-git-customization.sh
+++ b/contrib/gcc-git-customization.sh
@@ -25,6 +25,8 @@ git config alias.svn-rev '!f() { rev=$1; shift; git log --all --grep="^From-SVN:
 git config alias.gcc-descr \!"f() { if test \${1:-no} = --full; then c=\${2:-master}; r=\$(git describe --all --abbrev=40 --match 'basepoints/gcc-[0-9]*' \$c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-,r,p'); expr match \${r:-no} '^r[0-9]\\+\$' >/dev/null && r=\${r}-0-g\$(git rev-parse \${2:-master}); else c=\${1:-master}; r=\$(git describe --all --match 'basepoints/gcc-[0-9]*' \$c | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)-\\([0-9]\\+\\)-g[0-9a-f]*\$,r\\2-\\3,p;s,^\\(tags/\\)\\?basepoints/gcc-\\([0-9]\\+\\)\$,r\\2-0,p'); fi; if test -n \$r; then o=\$(git config --get gcc-config.upstream); rr=\$(echo \$r | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\\(-g[0-9a-f]\\+\\)\\?\$,\\1,p'); if git rev-parse --verify --quiet \${o:-origin}/releases/gcc-\$rr >/dev/null; then m=releases/gcc-\$rr; else m=master; fi; git merge-base --is-ancestor \$c \${o:-origin}/\$m && \echo \${r}; fi; }; f"
 git config alias.gcc-undescr \!"f() { o=\$(git config --get gcc-config.upstream); r=\$(echo \$1 | sed -n 's,^r\\([0-9]\\+\\)-[0-9]\\+\$,\\1,p'); n=\$(echo \$1 | sed -n 's,^r[0-9]\\+-\\([0-9]\\+\\)\$,\\1,p'); test -z \$r && echo Invalid id \$1 && exit 1; h=\$(git rev-parse --verify --quiet \${o:-origin}/releases/gcc-\$r); test -z \$h && h=\$(git rev-parse --verify --quiet \${o:-origin}/master); p=\$(git describe --all --match 'basepoints/gcc-'\$r \$h | sed -n 's,^\\(tags/\\)\\?basepoints/gcc-[0-9]\\+-\\([0-9]\\+\\)-g[0-9a-f]*\$,\\2,p;s,^\\(tags/\\)\\?basepoints/gcc-[0-9]\\+\$,0,p'); git rev-parse --verify \$h~\$(expr \$p - \$n); }; f"
 
+git config alias.gcc-mklog '!f() { "`git rev-parse --show-toplevel`/contrib/mklog_ng.py" $@; } ; f'
+
 # Make diff on MD files use "(define" as a function marker.
 # Use this in conjunction with a .gitattributes file containing
 # *.md    diff=md
diff --git a/contrib/mklog_ng.py b/contrib/mklog_ng.py
new file mode 100755
index 00000000000..a67fc007759
--- /dev/null
+++ b/contrib/mklog_ng.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2020 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING.  If not, write to
+# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
+# and adds a skeleton ChangeLog file to the file. It does not try to be
+# too smart when parsing function names, but it produces a reasonable
+# approximation.
+#
+# Author: Martin Liska <mli...@suse.cz>
+
+import argparse
+import os
+import re
+import sys
+
+from unidiff import PatchSet
+
+pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
+identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
+comment_regex = re.compile(r'^\/\*')
+struct_regex = re.compile(r'^((class|struct|union|enum)\s+[a-zA-Z0-9_]+)')
+macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
+super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
+fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
+template_and_param_regex = re.compile(r'<[^<>]*>')
+
+function_extensions = set(['.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def'])
+
+help_message = """\
+Generate ChangeLog template for PATCH.
+PATCH must be generated using diff(1)'s -up or -cp options
+(or their equivalent in git).
+"""
+
+script_folder = os.path.realpath(__file__)
+gcc_root = os.path.dirname(os.path.dirname(script_folder))
+
+
+def find_changelog(path):
+    folder = os.path.split(path)[0]
+    while True:
+        if os.path.exists(os.path.join(gcc_root, folder, 'ChangeLog')):
+            return folder
+        folder = os.path.dirname(folder)
+        if folder == '':
+            return folder
+    raise AssertionError()
+
+
+def extract_function_name(line):
+    if comment_regex.match(line):
+        return None
+    m = struct_regex.search(line)
+    if m:
+        # Struct declaration
+        return m.group(1)
+    m = macro_regex.search(line)
+    if m:
+        # Macro definition
+        return m.group(2)
+    m = super_macro_regex.search(line)
+    if m:
+        # Supermacro
+        return m.group(1)
+    m = fn_regex.search(line)
+    if m:
+        # Discard template and function parameters.
+        fn = m.group(1)
+        fn = re.sub(template_and_param_regex, '', fn)
+        return fn.rstrip()
+    return None
+
+
+def try_add_function(functions, line):
+    fn = extract_function_name(line)
+    if fn and fn not in functions:
+        functions.append(fn)
+    return bool(fn)
+
+
+def generate_changelog(data, no_functions=False):
+    changelogs = {}
+    sorted_changelogs = []
+    prs = []
+    out = ''
+    diff = PatchSet(data)
+
+    for file in diff:
+        changelog = find_changelog(file.path)
+        if changelog not in changelogs:
+            changelogs[changelog] = []
+            sorted_changelogs.append(changelog)
+        changelogs[changelog].append(file)
+
+        if 'testsuite' in file.path and file.is_added_file:
+            for line in list(file)[0]:
+                m = pr_regex.search(line.value)
+                if m:
+                    pr = m.group('pr')
+                    if pr not in prs:
+                        prs.append(pr)
+                else:
+                    break
+
+    for changelog in sorted_changelogs:
+        files = changelogs[changelog]
+        out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
+        out += '\n'
+        for pr in prs:
+            out += '\t%s\n' % pr
+        for file in files:
+            assert file.path.startswith(changelog)
+            in_tests = 'testsuite' in changelog or 'testsuite' in file.path
+            relative_path = file.path[len(changelog):].lstrip('/')
+            functions = []
+            if file.is_added_file:
+                msg = 'New test' if in_tests else 'New file'
+                out += '\t* %s: %s.\n' % (relative_path, msg)
+            elif file.is_removed_file:
+                out += '\t* %s: Removed.\n' % (relative_path)
+            else:
+                if not no_functions:
+                    for hunk in file:
+                        # Do not add function names for testsuite files
+                        extension = os.path.splitext(relative_path)[1]
+                        if not in_tests and extension in function_extensions:
+                            last_fn = None
+                            modified_visited = False
+                            success = False
+                            for line in hunk:
+                                m = identifier_regex.match(line.value)
+                                if line.is_added or line.is_removed:
+                                    if not line.value.strip():
+                                        continue
+                                    modified_visited = True
+                                    if m and try_add_function(functions,
+                                                              m.group(1)):
+                                        last_fn = None
+                                        success = True
+                                elif line.is_context:
+                                    if last_fn and modified_visited:
+                                        try_add_function(functions, last_fn)
+                                        last_fn = None
+                                        modified_visited = False
+                                        success = True
+                                    elif m:
+                                        last_fn = m.group(1)
+                                        modified_visited = False
+                            if not success:
+                                try_add_function(functions,
+                                                 hunk.section_header)
+                if functions:
+                    out += '\t* %s (%s):\n' % (relative_path, functions[0])
+                    for fn in functions[1:]:
+                        out += '\t(%s):\n' % fn
+                else:
+                    out += '\t* %s:\n' % relative_path
+        out += '\n'
+    return out
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description=help_message)
+    parser.add_argument('input', nargs='?',
+                        help='Patch file (or missing, read standard input)')
+    parser.add_argument('-s', '--no-functions', action='store_true',
+                        help='Do not generate function names in ChangeLogs')
+    args = parser.parse_args()
+    if args.input == '-':
+        args.input = None
+
+    input = open(args.input) if args.input else sys.stdin
+    output = generate_changelog(input.read(), args.no_functions)
+    print(output, end='')
diff --git a/contrib/test_mklog_ng.py b/contrib/test_mklog_ng.py
new file mode 100755
index 00000000000..6e68277b1bb
--- /dev/null
+++ b/contrib/test_mklog_ng.py
@@ -0,0 +1,158 @@
+#!/usr/bin/env python3
+
+# Copyright (C) 2020 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING.  If not, write to
+# the Free Software Foundation, 51 Franklin Street, Fifth Floor,
+# Boston, MA 02110-1301, USA.
+
+# This script parses a .diff file generated with 'diff -up' or 'diff -cp'
+# and adds a skeleton ChangeLog file to the file. It does not try to be
+# too smart when parsing function names, but it produces a reasonable
+# approximation.
+#
+# Author: Martin Liska <mli...@suse.cz>
+
+import unittest
+
+from mklog_ng import generate_changelog
+
+PATCH1 = '''\
+diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
+index 567c23380fe..e6209ede9d6 100644
+--- a/gcc/config/riscv/riscv.h
++++ b/gcc/config/riscv/riscv.h
+@@ -920,6 +920,7 @@ extern unsigned riscv_stack_boundary;
+ #define SHIFT_RS1 15
+ #define SHIFT_IMM 20
+ #define IMM_BITS 12
++#define C_S_BITS 5
+ #define C_SxSP_BITS 6
+ 
+ #define IMM_REACH (1LL << IMM_BITS)
+@@ -929,6 +930,10 @@ extern unsigned riscv_stack_boundary;
+ #define SWSP_REACH (4LL << C_SxSP_BITS)
+ #define SDSP_REACH (8LL << C_SxSP_BITS)
+ 
++/* This is the maximum value that can be represented in a compressed load/store
++   offset (an unsigned 5-bit value scaled by 4).  */
++#define CSW_MAX_OFFSET ((4LL << C_S_BITS) - 1) & ~3
++
+ /* Called from RISCV_REORG, this is defined in riscv-sr.c.  */
+ 
+ extern void riscv_remove_unneeded_save_restore_calls (void);
+
+'''
+
+EXPECTED1 = '''\
+gcc/ChangeLog:
+
+	* config/riscv/riscv.h (C_S_BITS):
+	(CSW_MAX_OFFSET):
+
+'''
+
+PATCH2 = '''\
+diff --git a/gcc/targhooks.h b/gcc/targhooks.h
+index 9704d23f1db..b572a36e8cf 100644
+--- a/gcc/targhooks.h
++++ b/gcc/targhooks.h
+@@ -120,7 +120,7 @@ extern bool default_empty_mask_is_expensive (unsigned);
+ extern void *default_init_cost (class loop *);
+ extern unsigned default_add_stmt_cost (class vec_info *, void *, int,
+ 				       enum vect_cost_for_stmt,
+-				       class _stmt_vec_info *, int,
++				       class _stmt_vec_info *, tree, int,
+ 				       enum vect_cost_model_location);
+ extern void default_finish_cost (void *, unsigned *, unsigned *, unsigned *);
+ extern void default_destroy_cost_data (void *);
+@@ -186,6 +186,7 @@ extern tree default_emutls_var_init (tree, tree, tree);
+ extern unsigned int default_hard_regno_nregs (unsigned int, machine_mode);
+ extern bool default_hard_regno_scratch_ok (unsigned int);
+ extern bool default_mode_dependent_address_p (const_rtx, addr_space_t);
++extern bool default_new_address_profitable_p (rtx, rtx_insn *, rtx);
+ extern bool default_target_option_valid_attribute_p (tree, tree, tree, int);
+ extern bool default_target_option_pragma_parse (tree, tree);
+ extern bool default_target_can_inline_p (tree, tree);
+
+'''
+
+EXPECTED2 = '''\
+gcc/ChangeLog:
+
+	* targhooks.h (default_add_stmt_cost):
+	(default_new_address_profitable_p):
+
+'''
+
+PATCH3 = '''\
+diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
+index 2b1e33f94ae..7f47402f9b9 100644
+--- a/libcpp/include/cpplib.h
++++ b/libcpp/include/cpplib.h
+@@ -173,7 +173,7 @@ enum c_lang {CLK_GNUC89 = 0, CLK_GNUC99, CLK_GNUC11, CLK_GNUC17, CLK_GNUC2X,
+ 	     CLK_STDC2X,
+ 	     CLK_GNUCXX, CLK_CXX98, CLK_GNUCXX11, CLK_CXX11,
+ 	     CLK_GNUCXX14, CLK_CXX14, CLK_GNUCXX17, CLK_CXX17,
+-	     CLK_GNUCXX2A, CLK_CXX2A, CLK_ASM};
++	     CLK_GNUCXX20, CLK_CXX20, CLK_ASM};
+ 
+ /* Payload of a NUMBER, STRING, CHAR or COMMENT token.  */
+ struct GTY(()) cpp_string {
+@@ -484,7 +484,7 @@ struct cpp_options
+   /* Nonzero for C2X decimal floating-point constants.  */
+   unsigned char dfp_constants;
+ 
+-  /* Nonzero for C++2a __VA_OPT__ feature.  */
++  /* Nonzero for C++20 __VA_OPT__ feature.  */
+   unsigned char va_opt;
+ 
+   /* Nonzero for the '::' token.  */
+
+'''
+
+EXPECTED3 = '''\
+libcpp/ChangeLog:
+
+	* include/cpplib.h (enum c_lang):
+	(struct cpp_options):
+
+'''
+
+EXPECTED3B = '''\
+libcpp/ChangeLog:
+
+	* include/cpplib.h:
+
+'''
+
+
+class TestMklog(unittest.TestCase):
+    def test_macro_definition(self):
+        changelog = generate_changelog(PATCH1)
+        assert changelog == EXPECTED1
+
+    def test_changed_argument(self):
+        changelog = generate_changelog(PATCH2)
+        assert changelog == EXPECTED2
+
+    def test_enum_and_struct(self):
+        changelog = generate_changelog(PATCH3)
+        assert changelog == EXPECTED3
+
+    def test_no_function(self):
+        changelog = generate_changelog(PATCH3, True)
+        assert changelog == EXPECTED3B
-- 
2.26.2

Reply via email to