scripts/regression-hotspots.py | 50 ++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 27 deletions(-)
New commits: commit 48d6a3e2c4294f2f41ce7c8fe226ccfc87f47be0 Author: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org> AuthorDate: Sun Jul 4 09:51:59 2021 +0300 Commit: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org> CommitDate: Tue Jul 6 08:31:27 2021 +0200 regression-hotspots: replace filename regex with handy git log options Old regex had a bug that made it skip some files. Switch from sh to GitPython. Introduce an excluding regex to leave out uninteresting files. Reorder the output sections and add wikitext headings. Change-Id: I1103d8d34d2a146d64f8aae57ca921716e88987d Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/118367 Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org> Tested-by: Ilmari Lauhakangas <ilmari.lauhakan...@libreoffice.org> diff --git a/scripts/regression-hotspots.py b/scripts/regression-hotspots.py index c2b5600..cec2781 100755 --- a/scripts/regression-hotspots.py +++ b/scripts/regression-hotspots.py @@ -6,15 +6,16 @@ # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at https://mozilla.org/MPL/2.0/. # +# Uses https://github.com/gitpython-developers/GitPython +# Results published in https://wiki.documentfoundation.org/Development/RegressionHotspots import sys import re -import sh +import git from urllib.request import urlopen, URLError from io import BytesIO - def get_fixed_regression_bugs(): - url = 'https://bugs.libreoffice.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0' + url = 'https://bugs.documentfoundation.org/buglist.cgi?bug_status=UNCONFIRMED&bug_status=NEW&bug_status=ASSIGNED&bug_status=REOPENED&bug_status=RESOLVED&bug_status=VERIFIED&bug_status=CLOSED&bug_status=NEEDINFO&bug_status=PLEASETEST&columnlist=&keywords=regression%2C%20&keywords_type=allwords&limit=0&list_id=354018&product=LibreOffice&query_format=advanced&resolution=FIXED&ctype=csv&human=0' try: resp = urlopen(url) except URLError: @@ -24,7 +25,6 @@ def get_fixed_regression_bugs(): for line in [raw.decode('utf-8').strip('\n') for raw in BytesIO(resp.read())][1:]: bug_ids.append(int(line)) return bug_ids - def get_dir_counts(file_counts, level): dir_counts = {} for (filename, count) in file_counts.items(): @@ -36,37 +36,33 @@ def get_dir_counts(file_counts, level): else: dir_counts[dirpart]=count return dir_counts - def print_counts(counts): printorder = reversed(sorted((count, name) for (name, count) in counts.items())) for count in printorder: print('%5d %s' % (count[0], count[1])) - if __name__ == '__main__': file_counts = {} - statregex = re.compile('^ ([^ ]+) \|') + excluderegex = re.compile(r'qa/|icon-themes/|extras/source/gallery/|extras/source/palettes/|extras/source/templates/|extras/source/truetype/|helpcontent2|dictionaries|translations|download\.lst|\.png|\.patch') fixed_regression_ids = get_fixed_regression_bugs() sys.stderr.write('found %d fixed regressions: %s\n' % (len(fixed_regression_ids), fixed_regression_ids)) for bug_id in fixed_regression_ids: sys.stderr.write('working on bug %d\n' % bug_id) - # FIXME: use --numstat instead, which does not abbreviate filenames - logstat = sh.git('--no-pager', 'log', '--grep', '[fdo|tdf]#%d' % bug_id, '--stat') - for line in logstat: - match = statregex.search(str(line)) - if match and match.group(1): - filename = match.group(1) - sys.stderr.write('regression fix touched file: %s\n' % filename) - if filename in file_counts: - file_counts[filename]+=1 - else: - file_counts[filename]=1 - print('top level dirs:') - print_counts(get_dir_counts(file_counts, 1)) - print('\nsecond level dirs:') - print_counts(get_dir_counts(file_counts, 2)) - print('\nthird level dirs:') - print_counts(get_dir_counts(file_counts, 3)) - print('\nfourth level dirs:') - print_counts(get_dir_counts(file_counts, 4)) - print('\nfiles:') + lognames = git.Git('.').execute(['git', 'log', '--grep=[fdo|tdf]#'+str(bug_id), '--pretty=tformat:', '--name-only']) + if lognames: + for filename in lognames.split('\n'): + if not excluderegex.search(filename): + sys.stderr.write('regression fix touched file: %s\n' % filename) + if filename in file_counts: + file_counts[filename]+=1 + else: + file_counts[filename]=1 + print('=== files ===\n') print_counts(file_counts) + print('\n=== fourth level dirs ===\n') + print_counts(get_dir_counts(file_counts, 4)) + print('\n=== third level dirs ===\n') + print_counts(get_dir_counts(file_counts, 3)) + print('\n=== second level dirs ===\n') + print_counts(get_dir_counts(file_counts, 2)) + print('\n=== top level dirs ===\n') + print_counts(get_dir_counts(file_counts, 1)) _______________________________________________ Libreoffice-commits mailing list libreoffice-comm...@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/libreoffice-commits