Krisztian Szucs created ARROW-8456: -------------------------------------- Summary: [Release] Add python script to help curating JIRA Key: ARROW-8456 URL: https://issues.apache.org/jira/browse/ARROW-8456 Project: Apache Arrow Issue Type: Task Components: Developer Tools Reporter: Krisztian Szucs Fix For: 1.0.0
The following script produces reports like https://gist.github.com/kszucs/9857ef69c92a230ce5a5068551b83ed8 {code:python} from jira import JIRA import warnings import pygit2 import pandas as pd from io import StringIO class Patch: def __init__(self, commit): self.commit = commit self.issue_key, self.msg = self._parse(commit.message) def _parse(self, message): first_line = message.splitlines()[0] m = re.match("(?P<ticket>((ARROW|PARQUET)\-\d+)):?(?P<msg>.*)", first_line) if m is None: return None, '' values = m.groupdict() return values['ticket'], values['msg'] @property def shortmessage(self): if not self.msg: return self.commit.message.splitlines()[0] else: return self.msg @property def sha(self): return self.commit.id @property def issue_url(self): return 'https://issues.apache.org/jira/browse/{}'.format(self.issue_key) @property def commit_url(self): return 'https://github.com/apache/arrow/commit/{}'.format(self.sha) def to_markdown(self): if self.issue_key is None: return "[{}]({})\n".format( self.shortmessage, self.commit_url ) else: return "[{}]({}): [{}]({})\n".format( self.issue_key, self.issue_url, self.shortmessage, self.commit_url ) JIRA_SEARCH_LIMIT = 10000 # JIRA_SEARCH_LIMIT = 50 class Release: """Release object for querying issues and commits Usage: jira = JIRA( {'server': 'https://issues.apache.org/jira'}, basic_auth=(user, password) ) repo = pygit2.Repository('path/to/arrow/repo') release = Release(jira, repo, '0.15.1', '0.15.0') # show the commits in application order for commit in release.commits(): print(commit.oid) # cherry-pick the patches to a branch release.apply_patches_to('a-branch') """ def __init__(self, jira, repo, version, previous_version): self.jira = jira self.repo = repo self.version = version self.previous_version = previous_version self._issues = None self._patches = None def _tag(self, version): return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}') def issues(self): # FIXME(kszucs): paginate instead of maxresults if self._issues is None: query = f'project=ARROW AND fixVersion={self.version}' self._issues = self.jira.search_issues(query, maxResults=JIRA_SEARCH_LIMIT) return self._issues def patches(self): """Commits belonging to release applied on master branch The returned commits' order corresponds to the output of git log. """ if self._patches is None: previous_tag = self._tag(self.previous_version) master = self.repo.branches['master'] ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE walker = self.repo.walk(master.target, ordering) walker.hide(previous_tag.oid) self._patches = list(map(Patch, walker)) return self._patches def curate(self): issues = self.issues() patches = self.patches() issue_keys = {issue.key for issue in self.issues()} within, outside, nojira = [], [], [] for p in patches: if p.issue_key is None: nojira.append(p) elif p.issue_key in issue_keys: within.append(p) issue_keys.remove(p.issue_key) else: outside.append(p) # remaining jira tickets nopatch = list(issue_keys) return within, outside, nojira, nopatch def curation_report(self): out = StringIO() out.write('Total number of JIRA tickets assigned to version {}: {}\n' .format(self.version, len(self.issues()))) out.write('\n') out.write('Total number of applied patches since {}: {}\n' .format(self.previous_version, len(self.patches()))) out.write('\n\n') within, outside, nojira, nopatch = self.curate() out.write('Patches with assigned issue in {}:\n'.format(self.version)) for p in within: out.write("- {}".format(p.to_markdown())) out.write('\n\n') out.write('Patches with assigned issue outside of {}:\n'.format(self.version)) for p in outside: out.write("- {}".format(p.to_markdown())) out.write('\n\n') out.write('Patches without assigned issue:\n') for p in nojira: out.write("- {}".format(p.to_markdown())) out.write('\n\n') out.write('JIRAs in {} without assigned patch:\n'.format(self.version)) for issue_key in nopatch: url = 'https://issues.apache.org/jira/browse/{}'.format(issue_key) out.write("- [{}]({})\n".format(issue_key, url)) return out.getvalue() def apply_patches_to(self, branch_name): previous_tag = self._tag(self.previous_version) branch = repo.create_branch(branch_name, previous_tag.get_object()) try: head = branch.target for commit in self.patches(): base = repo.merge_base(commit.oid, head) parent_tree = commit.parents[0].tree index = repo.merge_trees(parent_tree, head, commit.oid) tree_id = index.write_tree(repo) head = repo.create_commit( branch.name, commit.author, commit.committer, commit.message, tree_id, [head] ) except pygit2.GitError: repo.branches[branch_name].delete() raise import os import pygit2 from jira import JIRA jira = JIRA( {'server': 'https://issues.apache.org/jira'}, basic_auth=( os.environ.get('APACHE_JIRA_USER'), os.environ.get('APACHE_JIRA_PASSWORD') ) ) repo = pygit2.Repository('.') release = Release(jira, repo, version='0.17.0', previous_version='0.16.0') report = release.curation_report() {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)