It is now possible to parse and store series, so do just that. The parsing at the moment is based on both RFC822 headers and subject lines.
Signed-off-by: Stephen Finucane <step...@that.guru> Reviewed-by: Andy Doan <andy.d...@linaro.org> Reviewed-by: Daniel Axtens <d...@axtens.net> Reviewed-by: Andrew Donnellan <andrew.donnel...@au1.ibm.com> Tested-by: Russell Currey <rus...@russell.cc> --- v7: - Update to reference 'Series', not 'SeriesRevision' - Clarify why we need to save SeriesReference objects - Rename some functions v4: - Update per new 'SeriesRevision'-'Patch' relationship v3: - Rework how nested series are handled once again - Don't search for references when creating a cover letter v2: - Rebase onto master, moving changes into 'parser' - Add unit tests - Merge "Handle 'xxx (v2)' style messages" patch - Merge "Handle series sent 'in-reply-to'" patch - Handle capitalized version prefixes like [V2] - Trivial cleanup of some parser functions --- patchwork/parser.py | 138 ++++++++++++++++++++++++++++++++++++++--- patchwork/tests/test_parser.py | 112 +++++++++++++++++++++++++++------ patchwork/tests/utils.py | 25 ++++++++ 3 files changed, 248 insertions(+), 27 deletions(-) diff --git a/patchwork/parser.py b/patchwork/parser.py index 15476b1..a4036ff 100644 --- a/patchwork/parser.py +++ b/patchwork/parser.py @@ -21,8 +21,10 @@ import codecs import datetime -from email.header import decode_header, make_header -from email.utils import parsedate_tz, mktime_tz +from email.header import decode_header +from email.header import make_header +from email.utils import mktime_tz +from email.utils import parsedate_tz from fnmatch import fnmatch import logging import re @@ -30,9 +32,17 @@ import re from django.contrib.auth.models import User from django.utils import six -from patchwork.models import (Patch, Project, Person, Comment, State, - DelegationRule, Submission, CoverLetter, - get_default_initial_patch_state) +from patchwork.models import Comment +from patchwork.models import CoverLetter +from patchwork.models import DelegationRule +from patchwork.models import get_default_initial_patch_state +from patchwork.models import Patch +from patchwork.models import Person +from patchwork.models import Project +from patchwork.models import Series +from patchwork.models import SeriesReference +from patchwork.models import State +from patchwork.models import Submission _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') @@ -162,6 +172,42 @@ def find_project_by_header(mail): return project +def find_series(mail): + """Find a patch's series. + + Traverse RFC822 headers, starting with most recent first, to find + ancestors and the related series. Headers are traversed in reverse + to handle series sent in reply to previous series, like so: + + [PATCH 0/3] A cover letter + [PATCH 1/3] The first patch + ... + [PATCH v2 0/3] A cover letter + [PATCH v2 1/3] The first patch + ... + + This means we evaluate headers like so: + + - first, check for a Series that directly matches this message's + Message-ID + - then, check for a series that matches the In-Reply-To + - then, check for a series that matches the References, from most + recent (the patch's closest ancestor) to least recent + + Args: + mail (email.message.Message): The mail to extract series from + + Returns: + The matching ``Series`` instance, if any + """ + for ref in [mail.get('Message-Id')] + find_references(mail): + # try parsing by RFC5322 fields first + try: + return SeriesReference.objects.get(msgid=ref).series + except SeriesReference.DoesNotExist: + pass + + def find_author(mail): from_header = clean_header(mail.get('From')) name, email = (None, None) @@ -243,6 +289,13 @@ def find_references(mail): return refs +def _find_matching_prefix(subject_prefixes, regex): + for prefix in subject_prefixes: + m = regex.match(prefix) + if m: + return m + + def parse_series_marker(subject_prefixes): """Extract series markers from subject. @@ -258,14 +311,36 @@ def parse_series_marker(subject_prefixes): """ regex = re.compile('^([0-9]+)/([0-9]+)$') - for prefix in subject_prefixes: - m = regex.match(prefix) - if not m: - continue + m = _find_matching_prefix(subject_prefixes, regex) + if m: return (int(m.group(1)), int(m.group(2))) + return (None, None) +def parse_version(subject, subject_prefixes): + """Extract patch version. + + Args: + subject: Main body of subject line + subject_prefixes: List of subject prefixes to extract version + from + + Returns: + version if found, else 1 + """ + regex = re.compile('^[vV](\d+)$') + m = _find_matching_prefix(subject_prefixes, regex) + if m: + return int(m.group(1)) + + m = re.search(r'\([vV](\d+)\)', subject) + if m: + return int(m.group(1)) + + return 1 + + def find_content(project, mail): """Extract a comment and potential diff from a mail.""" patchbuf = None @@ -696,6 +771,7 @@ def parse_mail(mail, list_id=None): name, prefixes = clean_subject(subject, [project.linkname]) is_comment = subject_check(subject) x, n = parse_series_marker(prefixes) + version = parse_version(name, prefixes) refs = find_references(mail) date = find_date(mail) headers = find_headers(mail) @@ -712,6 +788,23 @@ def parse_mail(mail, list_id=None): filenames = find_filenames(diff) delegate = auto_delegate(project, filenames) + series = find_series(mail) + if not series and n: # the series markers indicates a series + series = Series(date=date, + submitter=author, + version=version, + total=n) + series.save() + + # NOTE(stephenfin) We must save references for series. We + # do this to handle the case where a later patch is + # received first. Without storing references, it would not + # be possible to identify the relationship between patches + # as the earlier patch does not reference the later one. + for ref in refs + [msgid]: + # we don't want duplicates + SeriesReference.objects.get_or_create(series=series, msgid=ref) + patch = Patch( msgid=msgid, project=project, @@ -727,6 +820,9 @@ def parse_mail(mail, list_id=None): patch.save() logger.debug('Patch saved') + if series: + series.add_patch(patch, x) + return patch elif x == 0: # (potential) cover letters # if refs are empty, it's implicitly a cover letter. If not, @@ -749,6 +845,28 @@ def parse_mail(mail, list_id=None): if is_cover_letter: author.save() + # we don't use 'find_series' here as a cover letter will + # always be the first item in a thread, thus the references + # could only point to a different series or unrelated + # message + try: + series = SeriesReference.objects.get(msgid=msgid).series + except SeriesReference.DoesNotExist: + series = None + + if not series: + series = Series(date=date, + submitter=author, + version=version, + total=n) + series.save() + + # we don't save the in-reply-to or references fields + # for a cover letter, as they can't refer to the same + # series + SeriesReference.objects.get_or_create(series=series, + msgid=msgid) + cover_letter = CoverLetter( msgid=msgid, project=project, @@ -760,6 +878,8 @@ def parse_mail(mail, list_id=None): cover_letter.save() logger.debug('Cover letter saved') + series.add_cover_letter(cover_letter) + return cover_letter # comments diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py index e16ffbc..96166ad 100644 --- a/patchwork/tests/test_parser.py +++ b/patchwork/tests/test_parser.py @@ -35,13 +35,16 @@ from patchwork.parser import clean_subject from patchwork.parser import find_author from patchwork.parser import find_content from patchwork.parser import find_project_by_header +from patchwork.parser import find_series from patchwork.parser import parse_mail as _parse_mail from patchwork.parser import parse_pull_request from patchwork.parser import parse_series_marker +from patchwork.parser import parse_version from patchwork.parser import split_prefixes from patchwork.parser import subject_check from patchwork.tests import TEST_MAIL_DIR from patchwork.tests.utils import create_project +from patchwork.tests.utils import create_series_reference from patchwork.tests.utils import create_state from patchwork.tests.utils import create_user from patchwork.tests.utils import read_patch @@ -328,6 +331,72 @@ class SenderCorrelationTest(TestCase): self.assertEqual(person_b.id, person_a.id) +class SeriesCorrelationTest(TestCase): + """Validate correct behavior of find_series.""" + + @staticmethod + def _create_email(msgid, references=None): + """Create a sample mail. + + Arguments: + msgid (str): The message's msgid + references (list): A list of preceding messages' msgids, + oldest first + """ + mail = 'Message-Id: %s\n' % msgid + \ + 'From: example user <u...@example.com>\n' + \ + 'Subject: Tests\n' + + if references: + mail += 'In-Reply-To: %s\n' % references[-1] + mail += 'References: %s\n' % '\n\t'.join(references) + + mail += 'test\n\n' + SAMPLE_DIFF + return message_from_string(mail) + + def test_new_series(self): + msgid = make_msgid() + email = self._create_email(msgid) + + self.assertIsNone(find_series(email)) + + def test_first_reply(self): + msgid_a = make_msgid() + msgid_b = make_msgid() + email = self._create_email(msgid_b, [msgid_a]) + + # assume msgid_a was already handled + ref = create_series_reference(msgid=msgid_a) + + series = find_series(email) + self.assertEqual(series, ref.series) + + def test_nested_series(self): + """Handle a series sent in-reply-to an existing series.""" + # create an old series with a "cover letter" + msgids = [make_msgid()] + ref_v1 = create_series_reference(msgid=msgids[0]) + + # ...and three patches + for i in range(3): + msgids.append(make_msgid()) + create_series_reference(msgid=msgids[-1], + series=ref_v1.series) + + # now create a new series with "cover letter" + msgids.append(make_msgid()) + ref_v2 = create_series_reference(msgid=msgids[-1]) + + # ...and the "first patch" of this new series + msgid = make_msgid() + email = self._create_email(msgid, msgids) + series = find_series(email) + + # this should link to the second series - not the first + self.assertEqual(len(msgids), 4 + 1) # old series + new cover + self.assertEqual(series, ref_v2.series) + + class SubjectEncodingTest(TestCase): """Validate correct handling of encoded subjects.""" @@ -692,24 +761,6 @@ class ParseInitialTagsTest(PatchTest): tag__name='Tested-by').count, 1) -class PrefixTest(TestCase): - - def test_split_prefixes(self): - self.assertEqual(split_prefixes('PATCH'), ['PATCH']) - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) - self.assertEqual(split_prefixes(''), []) - self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) - self.assertEqual(split_prefixes('PATCH '), ['PATCH']) - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) - self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) - - def test_series_markers(self): - self.assertEqual(parse_series_marker([]), (None, None)) - self.assertEqual(parse_series_marker(['bar']), (None, None)) - self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) - self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) - - class SubjectTest(TestCase): def test_clean_subject(self): @@ -748,3 +799,28 @@ class SubjectTest(TestCase): self.assertIsNotNone(subject_check('RE meep')) self.assertIsNotNone(subject_check('Re meep')) self.assertIsNotNone(subject_check('re meep')) + + def test_split_prefixes(self): + self.assertEqual(split_prefixes('PATCH'), ['PATCH']) + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) + self.assertEqual(split_prefixes(''), []) + self.assertEqual(split_prefixes('PATCH,'), ['PATCH']) + self.assertEqual(split_prefixes('PATCH '), ['PATCH']) + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC']) + self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2']) + + def test_series_markers(self): + self.assertEqual(parse_series_marker([]), (None, None)) + self.assertEqual(parse_series_marker(['bar']), (None, None)) + self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2)) + self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12)) + + def test_version(self): + self.assertEqual(parse_version('', []), 1) + self.assertEqual(parse_version('Hello, world', []), 1) + self.assertEqual(parse_version('Hello, world', ['version']), 1) + self.assertEqual(parse_version('Hello, world', ['v2']), 2) + self.assertEqual(parse_version('Hello, world', ['V6']), 6) + self.assertEqual(parse_version('Hello, world', ['v10']), 10) + self.assertEqual(parse_version('Hello, world (v2)', []), 2) + self.assertEqual(parse_version('Hello, world (V6)', []), 6) diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py index 23b0c87..52cd0f9 100644 --- a/patchwork/tests/utils.py +++ b/patchwork/tests/utils.py @@ -31,6 +31,8 @@ from patchwork.models import CoverLetter from patchwork.models import Patch from patchwork.models import Person from patchwork.models import Project +from patchwork.models import Series +from patchwork.models import SeriesReference from patchwork.models import State from patchwork.tests import TEST_PATCH_DIR @@ -217,6 +219,29 @@ def create_check(**kwargs): return Check.objects.create(**values) +def create_series(**kwargs): + """Create 'Series' object.""" + values = { + 'date': dt.now(), + 'submitter': create_person() if 'submitter' not in kwargs else None, + 'total': 1, + } + values.update(**kwargs) + + return Series.objects.create(**values) + + +def create_series_reference(**kwargs): + """Create 'SeriesReference' object.""" + values = { + 'series': create_series() if 'series' not in kwargs else None, + 'msgid': make_msgid(), + } + values.update(**kwargs) + + return SeriesReference.objects.create(**values) + + def _create_submissions(create_func, count=1, **kwargs): """Create 'count' Submission-based objects. -- 2.7.4 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork