Applied with a release note. Daniel Axtens <d...@axtens.net> writes:
> Andrew Donnellan <a...@linux.ibm.com> writes: > >> To avoid triggering spam filters due to failed signature validation, many >> mailing lists mangle the From header to change the From address to be the >> address of the list, typically where the sender's domain has a strict DMARC >> policy enabled. >> >> In this case, we should try to unmangle the From header. >> >> Add support for using the X-Original-From or Reply-To headers, as used by >> Google Groups and Mailman respectively, to unmangle the From header when >> necessary and associate the patch with the correct submitter based on the >> unmangled email address. >> >> When downloading mboxes, rewrite the From header using the unmangled >> address, and preserve the original header as X-Patchwork-Original-From in >> case someone needs it for some reason. The original From header will still >> be stored in the database and exposed via the API, as we want to keep >> messages as close to the original received format as possible. >> >> Closes: #64 ("Incorrect submitter when using googlegroups") >> Reported-by: Alexandre Belloni <alexandre.bell...@bootlin.com> >> Reported-by: Stephen Rothwell <s...@canb.auug.org.au> >> Signed-off-by: Andrew Donnellan <a...@linux.ibm.com> > > Tested-by: Daniel Axtens <d...@axtens.net> # mailman only > > I'll apply it shortly to master. You could convince me to get it > backported to stable if you really wanted. > > Do we need something to go through the db and fix up old ones? Or is it > best to let sleeping patches lie? > > Regards, > Daniel >> >> --- >> >> v1->v2: >> - use X-Original-From rather than X-Original-Sender >> - unmangle From header when downloading mbox >> >> rewrite from header >> >> Signed-off-by: Andrew Donnellan <a...@linux.ibm.com> >> >> use x original from >> >> Signed-off-by: Andrew Donnellan <a...@linux.ibm.com> >> --- >> patchwork/parser.py | 75 +++++++++++++++++++++++++++---- >> patchwork/tests/test_mboxviews.py | 21 +++++++++ >> patchwork/tests/test_parser.py | 68 ++++++++++++++++++++++++++-- >> patchwork/views/utils.py | 12 +++++ >> 4 files changed, 163 insertions(+), 13 deletions(-) >> >> diff --git a/patchwork/parser.py b/patchwork/parser.py >> index 7dc66bc05a5b..be1e51652dd3 100644 >> --- a/patchwork/parser.py >> +++ b/patchwork/parser.py >> @@ -321,12 +321,7 @@ def find_series(project, mail, author): >> return _find_series_by_markers(project, mail, author) >> >> >> -def get_or_create_author(mail): >> - from_header = clean_header(mail.get('From')) >> - >> - if not from_header: >> - raise ValueError("Invalid 'From' header") >> - >> +def split_from_header(from_header): >> name, email = (None, None) >> >> # tuple of (regex, fn) >> @@ -355,6 +350,65 @@ def get_or_create_author(mail): >> (name, email) = fn(match.groups()) >> break >> >> + return (name, email) >> + >> + >> +# Unmangle From addresses that have been mangled for DMARC purposes. >> +# >> +# To avoid triggering spam filters due to failed signature validation, many >> +# mailing lists mangle the From header to change the From address to be the >> +# address of the list, typically where the sender's domain has a strict >> +# DMARC policy enabled. >> +# >> +# Unfortunately, there's no standardised way of preserving the original >> +# From address. >> +# >> +# Google Groups adds an X-Original-From header. If present, we use that. >> +# >> +# Mailman preserves the original address by adding a Reply-To, except in the >> +# case where the list is set to either reply to list, or reply to a specific >> +# address, in which case the original From is added to Cc instead. These >> corner >> +# cases are dumb, but we try and handle things as sensibly as possible by >> +# looking for a name in Reply-To/Cc that matches From. It's inexact but >> should >> +# be good enough for our purposes. >> +def get_original_sender(mail, name, email): >> + if name and ' via ' in name: >> + # Mailman uses the format "<name> via <list>" >> + # Google Groups uses "'<name>' via <list>" >> + stripped_name = name[:name.rfind(' via ')].strip().strip("'") >> + >> + original_from = clean_header(mail.get('X-Original-From', '')) >> + if original_from: >> + new_email = split_from_header(original_from)[1].strip()[:255] >> + return (stripped_name, new_email) >> + >> + addrs = [] >> + reply_to_headers = mail.get_all('Reply-To') or [] >> + cc_headers = mail.get_all('Cc') or [] >> + for header in reply_to_headers + cc_headers: >> + header = clean_header(header) >> + addrs = header.split(",") >> + for addr in addrs: >> + new_name, new_email = split_from_header(addr) >> + if new_name: >> + new_name = new_name.strip()[:255] >> + if new_email: >> + new_email = new_email.strip()[:255] >> + if new_name == stripped_name: >> + return (stripped_name, new_email) >> + >> + # If we can't figure out the original sender, just keep it as is >> + return (name, email) >> + >> + >> +def get_or_create_author(mail, project=None): >> + from_header = clean_header(mail.get('From')) >> + >> + if not from_header: >> + raise ValueError("Invalid 'From' header") >> + >> + name, email = split_from_header(from_header) >> + >> if not email: >> raise ValueError("Invalid 'From' header") >> >> @@ -362,6 +416,9 @@ def get_or_create_author(mail): >> if name is not None: >> name = name.strip()[:255] >> >> + if project and email.lower() == project.listemail.lower(): >> + name, email = get_original_sender(mail, name, email) >> + >> # this correctly handles the case where we lose the race to create >> # the person and another process beats us to it. (If the record >> # does not exist, g_o_c invokes _create_object_from_params which >> @@ -1004,7 +1061,7 @@ def parse_mail(mail, list_id=None): >> >> if not is_comment and (diff or pull_url): # patches or pull requests >> # we delay the saving until we know we have a patch. >> - author = get_or_create_author(mail) >> + author = get_or_create_author(mail, project) >> >> delegate = find_delegate_by_header(mail) >> if not delegate and diff: >> @@ -1099,7 +1156,7 @@ def parse_mail(mail, list_id=None): >> is_cover_letter = True >> >> if is_cover_letter: >> - author = get_or_create_author(mail) >> + author = get_or_create_author(mail, project) >> >> # we don't use 'find_series' here as a cover letter will >> # always be the first item in a thread, thus the references >> @@ -1159,7 +1216,7 @@ def parse_mail(mail, list_id=None): >> if not submission: >> return >> >> - author = get_or_create_author(mail) >> + author = get_or_create_author(mail, project) >> >> try: >> comment = Comment.objects.create( >> diff --git a/patchwork/tests/test_mboxviews.py >> b/patchwork/tests/test_mboxviews.py >> index 3854a856c4db..8f67778dbacb 100644 >> --- a/patchwork/tests/test_mboxviews.py >> +++ b/patchwork/tests/test_mboxviews.py >> @@ -183,6 +183,27 @@ class MboxHeaderTest(TestCase): >> patch.url_msgid])) >> self.assertContains(response, from_email) >> >> + def test_dmarc_from_header(self): >> + """Validate 'From' header is rewritten correctly when DMARC-munged. >> + >> + Test that when an email with a DMARC-munged From header is >> processed, >> + the From header will be unmunged and the munged address will be >> saved >> + as 'X-Patchwork-Original-From'. >> + """ >> + orig_from_header = 'Person via List <l...@example.com>' >> + rewritten_from_header = 'Person <per...@example.com>' >> + project = create_project(listemail='l...@example.com') >> + person = create_person(name='Person', email='per...@example.com') >> + patch = create_patch(project=project, >> + headers='From: ' + orig_from_header, >> + submitter=person) >> + response = self.client.get( >> + reverse('patch-mbox', args=[patch.project.linkname, >> + patch.url_msgid])) >> + mail = email.message_from_string(response.content.decode()) >> + self.assertEqual(mail['From'], rewritten_from_header) >> + self.assertEqual(mail['X-Patchwork-Original-From'], >> orig_from_header) >> + >> def test_date_header(self): >> patch = create_patch() >> response = self.client.get( >> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py >> index e5a2fca3044e..85c6c52f0e4b 100644 >> --- a/patchwork/tests/test_parser.py >> +++ b/patchwork/tests/test_parser.py >> @@ -265,11 +265,23 @@ class SenderCorrelationTest(TestCase): >> """ >> >> @staticmethod >> - def _create_email(from_header): >> + def _create_email(from_header, reply_tos=None, ccs=None, >> + x_original_from=None): >> mail = 'Message-Id: %s\n' % make_msgid() + \ >> - 'From: %s\n' % from_header + \ >> - 'Subject: Tests\n\n'\ >> - 'test\n' >> + 'From: %s\n' % from_header >> + >> + if reply_tos: >> + mail += 'Reply-To: %s\n' % ', '.join(reply_tos) >> + >> + if ccs: >> + mail += 'Cc: %s\n' % ', '.join(ccs) >> + >> + if x_original_from: >> + mail += 'X-Original-From: %s\n' % x_original_from >> + >> + mail += 'Subject: Tests\n\n'\ >> + 'test\n' >> + >> return message_from_string(mail) >> >> def test_existing_sender(self): >> @@ -311,6 +323,54 @@ class SenderCorrelationTest(TestCase): >> self.assertEqual(person_b._state.adding, False) >> self.assertEqual(person_b.id, person_a.id) >> >> + def test_mailman_dmarc_munging(self): >> + project = create_project() >> + real_sender = 'Existing Sender <exist...@example.com>' >> + munged_sender = 'Existing Sender via List <{}>'.format( >> + project.listemail) >> + other_email = 'Other Person <ot...@example.com>' >> + >> + # Unmunged author >> + mail = self._create_email(real_sender) >> + person_a = get_or_create_author(mail, project) >> + person_a.save() >> + >> + # Single Reply-To >> + mail = self._create_email(munged_sender, [real_sender]) >> + person_b = get_or_create_author(mail, project) >> + self.assertEqual(person_b._state.adding, False) >> + self.assertEqual(person_b.id, person_a.id) >> + >> + # Single Cc >> + mail = self._create_email(munged_sender, [], [real_sender]) >> + person_b = get_or_create_author(mail, project) >> + self.assertEqual(person_b._state.adding, False) >> + self.assertEqual(person_b.id, person_a.id) >> + >> + # Multiple Reply-Tos and Ccs >> + mail = self._create_email(munged_sender, [other_email, real_sender], >> + [other_email, other_email]) >> + person_b = get_or_create_author(mail, project) >> + self.assertEqual(person_b._state.adding, False) >> + self.assertEqual(person_b.id, person_a.id) >> + >> + def test_google_dmarc_munging(self): >> + project = create_project() >> + real_sender = 'Existing Sender <exist...@example.com>' >> + munged_sender = "'Existing Sender' via List <{}>".format( >> + project.listemail) >> + >> + # Unmunged author >> + mail = self._create_email(real_sender) >> + person_a = get_or_create_author(mail, project) >> + person_a.save() >> + >> + # X-Original-From header >> + mail = self._create_email(munged_sender, None, None, real_sender) >> + person_b = get_or_create_author(mail, project) >> + self.assertEqual(person_b._state.adding, False) >> + self.assertEqual(person_b.id, person_a.id) >> + >> >> class SeriesCorrelationTest(TestCase): >> """Validate correct behavior of find_series.""" >> diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py >> index d65aa5816336..905807935459 100644 >> --- a/patchwork/views/utils.py >> +++ b/patchwork/views/utils.py >> @@ -18,6 +18,7 @@ from django.utils import six >> >> from patchwork.models import Comment >> from patchwork.models import Patch >> +from patchwork.parser import split_from_header >> >> if settings.ENABLE_REST_API: >> from rest_framework.authtoken.models import Token >> @@ -92,6 +93,17 @@ def _submission_to_mbox(submission): >> # [1] https://tools.ietf.org/html/rfc1847 >> if key == 'Content-Type' and val == 'multipart/signed': >> continue >> + >> + if key == 'From': >> + name, addr = split_from_header(val) >> + if addr == submission.project.listemail: >> + # If From: is the list address (typically DMARC munging), >> then >> + # use the submitter details (which are cleaned up in the >> + # parser) in the From: field so that the patch author >> details >> + # are correct when applied with git am. >> + mail['X-Patchwork-Original-From'] = val >> + val = mail['X-Patchwork-Submitter'] >> + >> mail[key] = val >> >> if 'Date' not in mail: >> -- >> 2.20.1 >> >> _______________________________________________ >> Patchwork mailing list >> Patchwork@lists.ozlabs.org >> https://lists.ozlabs.org/listinfo/patchwork _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork