Re: [PATCH 9/9] parser: don't fail on multiple SeriesReferences

2018-02-24 Thread Daniel Axtens
Andrew Donnellan  writes:

> On 22/02/18 01:17, Daniel Axtens wrote:
>> Parallel parsing would occasonally fail with:
>> 
>> patchwork.models.MultipleObjectsReturned: get() returned more than one 
>> SeriesReference -- it returned 2!
>> 
>> I think these are happening if you have different processes parsing
>> e.g. 1/3 and 2/3 simultaneously: both will have a reference to 1/3,
>> in the case of 1 it will be the msgid, in the case of 2 it will be
>> in References. So when we come to parse 3/3, .get() finds 2 and
>> throws the exception.
>> 
>> This does not fix the creation of multiple series references; it
>> just causes them to be ignored. We still have serious race conditions
>> with series creation, but I don't yet have clear answers for them.
>> With this patch, they will at least not stop patches from being
>> processed - they'll just lead to wonky series, which we already have.
>> 
>> Signed-off-by: Daniel Axtens 
>
> Label that open bug with a FIXME for searchability? Logging it is a 
> great idea.

Done!

>
> Reviewed-by: Andrew Donnellan 

Thanks!

Regards,
Daniel

>
>> ---
>>   patchwork/parser.py | 23 +--
>>   1 file changed, 21 insertions(+), 2 deletions(-)
>> 
>> diff --git a/patchwork/parser.py b/patchwork/parser.py
>> index 0e53e6b9a3af..4c9e636336d9 100644
>> --- a/patchwork/parser.py
>> +++ b/patchwork/parser.py
>> @@ -240,6 +240,13 @@ def _find_series_by_references(project, mail):
>>   msgid=ref[:255], series__project=project).series
>>   except SeriesReference.DoesNotExist:
>>   continue
>> +except SeriesReference.MultipleObjectsReturned:
>> +# Open bug: this can happen when we're processing messages
>> +# in parallel. Pick the first and log.
>> +logger.error("Multiple SeriesReferences for %s in project %s!" %
>> + (ref[:255], project.name))
>> +return SeriesReference.objects.filter(
>> +msgid=ref[:255], series__project=project).first().series
>> 
>> 
>>   def _find_series_by_markers(project, mail, author):
>> @@ -1037,6 +1044,9 @@ def parse_mail(mail, list_id=None):
>>   series__project=project)
>>   except SeriesReference.DoesNotExist:
>>   SeriesReference.objects.create(series=series, 
>> msgid=ref)
>> +except SeriesReference.MultipleObjectsReturned:
>> +logger.error("Multiple SeriesReferences for %s"
>> + " in project %s!" % (ref, project.name))
>> 
>>   # add to a series if we have found one, and we have a numbered
>>   # patch. Don't add unnumbered patches (for example diffs sent
>> @@ -1075,6 +1085,11 @@ def parse_mail(mail, list_id=None):
>>   msgid=msgid, series__project=project).series
>>   except SeriesReference.DoesNotExist:
>>   series = None
>> +except SeriesReference.MultipleObjectsReturned:
>> +logger.error("Multiple SeriesReferences for %s"
>> + " in project %s!" % (msgid, project.name))
>> +series = SeriesReference.objects.filter(
>> +msgid=msgid, series__project=project).first().series
>> 
>>   if not series:
>>   series = Series(project=project,
>> @@ -1087,8 +1102,12 @@ def parse_mail(mail, list_id=None):
>>   # we don't save the in-reply-to or references fields
>>   # for a cover letter, as they can't refer to the same
>>   # series
>> -SeriesReference.objects.get_or_create(series=series,
>> -  msgid=msgid)
>> +try:
>> +SeriesReference.objects.get_or_create(series=series,
>> +  msgid=msgid)
>> +except SeriesReference.MultipleObjectsReturned:
>> +logger.error("Multiple SeriesReferences for %s"
>> + " in project %s!" % (msgid, project.name))
>> 
>>   cover_letter = CoverLetter(
>>   msgid=msgid,
>> 
>
> -- 
> Andrew Donnellan  OzLabs, ADL Canberra
> andrew.donnel...@au1.ibm.com  IBM Australia Limited
___
Patchwork mailing list
Patchwork@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/patchwork


Re: [PATCH 9/9] parser: don't fail on multiple SeriesReferences

2018-02-21 Thread Andrew Donnellan

On 22/02/18 01:17, Daniel Axtens wrote:

Parallel parsing would occasonally fail with:

patchwork.models.MultipleObjectsReturned: get() returned more than one 
SeriesReference -- it returned 2!

I think these are happening if you have different processes parsing
e.g. 1/3 and 2/3 simultaneously: both will have a reference to 1/3,
in the case of 1 it will be the msgid, in the case of 2 it will be
in References. So when we come to parse 3/3, .get() finds 2 and
throws the exception.

This does not fix the creation of multiple series references; it
just causes them to be ignored. We still have serious race conditions
with series creation, but I don't yet have clear answers for them.
With this patch, they will at least not stop patches from being
processed - they'll just lead to wonky series, which we already have.

Signed-off-by: Daniel Axtens 


Label that open bug with a FIXME for searchability? Logging it is a 
great idea.


Reviewed-by: Andrew Donnellan 


---
  patchwork/parser.py | 23 +--
  1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/patchwork/parser.py b/patchwork/parser.py
index 0e53e6b9a3af..4c9e636336d9 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -240,6 +240,13 @@ def _find_series_by_references(project, mail):
  msgid=ref[:255], series__project=project).series
  except SeriesReference.DoesNotExist:
  continue
+except SeriesReference.MultipleObjectsReturned:
+# Open bug: this can happen when we're processing messages
+# in parallel. Pick the first and log.
+logger.error("Multiple SeriesReferences for %s in project %s!" %
+ (ref[:255], project.name))
+return SeriesReference.objects.filter(
+msgid=ref[:255], series__project=project).first().series


  def _find_series_by_markers(project, mail, author):
@@ -1037,6 +1044,9 @@ def parse_mail(mail, list_id=None):
  series__project=project)
  except SeriesReference.DoesNotExist:
  SeriesReference.objects.create(series=series, msgid=ref)
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (ref, project.name))

  # add to a series if we have found one, and we have a numbered
  # patch. Don't add unnumbered patches (for example diffs sent
@@ -1075,6 +1085,11 @@ def parse_mail(mail, list_id=None):
  msgid=msgid, series__project=project).series
  except SeriesReference.DoesNotExist:
  series = None
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (msgid, project.name))
+series = SeriesReference.objects.filter(
+msgid=msgid, series__project=project).first().series

  if not series:
  series = Series(project=project,
@@ -1087,8 +1102,12 @@ def parse_mail(mail, list_id=None):
  # we don't save the in-reply-to or references fields
  # for a cover letter, as they can't refer to the same
  # series
-SeriesReference.objects.get_or_create(series=series,
-  msgid=msgid)
+try:
+SeriesReference.objects.get_or_create(series=series,
+  msgid=msgid)
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (msgid, project.name))

  cover_letter = CoverLetter(
  msgid=msgid,



--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited

___
Patchwork mailing list
Patchwork@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/patchwork


[PATCH 9/9] parser: don't fail on multiple SeriesReferences

2018-02-21 Thread Daniel Axtens
Parallel parsing would occasonally fail with:

patchwork.models.MultipleObjectsReturned: get() returned more than one 
SeriesReference -- it returned 2!

I think these are happening if you have different processes parsing
e.g. 1/3 and 2/3 simultaneously: both will have a reference to 1/3,
in the case of 1 it will be the msgid, in the case of 2 it will be
in References. So when we come to parse 3/3, .get() finds 2 and
throws the exception.

This does not fix the creation of multiple series references; it
just causes them to be ignored. We still have serious race conditions
with series creation, but I don't yet have clear answers for them.
With this patch, they will at least not stop patches from being
processed - they'll just lead to wonky series, which we already have.

Signed-off-by: Daniel Axtens 
---
 patchwork/parser.py | 23 +--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/patchwork/parser.py b/patchwork/parser.py
index 0e53e6b9a3af..4c9e636336d9 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -240,6 +240,13 @@ def _find_series_by_references(project, mail):
 msgid=ref[:255], series__project=project).series
 except SeriesReference.DoesNotExist:
 continue
+except SeriesReference.MultipleObjectsReturned:
+# Open bug: this can happen when we're processing messages
+# in parallel. Pick the first and log.
+logger.error("Multiple SeriesReferences for %s in project %s!" %
+ (ref[:255], project.name))
+return SeriesReference.objects.filter(
+msgid=ref[:255], series__project=project).first().series
 
 
 def _find_series_by_markers(project, mail, author):
@@ -1037,6 +1044,9 @@ def parse_mail(mail, list_id=None):
 series__project=project)
 except SeriesReference.DoesNotExist:
 SeriesReference.objects.create(series=series, msgid=ref)
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (ref, project.name))
 
 # add to a series if we have found one, and we have a numbered
 # patch. Don't add unnumbered patches (for example diffs sent
@@ -1075,6 +1085,11 @@ def parse_mail(mail, list_id=None):
 msgid=msgid, series__project=project).series
 except SeriesReference.DoesNotExist:
 series = None
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (msgid, project.name))
+series = SeriesReference.objects.filter(
+msgid=msgid, series__project=project).first().series
 
 if not series:
 series = Series(project=project,
@@ -1087,8 +1102,12 @@ def parse_mail(mail, list_id=None):
 # we don't save the in-reply-to or references fields
 # for a cover letter, as they can't refer to the same
 # series
-SeriesReference.objects.get_or_create(series=series,
-  msgid=msgid)
+try:
+SeriesReference.objects.get_or_create(series=series,
+  msgid=msgid)
+except SeriesReference.MultipleObjectsReturned:
+logger.error("Multiple SeriesReferences for %s"
+ " in project %s!" % (msgid, project.name))
 
 cover_letter = CoverLetter(
 msgid=msgid,
-- 
2.14.1

___
Patchwork mailing list
Patchwork@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/patchwork