Stephen Finucane <step...@that.guru> writes: > On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote: >> > diff --git >> > a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py >> > b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py >> > new file mode 100644 >> > index 00000000..5dca85c5 >> > --- /dev/null >> > +++ >> > b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py >> > @@ -0,0 +1,61 @@ >> > +# -*- coding: utf-8 -*- >> > +from __future__ import unicode_literals >> > + >> > +from django.db import migrations, models >> > + >> > +from patchwork.api.event import PayloadSerializer >> > +from patchwork.models import Event >> > + >> > + >> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer >> > +class Payload(object): >> > + >> > + def __init__(self, **kwargs): >> > + for kwarg in kwargs: >> > + setattr(self, kwarg, kwargs[kwarg]) >> > + >> > + >> > +def generate_payload(apps, schema_editor): >> > + # NOTE(stephenfin): We could convert this to native SQL in the future >> > + # by using e.g. 'JSON_OBJECT' for MySQL >> > + EventBase = apps.get_model('patchwork', 'Event') >> > + >> > + for event in EventBase.objects.all(): >> > + category = event.category >> > + # TODO(stephenfin): This logic should be moved into the >> > EventSerializer >> > + # class >> > + if category == Event.CATEGORY_COVER_CREATED: >> > + payload_obj = Payload(cover=event.cover) >> > + elif category == Event.CATEGORY_PATCH_CREATED: >> > + payload_obj = Payload(patch=event.patch) >> > + elif category == Event.CATEGORY_PATCH_STATE_CHANGED: >> > + payload_obj = Payload(patch=event.patch, >> > + previous_state=event.previous_state, >> > + current_state=event.previous_state) >> > + elif category == Event.CATEGORY_PATCH_DELEGATED: >> > + payload_obj = Payload(patch=event.patch, >> > + >> > previous_delegate=event.previous_delegate, >> > + current_delegate=event.current_delegate) >> > + elif category == Event.CATEGORY_PATCH_COMPLETED: >> > + payload_obj = Payload(patch=event.patch) >> > + elif category == Event.CATEGORY_CHECK_CREATED: >> > + payload_obj = Payload(patch=event.patch, >> > check=event.created_check) >> > + elif category == Event.CATEGORY_SERIES_CREATED: >> > + payload_obj = Payload(series=event.series) >> > + elif category == Event.CATEGORY_SERIES_COMPLETED: >> > + payload_obj = Payload(series=event.series) >> > + >> > + payload = PayloadSerializer(payload_obj).data
Converting this to json.dumps(PayloadSerializer(payload_obj).data) WFM. It now 'only' uses about a third of my memory for 318k events. The results seem to render although they are more nested than I remember: { "id": 318637, "category": "patch-completed", "project": { "id": 2, "url": "http://localhost:8000/api/projects/2/", "name": "Kernel Team", "link_name": "kernel-team", "list_id": "kernel-team.lists.ubuntu.com", "list_email": "kernel-t...@lists.ubuntu.com", "web_url": "", "scm_url": "", "webscm_url": "" }, "date": "2018-04-10T15:24:19.650185", "payload": { "patch": { "id": 105448, "url": "http://localhost:8000/api/patches/105448/", "msgid": "<20170828081014.24028-1-po-hsu....@canonical.com>", "date": "2017-08-28T08:10:14", "name": "[CVE-2016-10200,SRU,Trusty] l2tp: fix racy SOCK_ZAPPED flag check in l2tp_ip{, 6}_bind()", "mbox": "http://localhost:8000/patch/105448/mbox/" } } }, ISTR they looked a bit different previously, although I can't remember if the details (here things like mbox) were previous at the top level or in some object. Timings to come. Regards, Daniel >> > + event.payload = payload >> > + event.save() >> > + >> > + >> > +class Migration(migrations.Migration): >> > + >> > + dependencies = [ >> > + ('patchwork', '0026_add_event_payload_field'), >> > + ] >> > + >> > + operations = [ >> > + migrations.RunPython(generate_payload, atomic=True), >> > + ] >> >> >> I tried to run this migration on my laptop (16GB ram + swap) on a db >> with 102k patches. (The postgres sql dump is 781 MB). >> >> It chewed up over 16GB of ram before being OOM killed. >> >> This somewhat derailed my attempts to see if this offers any performance >> gain to justify the additional complexity. > > Darn, I'd tested this with the Patchwork archives and, while slow, it > did work. Guess it must be loading a lot of stuff into memory. I'll try > this again later in the week with a larger archive and see if I can > optimize anything. > >> BTW - I asked some questions last time around about versioning and >> dealing with any future changes. Did you have any thoughts on that? I >> think it's also a big issue. > > Right, sorry. I'll address those now. > > Stephen > >> Regards, >> Daniel >> >> >> > diff --git a/patchwork/migrations/0028_remove_old_event_fields.py >> > b/patchwork/migrations/0028_remove_old_event_fields.py >> > new file mode 100644 >> > index 00000000..2f7ba7bf >> > --- /dev/null >> > +++ b/patchwork/migrations/0028_remove_old_event_fields.py >> > @@ -0,0 +1,34 @@ >> > +# -*- coding: utf-8 -*- >> > +from __future__ import unicode_literals >> > + >> > +from django.db import migrations >> > + >> > + >> > +class Migration(migrations.Migration): >> > + >> > + dependencies = [ >> > + ('patchwork', '0027_migrate_data_from_event_fields_to_payload'), >> > + ] >> > + >> > + operations = [ >> > + migrations.RemoveField( >> > + model_name='event', >> > + name='created_check', >> > + ), >> > + migrations.RemoveField( >> > + model_name='event', >> > + name='current_delegate', >> > + ), >> > + migrations.RemoveField( >> > + model_name='event', >> > + name='current_state', >> > + ), >> > + migrations.RemoveField( >> > + model_name='event', >> > + name='previous_delegate', >> > + ), >> > + migrations.RemoveField( >> > + model_name='event', >> > + name='previous_state', >> > + ), >> > + ] >> > diff --git a/patchwork/models.py b/patchwork/models.py >> > index f91b994c..a5992d68 100644 >> > --- a/patchwork/models.py >> > +++ b/patchwork/models.py >> > @@ -37,6 +37,7 @@ from django.utils.functional import cached_property >> > from patchwork.compat import is_authenticated >> > from patchwork.compat import reverse >> > from patchwork.fields import HashField >> > +from patchwork.fields import JSONField >> > from patchwork.hasher import hash_diff >> > >> > if settings.ENABLE_REST_API: >> > @@ -943,32 +944,9 @@ class Event(models.Model): >> > on_delete=models.CASCADE, >> > help_text='The cover letter that this event was created for.') >> > >> > - # fields for 'patch-state-changed' events >> > + # additional data >> > >> > - previous_state = models.ForeignKey( >> > - State, related_name='+', null=True, blank=True, >> > - on_delete=models.CASCADE) >> > - current_state = models.ForeignKey( >> > - State, related_name='+', null=True, blank=True, >> > - on_delete=models.CASCADE) >> > - >> > - # fields for 'patch-delegate-changed' events >> > - >> > - previous_delegate = models.ForeignKey( >> > - User, related_name='+', null=True, blank=True, >> > - on_delete=models.CASCADE) >> > - current_delegate = models.ForeignKey( >> > - User, related_name='+', null=True, blank=True, >> > - on_delete=models.CASCADE) >> > - >> > - # fields or 'patch-check-created' events >> > - >> > - created_check = models.ForeignKey( >> > - Check, related_name='+', null=True, blank=True, >> > - on_delete=models.CASCADE) >> > - >> > - # TODO(stephenfin): Validate that the correct fields are being set by >> > way >> > - # of a 'clean' method >> > + payload = JSONField(null=True, blank=True) >> > >> > def __repr__(self): >> > return "<Event id='%d' category='%s'" % (self.id, self.category) >> > diff --git a/patchwork/signals.py b/patchwork/signals.py >> > index b083b51a..c0f4c08d 100644 >> > --- a/patchwork/signals.py >> > +++ b/patchwork/signals.py >> > @@ -24,6 +24,7 @@ from django.db.models.signals import post_save >> > from django.db.models.signals import pre_save >> > from django.dispatch import receiver >> > >> > +from patchwork.api.event import PayloadSerializer >> > from patchwork.models import Check >> > from patchwork.models import CoverLetter >> > from patchwork.models import Event >> > @@ -33,6 +34,14 @@ from patchwork.models import Series >> > from patchwork.models import SeriesPatch >> > >> > >> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer >> > +class Payload(object): >> > + >> > + def __init__(self, **kwargs): >> > + for kwarg in kwargs: >> > + setattr(self, kwarg, kwargs[kwarg]) >> > + >> > + >> > @receiver(pre_save, sender=Patch) >> > def patch_change_callback(sender, instance, raw, **kwargs): >> > # we only want notification of modified patches >> > @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, >> > **kwargs): >> > def create_cover_created_event(sender, instance, created, raw, **kwargs): >> > >> > def create_event(cover): >> > + payload = PayloadSerializer(Payload( >> > + cover=cover)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_COVER_CREATED, >> > project=cover.project, >> > + payload=payload.data, >> > cover=cover) >> > >> > # don't trigger for items loaded from fixtures or new items >> > @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, >> > created, raw, **kwargs): >> > def create_patch_created_event(sender, instance, created, raw, **kwargs): >> > >> > def create_event(patch): >> > + payload = PayloadSerializer(Payload( >> > + patch=patch)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_PATCH_CREATED, >> > project=patch.project, >> > + payload=payload.data, >> > patch=patch) >> > >> > # don't trigger for items loaded from fixtures or new items >> > @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, >> > created, raw, **kwargs): >> > def create_patch_state_changed_event(sender, instance, raw, **kwargs): >> > >> > def create_event(patch, before, after): >> > + payload = PayloadSerializer(Payload( >> > + patch=patch, >> > + previous_state=before, >> > + current_state=after)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_PATCH_STATE_CHANGED, >> > project=patch.project, >> > - patch=patch, >> > - previous_state=before, >> > - current_state=after) >> > + payload=payload.data, >> > + patch=patch) >> > >> > # don't trigger for items loaded from fixtures or new items >> > if raw or not instance.pk: >> > @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, >> > instance, raw, **kwargs): >> > def create_patch_delegated_event(sender, instance, raw, **kwargs): >> > >> > def create_event(patch, before, after): >> > + payload = PayloadSerializer(Payload( >> > + patch=patch, >> > + previous_delegate=before, >> > + current_delegate=after)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_PATCH_DELEGATED, >> > project=patch.project, >> > - patch=patch, >> > - previous_delegate=before, >> > - current_delegate=after) >> > + payload=payload.data, >> > + patch=patch) >> > >> > # don't trigger for items loaded from fixtures or new items >> > if raw or not instance.pk: >> > @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, >> > created, raw, **kwargs): >> > """Create patch completed event for patches with series.""" >> > >> > def create_event(patch, series): >> > + payload = PayloadSerializer(Payload( >> > + patch=patch, >> > + series=series)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_PATCH_COMPLETED, >> > project=patch.project, >> > + payload=payload.data, >> > patch=patch, >> > series=series) >> > >> > @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, >> > created, raw, **kwargs): >> > def create_check_created_event(sender, instance, created, raw, **kwargs): >> > >> > def create_event(check): >> > + payload = PayloadSerializer(Payload( >> > + patch=check.patch, >> > + check=check)) >> > + >> > # TODO(stephenfin): It might make sense to add a 'project' field >> > to >> > # 'check' to prevent lookups here and in the REST API >> > return Event.objects.create( >> > category=Event.CATEGORY_CHECK_CREATED, >> > project=check.patch.project, >> > - patch=check.patch, >> > - created_check=check) >> > + payload=payload.data, >> > + patch=check.patch) >> > >> > # don't trigger for items loaded from fixtures or existing items >> > if raw or not created: >> > @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, >> > created, raw, **kwargs): >> > def create_series_created_event(sender, instance, created, raw, **kwargs): >> > >> > def create_event(series): >> > + payload = PayloadSerializer(Payload( >> > + series=series)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_SERIES_CREATED, >> > project=series.project, >> > + payload=payload.data, >> > series=series) >> > >> > # don't trigger for items loaded from fixtures or existing items >> > @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, >> > created, raw, **kwargs): >> > # in that case. >> > >> > def create_event(series): >> > + payload = PayloadSerializer(Payload( >> > + series=series)) >> > + >> > return Event.objects.create( >> > category=Event.CATEGORY_SERIES_COMPLETED, >> > project=series.project, >> > + payload=payload.data, >> > series=series) >> > >> > # don't trigger for items loaded from fixtures or existing items >> > diff --git a/patchwork/tests/test_events.py >> > b/patchwork/tests/test_events.py >> > index 70d563de..39dff6f1 100644 >> > --- a/patchwork/tests/test_events.py >> > +++ b/patchwork/tests/test_events.py >> > @@ -17,32 +17,20 @@ >> > # along with Patchwork; if not, write to the Free Software >> > # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 >> > USA >> > >> > +from collections import OrderedDict >> > + >> > from django.test import TestCase >> > >> > from patchwork.models import Event >> > from patchwork.tests import utils >> > >> > -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate', >> > - 'current_delegate'] >> > - >> > >> > def _get_events(**filters): >> > # These are sorted by reverse normally, so reverse it once again >> > return Event.objects.filter(**filters).order_by('date') >> > >> > >> > -class _BaseTestCase(TestCase): >> > - >> > - def assertEventFields(self, event, parent_type='patch', **fields): >> > - for field_name in [x for x in BASE_FIELDS]: >> > - field = getattr(event, field_name) >> > - if field_name in fields: >> > - self.assertEqual(field, fields[field_name]) >> > - else: >> > - self.assertIsNone(field) >> > - >> > - >> > -class PatchCreateTest(_BaseTestCase): >> > +class PatchCreateTest(TestCase): >> > >> > def test_patch_created(self): >> > """No series, so patch dependencies implicitly exist.""" >> > @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase): >> > # This should raise both the CATEGORY_PATCH_CREATED and >> > # CATEGORY_PATCH_COMPLETED events as there are no specific >> > dependencies >> > events = _get_events(patch=patch) >> > + >> > self.assertEqual(events.count(), 1) >> > self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED) >> > self.assertEqual(events[0].project, patch.project) >> > - self.assertEventFields(events[0]) >> > + >> > + payload = events[0].payload >> > + self.assertIsInstance(payload, OrderedDict) >> > + self.assertIn('patch', payload) >> > + self.assertEqual(payload['patch']['id'], patch.id) >> > >> > def test_patch_dependencies_present_series(self): >> > """Patch dependencies already exist.""" >> > @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase): >> > # This should raise both the CATEGORY_PATCH_CREATED and >> > # CATEGORY_PATCH_COMPLETED events >> > events = _get_events(patch=series_patch.patch) >> > + >> > self.assertEqual(events.count(), 2) >> > self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED) >> > self.assertEqual(events[0].project, series_patch.patch.project) >> > self.assertEqual(events[1].category, >> > Event.CATEGORY_PATCH_COMPLETED) >> > self.assertEqual(events[1].project, series_patch.patch.project) >> > - self.assertEventFields(events[0]) >> > - self.assertEventFields(events[1]) >> > + >> > + payload = events[0].payload >> > + self.assertIn('patch', payload) >> > + self.assertEqual(payload['patch']['id'], series_patch.patch.id) >> > + >> > + payload = events[1].payload >> > + self.assertIn('patch', payload) >> > + self.assertEqual(payload['patch']['id'], series_patch.patch.id) >> > + self.assertIn('series', payload) >> > + self.assertEqual(payload['series']['id'], series_patch.series.id) >> > >> > def test_patch_dependencies_out_of_order(self): >> > series = utils.create_series() >> > @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase): >> > events = _get_events(patch=series_patch.patch) >> > self.assertEqual(events.count(), 1) >> > self.assertEqual(events[0].category, >> > Event.CATEGORY_PATCH_CREATED) >> > - self.assertEventFields(events[0]) >> > >> > series_patch_1 = utils.create_series_patch(series=series, >> > number=1) >> > >> > @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase): >> > self.assertEqual(events[0].category, >> > Event.CATEGORY_PATCH_CREATED) >> > self.assertEqual(events[1].category, >> > Event.CATEGORY_PATCH_COMPLETED) >> > - self.assertEventFields(events[0]) >> > - self.assertEventFields(events[1]) >> > >> > def test_patch_dependencies_missing(self): >> > series_patch = utils.create_series_patch(number=2) >> > @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase): >> > events = _get_events(patch=series_patch.patch) >> > self.assertEqual(events.count(), 1) >> > self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED) >> > - self.assertEventFields(events[0]) >> > >> > >> > -class PatchChangedTest(_BaseTestCase): >> > +class PatchChangedTest(TestCase): >> > >> > def test_patch_state_changed(self): >> > patch = utils.create_patch() >> > @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase): >> > patch.save() >> > >> > events = _get_events(patch=patch) >> > + >> > self.assertEqual(events.count(), 2) >> > # we don't care about the CATEGORY_PATCH_CREATED event here >> > self.assertEqual(events[1].category, >> > Event.CATEGORY_PATCH_STATE_CHANGED) >> > self.assertEqual(events[1].project, patch.project) >> > - self.assertEventFields(events[1], previous_state=old_state, >> > - current_state=new_state) >> > + >> > + payload = events[1].payload >> > + self.assertIn('previous_state', payload) >> > + self.assertEqual(payload['previous_state'], old_state.slug) >> > + self.assertIn('current_state', payload) >> > + self.assertEqual(payload['current_state'], new_state.slug) >> > >> > def test_patch_delegated(self): >> > patch = utils.create_patch() >> > @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase): >> > patch.save() >> > >> > events = _get_events(patch=patch) >> > + >> > self.assertEqual(events.count(), 2) >> > # we don't care about the CATEGORY_PATCH_CREATED event here >> > self.assertEqual(events[1].category, >> > Event.CATEGORY_PATCH_DELEGATED) >> > self.assertEqual(events[1].project, patch.project) >> > - self.assertEventFields(events[1], current_delegate=delegate_a) >> > + >> > + payload = events[1].payload >> > + self.assertIn('previous_delegate', payload) >> > + self.assertIsNone(payload['previous_delegate']) >> > + self.assertIn('current_delegate', payload) >> > + self.assertEqual(payload['current_delegate']['id'], delegate_a.id) >> > >> > delegate_b = utils.create_user() >> > >> > @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase): >> > patch.save() >> > >> > events = _get_events(patch=patch) >> > + >> > self.assertEqual(events.count(), 3) >> > self.assertEqual(events[2].category, >> > Event.CATEGORY_PATCH_DELEGATED) >> > - self.assertEventFields(events[2], previous_delegate=delegate_a, >> > - current_delegate=delegate_b) >> > + >> > + payload = events[2].payload >> > + self.assertIn('previous_delegate', payload) >> > + self.assertEqual(payload['previous_delegate']['id'], >> > delegate_a.id) >> > + self.assertIn('current_delegate', payload) >> > + self.assertEqual(payload['current_delegate']['id'], delegate_b.id) >> > >> > # Delegate B -> None >> > >> > @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase): >> > patch.save() >> > >> > events = _get_events(patch=patch) >> > + >> > self.assertEqual(events.count(), 4) >> > self.assertEqual(events[3].category, >> > Event.CATEGORY_PATCH_DELEGATED) >> > - self.assertEventFields(events[3], previous_delegate=delegate_b) >> > >> > + payload = events[3].payload >> > + self.assertIn('previous_delegate', payload) >> > + self.assertEqual(payload['previous_delegate']['id'], >> > delegate_b.id) >> > + self.assertIn('current_delegate', payload) >> > + self.assertIsNone(payload['current_delegate']) >> > >> > -class CheckCreateTest(_BaseTestCase): >> > + >> > +class CheckCreateTest(TestCase): >> > >> > def test_check_created(self): >> > check = utils.create_check() >> > - events = _get_events(created_check=check) >> > - self.assertEqual(events.count(), 1) >> > - self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED) >> > - self.assertEqual(events[0].project, check.patch.project) >> > - self.assertEventFields(events[0]) >> > + # we don't care about the CATEGORY_PATCH_CREATED event here >> > + events = _get_events(patch=check.patch) >> > >> > + self.assertEqual(events.count(), 2) >> > + # we don't care about the CATEGORY_PATCH_CREATED event here >> > + self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED) >> > + self.assertEqual(events[1].project, check.patch.project) >> > + >> > + payload = events[1].payload >> > + self.assertIn('check', payload) >> > + self.assertEqual(payload['check']['id'], check.id) >> > >> > -class CoverCreateTest(_BaseTestCase): >> > + >> > +class CoverCreateTest(TestCase): >> > >> > def test_cover_created(self): >> > cover = utils.create_cover() >> > @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase): >> > self.assertEqual(events.count(), 1) >> > self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED) >> > self.assertEqual(events[0].project, cover.project) >> > - self.assertEventFields(events[0]) >> > + >> > + payload = events[0].payload >> > + self.assertIn('cover', payload) >> > + self.assertEqual(payload['cover']['id'], cover.id) >> > >> > >> > -class SeriesCreateTest(_BaseTestCase): >> > +class SeriesCreateTest(TestCase): >> > >> > def test_series_created(self): >> > series = utils.create_series() >> > @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase): >> > self.assertEqual(events.count(), 1) >> > self.assertEqual(events[0].category, >> > Event.CATEGORY_SERIES_CREATED) >> > self.assertEqual(events[0].project, series.project) >> > - self.assertEventFields(events[0]) >> > + >> > + payload = events[0].payload >> > + self.assertIn('series', payload) >> > + self.assertEqual(payload['series']['id'], series.id) >> > -- >> > 2.14.3 >> > >> > _______________________________________________ >> > Patchwork mailing list >> > Patchwork@lists.ozlabs.org >> > https://lists.ozlabs.org/listinfo/patchwork _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork