#35099: Combining QuerySets with "|" or "&" produce side effects affecting 
further
queries
-------------------------------------+-------------------------------------
               Reporter:  Alan       |          Owner:  nobody
                   Type:  Bug        |         Status:  new
              Component:  Database   |        Version:  5.0
  layer (models, ORM)                |
               Severity:  Normal     |       Keywords:
           Triage Stage:             |      Has patch:  0
  Unreviewed                         |
    Needs documentation:  0          |    Needs tests:  0
Patch needs improvement:  0          |  Easy pickings:  0
                  UI/UX:  0          |
-------------------------------------+-------------------------------------
 Hello everyone.

 Combining some queries with  "|" or "&" somehow affects queries involved
 in the operation, leading to malformed SQL and unexpected results.

 Here are details and steps to reproduce. Apologise, for maybe a bit
 confusing model names, I copied them from production.
 {{{
 class SiteUser(models.Model):
     pass

 class Notification(models.Model):
     user = models.ForeignKey(to=SiteUser, on_delete=models.CASCADE)


 class PayoutRequest(models.Model):
     requester = models.ForeignKey(to=SiteUser, on_delete=models.CASCADE)
 }}}

 Test:
 {{{
 from django.test import TestCase
 from django.db.models import OuterRef, Exists
 from reproduce.models import Notification, SiteUser, PayoutRequest


 class Reproduce(TestCase):

     def test(self):
         u01 = SiteUser.objects.create()
         u02 = SiteUser.objects.create()
         u03 = SiteUser.objects.create()

         Notification.objects.create(user=u01)
         PayoutRequest.objects.create(requester=u01)
         Notification.objects.create(user=u02)
         PayoutRequest.objects.create(requester=u03)

         are_active = SiteUser.objects.all().distinct()
         got_money = SiteUser.objects.filter(
             Exists(PayoutRequest.objects.filter(requester=OuterRef('pk')))
         ).distinct()
         whatever_query = SiteUser.objects.all().distinct()

         # Execute queries first time
         need_help = are_active.exclude(pk__in=got_money)
         notified =
 Notification.objects.filter(user__in=need_help).values_list('user_id',
 flat=True)
         query_before = str(notified.query)

         self.assertEqual(len(notified), 1)  # correct

         whatever_query | got_money  # Touch "got_money" with any other
 query

         # Execute same queries second time
         need_help = are_active.exclude(pk__in=got_money)
         notified =
 Notification.objects.filter(user__in=need_help).values_list('user_id',
 flat=True)
         query_after = str(notified.query)

         print(query_before)
         print(query_after)
         self.assertEqual(len(notified), 1)  # expected 1, got 0
         self.assertEqual(query_before, query_after)  # false
 }}}

 As you can see, merely touching the `got_money` query with any other query
 leads to modifying the results of the same queries executed after that.
 This test case probably may be simplified even further, but unfortunately,
 I have no more time resources to dig much deeper.

 I had another queries built using simple .filter() and .exclude(). Those
 were not affected by combining.
 I found only this query `got_money` using `Exists()` and `OuterRef()` to
 be affected. There might be more of which I am not aware of.

 The reason for this I don't know, but `query_before` and `query_after`
 differs.
 `query_before` correctly separates subqueries using W0, U0, V0 aliases,
 while the `query_after` uses a single U0 alias for all subqueries, leading
 to incorrect results.

 Before
 {{{
 SELECT
   "reproduce_notification"."user_id"
 FROM
   "reproduce_notification"
 WHERE
   "reproduce_notification"."user_id" IN (
     SELECT
       DISTINCT W0."id"
     FROM
       "reproduce_siteuser" W0
     WHERE
       NOT (
         W0."id" IN (
           SELECT
             DISTINCT V0."id"
           FROM
             "reproduce_siteuser" V0
           WHERE
             EXISTS(
               SELECT
                 1 AS "a"
               FROM
                 "reproduce_payoutrequest" U0
               WHERE
                 U0."requester_id" = (V0."id")
               LIMIT
                 1
             )
         )
       )
   )
 }}}

 After
 {{{
 SELECT
   "reproduce_notification"."user_id"
 FROM
   "reproduce_notification"
 WHERE
   "reproduce_notification"."user_id" IN (
     SELECT
       DISTINCT U0."id"
     FROM
       "reproduce_siteuser" U0
     WHERE
       NOT (
         U0."id" IN (
           SELECT
             DISTINCT U0."id"
           FROM
             "reproduce_siteuser" U0
           WHERE
             EXISTS(
               SELECT
                 1 AS "a"
               FROM
                 "reproduce_payoutrequest" U0
               WHERE
                 U0."requester_id" = (U0."id")
               LIMIT
                 1
             )
         )
       )
   )
 }}}

-- 
Ticket URL: <https://code.djangoproject.com/ticket/35099>
Django <https://code.djangoproject.com/>
The Web framework for perfectionists with deadlines.

-- 
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to django-updates+unsubscr...@googlegroups.com.
To view this discussion on the web visit 
https://groups.google.com/d/msgid/django-updates/0107018cefb81ba8-b17a2d82-dc0a-4141-bbe4-e0a7fc2a27d9-000000%40eu-central-1.amazonses.com.

Reply via email to