Author: lukeplant
Date: 2011-10-05 16:14:52 -0700 (Wed, 05 Oct 2011)
New Revision: 16930

Added:
   django/trunk/tests/modeltests/prefetch_related/
   django/trunk/tests/modeltests/prefetch_related/__init__.py
   django/trunk/tests/modeltests/prefetch_related/models.py
   django/trunk/tests/modeltests/prefetch_related/tests.py
Modified:
   django/trunk/django/contrib/contenttypes/generic.py
   django/trunk/django/db/models/fields/related.py
   django/trunk/django/db/models/manager.py
   django/trunk/django/db/models/query.py
   django/trunk/docs/ref/models/querysets.txt
   django/trunk/docs/releases/1.4.txt
   django/trunk/docs/topics/db/optimization.txt
Log:
Fixed #16937 - added `QuerySet.prefetch_related` to prefetch many related 
objects.

Many thanks to akaariai for lots of review and feedback, bug finding,
additional unit tests and performance testing.

Modified: django/trunk/django/contrib/contenttypes/generic.py
===================================================================
--- django/trunk/django/contrib/contenttypes/generic.py 2011-10-05 22:56:09 UTC 
(rev 16929)
+++ django/trunk/django/contrib/contenttypes/generic.py 2011-10-05 23:14:52 UTC 
(rev 16930)
@@ -225,11 +225,7 @@
             content_type = content_type,
             content_type_field_name = self.field.content_type_field_name,
             object_id_field_name = self.field.object_id_field_name,
-            core_filters = {
-                '%s__pk' % self.field.content_type_field_name: content_type.id,
-                '%s__exact' % self.field.object_id_field_name: 
instance._get_pk_val(),
-            }
-
+            prefetch_cache_name = self.field.attname,
         )
 
         return manager
@@ -250,12 +246,12 @@
     """
 
     class GenericRelatedObjectManager(superclass):
-        def __init__(self, model=None, core_filters=None, instance=None, 
symmetrical=None,
+        def __init__(self, model=None, instance=None, symmetrical=None,
                      source_col_name=None, target_col_name=None, 
content_type=None,
-                     content_type_field_name=None, object_id_field_name=None):
+                     content_type_field_name=None, object_id_field_name=None,
+                     prefetch_cache_name=None):
 
             super(GenericRelatedObjectManager, self).__init__()
-            self.core_filters = core_filters
             self.model = model
             self.content_type = content_type
             self.symmetrical = symmetrical
@@ -264,12 +260,30 @@
             self.target_col_name = target_col_name
             self.content_type_field_name = content_type_field_name
             self.object_id_field_name = object_id_field_name
+            self.prefetch_cache_name = prefetch_cache_name
             self.pk_val = self.instance._get_pk_val()
+            self.core_filters = {
+                '%s__pk' % content_type_field_name: content_type.id,
+                '%s__exact' % object_id_field_name: instance._get_pk_val(),
+            }
 
         def get_query_set(self):
-            db = self._db or router.db_for_read(self.model, 
instance=self.instance)
-            return super(GenericRelatedObjectManager, 
self).get_query_set().using(db).filter(**self.core_filters)
+            try:
+                return 
self.instance._prefetched_objects_cache[self.prefetch_cache_name]
+            except (AttributeError, KeyError):
+                db = self._db or router.db_for_read(self.model, 
instance=self.instance)
+                return super(GenericRelatedObjectManager, 
self).get_query_set().using(db).filter(**self.core_filters)
 
+        def get_prefetch_query_set(self, instances):
+            db = self._db or router.db_for_read(self.model)
+            query = {
+                '%s__pk' % self.content_type_field_name: self.content_type.id,
+                '%s__in' % self.object_id_field_name:
+                    [obj._get_pk_val() for obj in instances]
+                }
+            qs = super(GenericRelatedObjectManager, 
self).get_query_set().using(db).filter(**query)
+            return (qs, self.object_id_field_name, 'pk')
+
         def add(self, *objs):
             for obj in objs:
                 if not isinstance(obj, self.model):

Modified: django/trunk/django/db/models/fields/related.py
===================================================================
--- django/trunk/django/db/models/fields/related.py     2011-10-05 22:56:09 UTC 
(rev 16929)
+++ django/trunk/django/db/models/fields/related.py     2011-10-05 23:14:52 UTC 
(rev 16930)
@@ -432,9 +432,23 @@
                 self.model = rel_model
 
             def get_query_set(self):
-                db = self._db or router.db_for_read(self.model, 
instance=self.instance)
-                return super(RelatedManager, 
self).get_query_set().using(db).filter(**(self.core_filters))
+                try:
+                    return 
self.instance._prefetched_objects_cache[rel_field.related_query_name()]
+                except (AttributeError, KeyError):
+                    db = self._db or router.db_for_read(self.model, 
instance=self.instance)
+                    return super(RelatedManager, 
self).get_query_set().using(db).filter(**self.core_filters)
 
+            def get_prefetch_query_set(self, instances):
+                """
+                Return a queryset that does the bulk lookup needed
+                by prefetch_related functionality.
+                """
+                db = self._db or router.db_for_read(self.model)
+                query = {'%s__%s__in' % (rel_field.name, attname):
+                             [getattr(obj, attname) for obj in instances]}
+                qs = super(RelatedManager, 
self).get_query_set().using(db).filter(**query)
+                return (qs, rel_field.get_attname(), attname)
+
             def add(self, *objs):
                 for obj in objs:
                     if not isinstance(obj, self.model):
@@ -482,26 +496,61 @@
     """Creates a manager that subclasses 'superclass' (which is a Manager)
     and adds behavior for many-to-many related objects."""
     class ManyRelatedManager(superclass):
-        def __init__(self, model=None, core_filters=None, instance=None, 
symmetrical=None,
+        def __init__(self, model=None, query_field_name=None, instance=None, 
symmetrical=None,
                      source_field_name=None, target_field_name=None, 
reverse=False,
-                     through=None):
+                     through=None, prefetch_cache_name=None):
             super(ManyRelatedManager, self).__init__()
             self.model = model
-            self.core_filters = core_filters
+            self.query_field_name = query_field_name
+            self.core_filters = {'%s__pk' % query_field_name: 
instance._get_pk_val()}
             self.instance = instance
             self.symmetrical = symmetrical
             self.source_field_name = source_field_name
             self.target_field_name = target_field_name
             self.reverse = reverse
             self.through = through
+            self.prefetch_cache_name = prefetch_cache_name
             self._pk_val = self.instance.pk
             if self._pk_val is None:
                 raise ValueError("%r instance needs to have a primary key 
value before a many-to-many relationship can be used." % 
instance.__class__.__name__)
 
         def get_query_set(self):
-            db = self._db or router.db_for_read(self.instance.__class__, 
instance=self.instance)
-            return super(ManyRelatedManager, 
self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters))
+            try:
+                return 
self.instance._prefetched_objects_cache[self.prefetch_cache_name]
+            except (AttributeError, KeyError):
+                db = self._db or router.db_for_read(self.instance.__class__, 
instance=self.instance)
+                return super(ManyRelatedManager, 
self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)
 
+        def get_prefetch_query_set(self, instances):
+            """
+            Returns a tuple:
+            (queryset of instances of self.model that are related to passed in 
instances
+             attr of returned instances needed for matching
+             attr of passed in instances needed for matching)
+            """
+            from django.db import connections
+            db = self._db or router.db_for_read(self.model)
+            query = {'%s__pk__in' % self.query_field_name:
+                         [obj._get_pk_val() for obj in instances]}
+            qs = super(ManyRelatedManager, 
self).get_query_set().using(db)._next_is_sticky().filter(**query)
+
+            # M2M: need to annotate the query in order to get the primary model
+            # that the secondary model was actually related to. We know that
+            # there will already be a join on the join table, so we can just 
add
+            # the select.
+
+            # For non-autocreated 'through' models, can't assume we are
+            # dealing with PK values.
+            fk = self.through._meta.get_field(self.source_field_name)
+            source_col = fk.column
+            join_table = self.through._meta.db_table
+            connection = connections[db]
+            qn = connection.ops.quote_name
+            qs = qs.extra(select={'_prefetch_related_val':
+                                      '%s.%s' % (qn(join_table), 
qn(source_col))})
+            select_attname = fk.rel.get_related_field().get_attname()
+            return (qs, '_prefetch_related_val', select_attname)
+
         # If the ManyToMany relation has an intermediary model,
         # the add and remove methods do not exist.
         if rel.through._meta.auto_created:
@@ -683,7 +732,8 @@
 
         manager = self.related_manager_cls(
             model=rel_model,
-            core_filters={'%s__pk' % self.related.field.name: 
instance._get_pk_val()},
+            query_field_name=self.related.field.name,
+            prefetch_cache_name=self.related.field.related_query_name(),
             instance=instance,
             symmetrical=False,
             source_field_name=self.related.field.m2m_reverse_field_name(),
@@ -739,7 +789,8 @@
 
         manager = self.related_manager_cls(
             model=self.field.rel.to,
-            core_filters={'%s__pk' % self.field.related_query_name(): 
instance._get_pk_val()},
+            query_field_name=self.field.related_query_name(),
+            prefetch_cache_name=self.field.name,
             instance=instance,
             symmetrical=self.field.rel.symmetrical,
             source_field_name=self.field.m2m_field_name(),

Modified: django/trunk/django/db/models/manager.py
===================================================================
--- django/trunk/django/db/models/manager.py    2011-10-05 22:56:09 UTC (rev 
16929)
+++ django/trunk/django/db/models/manager.py    2011-10-05 23:14:52 UTC (rev 
16930)
@@ -172,6 +172,9 @@
     def select_related(self, *args, **kwargs):
         return self.get_query_set().select_related(*args, **kwargs)
 
+    def prefetch_related(self, *args, **kwargs):
+        return self.get_query_set().prefetch_related(*args, **kwargs)
+
     def values(self, *args, **kwargs):
         return self.get_query_set().values(*args, **kwargs)
 

Modified: django/trunk/django/db/models/query.py
===================================================================
--- django/trunk/django/db/models/query.py      2011-10-05 22:56:09 UTC (rev 
16929)
+++ django/trunk/django/db/models/query.py      2011-10-05 23:14:52 UTC (rev 
16930)
@@ -36,6 +36,8 @@
         self._iter = None
         self._sticky_filter = False
         self._for_write = False
+        self._prefetch_related_lookups = []
+        self._prefetch_done = False
 
     ########################
     # PYTHON MAGIC METHODS #
@@ -81,9 +83,17 @@
                 self._result_cache = list(self.iterator())
         elif self._iter:
             self._result_cache.extend(self._iter)
+        if self._prefetch_related_lookups and not self._prefetch_done:
+            self._prefetch_related_objects()
         return len(self._result_cache)
 
     def __iter__(self):
+        if self._prefetch_related_lookups and not self._prefetch_done:
+            # We need all the results in order to be able to do the prefetch
+            # in one go. To minimize code duplication, we use the __len__
+            # code path which also forces this, and also does the prefetch
+            len(self)
+
         if self._result_cache is None:
             self._iter = self.iterator()
             self._result_cache = []
@@ -106,6 +116,12 @@
                 self._fill_cache()
 
     def __nonzero__(self):
+        if self._prefetch_related_lookups and not self._prefetch_done:
+            # We need all the results in order to be able to do the prefetch
+            # in one go. To minimize code duplication, we use the __len__
+            # code path which also forces this, and also does the prefetch
+            len(self)
+
         if self._result_cache is not None:
             return bool(self._result_cache)
         try:
@@ -527,6 +543,11 @@
             return self.query.has_results(using=self.db)
         return bool(self._result_cache)
 
+    def _prefetch_related_objects(self):
+        # This method can only be called once the result cache has been filled.
+        prefetch_related_objects(self._result_cache, 
self._prefetch_related_lookups)
+        self._prefetch_done = True
+
     ##################################################
     # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
     ##################################################
@@ -650,6 +671,23 @@
             obj.query.max_depth = depth
         return obj
 
+    def prefetch_related(self, *lookups):
+        """
+        Returns a new QuerySet instance that will prefetch the specified
+        Many-To-One and Many-To-Many related objects when the QuerySet is
+        evaluated.
+
+        When prefetch_related() is called more than once, the list of lookups 
to
+        prefetch is appended to. If prefetch_related(None) is called, the
+        the list is cleared.
+        """
+        clone = self._clone()
+        if lookups == (None,):
+            clone._prefetch_related_lookups = []
+        else:
+            clone._prefetch_related_lookups.extend(lookups)
+        return clone
+
     def dup_select_related(self, other):
         """
         Copies the related selection status from the QuerySet 'other' to the
@@ -799,6 +837,7 @@
             query.filter_is_sticky = True
         c = klass(model=self.model, query=query, using=self._db)
         c._for_write = self._for_write
+        c._prefetch_related_lookups = self._prefetch_related_lookups[:]
         c.__dict__.update(kwargs)
         if setup and hasattr(c, '_setup_query'):
             c._setup_query()
@@ -864,6 +903,7 @@
     # empty" result.
     value_annotation = True
 
+
 class ValuesQuerySet(QuerySet):
     def __init__(self, *args, **kwargs):
         super(ValuesQuerySet, self).__init__(*args, **kwargs)
@@ -993,6 +1033,7 @@
                     % self.__class__.__name__)
         return self
 
+
 class ValuesListQuerySet(ValuesQuerySet):
     def iterator(self):
         if self.flat and len(self._fields) == 1:
@@ -1502,6 +1543,7 @@
                 self._model_fields[converter(column)] = field
         return self._model_fields
 
+
 def insert_query(model, objs, fields, return_id=False, raw=False, using=None):
     """
     Inserts a new record for the given model. This provides an interface to
@@ -1511,3 +1553,140 @@
     query = sql.InsertQuery(model)
     query.insert_values(fields, objs, raw=raw)
     return query.get_compiler(using=using).execute_sql(return_id)
+
+
+def prefetch_related_objects(result_cache, related_lookups):
+    """
+    Helper function for prefetch_related functionality
+
+    Populates prefetched objects caches for a list of results
+    from a QuerySet
+    """
+    from django.db.models.sql.constants import LOOKUP_SEP
+
+    if len(result_cache) == 0:
+        return # nothing to do
+
+    model = result_cache[0].__class__
+
+    # We need to be able to dynamically add to the list of prefetch_related
+    # lookups that we look up (see below).  So we need some book keeping to
+    # ensure we don't do duplicate work.
+    done_lookups = set() # list of lookups like foo__bar__baz
+    done_queries = {}    # dictionary of things like 'foo__bar': [results]
+    related_lookups = list(related_lookups)
+
+    # We may expand related_lookups, so need a loop that allows for that
+    for lookup in related_lookups:
+        if lookup in done_lookups:
+            # We've done exactly this already, skip the whole thing
+            continue
+        done_lookups.add(lookup)
+
+        # Top level, the list of objects to decorate is the the result cache
+        # from the primary QuerySet. It won't be for deeper levels.
+        obj_list = result_cache
+
+        attrs = lookup.split(LOOKUP_SEP)
+        for level, attr in enumerate(attrs):
+            # Prepare main instances
+            if len(obj_list) == 0:
+                break
+
+            good_objects = True
+            for obj in obj_list:
+                if not hasattr(obj, '_prefetched_objects_cache'):
+                    try:
+                        obj._prefetched_objects_cache = {}
+                    except AttributeError:
+                        # Must be in a QuerySet subclass that is not returning
+                        # Model instances, either in Django or 3rd
+                        # party. prefetch_related() doesn't make sense, so quit
+                        # now.
+                        good_objects = False
+                        break
+                else:
+                    # We already did this list
+                    break
+            if not good_objects:
+                break
+
+            # Descend down tree
+            try:
+                rel_obj = getattr(obj_list[0], attr)
+            except AttributeError:
+                raise AttributeError("Cannot find '%s' on %s object, '%s' is 
an invalid "
+                                     "parameter to prefetch_related()" %
+                                     (attr, obj_list[0].__class__.__name__, 
lookup))
+
+            can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set')
+            if level == len(attrs) - 1 and not can_prefetch:
+                # Last one, this *must* resolve to a related manager.
+                raise ValueError("'%s' does not resolve to a supported 'many 
related"
+                                 " manager' for model %s - this is an invalid"
+                                 " parameter to prefetch_related()."
+                                 % (lookup, model.__name__))
+
+            if can_prefetch:
+                # Check we didn't do this already
+                current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
+                if current_lookup in done_queries:
+                    obj_list = done_queries[current_lookup]
+                else:
+                    relmanager = rel_obj
+                    obj_list, additional_prl = prefetch_one_level(obj_list, 
relmanager, attr)
+                    for f in additional_prl:
+                        new_prl = LOOKUP_SEP.join([current_lookup, f])
+                        related_lookups.append(new_prl)
+                    done_queries[current_lookup] = obj_list
+            else:
+                # Assume we've got some singly related object. We replace
+                # the current list of parent objects with that list.
+                obj_list = [getattr(obj, attr) for obj in obj_list]
+
+                # Filter out 'None' so that we can continue with nullable
+                # relations.
+                obj_list = [obj for obj in obj_list if obj is not None]
+
+
+def prefetch_one_level(instances, relmanager, attname):
+    """
+    Helper function for prefetch_related_objects
+
+    Runs prefetches on all instances using the manager relmanager,
+    assigning results to queryset against instance.attname.
+
+    The prefetched objects are returned, along with any additional
+    prefetches that must be done due to prefetch_related lookups
+    found from default managers.
+    """
+    rel_qs, rel_obj_attr, instance_attr = 
relmanager.get_prefetch_query_set(instances)
+    # We have to handle the possibility that the default manager itself added
+    # prefetch_related lookups to the QuerySet we just got back. We don't want 
to
+    # trigger the prefetch_related functionality by evaluating the query.
+    # Rather, we need to merge in the prefetch_related lookups.
+    additional_prl = getattr(rel_qs, '_prefetch_related_lookups', [])
+    if additional_prl:
+        # Don't need to clone because the manager should have given us a fresh
+        # instance, so we access an internal instead of using public interface
+        # for performance reasons.
+        rel_qs._prefetch_related_lookups = []
+
+    all_related_objects = list(rel_qs)
+
+    rel_obj_cache = {}
+    for rel_obj in all_related_objects:
+        rel_attr_val = getattr(rel_obj, rel_obj_attr)
+        if rel_attr_val not in rel_obj_cache:
+            rel_obj_cache[rel_attr_val] = []
+        rel_obj_cache[rel_attr_val].append(rel_obj)
+
+    for obj in instances:
+        qs = getattr(obj, attname).all()
+        instance_attr_val = getattr(obj, instance_attr)
+        qs._result_cache = rel_obj_cache.get(instance_attr_val, [])
+        # We don't want the individual qs doing prefetch_related now, since we
+        # have merged this into the current work.
+        qs._prefetch_done = True
+        obj._prefetched_objects_cache[attname] = qs
+    return all_related_objects, additional_prl

Modified: django/trunk/docs/ref/models/querysets.txt
===================================================================
--- django/trunk/docs/ref/models/querysets.txt  2011-10-05 22:56:09 UTC (rev 
16929)
+++ django/trunk/docs/ref/models/querysets.txt  2011-10-05 23:14:52 UTC (rev 
16930)
@@ -571,8 +571,6 @@
 manager or a ``QuerySet`` and do further filtering on the result. After calling
 ``all()`` on either object, you'll definitely have a ``QuerySet`` to work with.
 
-.. _select-related:
-
 select_related
 ~~~~~~~~~~~~~~
 
@@ -690,6 +688,107 @@
 A :class:`~django.db.models.OneToOneField` is not traversed in the reverse
 direction if you are performing a depth-based ``select_related()`` call.
 
+prefetch_related
+~~~~~~~~~~~~~~~~
+
+.. method:: prefetch_related(*lookups)
+
+.. versionadded:: 1.4
+
+Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
+related many-to-many and many-to-one objects for each of the specified lookups.
+
+This is similar to ``select_related`` for the 'many related objects' case, but
+note that ``prefetch_related`` causes a separate query to be issued for each 
set
+of related objects that you request, unlike ``select_related`` which modifies
+the original query with joins in order to get the related objects. With
+``prefetch_related``, the additional queries are done as soon as the QuerySet
+begins to be evaluated.
+
+For example, suppose you have these models::
+
+    class Topping(models.Model):
+        name = models.CharField(max_length=30)
+
+    class Pizza(models.Model):
+        name = models.CharField(max_length=50)
+        toppings = models.ManyToManyField(Topping)
+
+        def __unicode__(self):
+            return u"%s (%s)" % (self.name, u", ".join([topping.name
+                                                        for topping in 
self.toppings.all()]))
+
+and run this code::
+
+    >>> Pizza.objects.all()
+    [u"Hawaiian (ham, pineapple)", u"Seafood (prawns, smoked salmon)"...
+
+The problem with this code is that it will run a query on the Toppings table 
for
+**every** item in the Pizza ``QuerySet``.  Using ``prefetch_related``, this can
+be reduced to two:
+
+    >>> Pizza.objects.all().prefetch_related('toppings')
+
+All the relevant toppings will be fetched in a single query, and used to make
+``QuerySets`` that have a pre-filled cache of the relevant results. These
+``QuerySets`` are then used in the ``self.toppings.all()`` calls.
+
+Please note that use of ``prefetch_related`` will mean that the additional
+queries run will **always** be executed - even if you never use the related
+objects - and it always fully populates the result cache on the primary
+``QuerySet`` (which can sometimes be avoided in other cases).
+
+Also remember that, as always with QuerySets, any subsequent chained methods
+will ignore previously cached results, and retrieve data using a fresh database
+query. So, if you write the following:
+
+    >>> pizzas = Pizza.objects.prefetch_related('toppings')
+    >>> [list(pizza.toppings.filter(spicy=True)) for pizza in pizzas]
+
+...then the fact that `pizza.toppings.all()` has been prefetched will not help
+you - in fact it hurts performance, since you have done a database query that
+you haven't used. So use this feature with caution!
+
+The lookups that must be supplied to this method can be any attributes on the
+model instances which represent related queries that return multiple
+objects. This includes attributes representing the 'many' side of 
``ForeignKey``
+relationships, forward and reverse ``ManyToManyField`` attributes, and also any
+``GenericRelations``.
+
+You can also use the normal join syntax to do related fields of related
+fields. Suppose we have an additional model to the example above::
+
+    class Restaurant(models.Model):
+        pizzas = models.ManyToMany(Pizza, related_name='restaurants')
+        best_pizza = models.ForeignKey(Pizza, related_name='championed_by')
+
+The following are all legal:
+
+    >>> Restaurant.objects.prefetch_related('pizzas__toppings')
+
+This will prefetch all pizzas belonging to restaurants, and all toppings
+belonging to those pizzas. This will result in a total of 3 database queries -
+one for the restaurants, one for the pizzas, and one for the toppings.
+
+    >>> 
Restaurant.objects.select_related('best_pizza').prefetch_related('best_pizza__toppings')
+
+This will fetch the best pizza and all the toppings for the best pizza for each
+restaurant. This will be done in 2 database queries - one for the restaurants
+and 'best pizzas' combined (achieved through use of ``select_related``), and 
one
+for the toppings.
+
+Chaining ``prefetch_related`` calls will accumulate the fields that should have
+this behavior applied. To clear any ``prefetch_related`` behavior, pass `None`
+as a parameter::
+
+   >>> non_prefetched = qs.prefetch_related(None)
+
+One difference when using ``prefetch_related`` is that, in some circumstances,
+objects created by a query can be shared between the different objects that 
they
+are related to i.e. a single Python model instance can appear at more than one
+point in the tree of objects that are returned. Normally this behavior will not
+be a problem, and will in fact save both memory and CPU time.
+
 extra
 ~~~~~
 

Modified: django/trunk/docs/releases/1.4.txt
===================================================================
--- django/trunk/docs/releases/1.4.txt  2011-10-05 22:56:09 UTC (rev 16929)
+++ django/trunk/docs/releases/1.4.txt  2011-10-05 23:14:52 UTC (rev 16930)
@@ -63,6 +63,19 @@
 See the :meth:`~django.db.models.query.QuerySet.bulk_create` docs for more
 information.
 
+``QuerySet.prefetch_related``
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Analagous to :meth:`~django.db.models.query.QuerySet.select_related` but for
+many-to-many relationships,
+:meth:`~django.db.models.query.QuerySet.prefetch_related` has been added to
+:class:`~django.db.models.query.QuerySet`. This method returns a new 
``QuerySet``
+that will prefetch in a single batch each of the specified related lookups as
+soon as it begins to be evaluated (e.g. by iterating over it). This enables you
+to fix many instances of a very common performance problem, in which your code
+ends up doing O(n) database queries (or worse) if objects on your primary
+``QuerySet`` each have many related objects that you also need.
+
 HTML5
 ~~~~~
 

Modified: django/trunk/docs/topics/db/optimization.txt
===================================================================
--- django/trunk/docs/topics/db/optimization.txt        2011-10-05 22:56:09 UTC 
(rev 16929)
+++ django/trunk/docs/topics/db/optimization.txt        2011-10-05 23:14:52 UTC 
(rev 16930)
@@ -141,10 +141,12 @@
 query that is executed in a loop, and could therefore end up doing many 
database
 queries, when only one was needed. So:
 
-Use ``QuerySet.select_related()``
----------------------------------
+Use ``QuerySet.select_related()`` and ``prefetch_related()``
+------------------------------------------------------------
 
-Understand :ref:`QuerySet.select_related() <select-related>` thoroughly, and 
use it:
+Understand :meth:`~django.db.models.query.QuerySet.select_related` and
+:meth:`~django.db.models.query.QuerySet.prefetch_related` thoroughly, and use
+them:
 
 * in view code,
 

Added: django/trunk/tests/modeltests/prefetch_related/__init__.py
===================================================================
Added: django/trunk/tests/modeltests/prefetch_related/models.py
===================================================================
--- django/trunk/tests/modeltests/prefetch_related/models.py                    
        (rev 0)
+++ django/trunk/tests/modeltests/prefetch_related/models.py    2011-10-05 
23:14:52 UTC (rev 16930)
@@ -0,0 +1,165 @@
+from django.contrib.contenttypes.models import ContentType
+from django.contrib.contenttypes import generic
+from django.db import models
+
+## Basic tests
+
+class Author(models.Model):
+    name = models.CharField(max_length=50, unique=True)
+    first_book = models.ForeignKey('Book', related_name='first_time_authors')
+    favorite_authors = models.ManyToManyField(
+        'self', through='FavoriteAuthors', symmetrical=False, 
related_name='favors_me')
+
+    def __unicode__(self):
+        return self.name
+
+    class Meta:
+        ordering = ['id']
+
+
+class AuthorWithAge(Author):
+    author = models.OneToOneField(Author, parent_link=True)
+    age = models.IntegerField()
+
+
+class FavoriteAuthors(models.Model):
+    author = models.ForeignKey(Author, to_field='name', related_name='i_like')
+    likes_author = models.ForeignKey(Author, to_field='name', 
related_name='likes_me')
+
+    class Meta:
+         ordering = ['id']
+
+
+class AuthorAddress(models.Model):
+    author = models.ForeignKey(Author, to_field='name', 
related_name='addresses')
+    address = models.TextField()
+
+    class Meta:
+        ordering = ['id']
+
+    def __unicode__(self):
+        return self.address
+
+
+class Book(models.Model):
+    title = models.CharField(max_length=255)
+    authors = models.ManyToManyField(Author, related_name='books')
+
+    def __unicode__(self):
+        return self.title
+
+    class Meta:
+        ordering = ['id']
+
+class BookWithYear(Book):
+    book = models.OneToOneField(Book, parent_link=True)
+    published_year = models.IntegerField()
+    aged_authors = models.ManyToManyField(
+        AuthorWithAge, related_name='books_with_year')
+
+
+class Reader(models.Model):
+    name = models.CharField(max_length=50)
+    books_read = models.ManyToManyField(Book, related_name='read_by')
+
+    def __unicode__(self):
+        return self.name
+
+    class Meta:
+        ordering = ['id']
+
+
+## Models for default manager tests
+
+class Qualification(models.Model):
+    name = models.CharField(max_length=10)
+
+    class Meta:
+        ordering = ['id']
+
+
+class TeacherManager(models.Manager):
+    def get_query_set(self):
+        return super(TeacherManager, 
self).get_query_set().prefetch_related('qualifications')
+
+
+class Teacher(models.Model):
+    name = models.CharField(max_length=50)
+    qualifications = models.ManyToManyField(Qualification)
+
+    objects = TeacherManager()
+
+    def __unicode__(self):
+        return "%s (%s)" % (self.name, ", ".join(q.name for q in 
self.qualifications.all()))
+
+    class Meta:
+        ordering = ['id']
+
+
+class Department(models.Model):
+    name = models.CharField(max_length=50)
+    teachers = models.ManyToManyField(Teacher)
+
+    class Meta:
+        ordering = ['id']
+
+
+## Generic relation tests
+
+class TaggedItem(models.Model):
+    tag = models.SlugField()
+    content_type = models.ForeignKey(ContentType, 
related_name="taggeditem_set2")
+    object_id = models.PositiveIntegerField()
+    content_object = generic.GenericForeignKey('content_type', 'object_id')
+
+    def __unicode__(self):
+        return self.tag
+
+
+class Bookmark(models.Model):
+    url = models.URLField()
+    tags = generic.GenericRelation(TaggedItem)
+
+
+## Models for lookup ordering tests
+
+
+class House(models.Model):
+    address = models.CharField(max_length=255)
+
+    class Meta:
+        ordering = ['id']
+
+class Room(models.Model):
+    name = models.CharField(max_length=50)
+    house = models.ForeignKey(House, related_name='rooms')
+
+    class Meta:
+        ordering = ['id']
+
+
+class Person(models.Model):
+    name = models.CharField(max_length=50)
+    houses = models.ManyToManyField(House, related_name='occupants')
+
+    @property
+    def primary_house(self):
+        # Assume business logic forces every person to have at least one house.
+        return sorted(self.houses.all(), key=lambda house: 
-house.rooms.count())[0]
+
+    class Meta:
+        ordering = ['id']
+
+
+## Models for nullable FK tests
+
+class Employee(models.Model):
+    name = models.CharField(max_length=50)
+    boss = models.ForeignKey('self', null=True,
+                             related_name='serfs')
+
+    def __unicode__(self):
+        return self.name
+
+    class Meta:
+        ordering = ['id']

Added: django/trunk/tests/modeltests/prefetch_related/tests.py
===================================================================
--- django/trunk/tests/modeltests/prefetch_related/tests.py                     
        (rev 0)
+++ django/trunk/tests/modeltests/prefetch_related/tests.py     2011-10-05 
23:14:52 UTC (rev 16930)
@@ -0,0 +1,418 @@
+from __future__ import with_statement
+
+from django.contrib.contenttypes.models import ContentType
+from django.test import TestCase
+from django.utils import unittest
+
+from models import (Author, Book, Reader, Qualification, Teacher, Department,
+                    TaggedItem, Bookmark, AuthorAddress, FavoriteAuthors,
+                    AuthorWithAge, BookWithYear, Person, House, Room,
+                    Employee)
+
+
+class PrefetchRelatedTests(TestCase):
+
+    def setUp(self):
+
+        self.book1 = Book.objects.create(title="Poems")
+        self.book2 = Book.objects.create(title="Jane Eyre")
+        self.book3 = Book.objects.create(title="Wuthering Heights")
+        self.book4 = Book.objects.create(title="Sense and Sensibility")
+
+        self.author1 = Author.objects.create(name="Charlotte",
+                                             first_book=self.book1)
+        self.author2 = Author.objects.create(name="Anne",
+                                             first_book=self.book1)
+        self.author3 = Author.objects.create(name="Emily",
+                                             first_book=self.book1)
+        self.author4 = Author.objects.create(name="Jane",
+                                             first_book=self.book4)
+
+        self.book1.authors.add(self.author1, self.author2, self.author3)
+        self.book2.authors.add(self.author1)
+        self.book3.authors.add(self.author3)
+        self.book4.authors.add(self.author4)
+
+        self.reader1 = Reader.objects.create(name="Amy")
+        self.reader2 = Reader.objects.create(name="Belinda")
+
+        self.reader1.books_read.add(self.book1, self.book4)
+        self.reader2.books_read.add(self.book2, self.book4)
+
+    def test_m2m_forward(self):
+        with self.assertNumQueries(2):
+            lists = [list(b.authors.all()) for b in 
Book.objects.prefetch_related('authors')]
+
+        normal_lists = [list(b.authors.all()) for b in Book.objects.all()]
+        self.assertEqual(lists, normal_lists)
+
+
+    def test_m2m_reverse(self):
+        with self.assertNumQueries(2):
+            lists = [list(a.books.all()) for a in 
Author.objects.prefetch_related('books')]
+
+        normal_lists = [list(a.books.all()) for a in Author.objects.all()]
+        self.assertEqual(lists, normal_lists)
+
+    def test_foreignkey_reverse(self):
+        with self.assertNumQueries(2):
+            lists = [list(b.first_time_authors.all())
+                     for b in 
Book.objects.prefetch_related('first_time_authors')]
+
+        self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: 
Charlotte>"])
+
+    def test_survives_clone(self):
+        with self.assertNumQueries(2):
+            lists = [list(b.first_time_authors.all())
+                     for b in 
Book.objects.prefetch_related('first_time_authors').exclude(id=1000)]
+
+    def test_len(self):
+        with self.assertNumQueries(2):
+            qs = Book.objects.prefetch_related('first_time_authors')
+            length = len(qs)
+            lists = [list(b.first_time_authors.all())
+                     for b in qs]
+
+    def test_bool(self):
+        with self.assertNumQueries(2):
+            qs = Book.objects.prefetch_related('first_time_authors')
+            x = bool(qs)
+            lists = [list(b.first_time_authors.all())
+                     for b in qs]
+
+    def test_count(self):
+        with self.assertNumQueries(2):
+            qs = Book.objects.prefetch_related('first_time_authors')
+            [b.first_time_authors.count() for b in qs]
+
+    def test_exists(self):
+        with self.assertNumQueries(2):
+            qs = Book.objects.prefetch_related('first_time_authors')
+            [b.first_time_authors.exists() for b in qs]
+
+    def test_clear(self):
+        """
+        Test that we can clear the behavior by calling prefetch_related()
+        """
+        with self.assertNumQueries(5):
+            with_prefetch = Author.objects.prefetch_related('books')
+            without_prefetch = with_prefetch.prefetch_related(None)
+            lists = [list(a.books.all()) for a in without_prefetch]
+
+    def test_m2m_then_m2m(self):
+        """
+        Test we can follow a m2m and another m2m
+        """
+        with self.assertNumQueries(3):
+            qs = Author.objects.prefetch_related('books__read_by')
+            lists = [[[unicode(r) for r in b.read_by.all()]
+                      for b in a.books.all()]
+                     for a in qs]
+            self.assertEqual(lists,
+            [
+                [[u"Amy"], [u"Belinda"]],  # Charlotte - Poems, Jane Eyre
+                [[u"Amy"]],                # Anne - Poems
+                [[u"Amy"], []],            # Emily - Poems, Wuthering Heights
+                [[u"Amy", u"Belinda"]],    # Jane - Sense and Sense
+            ])
+
+    def test_overriding_prefetch(self):
+        with self.assertNumQueries(3):
+            qs = Author.objects.prefetch_related('books', 'books__read_by')
+            lists = [[[unicode(r) for r in b.read_by.all()]
+                      for b in a.books.all()]
+                     for a in qs]
+            self.assertEqual(lists,
+            [
+                [[u"Amy"], [u"Belinda"]],  # Charlotte - Poems, Jane Eyre
+                [[u"Amy"]],                # Anne - Poems
+                [[u"Amy"], []],            # Emily - Poems, Wuthering Heights
+                [[u"Amy", u"Belinda"]],    # Jane - Sense and Sense
+            ])
+        with self.assertNumQueries(3):
+            qs = Author.objects.prefetch_related('books__read_by', 'books')
+            lists = [[[unicode(r) for r in b.read_by.all()]
+                      for b in a.books.all()]
+                     for a in qs]
+            self.assertEqual(lists,
+            [
+                [[u"Amy"], [u"Belinda"]],  # Charlotte - Poems, Jane Eyre
+                [[u"Amy"]],                # Anne - Poems
+                [[u"Amy"], []],            # Emily - Poems, Wuthering Heights
+                [[u"Amy", u"Belinda"]],    # Jane - Sense and Sense
+            ])
+
+    def test_get(self):
+        """
+        Test that objects retrieved with .get() get the prefetch behaviour
+        """
+        # Need a double
+        with self.assertNumQueries(3):
+            author = 
Author.objects.prefetch_related('books__read_by').get(name="Charlotte")
+            lists = [[unicode(r) for r in b.read_by.all()]
+                      for b in author.books.all()]
+            self.assertEqual(lists, [[u"Amy"], [u"Belinda"]])  # Poems, Jane 
Eyre
+
+    def test_foreign_key_then_m2m(self):
+        """
+        Test we can follow an m2m relation after a relation like ForeignKey
+        that doesn't have many objects
+        """
+        with self.assertNumQueries(2):
+            qs = 
Author.objects.select_related('first_book').prefetch_related('first_book__read_by')
+            lists = [[unicode(r) for r in a.first_book.read_by.all()]
+                     for a in qs]
+            self.assertEqual(lists, [[u"Amy"],
+                                     [u"Amy"],
+                                     [u"Amy"],
+                                     [u"Amy", "Belinda"]])
+
+    def test_attribute_error(self):
+        qs = Reader.objects.all().prefetch_related('books_read__xyz')
+        with self.assertRaises(AttributeError) as cm:
+            list(qs)
+
+        self.assertTrue('prefetch_related' in str(cm.exception))
+
+    def test_invalid_final_lookup(self):
+        qs = Book.objects.prefetch_related('authors__first_book')
+        with self.assertRaises(ValueError) as cm:
+            list(qs)
+
+        self.assertTrue('prefetch_related' in str(cm.exception))
+        self.assertTrue("first_book" in str(cm.exception))
+
+
+class DefaultManagerTests(TestCase):
+
+    def setUp(self):
+        self.qual1 = Qualification.objects.create(name="BA")
+        self.qual2 = Qualification.objects.create(name="BSci")
+        self.qual3 = Qualification.objects.create(name="MA")
+        self.qual4 = Qualification.objects.create(name="PhD")
+
+        self.teacher1 = Teacher.objects.create(name="Mr Cleese")
+        self.teacher2 = Teacher.objects.create(name="Mr Idle")
+        self.teacher3 = Teacher.objects.create(name="Mr Chapman")
+
+        self.teacher1.qualifications.add(self.qual1, self.qual2, self.qual3, 
self.qual4)
+        self.teacher2.qualifications.add(self.qual1)
+        self.teacher3.qualifications.add(self.qual2)
+
+        self.dept1 = Department.objects.create(name="English")
+        self.dept2 = Department.objects.create(name="Physics")
+
+        self.dept1.teachers.add(self.teacher1, self.teacher2)
+        self.dept2.teachers.add(self.teacher1, self.teacher3)
+
+    def test_m2m_then_m2m(self):
+        with self.assertNumQueries(3):
+            # When we prefetch the teachers, and force the query, we don't want
+            # the default manager on teachers to immediately get all the 
related
+            # qualifications, since this will do one query per teacher.
+            qs = Department.objects.prefetch_related('teachers')
+            depts = "".join(["%s department: %s\n" %
+                             (dept.name, ", ".join(unicode(t) for t in 
dept.teachers.all()))
+                             for dept in qs])
+
+            self.assertEqual(depts,
+                             "English department: Mr Cleese (BA, BSci, MA, 
PhD), Mr Idle (BA)\n"
+                             "Physics department: Mr Cleese (BA, BSci, MA, 
PhD), Mr Chapman (BSci)\n")
+
+
+class GenericRelationTests(TestCase):
+
+    def test_traverse_GFK(self):
+        """
+        Test that we can traverse a 'content_object' with prefetch_related()
+        """
+        # In fact, there is no special support for this in prefetch_related 
code
+        # - we can traverse any object that will lead us to objects that have
+        # related managers.
+
+        book1 = Book.objects.create(title="Winnie the Pooh")
+        book2 = Book.objects.create(title="Do you like green eggs and spam?")
+
+        reader1 = Reader.objects.create(name="me")
+        reader2 = Reader.objects.create(name="you")
+
+        book1.read_by.add(reader1)
+        book2.read_by.add(reader2)
+
+        TaggedItem.objects.create(tag="awesome", content_object=book1)
+        TaggedItem.objects.create(tag="awesome", content_object=book2)
+
+        ct = ContentType.objects.get_for_model(Book)
+
+        # We get 4 queries - 1 for main query, 2 for each access to
+        # 'content_object' because these can't be handled by select_related, 
and
+        # 1 for the 'read_by' relation.
+        with self.assertNumQueries(4):
+            # If we limit to books, we know that they will have 'read_by'
+            # attributes, so the following makes sense:
+            qs = 
TaggedItem.objects.select_related('content_type').prefetch_related('content_object__read_by').filter(tag='awesome').filter(content_type=ct,
 tag='awesome')
+            readers_of_awesome_books = [r.name for tag in qs
+                                        for r in 
tag.content_object.read_by.all()]
+            self.assertEqual(readers_of_awesome_books, ["me", "you"])
+
+
+    def test_generic_relation(self):
+        b = Bookmark.objects.create(url='http://www.djangoproject.com/')
+        t1 = TaggedItem.objects.create(content_object=b, tag='django')
+        t2 = TaggedItem.objects.create(content_object=b, tag='python')
+
+        with self.assertNumQueries(2):
+            tags = [t.tag for b in Bookmark.objects.prefetch_related('tags')
+                    for t in b.tags.all()]
+            self.assertEqual(sorted(tags), ["django", "python"])
+
+
+class MultiTableInheritanceTest(TestCase):
+
+    def setUp(self):
+        self.book1 = BookWithYear.objects.create(
+            title="Poems", published_year=2010)
+        self.book2 = BookWithYear.objects.create(
+            title="More poems", published_year=2011)
+        self.author1 = AuthorWithAge.objects.create(
+            name='Jane', first_book=self.book1, age=50)
+        self.author2 = AuthorWithAge.objects.create(
+            name='Tom', first_book=self.book1, age=49)
+        self.author3 = AuthorWithAge.objects.create(
+            name='Robert', first_book=self.book2, age=48)
+        self.authorAddress = AuthorAddress.objects.create(
+            author=self.author1, address='SomeStreet 1')
+        self.book2.aged_authors.add(self.author2, self.author3)
+
+    def test_foreignkey(self):
+        with self.assertNumQueries(2):
+            qs = AuthorWithAge.objects.prefetch_related('addresses')
+            addresses = [[unicode(address) for address in obj.addresses.all()]
+                         for obj in qs]
+        self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []])
+
+    def test_m2m_to_inheriting_model(self):
+        qs = AuthorWithAge.objects.prefetch_related('books_with_year')
+        with self.assertNumQueries(2):
+            lst = [[unicode(book) for book in author.books_with_year.all()]
+                   for author in qs]
+        qs = AuthorWithAge.objects.all()
+        lst2 = [[unicode(book) for book in author.books_with_year.all()]
+                for author in qs]
+        self.assertEquals(lst, lst2)
+
+        qs = BookWithYear.objects.prefetch_related('aged_authors')
+        with self.assertNumQueries(2):
+            lst = [[unicode(author) for author in book.aged_authors.all()]
+                   for book in qs]
+        qs = BookWithYear.objects.all()
+        lst2 = [[unicode(author) for author in book.aged_authors.all()]
+               for book in qs]
+        self.assertEquals(lst, lst2)
+
+    def test_parent_link_prefetch(self):
+        with self.assertRaises(ValueError) as cm:
+            qs = list(AuthorWithAge.objects.prefetch_related('author'))
+        self.assertTrue('prefetch_related' in str(cm.exception))
+
+
+class ForeignKeyToFieldTest(TestCase):
+
+    def setUp(self):
+        self.book = Book.objects.create(title="Poems")
+        self.author1 = Author.objects.create(name='Jane', first_book=self.book)
+        self.author2 = Author.objects.create(name='Tom', first_book=self.book)
+        self.author3 = Author.objects.create(name='Robert', 
first_book=self.book)
+        self.authorAddress = AuthorAddress.objects.create(
+            author=self.author1, address='SomeStreet 1'
+        )
+        FavoriteAuthors.objects.create(author=self.author1,
+                                       likes_author=self.author2)
+        FavoriteAuthors.objects.create(author=self.author2,
+                                       likes_author=self.author3)
+        FavoriteAuthors.objects.create(author=self.author3,
+                                       likes_author=self.author1)
+
+    def test_foreignkey(self):
+        with self.assertNumQueries(2):
+            qs = Author.objects.prefetch_related('addresses')
+            addresses = [[unicode(address) for address in obj.addresses.all()]
+                         for obj in qs]
+        self.assertEquals(addresses, [[unicode(self.authorAddress)], [], []])
+
+    def test_m2m(self):
+        with self.assertNumQueries(3):
+            qs = Author.objects.all().prefetch_related('favorite_authors', 
'favors_me')
+            favorites = [(
+                 [unicode(i_like) for i_like in author.favorite_authors.all()],
+                 [unicode(likes_me) for likes_me in author.favors_me.all()]
+                ) for author in qs]
+            self.assertEquals(
+                favorites,
+                [
+                    ([unicode(self.author2)],[unicode(self.author3)]),
+                    ([unicode(self.author3)],[unicode(self.author1)]),
+                    ([unicode(self.author1)],[unicode(self.author2)])
+                ]
+            )
+
+
+class LookupOrderingTest(TestCase):
+    """
+    Test cases that demonstrate that ordering of lookups is important, and
+    ensure it is preserved.
+    """
+
+    def setUp(self):
+        self.person1 = Person.objects.create(name="Joe")
+        self.person2 = Person.objects.create(name="Mary")
+
+        self.house1 = House.objects.create(address="123 Main St")
+        self.house2 = House.objects.create(address="45 Side St")
+        self.house3 = House.objects.create(address="6 Downing St")
+        self.house4 = House.objects.create(address="7 Regents St")
+
+        self.room1_1 = Room.objects.create(name="Dining room", 
house=self.house1)
+        self.room1_2 = Room.objects.create(name="Lounge", house=self.house1)
+        self.room1_3 = Room.objects.create(name="Kitchen", house=self.house1)
+
+        self.room2_1 = Room.objects.create(name="Dining room", 
house=self.house2)
+        self.room2_2 = Room.objects.create(name="Lounge", house=self.house2)
+
+        self.room3_1 = Room.objects.create(name="Dining room", 
house=self.house3)
+        self.room3_2 = Room.objects.create(name="Lounge", house=self.house3)
+        self.room3_3 = Room.objects.create(name="Kitchen", house=self.house3)
+
+        self.room4_1 = Room.objects.create(name="Dining room", 
house=self.house4)
+        self.room4_2 = Room.objects.create(name="Lounge", house=self.house4)
+
+        self.person1.houses.add(self.house1, self.house2)
+        self.person2.houses.add(self.house3, self.house4)
+
+    def test_order(self):
+        with self.assertNumQueries(4):
+            # The following two queries must be done in the same order as 
written,
+            # otherwise 'primary_house' will cause non-prefetched lookups
+            qs = Person.objects.prefetch_related('houses__rooms',
+                                                 'primary_house__occupants')
+            [list(p.primary_house.occupants.all()) for p in qs]
+
+
+class NullableTest(TestCase):
+
+    def setUp(self):
+        boss = Employee.objects.create(name="Peter")
+        worker1 = Employee.objects.create(name="Joe", boss=boss)
+        worker2 = Employee.objects.create(name="Angela", boss=boss)
+
+    def test_traverse_nullable(self):
+        with self.assertNumQueries(2):
+            qs = 
Employee.objects.select_related('boss').prefetch_related('boss__serfs')
+            co_serfs = [list(e.boss.serfs.all()) if e.boss is not None else []
+                        for e in qs]
+
+        qs2 =  Employee.objects.select_related('boss')
+        co_serfs2 =  [list(e.boss.serfs.all()) if e.boss is not None else []
+                        for e in qs2]
+
+        self.assertEqual(co_serfs, co_serfs2)

-- 
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To post to this group, send email to django-updates@googlegroups.com.
To unsubscribe from this group, send email to 
django-updates+unsubscr...@googlegroups.com.
For more options, visit this group at 
http://groups.google.com/group/django-updates?hl=en.

Reply via email to