queries

noreply Thu, 19 Mar 2009 02:06:09 -0700

Author: mtredinnick
Date: 2009-03-19 04:06:04 -0500 (Thu, 19 Mar 2009)
New Revision: 10090


Added:
   django/trunk/tests/modeltests/defer/
   django/trunk/tests/modeltests/defer/__init__.py
   django/trunk/tests/modeltests/defer/models.py
Modified:
   django/trunk/django/db/models/base.py
   django/trunk/django/db/models/manager.py
   django/trunk/django/db/models/options.py
   django/trunk/django/db/models/query.py
   django/trunk/django/db/models/query_utils.py
   django/trunk/django/db/models/sql/query.py
   django/trunk/docs/ref/models/querysets.txt
   django/trunk/tests/regressiontests/queries/models.py
Log:
Fixed #5420 -- Added support for delayed loading of model fields.

In extreme cases, some fields are expensive to load from the database
(e.g. GIS fields requiring conversion, or large text fields). This
commit adds defer() and only() methods to querysets that allow the
caller to specify which fields should not be loaded unless they are
accessed.

Modified: django/trunk/django/db/models/base.py
===================================================================
--- django/trunk/django/db/models/base.py       2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/django/db/models/base.py       2009-03-19 09:06:04 UTC (rev 
10090)
@@ -12,7 +12,8 @@
 from django.core.exceptions import ObjectDoesNotExist, 
MultipleObjectsReturned, FieldError
 from django.db.models.fields import AutoField, FieldDoesNotExist
 from django.db.models.fields.related import OneToOneRel, ManyToOneRel, 
OneToOneField
-from django.db.models.query import delete_objects, Q, CollectedObjects
+from django.db.models.query import delete_objects, Q
+from django.db.models.query_utils import CollectedObjects, DeferredAttribute
 from django.db.models.options import Options
 from django.db import connection, transaction, DatabaseError
 from django.db.models import signals
@@ -235,6 +236,7 @@
 
 class Model(object):
     __metaclass__ = ModelBase
+    _deferred = False
 
     def __init__(self, *args, **kwargs):
         signals.pre_init.send(sender=self.__class__, args=args, kwargs=kwargs)
@@ -271,6 +273,13 @@
         for field in fields_iter:
             is_related_object = False
             if kwargs:
+                # This slightly odd construct is so that we can access any
+                # data-descriptor object (DeferredAttribute) without triggering
+                # its __get__ method.
+                if (field.attname not in kwargs and
+                        isinstance(self.__class__.__dict__.get(field.attname), 
DeferredAttribute)):
+                    # This field will be populated on request.
+                    continue
                 if isinstance(field.rel, ManyToOneRel):
                     try:
                         # Assume object instance was passed in.
@@ -332,6 +341,31 @@
     def __hash__(self):
         return hash(self._get_pk_val())
 
+    def __reduce__(self):
+        """
+        Provide pickling support. Normally, this just dispatches to Python's
+        standard handling. However, for models with deferred field loading, we
+        need to do things manually, as they're dynamically created classes and
+        only module-level classes can be pickled by the default path.
+        """
+        if not self._deferred:
+            return super(Model, self).__reduce__()
+        data = self.__dict__
+        defers = []
+        pk_val = None
+        for field in self._meta.fields:
+            if isinstance(self.__class__.__dict__.get(field.attname),
+                    DeferredAttribute):
+                defers.append(field.attname)
+                if pk_val is None:
+                    # The pk_val and model values are the same for all
+                    # DeferredAttribute classes, so we only need to do this
+                    # once.
+                    obj = self.__class__.__dict__[field.attname]
+                    pk_val = obj.pk_value
+                    model = obj.model_ref()
+        return (model_unpickle, (model, pk_val, defers), data)
+
     def _get_pk_val(self, meta=None):
         if not meta:
             meta = self._meta
@@ -591,6 +625,15 @@
 class Empty(object):
     pass
 
+def model_unpickle(model, pk_val, attrs):
+    """
+    Used to unpickle Model subclasses with deferred fields.
+    """
+    from django.db.models.query_utils import deferred_class_factory
+    cls = deferred_class_factory(model, pk_val, attrs)
+    return cls.__new__(cls)
+model_unpickle.__safe_for_unpickle__ = True
+
 if sys.version_info < (2, 5):
     # Prior to Python 2.5, Exception was an old-style class
     def subclass_exception(name, parent, unused):

Modified: django/trunk/django/db/models/manager.py
===================================================================
--- django/trunk/django/db/models/manager.py    2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/django/db/models/manager.py    2009-03-19 09:06:04 UTC (rev 
10090)
@@ -167,6 +167,12 @@
     def reverse(self, *args, **kwargs):
         return self.get_query_set().reverse(*args, **kwargs)
 
+    def defer(self, *args, **kwargs):
+        return self.get_query_set().defer(*args, **kwargs)
+
+    def only(self, *args, **kwargs):
+        return self.get_query_set().only(*args, **kwargs)
+
     def _insert(self, values, **kwargs):
         return insert_query(self.model, values, **kwargs)
 

Modified: django/trunk/django/db/models/options.py
===================================================================
--- django/trunk/django/db/models/options.py    2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/django/db/models/options.py    2009-03-19 09:06:04 UTC (rev 
10090)
@@ -477,3 +477,9 @@
             self._ordered_objects = objects
         return self._ordered_objects
 
+    def pk_index(self):
+        """
+        Returns the index of the primary key field in the self.fields list.
+        """
+        return self.fields.index(self.pk)
+

Modified: django/trunk/django/db/models/query.py
===================================================================
--- django/trunk/django/db/models/query.py      2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/django/db/models/query.py      2009-03-19 09:06:04 UTC (rev 
10090)
@@ -1,3 +1,7 @@
+"""
+The main QuerySet implementation. This provides the public API for the ORM.
+"""
+
 try:
     set
 except NameError:
@@ -6,9 +10,8 @@
 from django.db import connection, transaction, IntegrityError
 from django.db.models.aggregates import Aggregate
 from django.db.models.fields import DateField
-from django.db.models.query_utils import Q, select_related_descend
+from django.db.models.query_utils import Q, select_related_descend, 
CollectedObjects, CyclicDependency, deferred_class_factory
 from django.db.models import signals, sql
-from django.utils.datastructures import SortedDict
 
 
 # Used to control how many objects are worked with at once in some cases (e.g.
@@ -22,102 +25,6 @@
 # Pull into this namespace for backwards compatibility.
 EmptyResultSet = sql.EmptyResultSet
 
-
-class CyclicDependency(Exception):
-    """
-    An error when dealing with a collection of objects that have a cyclic
-    dependency, i.e. when deleting multiple objects.
-    """
-    pass
-
-
-class CollectedObjects(object):
-    """
-    A container that stores keys and lists of values along with remembering the
-    parent objects for all the keys.
-
-    This is used for the database object deletion routines so that we can
-    calculate the 'leaf' objects which should be deleted first.
-    """
-
-    def __init__(self):
-        self.data = {}
-        self.children = {}
-
-    def add(self, model, pk, obj, parent_model, nullable=False):
-        """
-        Adds an item to the container.
-
-        Arguments:
-        * model - the class of the object being added.
-        * pk - the primary key.
-        * obj - the object itself.
-        * parent_model - the model of the parent object that this object was
-          reached through.
-        * nullable - should be True if this relation is nullable.
-
-        Returns True if the item already existed in the structure and
-        False otherwise.
-        """
-        d = self.data.setdefault(model, SortedDict())
-        retval = pk in d
-        d[pk] = obj
-        # Nullable relationships can be ignored -- they are nulled out before
-        # deleting, and therefore do not affect the order in which objects
-        # have to be deleted.
-        if parent_model is not None and not nullable:
-            self.children.setdefault(parent_model, []).append(model)
-        return retval
-
-    def __contains__(self, key):
-        return self.data.__contains__(key)
-
-    def __getitem__(self, key):
-        return self.data[key]
-
-    def __nonzero__(self):
-        return bool(self.data)
-
-    def iteritems(self):
-        for k in self.ordered_keys():
-            yield k, self[k]
-
-    def items(self):
-        return list(self.iteritems())
-
-    def keys(self):
-        return self.ordered_keys()
-
-    def ordered_keys(self):
-        """
-        Returns the models in the order that they should be dealt with (i.e.
-        models with no dependencies first).
-        """
-        dealt_with = SortedDict()
-        # Start with items that have no children
-        models = self.data.keys()
-        while len(dealt_with) < len(models):
-            found = False
-            for model in models:
-                if model in dealt_with:
-                    continue
-                children = self.children.setdefault(model, [])
-                if len([c for c in children if c not in dealt_with]) == 0:
-                    dealt_with[model] = None
-                    found = True
-            if not found:
-                raise CyclicDependency(
-                    "There is a cyclic dependency of items to be processed.")
-
-        return dealt_with.keys()
-
-    def unordered_keys(self):
-        """
-        Fallback for the case where is a cyclic dependency but we don't  care.
-        """
-        return self.data.keys()
-
-
 class QuerySet(object):
     """
     Represents a lazy database lookup for a set of objects.
@@ -275,6 +182,11 @@
         extra_select = self.query.extra_select.keys()
         aggregate_select = self.query.aggregate_select.keys()
 
+        only_load = self.query.get_loaded_field_names()
+        if not fill_cache:
+            fields = self.model._meta.fields
+            pk_idx = self.model._meta.pk_index()
+
         index_start = len(extra_select)
         aggregate_start = index_start + len(self.model._meta.fields)
 
@@ -282,10 +194,31 @@
             if fill_cache:
                 obj, _ = get_cached_row(self.model, row,
                             index_start, max_depth,
-                            requested=requested, offset=len(aggregate_select))
+                            requested=requested, offset=len(aggregate_select),
+                            only_load=only_load)
             else:
-                # omit aggregates in object creation
-                obj = self.model(*row[index_start:aggregate_start])
+                load_fields = only_load.get(self.model)
+                if load_fields:
+                    # Some fields have been deferred, so we have to initialise
+                    # via keyword arguments.
+                    row_data = row[index_start:aggregate_start]
+                    pk_val = row_data[pk_idx]
+                    skip = set()
+                    init_list = []
+                    for field in fields:
+                        if field.name not in load_fields:
+                            skip.add(field.attname)
+                        else:
+                            init_list.append(field.attname)
+                    if skip:
+                        model_cls = deferred_class_factory(self.model, pk_val,
+                                skip)
+                        obj = model_cls(**dict(zip(init_list, row_data)))
+                    else:
+                        obj = self.model(*row[index_start:aggregate_start])
+                else:
+                    # Omit aggregates in object creation.
+                    obj = self.model(*row[index_start:aggregate_start])
 
             for i, k in enumerate(extra_select):
                 setattr(obj, k, row[i])
@@ -655,6 +588,35 @@
         clone.query.standard_ordering = not clone.query.standard_ordering
         return clone
 
+    def defer(self, *fields):
+        """
+        Defers the loading of data for certain fields until they are accessed.
+        The set of fields to defer is added to any existing set of deferred
+        fields. The only exception to this is if None is passed in as the only
+        parameter, in which case all deferrals are removed (None acts as a
+        reset option).
+        """
+        clone = self._clone()
+        if fields == (None,):
+            clone.query.clear_deferred_loading()
+        else:
+            clone.query.add_deferred_loading(fields)
+        return clone
+
+    def only(self, *fields):
+        """
+        Essentially, the opposite of defer. Only the fields passed into this
+        method and that are not already specified as deferred are loaded
+        immediately when the queryset is evaluated.
+        """
+        if fields == [None]:
+            # Can only pass None to defer(), not only(), as the rest option.
+            # That won't stop people trying to do this, so let's be explicit.
+            raise TypeError("Cannot pass None as an argument to only().")
+        clone = self._clone()
+        clone.query.add_immediate_loading(fields)
+        return clone
+
     ###################
     # PRIVATE METHODS #
     ###################
@@ -757,6 +719,7 @@
         Called by the _clone() method after initializing the rest of the
         instance.
         """
+        self.query.clear_deferred_loading()
         self.query.clear_select_fields()
 
         if self._fields:
@@ -847,9 +810,9 @@
             for row in self.query.results_iter():
                 yield tuple(row)
         else:
-            # When extra(select=...) or an annotation is involved, the extra 
cols are
-            # always at the start of the row, and we need to reorder the fields
-            # to match the order in self._fields.
+            # When extra(select=...) or an annotation is involved, the extra
+            # cols are always at the start of the row, and we need to reorder
+            # the fields to match the order in self._fields.
             extra_names = self.query.extra_select.keys()
             field_names = self.field_names
             aggregate_names = self.query.aggregate_select.keys()
@@ -884,6 +847,7 @@
         Called by the _clone() method after initializing the rest of the
         instance.
         """
+        self.query.clear_deferred_loading()
         self.query = self.query.clone(klass=sql.DateQuery, setup=True)
         self.query.select = []
         field = self.model._meta.get_field(self._field_name, 
many_to_many=False)
@@ -935,7 +899,7 @@
 
 
 def get_cached_row(klass, row, index_start, max_depth=0, cur_depth=0,
-                   requested=None, offset=0):
+                   requested=None, offset=0, only_load=None):
     """
     Helper function that recursively returns an object with the specified
     related attributes already populated.
@@ -951,7 +915,24 @@
         # If we only have a list of Nones, there was not related object.
         obj = None
     else:
-        obj = klass(*fields)
+        load_fields = only_load and only_load.get(klass) or None
+        if load_fields:
+            # Handle deferred fields.
+            skip = set()
+            init_list = []
+            pk_val = fields[klass._meta.pk_index()]
+            for field in klass._meta.fields:
+                if field.name not in load_fields:
+                    skip.add(field.name)
+                else:
+                    init_list.append(field.attname)
+            if skip:
+                klass = deferred_class_factory(klass, pk_val, skip)
+                obj = klass(**dict(zip(init_list, fields)))
+            else:
+                obj = klass(*fields)
+        else:
+            obj = klass(*fields)
     index_end += offset
     for f in klass._meta.fields:
         if not select_related_descend(f, restricted, requested):

Modified: django/trunk/django/db/models/query_utils.py
===================================================================
--- django/trunk/django/db/models/query_utils.py        2009-03-19 09:04:19 UTC 
(rev 10089)
+++ django/trunk/django/db/models/query_utils.py        2009-03-19 09:06:04 UTC 
(rev 10090)
@@ -1,14 +1,116 @@
 """
 Various data structures used in query construction.
 
-Factored out from django.db.models.query so that they can also be used by other
-modules without getting into circular import difficulties.
+Factored out from django.db.models.query to avoid making the main module very
+large and/or so that they can be used by other modules without getting into
+circular import difficulties.
 """
 
+import weakref
 from copy import deepcopy
 
 from django.utils import tree
+from django.utils.datastructures import SortedDict
 
+try:
+    sorted
+except NameError:
+    from django.utils.itercompat import sorted  # For Python 2.3.
+
+
+class CyclicDependency(Exception):
+    """
+    An error when dealing with a collection of objects that have a cyclic
+    dependency, i.e. when deleting multiple objects.
+    """
+    pass
+
+class CollectedObjects(object):
+    """
+    A container that stores keys and lists of values along with remembering the
+    parent objects for all the keys.
+
+    This is used for the database object deletion routines so that we can
+    calculate the 'leaf' objects which should be deleted first.
+    """
+
+    def __init__(self):
+        self.data = {}
+        self.children = {}
+
+    def add(self, model, pk, obj, parent_model, nullable=False):
+        """
+        Adds an item to the container.
+
+        Arguments:
+        * model - the class of the object being added.
+        * pk - the primary key.
+        * obj - the object itself.
+        * parent_model - the model of the parent object that this object was
+          reached through.
+        * nullable - should be True if this relation is nullable.
+
+        Returns True if the item already existed in the structure and
+        False otherwise.
+        """
+        d = self.data.setdefault(model, SortedDict())
+        retval = pk in d
+        d[pk] = obj
+        # Nullable relationships can be ignored -- they are nulled out before
+        # deleting, and therefore do not affect the order in which objects
+        # have to be deleted.
+        if parent_model is not None and not nullable:
+            self.children.setdefault(parent_model, []).append(model)
+        return retval
+
+    def __contains__(self, key):
+        return self.data.__contains__(key)
+
+    def __getitem__(self, key):
+        return self.data[key]
+
+    def __nonzero__(self):
+        return bool(self.data)
+
+    def iteritems(self):
+        for k in self.ordered_keys():
+            yield k, self[k]
+
+    def items(self):
+        return list(self.iteritems())
+
+    def keys(self):
+        return self.ordered_keys()
+
+    def ordered_keys(self):
+        """
+        Returns the models in the order that they should be dealt with (i.e.
+        models with no dependencies first).
+        """
+        dealt_with = SortedDict()
+        # Start with items that have no children
+        models = self.data.keys()
+        while len(dealt_with) < len(models):
+            found = False
+            for model in models:
+                if model in dealt_with:
+                    continue
+                children = self.children.setdefault(model, [])
+                if len([c for c in children if c not in dealt_with]) == 0:
+                    dealt_with[model] = None
+                    found = True
+            if not found:
+                raise CyclicDependency(
+                    "There is a cyclic dependency of items to be processed.")
+
+        return dealt_with.keys()
+
+    def unordered_keys(self):
+        """
+        Fallback for the case where is a cyclic dependency but we don't  care.
+        """
+        return self.data.keys()
+
 class QueryWrapper(object):
     """
     A type that indicates the contents are an SQL fragment and the associate
@@ -51,6 +153,39 @@
         obj.negate()
         return obj
 
+class DeferredAttribute(object):
+    """
+    A wrapper for a deferred-loading field. When the value is read from this
+    object the first time, the query is executed.
+    """
+    def __init__(self, field_name, pk_value, model):
+        self.field_name = field_name
+        self.pk_value = pk_value
+        self.model_ref = weakref.ref(model)
+        self.loaded = False
+
+    def __get__(self, instance, owner):
+        """
+        Retrieves and caches the value from the datastore on the first lookup.
+        Returns the cached value.
+        """
+        assert instance is not None
+        if not self.loaded:
+            obj = self.model_ref()
+            if obj is None:
+                return
+            self.value = 
list(obj._base_manager.filter(pk=self.pk_value).values_list(self.field_name, 
flat=True))[0]
+            self.loaded = True
+        return self.value
+
+    def __set__(self, name, value):
+        """
+        Deferred loading attributes can be set normally (which means there will
+        never be a database lookup involved.
+        """
+        self.value = value
+        self.loaded = True
+
 def select_related_descend(field, restricted, requested):
     """
     Returns True if this field should be used to descend deeper for
@@ -67,3 +202,35 @@
     if not restricted and field.null:
         return False
     return True
+
+# This function is needed because data descriptors must be defined on a class
+# object, not an instance, to have any effect.
+
+def deferred_class_factory(model, pk_value, attrs):
+    """
+    Returns a class object that is a copy of "model" with the specified "attrs"
+    being replaced with DeferredAttribute objects. The "pk_value" ties the
+    deferred attributes to a particular instance of the model.
+    """
+    class Meta:
+        pass
+    setattr(Meta, "proxy", True)
+    setattr(Meta, "app_label", model._meta.app_label)
+
+    # The app_cache wants a unique name for each model, otherwise the new class
+    # won't be created (we get an old one back). Therefore, we generate the
+    # name using the passed in attrs. It's OK to reuse an old case if the attrs
+    # are identical.
+    name = "%s_Deferred_%s" % (model.__name__, '_'.join(sorted(list(attrs))))
+
+    overrides = dict([(attr, DeferredAttribute(attr, pk_value, model))
+            for attr in attrs])
+    overrides["Meta"] = Meta
+    overrides["__module__"] = model.__module__
+    overrides["_deferred"] = True
+    return type(name, (model,), overrides)
+
+# The above function is also used to unpickle model instances with deferred
+# fields.
+deferred_class_factory.__safe_for_unpickling__ = True
+

Modified: django/trunk/django/db/models/sql/query.py
===================================================================
--- django/trunk/django/db/models/sql/query.py  2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/django/db/models/sql/query.py  2009-03-19 09:06:04 UTC (rev 
10090)
@@ -94,6 +94,11 @@
         self.extra_params = ()
         self.extra_order_by = ()
 
+        # A tuple that is a set of model field names and either True, if these
+        # are the fields to defer, or False if these are the only fields to
+        # load.
+        self.deferred_loading = (set(), True)
+
     def __str__(self):
         """
         Returns the query as a string of SQL with the parameter values
@@ -206,6 +211,7 @@
         obj.extra_where = self.extra_where
         obj.extra_params = self.extra_params
         obj.extra_order_by = self.extra_order_by
+        obj.deferred_loading = deepcopy(self.deferred_loading)
         if self.filter_is_sticky and self.used_aliases:
             obj.used_aliases = self.used_aliases.copy()
         else:
@@ -550,9 +556,101 @@
         if self.select_related and not self.related_select_cols:
             self.fill_related_selections()
 
+    def deferred_to_data(self, target, callback):
+        """
+        Converts the self.deferred_loading data structure to an alternate data
+        structure, describing the field that *will* be loaded. This is used to
+        compute the columns to select from the database and also by the
+        QuerySet class to work out which fields are being initialised on each
+        model. Models that have all their fields included aren't mentioned in
+        the result, only those that have field restrictions in place.
+
+        The "target" parameter is the instance that is populated (in place).
+        The "callback" is a function that is called whenever a (model, field)
+        pair need to be added to "target". It accepts three parameters:
+        "target", and the model and list of fields being added for that model.
+        """
+        field_names, defer = self.deferred_loading
+        if not field_names:
+            return
+        columns = set()
+        cur_model = self.model
+        opts = cur_model._meta
+        seen = {}
+        must_include = {cur_model: set([opts.pk])}
+        for field_name in field_names:
+            parts = field_name.split(LOOKUP_SEP)
+            for name in parts[:-1]:
+                old_model = cur_model
+                source = opts.get_field_by_name(name)[0]
+                cur_model = opts.get_field_by_name(name)[0].rel.to
+                opts = cur_model._meta
+                # Even if we're "just passing through" this model, we must add
+                # both the current model's pk and the related reference field
+                # to the things we select.
+                must_include[old_model].add(source)
+                add_to_dict(must_include, cur_model, opts.pk)
+            field, model, _, _ = opts.get_field_by_name(parts[-1])
+            if model is None:
+                model = cur_model
+            add_to_dict(seen, model, field)
+
+        if defer:
+            # We need to load all fields for each model, except those that
+            # appear in "seen" (for all models that appear in "seen"). The only
+            # slight complexity here is handling fields that exist on parent
+            # models.
+            workset = {}
+            for model, values in seen.iteritems():
+                for field, f_model in model._meta.get_fields_with_model():
+                    if field in values:
+                        continue
+                    add_to_dict(workset, f_model or model, field)
+            for model, values in must_include.iteritems():
+                # If we haven't included a model in workset, we don't add the
+                # corresponding must_include fields for that model, since an
+                # empty set means "include all fields". That's why there's no
+                # "else" branch here.
+                if model in workset:
+                    workset[model].update(values)
+            for model, values in workset.iteritems():
+                callback(target, model, values)
+        else:
+            for model, values in must_include.iteritems():
+                if model in seen:
+                    seen[model].update(values)
+                else:
+                    # As we've passed through this model, but not explicitly
+                    # included any fields, we have to make sure it's mentioned
+                    # so that only the "must include" fields are pulled in.
+                    seen[model] = values
+            for model, values in seen.iteritems():
+                callback(target, model, values)
+
+    def deferred_to_columns(self):
+        """
+        Converts the self.deferred_loading data structure to mapping of table
+        names to sets of column names which are to be loaded. Returns the
+        dictionary.
+        """
+        columns = {}
+        self.deferred_to_data(columns, self.deferred_to_columns_cb)
+        return columns
+
+    def deferred_to_columns_cb(self, target, model, fields):
+        """
+        Callback used by deferred_to_columns(). The "target" parameter should
+        be a set instance.
+        """
+        table = model._meta.db_table
+        if table not in target:
+            target[table] = set()
+        for field in fields:
+            target[table].add(field.column)
+
     def get_columns(self, with_aliases=False):
         """
-        Return the list of columns to use in the select statement. If no
+        Returns the list of columns to use in the select statement. If no
         columns have been specified, returns all columns relating to fields in
         the model.
 
@@ -569,9 +667,14 @@
         else:
             col_aliases = set()
         if self.select:
+            only_load = self.deferred_to_columns()
             for col in self.select:
                 if isinstance(col, (list, tuple)):
-                    r = '%s.%s' % (qn(col[0]), qn(col[1]))
+                    alias, column = col
+                    table = self.alias_map[alias][TABLE_NAME]
+                    if table in only_load and col not in only_load[table]:
+                        continue
+                    r = '%s.%s' % (qn(alias), qn(column))
                     if with_aliases:
                         if col[1] in col_aliases:
                             c_alias = 'Col%d' % len(col_aliases)
@@ -641,6 +744,7 @@
         qn = self.quote_name_unless_alias
         qn2 = self.connection.ops.quote_name
         aliases = set()
+        only_load = self.deferred_to_columns()
         proxied_model = opts.proxy and opts.proxy_for_model or 0
         if start_alias:
             seen = {None: start_alias}
@@ -661,6 +765,9 @@
                 # aliases will have already been set up in pre_sql_setup(), so
                 # we can save time here.
                 alias = self.included_inherited_models[model]
+            table = self.alias_map[alias][TABLE_NAME]
+            if table in only_load and field.column not in only_load[table]:
+                continue
             if as_pairs:
                 result.append((alias, field.column))
                 continue
@@ -2014,6 +2121,70 @@
         if order_by:
             self.extra_order_by = order_by
 
+    def clear_deferred_loading(self):
+        """
+        Remove any fields from the deferred loading set.
+        """
+        self.deferred_loading = (set(), True)
+
+    def add_deferred_loading(self, field_names):
+        """
+        Add the given list of model field names to the set of fields to
+        exclude from loading from the database when automatic column selection
+        is done. The new field names are added to any existing field names that
+        are deferred (or removed from any existing field names that are marked
+        as the only ones for immediate loading).
+        """
+        # Fields on related models are stored in the literal double-underscore
+        # format, so that we can use a set datastructure. We do the foo__bar
+        # splitting and handling when computing the SQL colum names (as part of
+        # get_columns()).
+        existing, defer = self.deferred_loading
+        if defer:
+            # Add to existing deferred names.
+            self.deferred_loading = existing.union(field_names), True
+        else:
+            # Remove names from the set of any existing "immediate load" names.
+            self.deferred_loading = existing.difference(field_names), False
+
+    def add_immediate_loading(self, field_names):
+        """
+        Add the given list of model field names to the set of fields to
+        retrieve when the SQL is executed ("immediate loading" fields). The
+        field names replace any existing immediate loading field names. If
+        there are field names already specified for deferred loading, those
+        names are removed from the new field_names before storing the new names
+        for immediate loading. (That is, immediate loading overrides any
+        existing immediate values, but respects existing deferrals.)
+        """
+        existing, defer = self.deferred_loading
+        if defer:
+            # Remove any existing deferred names from the current set before
+            # setting the new names.
+            self.deferred_loading = set(field_names).difference(existing), 
False
+        else:
+            # Replace any existing "immediate load" field names.
+            self.deferred_loading = set(field_names), False
+
+    def get_loaded_field_names(self):
+        """
+        If any fields are marked to be deferred, returns a dictionary mapping
+        models to a set of names in those fields that will be loaded. If a
+        model is not in the returned dictionary, none of it's fields are
+        deferred.
+
+        If no fields are marked for deferral, returns an empty dictionary.
+        """
+        collection = {}
+        self.deferred_to_data(collection, self.get_loaded_field_names_cb)
+        return collection
+
+    def get_loaded_field_names_cb(self, target, model, fields):
+        """
+        Callback used by get_deferred_field_names().
+        """
+        target[model] = set([f.name for f in fields])
+
     def trim_extra_select(self, names):
         """
         Removes any aliases in the extra_select dictionary that aren't in
@@ -2180,3 +2351,13 @@
 
 signals.class_prepared.connect(setup_join_cache)
 
+def add_to_dict(data, key, value):
+    """
+    A helper function to add "value" to the set of values for "key", whether or
+    not "key" already exists.
+    """
+    if key in data:
+        data[key].add(value)
+    else:
+        data[key] = set([value])
+

Modified: django/trunk/docs/ref/models/querysets.txt
===================================================================
--- django/trunk/docs/ref/models/querysets.txt  2009-03-19 09:04:19 UTC (rev 
10089)
+++ django/trunk/docs/ref/models/querysets.txt  2009-03-19 09:06:04 UTC (rev 
10090)
@@ -768,6 +768,101 @@
 
         Entry.objects.extra(where=['headline=%s'], params=['Lennon'])
 
+``defer(*fields)``
+~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+In some complex data-modeling situations, your models might contain a lot of
+fields, some of which could contain a lot of data (for example, text fields),
+or require expensive processing to convert them to Python objects. If you are
+using the results of a queryset in some situation where you know you don't
+need those particular fields, you can tell Django not to retrieve them from
+the database.
+
+This is done by passing the names of the fields to not load to ``defer()``::
+
+    Entry.objects.defer("lede", "body")
+
+A queryset that has deferred fields will still return model instances. Each
+deferred field will be retrieved from the database if you access that field
+(one at a time, not all the deferred fields at once).
+
+You can make multiple calls to ``defer()``. Each call adds new fields to the
+deferred set::
+
+    # Defers both the body and lede fields.
+    Entry.objects.defer("body").filter(headline="Lennon").defer("lede")
+
+The order in which fields are added to the deferred set does not matter. 
Calling ``defer()`` with a field name that has already been deferred is 
harmless (the field will still be deferred).
+
+You can defer loading of fields in related models (if the related models are
+loading via ``select_related()``) by using the standard double-underscore
+notation to separate related fields::
+
+    Blog.objects.select_related().defer("entry__lede", "entry__body")
+
+If you want to clear the set of deferred fields, pass ``None`` as a parameter
+to ``defer()``::
+
+    # Load all fields immediately.
+    my_queryset.defer(None)
+
+Some fields in a model won't be deferred, even if you ask for them. You can
+never defer the loading of the primary key. If you are using
+``select_related()`` to retrieve other models at the same time you shouldn't
+defer the loading of the field that connects from the primary model to the
+related one (at the moment, that doesn't raise an error, but it will
+eventually).
+
+.. note::
+
+    The ``defer()`` method (and its cousin, ``only()``, below) are only for
+    advanced use-cases. They provide an optimization for when you have
+    analyzed your queries closely and understand *exactly* what information
+    you need and have measured that the difference between returning the
+    fields you need and the full set of fields for the model will be
+    significant. When you are initially developing your applications, don't
+    bother using ``defer()``; leave it until your query construction has
+    settled down and you understand where the hot-points are.
+
+``only(*fields)``
+~~~~~~~~~~~~~~~~~~
+
+.. versionadded:: 1.1
+
+The ``only()`` method is more or less the opposite of ``defer()``. You
+call it with the fields that should *not* be deferred when retrieving a model.
+If you have a model where almost all the fields need to be deferred, using
+``only()`` to specify the complementary set of fields could result in simpler
+code.
+
+If you have a model with fields ``name``, ``age`` and ``biography``, the
+following two querysets are the same, in terms of deferred fields::
+
+    Person.objects.defer("age", "biography")
+    Person.objects.only("name")
+
+Whenever you call ``only()`` it *replaces* the set of fields to load
+immediately. The method's name is mnemonic: **only** those fields are loaded
+immediately; the remainder are deferred. Thus, successive calls to ``only()``
+result in only the final fields being considered::
+
+    # This will defer all fields except the headline.
+    Entry.objects.only("body", "lede").only("headline")
+
+Since ``defer()`` acts incrementally (adding fields to the deferred list), you
+can combine calls to ``only()`` and ``defer()`` and things will behave
+logically::
+
+    # Final result is that everything except "headline" is deferred.
+    Entry.objects.only("headline", "body").defer("body")
+
+    # Final result loads headline and body immediately (only() replaces any
+    # existing set of fields).
+    Entry.objects.defer("body").only("headline", "body")
+
+
 QuerySet methods that do not return QuerySets
 ---------------------------------------------
 

Added: django/trunk/tests/modeltests/defer/__init__.py
===================================================================

Added: django/trunk/tests/modeltests/defer/models.py
===================================================================
--- django/trunk/tests/modeltests/defer/models.py                               
(rev 0)
+++ django/trunk/tests/modeltests/defer/models.py       2009-03-19 09:06:04 UTC 
(rev 10090)
@@ -0,0 +1,89 @@
+"""
+Tests for defer() and only().
+"""
+
+from django.db import models
+from django.db.models.query_utils import DeferredAttribute
+
+class Secondary(models.Model):
+    first = models.CharField(max_length=50)
+    second = models.CharField(max_length=50)
+
+class Primary(models.Model):
+    name = models.CharField(max_length=50)
+    value = models.CharField(max_length=50)
+    related = models.ForeignKey(Secondary)
+
+def count_delayed_fields(obj, debug=False):
+    """
+    Returns the number of delayed attributes on the given model instance.
+    """
+    count = 0
+    for field in obj._meta.fields:
+        if isinstance(obj.__class__.__dict__.get(field.attname),
+                DeferredAttribute):
+            if debug:
+                print field.name, field.attname
+            count += 1
+    return count
+
+
+__test__ = {"API_TEST": """
+To all outward appearances, instances with deferred fields look the same as
+normal instances when we examine attribut values. Therefore we test for the
+number of deferred fields on returned instances (by poking at the internals),
+as a way to observe what is going on.
+
+>>> s1 = Secondary.objects.create(first="x1", second="y1")
+>>> p1 = Primary.objects.create(name="p1", value="xx", related=s1)
+
+>>> qs = Primary.objects.all()
+
+>>> count_delayed_fields(qs.defer('name')[0])
+1
+>>> count_delayed_fields(qs.only('name')[0])
+2
+>>> count_delayed_fields(qs.defer('related__first')[0])
+0
+>>> obj = qs.select_related().only('related__first')[0]
+>>> count_delayed_fields(obj)
+2
+>>> obj.related_id == s1.pk
+True
+>>> count_delayed_fields(qs.defer('name').extra(select={'a': 1})[0])
+1
+>>> count_delayed_fields(qs.extra(select={'a': 1}).defer('name')[0])
+1
+>>> count_delayed_fields(qs.defer('name').defer('value')[0])
+2
+>>> count_delayed_fields(qs.only('name').only('value')[0])
+2
+>>> count_delayed_fields(qs.only('name').defer('value')[0])
+2
+>>> count_delayed_fields(qs.only('name', 'value').defer('value')[0])
+2
+>>> count_delayed_fields(qs.defer('name').only('value')[0])
+2
+>>> obj = qs.only()[0]
+>>> count_delayed_fields(qs.defer(None)[0])
+0
+>>> count_delayed_fields(qs.only('name').defer(None)[0])
+0
+
+User values() won't defer anything (you get the full list of dictionaries
+back), but it still works.
+>>> qs.defer('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 
'xx', 'related_id': s1.id}
+True
+>>> qs.only('name').values()[0] == {'id': p1.id, 'name': u'p1', 'value': 'xx', 
'related_id': s1.id}
+True
+
+Using defer() and only() with get() is also valid.
+>>> count_delayed_fields(qs.defer('name').get(pk=p1.pk))
+1
+>>> count_delayed_fields(qs.only('name').get(pk=p1.pk))
+2
+
+# KNOWN NOT TO WORK: >>> 
count_delayed_fields(qs.only('name').select_related('related')[0])
+# KNOWN NOT TO WORK >>> 
count_delayed_fields(qs.defer('related').select_related('related')[0])
+
+"""}

Modified: django/trunk/tests/regressiontests/queries/models.py
===================================================================
--- django/trunk/tests/regressiontests/queries/models.py        2009-03-19 
09:04:19 UTC (rev 10089)
+++ django/trunk/tests/regressiontests/queries/models.py        2009-03-19 
09:06:04 UTC (rev 10090)
@@ -890,6 +890,12 @@
 >>> query2.as_sql()[0] == query
 True
 
+Check pickling of deferred-loading querysets
+>>> qs = Item.objects.defer('name', 'creator')
+>>> q2 = pickle.loads(pickle.dumps(qs))
+>>> list(qs) == list(q2)
+True
+
 Bug #7277
 >>> n1.annotation_set.filter(Q(tag=t5) | Q(tag__children=t5) | 
 >>> Q(tag__children__children=t5))
 [<Annotation: a1>]


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To post to this group, send email to django-updates@googlegroups.com
To unsubscribe from this group, send email to 
django-updates+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/django-updates?hl=en
-~----------~----~----~----~------~----~------~--~---

[Changeset] r10090 - in django/trunk: django/db/models django/db/models/sql docs/ref/models tests/modeltests tests/modeltests/defer tests/regressiontests/queries

Reply via email to