Hi,

Django allows an iterator to be passed as response content when instantiating an
HttpResponse.  However, doing so causes problems with the following classes and
functions:

UpdateCacheMiddleware:
  Caches the response object using the configured cache backend, but most
  iterators cannot be pickled.
CommonMiddleware:
  Calculates a value for the ETag header, causing the iterator to be drained
  prematurely.
GzipMiddleware:
  Calculates the response content length, causing the iterator to be drained
  prematurely.
ConditionalGetMiddleware
  Calculates the response content length and sets the Content-Length header,
  causing the iterator to be drained prematurely.
patch_response_headers
  Calculates a value for the ETag header, causing the iterator to be drained
  prematurely.

Some of these problems are discussed in the following tickets:

#6027: FileWrapper iterator drained by GzipMiddleware before content can be 
returned
#6527: A bug in HttpResponse with iterators
#12214: never_cache decorator breaks HttpResponse with iterator content

Attached is a proof-of-concept patch illustrating how I'd like to tackle this
issue.

Roughly, my approach is as follows:

* Introduce a ResponseContent class with subclasses MutableResponseContent and
  IteratorResponseContent to limit operations on the response content depending
  on the type of the value provided (string type or iterator).
* Forbid premature draining of the content iterator via response.content by only
  evaluating the content iterator if accessed via iter(response) and raising an
  exception if it is accessed via response.content.
* Modify middleware/view decorator code that accesses response.content to check
  response.content_obj.is_readable() before trying to read the content.

This approach should be backwards compatible, with the exception that if a
middleware or view decorator does inappropriately access response.content, a
TypeError will be raised.  Previously, the content iterator would be prematurely
drained and an empty response would be returned.  Since the previous behavior
was not usable, it seems unlikely anyone would be depending on it.

I have a few questions:

* Does this approach make sense to others?  If so, I'll add a few tests and look
  at how the docs might need updating.
* Is this type of change too invasive for 1.2?
* Should I create a "master" ticket to deal with this issue and continue
  development work there?  The other tickets focus on specific symptoms of this
  problem rather than the root cause more generally.

Any comments are appreciated.

Thanks,
Forest
-- 
Forest Bond
http://www.alittletooquiet.net
http://www.pytagsfs.org
=== modified file 'django/http/__init__.py'
--- django/http/__init__.py	2010-01-23 23:13:00 +0000
+++ django/http/__init__.py	2010-02-15 20:24:31 +0000
@@ -1,5 +1,6 @@
 import os
 import re
+from cStringIO import StringIO
 from Cookie import BaseCookie, SimpleCookie, CookieError
 from pprint import pformat
 from urllib import urlencode
@@ -296,6 +297,107 @@
 class BadHeaderError(ValueError):
     pass
 
+def iter_string_chunked(s, chunk_size):
+    for i in range(0, len(s), chunk_size):
+        yield s[i:i+chunk_size]
+
+class ResponseContent(object):
+    charset = None
+
+    def __init__(self, value, charset):
+        self.charset = charset
+        self.set_value(value)
+
+    def __unicode__(self):
+        raise TypeError('unicode conversion not supported by %r' % type(self))
+
+    def __str__(self):
+        raise TypeError(
+          'byte string conversion not supported by %r' % type(self)
+        )
+
+    def set_value(self, value):
+        raise NotImplementedError()
+
+    def is_readable(self):
+        raise NotImplementedError()
+
+    def close(self):
+        pass
+
+    def write(self, data):
+        raise TypeError('file-like interface not supported by %r' % type(self))
+
+    def flush(self):
+        raise TypeError('file-like interface not supported by %r' % type(self))
+
+    def tell(self):
+        raise TypeError('file-like interface not supported by %r' % type(self))
+
+class MutableResponseContent(ResponseContent):
+    _buffer = None
+    _chunk_size = 102400
+
+    def __init__(self, *args, **kwargs):
+        self._buffer = StringIO()
+        super(MutableResponseContent, self).__init__(*args, **kwargs)
+
+    def __unicode__(self):
+        return self._buffer.getvalue().decode(self.charset)
+
+    def __str__(self):
+        return self._buffer.getvalue()
+
+    def __iter__(self):
+        return iter_string_chunked(str(self), self._chunk_size)
+
+    def set_value(self, value):
+        if not isinstance(value, basestring):
+            raise TypeError(type(value))
+        if isinstance(value, unicode):
+            value = value.encode(self.charset)
+        self._buffer.seek(0)
+        self._buffer.truncate()
+        self._buffer.write(value)
+
+    def is_readable(self):
+        return True
+
+    def write(self, data):
+        if isinstance(data, unicode):
+            data = data.encode(self.charset)
+        return self._buffer.write(data)
+
+    def flush(self):
+        return self._buffer.flush()
+
+    def tell(self):
+        return self._buffer.tell()
+
+class IteratorResponseContent(ResponseContent):
+    _iterator = None
+    _evaluated = None
+
+    def __iter__(self):
+        if self._evaluated:
+            raise AssertionError('iterator can only be evaluated once')
+        self._evaluated = True
+        for chunk in self._iterator:
+            if isinstance(chunk, unicode) and (self.charset is not None):
+                chunk = chunk.encode(self.charset)
+            yield chunk
+
+    def set_value(self, value):
+        if callable(getattr(value, 'next', None)):
+            self._iterator = value
+        elif callable(getattr(value, '__iter__', None)):
+            self._iterator = iter(value)
+        else:
+            raise TypeError(type(value))
+
+    def is_readable(self):
+        return False
+
 class HttpResponse(object):
     """A basic HTTP response, with content and dictionary-accessed headers."""
 
@@ -303,19 +405,20 @@
 
     def __init__(self, content='', mimetype=None, status=None,
             content_type=None):
+
         from django.conf import settings
-        self._charset = settings.DEFAULT_CHARSET
+
+        charset = settings.DEFAULT_CHARSET
+        try:
+            self.content_obj = MutableResponseContent(content, charset)
+        except TypeError:
+            self.content_obj = IteratorResponseContent(content, charset)
+
         if mimetype:
             content_type = mimetype     # For backwards compatibility
         if not content_type:
             content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
                     settings.DEFAULT_CHARSET)
-        if not isinstance(content, basestring) and hasattr(content, '__iter__'):
-            self._container = content
-            self._is_string = False
-        else:
-            self._container = [content]
-            self._is_string = True
         self.cookies = CompatCookie()
         if status:
             self.status_code = status
@@ -390,44 +493,44 @@
                         expires='Thu, 01-Jan-1970 00:00:00 GMT')
 
     def _get_content(self):
+        # XXX: I'd like to just do this, but I'm not sure if that would be
+        # backwards incompatible in a subtle way.
+        #return str(self.content_obj)
         if self.has_header('Content-Encoding'):
-            return ''.join(self._container)
-        return smart_str(''.join(self._container), self._charset)
+            return str(self.content_obj)
+        return smart_str(self.content_obj, self.content_obj.charset)
 
     def _set_content(self, value):
-        self._container = [value]
-        self._is_string = True
+        if isinstance(self.content_obj, MutableResponseContent):
+            self.content_obj.set_value(value)
+        else:
+            from django.conf import settings
+            charset = settings.DEFAULT_CHARSET
+            self.content_obj = MutableResponseContent(value, charset)
 
     content = property(_get_content, _set_content)
 
     def __iter__(self):
-        self._iterator = iter(self._container)
-        return self
+        return iter(self.content_obj)
 
     def next(self):
-        chunk = self._iterator.next()
-        if isinstance(chunk, unicode):
-            chunk = chunk.encode(self._charset)
-        return str(chunk)
+        if getattr(self, '_iterator', None) is None:
+            self._iterator = iter(self)
+        return self._iterator.next()
 
     def close(self):
-        if hasattr(self._container, 'close'):
-            self._container.close()
+        return self.content_obj.close()
 
     # The remaining methods partially implement the file-like object interface.
     # See http://docs.python.org/lib/bltin-file-objects.html
     def write(self, content):
-        if not self._is_string:
-            raise Exception("This %s instance is not writable" % self.__class__)
-        self._container.append(content)
+        return self.content_obj.write(content)
 
     def flush(self):
-        pass
+        return self.content_obj.flush()
 
     def tell(self):
-        if not self._is_string:
-            raise Exception("This %s instance cannot tell its position" % self.__class__)
-        return sum([len(chunk) for chunk in self._container])
+        return self.content_obj.tell()
 
 class HttpResponseRedirect(HttpResponse):
     status_code = 302

=== modified file 'django/middleware/cache.py'
--- django/middleware/cache.py	2008-09-30 02:58:09 +0000
+++ django/middleware/cache.py	2010-02-15 20:24:31 +0000
@@ -89,7 +89,7 @@
             # max-age was set to 0, don't bother caching.
             return response
         patch_response_headers(response, timeout)
-        if timeout:
+        if response.content_obj.is_readable() and timeout:
             cache_key = learn_cache_key(request, response, timeout, self.key_prefix)
             cache.set(cache_key, response, timeout)
         return response

=== modified file 'django/middleware/common.py'
--- django/middleware/common.py	2010-01-10 17:37:48 +0000
+++ django/middleware/common.py	2010-02-15 20:24:31 +0000
@@ -101,13 +101,17 @@
             if response.has_header('ETag'):
                 etag = response['ETag']
             else:
-                etag = '"%s"' % md5_constructor(response.content).hexdigest()
-            if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag:
-                cookies = response.cookies
-                response = http.HttpResponseNotModified()
-                response.cookies = cookies
-            else:
-                response['ETag'] = etag
+                if response.content_obj.is_readable():
+                    etag = '"%s"' % md5_constructor(response.content).hexdigest()
+                else:
+                    etag = None
+            if etag is not None:
+                if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag:
+                    cookies = response.cookies
+                    response = http.HttpResponseNotModified()
+                    response.cookies = cookies
+                else:
+                    response['ETag'] = etag
 
         return response
 

=== modified file 'django/middleware/csrf.py'
--- django/middleware/csrf.py	2009-10-27 20:31:20 +0000
+++ django/middleware/csrf.py	2010-02-15 20:24:31 +0000
@@ -202,6 +202,9 @@
         )
 
     def process_response(self, request, response):
+        if not response.content_obj.is_readable():
+            return response
+
         if getattr(response, 'csrf_exempt', False):
             return response
 
@@ -223,16 +226,20 @@
                 "' /></div>")
 
             # Modify any POST forms
-            response.content, n = _POST_FORM_RE.subn(add_csrf_field, response.content)
+            response.content, n = _POST_FORM_RE.subn(
+              add_csrf_field,
+              response.content,
+            )
             if n > 0:
                 # Content varies with the CSRF cookie, so set the Vary header.
                 patch_vary_headers(response, ('Cookie',))
 
                 # Since the content has been modified, any Etag will now be
                 # incorrect.  We could recalculate, but only if we assume that
-                # the Etag was set by CommonMiddleware. The safest thing is just
-                # to delete. See bug #9163
+                # the Etag was set by CommonMiddleware. The safest thing is
+                # just to delete. See bug #9163
                 del response['ETag']
+
         return response
 
 class CsrfMiddleware(object):

=== modified file 'django/middleware/gzip.py'
--- django/middleware/gzip.py	2009-04-12 03:14:23 +0000
+++ django/middleware/gzip.py	2010-02-15 20:24:31 +0000
@@ -12,6 +12,9 @@
     on the Accept-Encoding header.
     """
     def process_response(self, request, response):
+        if not response.content_obj.is_readable():
+            return response
+
         # It's not worth compressing non-OK or really short responses.
         if response.status_code != 200 or len(response.content) < 200:
             return response

=== modified file 'django/middleware/http.py'
--- django/middleware/http.py	2009-07-29 04:35:51 +0000
+++ django/middleware/http.py	2010-02-15 20:24:31 +0000
@@ -12,7 +12,8 @@
     def process_response(self, request, response):
         response['Date'] = http_date()
         if not response.has_header('Content-Length'):
-            response['Content-Length'] = str(len(response.content))
+            if response.content_obj.is_readable():
+                response['Content-Length'] = str(len(response.content))
 
         if response.has_header('ETag'):
             if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None)
@@ -48,4 +49,4 @@
         warnings.warn("SetRemoteAddrFromForwardedFor has been removed. "
                       "See the Django 1.1 release notes for details.",
                       category=DeprecationWarning)
-        raise MiddlewareNotUsed()
\ No newline at end of file
+        raise MiddlewareNotUsed()

=== modified file 'django/test/testcases.py'
--- django/test/testcases.py	2010-02-13 11:59:09 +0000
+++ django/test/testcases.py	2010-02-15 18:57:11 +0000
@@ -359,7 +359,7 @@
         self.assertEqual(response.status_code, status_code,
             msg_prefix + "Couldn't retrieve page: Response code was %d"
             " (expected %d)" % (response.status_code, status_code))
-        text = smart_str(text, response._charset)
+        text = smart_str(text, response.content_obj.charset)
         real_count = response.content.count(text)
         if count is not None:
             self.assertEqual(real_count, count,
@@ -382,7 +382,7 @@
         self.assertEqual(response.status_code, status_code,
             msg_prefix + "Couldn't retrieve page: Response code was %d"
             " (expected %d)" % (response.status_code, status_code))
-        text = smart_str(text, response._charset)
+        text = smart_str(text, response.content_obj.charset)
         self.assertEqual(response.content.count(text), 0,
             msg_prefix + "Response should not contain '%s'" % text)
 

=== modified file 'django/utils/cache.py'
--- django/utils/cache.py	2009-04-01 17:19:32 +0000
+++ django/utils/cache.py	2010-02-15 20:24:31 +0000
@@ -105,7 +105,8 @@
     if cache_timeout < 0:
         cache_timeout = 0 # Can't have max-age negative
     if not response.has_header('ETag'):
-        response['ETag'] = '"%s"' % md5_constructor(response.content).hexdigest()
+        if response.content_obj.is_readable():
+            response['ETag'] = '"%s"' % md5_constructor(response.content).hexdigest()
     if not response.has_header('Last-Modified'):
         response['Last-Modified'] = http_date()
     if not response.has_header('Expires'):

=== modified file 'tests/regressiontests/admin_views/tests.py'
--- tests/regressiontests/admin_views/tests.py	2010-02-01 14:14:56 +0000
+++ tests/regressiontests/admin_views/tests.py	2010-02-15 18:57:11 +0000
@@ -1370,7 +1370,7 @@
             "pictures-1-image": "",
         }
         response = self.client.post('/test_admin/%s/admin_views/gallery/1/' % self.urlbit, post_data)
-        self.failUnless(response._container[0].find("Currently:") > -1)
+        self.failUnless(response.content.find("Currently:") > -1)
 
 
 class AdminInlineTests(TestCase):

Attachment: signature.asc
Description: Digital signature

Reply via email to