Hi, Django allows an iterator to be passed as response content when instantiating an HttpResponse. However, doing so causes problems with the following classes and functions:
UpdateCacheMiddleware: Caches the response object using the configured cache backend, but most iterators cannot be pickled. CommonMiddleware: Calculates a value for the ETag header, causing the iterator to be drained prematurely. GzipMiddleware: Calculates the response content length, causing the iterator to be drained prematurely. ConditionalGetMiddleware Calculates the response content length and sets the Content-Length header, causing the iterator to be drained prematurely. patch_response_headers Calculates a value for the ETag header, causing the iterator to be drained prematurely. Some of these problems are discussed in the following tickets: #6027: FileWrapper iterator drained by GzipMiddleware before content can be returned #6527: A bug in HttpResponse with iterators #12214: never_cache decorator breaks HttpResponse with iterator content Attached is a proof-of-concept patch illustrating how I'd like to tackle this issue. Roughly, my approach is as follows: * Introduce a ResponseContent class with subclasses MutableResponseContent and IteratorResponseContent to limit operations on the response content depending on the type of the value provided (string type or iterator). * Forbid premature draining of the content iterator via response.content by only evaluating the content iterator if accessed via iter(response) and raising an exception if it is accessed via response.content. * Modify middleware/view decorator code that accesses response.content to check response.content_obj.is_readable() before trying to read the content. This approach should be backwards compatible, with the exception that if a middleware or view decorator does inappropriately access response.content, a TypeError will be raised. Previously, the content iterator would be prematurely drained and an empty response would be returned. Since the previous behavior was not usable, it seems unlikely anyone would be depending on it. I have a few questions: * Does this approach make sense to others? If so, I'll add a few tests and look at how the docs might need updating. * Is this type of change too invasive for 1.2? * Should I create a "master" ticket to deal with this issue and continue development work there? The other tickets focus on specific symptoms of this problem rather than the root cause more generally. Any comments are appreciated. Thanks, Forest -- Forest Bond http://www.alittletooquiet.net http://www.pytagsfs.org
=== modified file 'django/http/__init__.py' --- django/http/__init__.py 2010-01-23 23:13:00 +0000 +++ django/http/__init__.py 2010-02-15 20:24:31 +0000 @@ -1,5 +1,6 @@ import os import re +from cStringIO import StringIO from Cookie import BaseCookie, SimpleCookie, CookieError from pprint import pformat from urllib import urlencode @@ -296,6 +297,107 @@ class BadHeaderError(ValueError): pass +def iter_string_chunked(s, chunk_size): + for i in range(0, len(s), chunk_size): + yield s[i:i+chunk_size] + +class ResponseContent(object): + charset = None + + def __init__(self, value, charset): + self.charset = charset + self.set_value(value) + + def __unicode__(self): + raise TypeError('unicode conversion not supported by %r' % type(self)) + + def __str__(self): + raise TypeError( + 'byte string conversion not supported by %r' % type(self) + ) + + def set_value(self, value): + raise NotImplementedError() + + def is_readable(self): + raise NotImplementedError() + + def close(self): + pass + + def write(self, data): + raise TypeError('file-like interface not supported by %r' % type(self)) + + def flush(self): + raise TypeError('file-like interface not supported by %r' % type(self)) + + def tell(self): + raise TypeError('file-like interface not supported by %r' % type(self)) + +class MutableResponseContent(ResponseContent): + _buffer = None + _chunk_size = 102400 + + def __init__(self, *args, **kwargs): + self._buffer = StringIO() + super(MutableResponseContent, self).__init__(*args, **kwargs) + + def __unicode__(self): + return self._buffer.getvalue().decode(self.charset) + + def __str__(self): + return self._buffer.getvalue() + + def __iter__(self): + return iter_string_chunked(str(self), self._chunk_size) + + def set_value(self, value): + if not isinstance(value, basestring): + raise TypeError(type(value)) + if isinstance(value, unicode): + value = value.encode(self.charset) + self._buffer.seek(0) + self._buffer.truncate() + self._buffer.write(value) + + def is_readable(self): + return True + + def write(self, data): + if isinstance(data, unicode): + data = data.encode(self.charset) + return self._buffer.write(data) + + def flush(self): + return self._buffer.flush() + + def tell(self): + return self._buffer.tell() + +class IteratorResponseContent(ResponseContent): + _iterator = None + _evaluated = None + + def __iter__(self): + if self._evaluated: + raise AssertionError('iterator can only be evaluated once') + self._evaluated = True + for chunk in self._iterator: + if isinstance(chunk, unicode) and (self.charset is not None): + chunk = chunk.encode(self.charset) + yield chunk + + def set_value(self, value): + if callable(getattr(value, 'next', None)): + self._iterator = value + elif callable(getattr(value, '__iter__', None)): + self._iterator = iter(value) + else: + raise TypeError(type(value)) + + def is_readable(self): + return False + class HttpResponse(object): """A basic HTTP response, with content and dictionary-accessed headers.""" @@ -303,19 +405,20 @@ def __init__(self, content='', mimetype=None, status=None, content_type=None): + from django.conf import settings - self._charset = settings.DEFAULT_CHARSET + + charset = settings.DEFAULT_CHARSET + try: + self.content_obj = MutableResponseContent(content, charset) + except TypeError: + self.content_obj = IteratorResponseContent(content, charset) + if mimetype: content_type = mimetype # For backwards compatibility if not content_type: content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, settings.DEFAULT_CHARSET) - if not isinstance(content, basestring) and hasattr(content, '__iter__'): - self._container = content - self._is_string = False - else: - self._container = [content] - self._is_string = True self.cookies = CompatCookie() if status: self.status_code = status @@ -390,44 +493,44 @@ expires='Thu, 01-Jan-1970 00:00:00 GMT') def _get_content(self): + # XXX: I'd like to just do this, but I'm not sure if that would be + # backwards incompatible in a subtle way. + #return str(self.content_obj) if self.has_header('Content-Encoding'): - return ''.join(self._container) - return smart_str(''.join(self._container), self._charset) + return str(self.content_obj) + return smart_str(self.content_obj, self.content_obj.charset) def _set_content(self, value): - self._container = [value] - self._is_string = True + if isinstance(self.content_obj, MutableResponseContent): + self.content_obj.set_value(value) + else: + from django.conf import settings + charset = settings.DEFAULT_CHARSET + self.content_obj = MutableResponseContent(value, charset) content = property(_get_content, _set_content) def __iter__(self): - self._iterator = iter(self._container) - return self + return iter(self.content_obj) def next(self): - chunk = self._iterator.next() - if isinstance(chunk, unicode): - chunk = chunk.encode(self._charset) - return str(chunk) + if getattr(self, '_iterator', None) is None: + self._iterator = iter(self) + return self._iterator.next() def close(self): - if hasattr(self._container, 'close'): - self._container.close() + return self.content_obj.close() # The remaining methods partially implement the file-like object interface. # See http://docs.python.org/lib/bltin-file-objects.html def write(self, content): - if not self._is_string: - raise Exception("This %s instance is not writable" % self.__class__) - self._container.append(content) + return self.content_obj.write(content) def flush(self): - pass + return self.content_obj.flush() def tell(self): - if not self._is_string: - raise Exception("This %s instance cannot tell its position" % self.__class__) - return sum([len(chunk) for chunk in self._container]) + return self.content_obj.tell() class HttpResponseRedirect(HttpResponse): status_code = 302 === modified file 'django/middleware/cache.py' --- django/middleware/cache.py 2008-09-30 02:58:09 +0000 +++ django/middleware/cache.py 2010-02-15 20:24:31 +0000 @@ -89,7 +89,7 @@ # max-age was set to 0, don't bother caching. return response patch_response_headers(response, timeout) - if timeout: + if response.content_obj.is_readable() and timeout: cache_key = learn_cache_key(request, response, timeout, self.key_prefix) cache.set(cache_key, response, timeout) return response === modified file 'django/middleware/common.py' --- django/middleware/common.py 2010-01-10 17:37:48 +0000 +++ django/middleware/common.py 2010-02-15 20:24:31 +0000 @@ -101,13 +101,17 @@ if response.has_header('ETag'): etag = response['ETag'] else: - etag = '"%s"' % md5_constructor(response.content).hexdigest() - if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag: - cookies = response.cookies - response = http.HttpResponseNotModified() - response.cookies = cookies - else: - response['ETag'] = etag + if response.content_obj.is_readable(): + etag = '"%s"' % md5_constructor(response.content).hexdigest() + else: + etag = None + if etag is not None: + if response.status_code >= 200 and response.status_code < 300 and request.META.get('HTTP_IF_NONE_MATCH') == etag: + cookies = response.cookies + response = http.HttpResponseNotModified() + response.cookies = cookies + else: + response['ETag'] = etag return response === modified file 'django/middleware/csrf.py' --- django/middleware/csrf.py 2009-10-27 20:31:20 +0000 +++ django/middleware/csrf.py 2010-02-15 20:24:31 +0000 @@ -202,6 +202,9 @@ ) def process_response(self, request, response): + if not response.content_obj.is_readable(): + return response + if getattr(response, 'csrf_exempt', False): return response @@ -223,16 +226,20 @@ "' /></div>") # Modify any POST forms - response.content, n = _POST_FORM_RE.subn(add_csrf_field, response.content) + response.content, n = _POST_FORM_RE.subn( + add_csrf_field, + response.content, + ) if n > 0: # Content varies with the CSRF cookie, so set the Vary header. patch_vary_headers(response, ('Cookie',)) # Since the content has been modified, any Etag will now be # incorrect. We could recalculate, but only if we assume that - # the Etag was set by CommonMiddleware. The safest thing is just - # to delete. See bug #9163 + # the Etag was set by CommonMiddleware. The safest thing is + # just to delete. See bug #9163 del response['ETag'] + return response class CsrfMiddleware(object): === modified file 'django/middleware/gzip.py' --- django/middleware/gzip.py 2009-04-12 03:14:23 +0000 +++ django/middleware/gzip.py 2010-02-15 20:24:31 +0000 @@ -12,6 +12,9 @@ on the Accept-Encoding header. """ def process_response(self, request, response): + if not response.content_obj.is_readable(): + return response + # It's not worth compressing non-OK or really short responses. if response.status_code != 200 or len(response.content) < 200: return response === modified file 'django/middleware/http.py' --- django/middleware/http.py 2009-07-29 04:35:51 +0000 +++ django/middleware/http.py 2010-02-15 20:24:31 +0000 @@ -12,7 +12,8 @@ def process_response(self, request, response): response['Date'] = http_date() if not response.has_header('Content-Length'): - response['Content-Length'] = str(len(response.content)) + if response.content_obj.is_readable(): + response['Content-Length'] = str(len(response.content)) if response.has_header('ETag'): if_none_match = request.META.get('HTTP_IF_NONE_MATCH', None) @@ -48,4 +49,4 @@ warnings.warn("SetRemoteAddrFromForwardedFor has been removed. " "See the Django 1.1 release notes for details.", category=DeprecationWarning) - raise MiddlewareNotUsed() \ No newline at end of file + raise MiddlewareNotUsed() === modified file 'django/test/testcases.py' --- django/test/testcases.py 2010-02-13 11:59:09 +0000 +++ django/test/testcases.py 2010-02-15 18:57:11 +0000 @@ -359,7 +359,7 @@ self.assertEqual(response.status_code, status_code, msg_prefix + "Couldn't retrieve page: Response code was %d" " (expected %d)" % (response.status_code, status_code)) - text = smart_str(text, response._charset) + text = smart_str(text, response.content_obj.charset) real_count = response.content.count(text) if count is not None: self.assertEqual(real_count, count, @@ -382,7 +382,7 @@ self.assertEqual(response.status_code, status_code, msg_prefix + "Couldn't retrieve page: Response code was %d" " (expected %d)" % (response.status_code, status_code)) - text = smart_str(text, response._charset) + text = smart_str(text, response.content_obj.charset) self.assertEqual(response.content.count(text), 0, msg_prefix + "Response should not contain '%s'" % text) === modified file 'django/utils/cache.py' --- django/utils/cache.py 2009-04-01 17:19:32 +0000 +++ django/utils/cache.py 2010-02-15 20:24:31 +0000 @@ -105,7 +105,8 @@ if cache_timeout < 0: cache_timeout = 0 # Can't have max-age negative if not response.has_header('ETag'): - response['ETag'] = '"%s"' % md5_constructor(response.content).hexdigest() + if response.content_obj.is_readable(): + response['ETag'] = '"%s"' % md5_constructor(response.content).hexdigest() if not response.has_header('Last-Modified'): response['Last-Modified'] = http_date() if not response.has_header('Expires'): === modified file 'tests/regressiontests/admin_views/tests.py' --- tests/regressiontests/admin_views/tests.py 2010-02-01 14:14:56 +0000 +++ tests/regressiontests/admin_views/tests.py 2010-02-15 18:57:11 +0000 @@ -1370,7 +1370,7 @@ "pictures-1-image": "", } response = self.client.post('/test_admin/%s/admin_views/gallery/1/' % self.urlbit, post_data) - self.failUnless(response._container[0].find("Currently:") > -1) + self.failUnless(response.content.find("Currently:") > -1) class AdminInlineTests(TestCase):
signature.asc
Description: Digital signature