Author: ccahoon Date: 2009-06-17 15:42:15 -0500 (Wed, 17 Jun 2009) New Revision: 11030
Modified: django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py Log: [soc2009/http-wsgi-improvements] Added more tests for #10190, changed logic to pass them. http.charsets.determine_charset now takes the accept_charset header instead of the request. Passes the test suite, including the extensive tests on HttpResponse's detection of Accept-Charset and finding the codec from content_type. However, it does not test that the codec encodes properly. Modified: django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -13,7 +13,7 @@ from django.utils.datastructures import MultiValueDict, ImmutableList from django.utils.encoding import smart_str, iri_to_uri, force_unicode from django.http.multipartparser import MultiPartParser -from django.http.charsets import determine_charset +from django.http.charsets import determine_charset, get_codec from django.conf import settings from django.core.files import uploadhandler from utils import * @@ -273,13 +273,20 @@ status_code = 200 def __init__(self, content='', mimetype=None, status=None, - content_type=None, origin_request=None): + content_type=None, request=None): from django.conf import settings self._charset = settings.DEFAULT_CHARSET + accept_charset = None if mimetype: content_type = mimetype # Mimetype is an alias for content-type - if origin_request or content_type: - self._charset, self._codec = determine_charset(content_type, origin_request) + if request: + accept_charset = request.META.get("ACCEPT_CHARSET") + if accept_charset or content_type: + charset, codec = determine_charset(content_type, accept_charset) + if charset: + self._charset = charset + if codec: + self._codec = codec if not content_type: content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE, self._charset) @@ -365,7 +372,10 @@ def _get_content(self): if self.has_header('Content-Encoding'): return ''.join(self._container) - return smart_str(''.join(self._container), self._charset) + + if not self._codec: + self._codec = get_codec(self._charset) + return smart_str(''.join(self._container), self._codec.name) def _set_content(self, value): self._container = [value] @@ -379,8 +389,10 @@ def next(self): chunk = self._iterator.next() + if not self._codec: + self._codec = get_codec(self._charset) if isinstance(chunk, unicode): - chunk = chunk.encode(self._charset) + chunk = chunk.encode(self._codec.name) return str(chunk) def close(self): Modified: django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -252,7 +252,7 @@ CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') ACCEPT_CHARSET_RE = re.compile('(?P<charset>([\w\d-]+)|(\*))(;q=(?P<q>[01](\.\d{1,3})?))?,?') -def determine_charset(content_type, request): +def determine_charset(content_type, accept_charset_header): """ Searches request headers from clients and mimetype settings (which may be set by users) for indicators of which charset and encoding the response should use. @@ -270,7 +270,6 @@ """ codec = None charset = None - # Attempt to get the codec from a content-type, and verify that the charset is valid. if content_type: match = CONTENT_TYPE_RE.match(content_type) @@ -279,14 +278,19 @@ codec = get_codec(charset) if not codec: # Unsupported charset # we should throw an exception here - print "No CODEC ON MIMETYPE" + # print "No CODEC ON MIMETYPE" + pass + # If we don't match a content-type header WITH charset, we give the default + else: + charset = settings.DEFAULT_CHARSET + codec = get_codec(settings.DEFAULT_CHARSET) # Handle Accept-Charset (which we only do if we do not deal with content_type). else: - if request and "ACCEPT_CHARSET" in request.META: + if accept_charset_header: # Get list of matches for Accepted-Charsets. # [{ charset : q }, { charset : q }] - match_iterator = ACCEPT_CHARSET_RE.finditer(request.META["ACCEPT_CHARSET"]) + match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header) accept_charset = [m.groupdict() for m in match_iterator] else: accept_charset = [] # use settings.DEFAULT_CHARSET Modified: django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. Modified: django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -1,10 +1,11 @@ +import re + from django.test import Client, TestCase -import re from django.conf import settings +from django.http.charsets import determine_charset, get_codec + CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?') - - def get_charset(response): match = CONTENT_TYPE_RE.match(response.get("content-type","")) if match: @@ -14,6 +15,7 @@ return charset class ClientTest(TestCase): + urls = 'regressiontests.charsets.urls' def test_good_accept_charset(self): "Use Accept-Charset" @@ -21,40 +23,65 @@ # anyway. response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii,utf-8;q=0") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ascii") - + + def test_good_accept_charset2(self): # us is an alias for ascii response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.9") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + + def test_good_accept_charset3(self): + response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7") - response = self.client.post('/accept_charset/', ACCEPT_CHARSET="us;q=0.8,*;q=0.7") + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "us") + + def test_good_accept_charset4(self): + response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9") - response = self.client.post('/accept_charset/', ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9") + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET) + + def test_good_accept_charset5(self): + response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0") - response = self.client.post('/accept_charset/', ACCEPT_CHARSET="utf-8;q=0") + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "ISO-8859-1") - + def test_bad_accept_charset(self): "Do not use a malformed Accept-Charset" # The data is ignored, but let's check it doesn't crash the system # anyway. response = self.client.post('/accept_charset/', ACCEPT_CHARSET="this_is_junk") + + self.assertEqual(response.status_code, 200) self.assertEqual(get_charset(response), "utf-8") def test_good_content_type(self): - "Use content-type" + "Use good content-type" # The data is ignored, but let's check it doesn't crash the system # anyway. response = self.client.post('/good_content_type/') - self.assertEqual(get_charset(response), "us") + self.assertEqual(response.status_code, 200) def test_bad_content_type(self): - "Use content-type" - # The data is ignored, but let's check it doesn't crash the system - # anyway. + "Use bad content-type" - response = self.client.post('/bad_content_type/') \ No newline at end of file + response = self.client.post('/bad_content_type/') + self.assertEqual(response.status_code, 200) + self.assertEqual(get_codec(get_charset(response)), None) + + def test_content_type_no_charset(self): + response = self.client.post('/content_type_no_charset/') + self.assertEqual(get_charset(response), None) + + def test_determine_charset(self): + content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9") + self.assertEqual(codec, get_codec("ISO-8859-1")) + \ No newline at end of file Modified: django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -1,9 +1,22 @@ from django.conf.urls.defaults import * - import views +# Uncomment the next two lines to enable the admin: +# from django.contrib import admin +# admin.autodiscover() + urlpatterns = patterns('', + # Example: + # (r'^tutu/', include('tutu.foo.urls')), + + # Uncomment the admin/doc line below and add 'django.contrib.admindocs' + # to INSTALLED_APPS to enable admin documentation: + # (r'^admin/doc/', include('django.contrib.admindocs.urls')), + + # Uncomment the next line to enable the admin: + # (r'^admin/', include(admin.site.urls)), (r'^accept_charset/', views.accept_charset), (r'^good_content_type/', views.good_content_type), (r'^bad_content_type/', views.bad_content_type), + (r'^content_type_no_charset/', views.content_type_no_charset), ) Modified: django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py =================================================================== --- django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py 2009-06-17 20:02:17 UTC (rev 11029) +++ django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py 2009-06-17 20:42:15 UTC (rev 11030) @@ -2,10 +2,16 @@ from django.shortcuts import render_to_response def accept_charset(request): - return HttpResponse("ASCII.", origin_request=request) + return HttpResponse("ASCII.", request=request) def good_content_type(request): return HttpResponse("ASCII.", content_type="text/html; charset=us") def bad_content_type(request): - return HttpResponse("ASCII.", content_type="text/html; charset=this_should_be_junk") + return HttpResponse("UTF-8", content_type="text/html; charset=this_should_be_junk") + +def content_type_no_charset(request): + return HttpResponse("UTF-8", content_type="text/html") + +def encode_response(request): + return HttpResponse(u"\ue863", content_type="text/html; charset=GBK") \ No newline at end of file --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Django updates" group. To post to this group, send email to django-updates@googlegroups.com To unsubscribe from this group, send email to django-updates+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/django-updates?hl=en -~----------~----~----~----~------~----~------~--~---