charsets

noreply Wed, 17 Jun 2009 13:42:23 -0700

Author: ccahoon
Date: 2009-06-17 15:42:15 -0500 (Wed, 17 Jun 2009)
New Revision: 11030


Modified:
   django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py
   django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py
   
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py
   
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py
   
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py
   
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py
Log:
[soc2009/http-wsgi-improvements] Added more tests for #10190, changed logic to 
pass them. http.charsets.determine_charset now takes the accept_charset header 
instead of the request.

Passes the test suite, including the extensive tests on HttpResponse's 
detection of Accept-Charset and finding the codec from content_type. However, 
it does not test that the codec encodes properly.

Modified: django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py
===================================================================
--- django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py      
2009-06-17 20:02:17 UTC (rev 11029)
+++ django/branches/soc2009/http-wsgi-improvements/django/http/__init__.py      
2009-06-17 20:42:15 UTC (rev 11030)
@@ -13,7 +13,7 @@
 from django.utils.datastructures import MultiValueDict, ImmutableList
 from django.utils.encoding import smart_str, iri_to_uri, force_unicode
 from django.http.multipartparser import MultiPartParser
-from django.http.charsets import determine_charset
+from django.http.charsets import determine_charset, get_codec
 from django.conf import settings
 from django.core.files import uploadhandler
 from utils import *
@@ -273,13 +273,20 @@
     status_code = 200
 
     def __init__(self, content='', mimetype=None, status=None,
-            content_type=None, origin_request=None):
+            content_type=None, request=None):
         from django.conf import settings
         self._charset = settings.DEFAULT_CHARSET
+        accept_charset = None
         if mimetype:
             content_type = mimetype     # Mimetype is an alias for 
content-type 
-        if origin_request or content_type:
-           self._charset, self._codec = determine_charset(content_type, 
origin_request)
+        if request:
+            accept_charset = request.META.get("ACCEPT_CHARSET")
+        if accept_charset or content_type:
+            charset, codec = determine_charset(content_type, accept_charset)
+            if charset:
+                self._charset = charset
+            if codec:
+                self._codec = codec
         if not content_type:
             content_type = "%s; charset=%s" % (settings.DEFAULT_CONTENT_TYPE,
                     self._charset)
@@ -365,7 +372,10 @@
     def _get_content(self):
         if self.has_header('Content-Encoding'):
             return ''.join(self._container)
-        return smart_str(''.join(self._container), self._charset)
+        
+        if not self._codec:
+            self._codec = get_codec(self._charset)
+        return smart_str(''.join(self._container), self._codec.name)
 
     def _set_content(self, value):
         self._container = [value]
@@ -379,8 +389,10 @@
 
     def next(self):
         chunk = self._iterator.next()
+        if not self._codec:
+            self._codec = get_codec(self._charset)
         if isinstance(chunk, unicode):
-            chunk = chunk.encode(self._charset)
+            chunk = chunk.encode(self._codec.name)
         return str(chunk)
 
     def close(self):

Modified: django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py
===================================================================
--- django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py      
2009-06-17 20:02:17 UTC (rev 11029)
+++ django/branches/soc2009/http-wsgi-improvements/django/http/charsets.py      
2009-06-17 20:42:15 UTC (rev 11030)
@@ -252,7 +252,7 @@
 
 CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
 ACCEPT_CHARSET_RE = 
re.compile('(?P<charset>([\w\d-]+)|(\*))(;q=(?P<q>[01](\.\d{1,3})?))?,?')
-def determine_charset(content_type, request):
+def determine_charset(content_type, accept_charset_header):
     """
     Searches request headers from clients and mimetype settings (which may be 
set 
     by users) for indicators of which charset and encoding the response should 
use.
@@ -270,7 +270,6 @@
     """
     codec = None
     charset = None
-    
     # Attempt to get the codec from a content-type, and verify that the 
charset is valid.
     if content_type:
         match = CONTENT_TYPE_RE.match(content_type)
@@ -279,14 +278,19 @@
             codec = get_codec(charset)
             if not codec:   # Unsupported charset
                 # we should throw an exception here
-                print "No CODEC ON MIMETYPE"
+                # print "No CODEC ON MIMETYPE"
+                pass
+        # If we don't match a content-type header WITH charset, we give the 
default
+        else:
+            charset = settings.DEFAULT_CHARSET
+            codec = get_codec(settings.DEFAULT_CHARSET)
     
     # Handle Accept-Charset (which we only do if we do not deal with 
content_type).
     else:
-        if request and "ACCEPT_CHARSET" in request.META:
+        if accept_charset_header:
             # Get list of matches for Accepted-Charsets.
             # [{ charset : q }, { charset : q }]
-            match_iterator = 
ACCEPT_CHARSET_RE.finditer(request.META["ACCEPT_CHARSET"])
+            match_iterator = ACCEPT_CHARSET_RE.finditer(accept_charset_header)
             accept_charset = [m.groupdict() for m in match_iterator]
         else:
             accept_charset = []    # use settings.DEFAULT_CHARSET

Modified: 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py
===================================================================
--- 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py
     2009-06-17 20:02:17 UTC (rev 11029)
+++ 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/models.py
     2009-06-17 20:42:15 UTC (rev 11030)
@@ -0,0 +1,3 @@
+from django.db import models
+
+# Create your models here.

Modified: 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py
===================================================================
--- 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py
      2009-06-17 20:02:17 UTC (rev 11029)
+++ 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/tests.py
      2009-06-17 20:42:15 UTC (rev 11030)
@@ -1,10 +1,11 @@
+import re
+
 from django.test import Client, TestCase
-import re
 from django.conf import settings
+from django.http.charsets import determine_charset, get_codec
 
+
 CONTENT_TYPE_RE = re.compile('.*; charset=([\w\d-]+);?')
-
-
 def get_charset(response):
     match = CONTENT_TYPE_RE.match(response.get("content-type",""))
     if match:
@@ -14,6 +15,7 @@
     return charset
 
 class ClientTest(TestCase):
+    urls = 'regressiontests.charsets.urls'
     
     def test_good_accept_charset(self):
         "Use Accept-Charset"
@@ -21,40 +23,65 @@
         # anyway.
         
         response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="ascii,utf-8;q=0")
+        
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), "ascii")
-         
+    
+    def test_good_accept_charset2(self):
         # us is an alias for ascii
         response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="us;q=0.8,*;q=0.9")
+        
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+    
+    def test_good_accept_charset3(self):     
+        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="us;q=0.8,*;q=0.7")
         
-        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="us;q=0.8,*;q=0.7")
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), "us")
+    
+    def test_good_accept_charset4(self):
+        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9")
         
-        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="ascii;q=0.89,utf-8;q=.9")
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), settings.DEFAULT_CHARSET)
+    
+    def test_good_accept_charset5(self):    
+        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="utf-8;q=0")
         
-        response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="utf-8;q=0")
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), "ISO-8859-1")  
-    
+        
     def test_bad_accept_charset(self):
         "Do not use a malformed Accept-Charset"
         # The data is ignored, but let's check it doesn't crash the system
         # anyway.
         
         response = self.client.post('/accept_charset/', 
ACCEPT_CHARSET="this_is_junk")
+        
+        self.assertEqual(response.status_code, 200)
         self.assertEqual(get_charset(response), "utf-8")
         
     def test_good_content_type(self):
-        "Use content-type"
+        "Use good content-type"
         # The data is ignored, but let's check it doesn't crash the system
         # anyway.
         
         response = self.client.post('/good_content_type/')
-        self.assertEqual(get_charset(response), "us")
+        self.assertEqual(response.status_code, 200)
         
     def test_bad_content_type(self):
-        "Use content-type"
-        # The data is ignored, but let's check it doesn't crash the system
-        # anyway.
+        "Use bad content-type"
         
-        response = self.client.post('/bad_content_type/')
\ No newline at end of file
+        response = self.client.post('/bad_content_type/')
+        self.assertEqual(response.status_code, 200)
+        self.assertEqual(get_codec(get_charset(response)), None)
+    
+    def test_content_type_no_charset(self):
+        response = self.client.post('/content_type_no_charset/')
+        self.assertEqual(get_charset(response), None)
+    
+    def test_determine_charset(self):
+        content_type, codec = determine_charset("", "utf-8;q=0.8,*;q=0.9")
+        self.assertEqual(codec, get_codec("ISO-8859-1"))
+        
\ No newline at end of file

Modified: 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py
===================================================================
--- 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py
       2009-06-17 20:02:17 UTC (rev 11029)
+++ 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/urls.py
       2009-06-17 20:42:15 UTC (rev 11030)
@@ -1,9 +1,22 @@
 from django.conf.urls.defaults import *
-
 import views
 
+# Uncomment the next two lines to enable the admin:
+# from django.contrib import admin
+# admin.autodiscover()
+
 urlpatterns = patterns('',
+    # Example:
+    # (r'^tutu/', include('tutu.foo.urls')),
+
+    # Uncomment the admin/doc line below and add 'django.contrib.admindocs' 
+    # to INSTALLED_APPS to enable admin documentation:
+    # (r'^admin/doc/', include('django.contrib.admindocs.urls')),
+
+    # Uncomment the next line to enable the admin:
+    # (r'^admin/', include(admin.site.urls)),
     (r'^accept_charset/', views.accept_charset),
     (r'^good_content_type/', views.good_content_type),
     (r'^bad_content_type/', views.bad_content_type),
+    (r'^content_type_no_charset/', views.content_type_no_charset),
 )

Modified: 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py
===================================================================
--- 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py
      2009-06-17 20:02:17 UTC (rev 11029)
+++ 
django/branches/soc2009/http-wsgi-improvements/tests/regressiontests/charsets/views.py
      2009-06-17 20:42:15 UTC (rev 11030)
@@ -2,10 +2,16 @@
 from django.shortcuts import render_to_response
 
 def accept_charset(request):
-    return HttpResponse("ASCII.", origin_request=request)
+    return HttpResponse("ASCII.", request=request)
 
 def good_content_type(request):
     return HttpResponse("ASCII.", content_type="text/html; charset=us")
 
 def bad_content_type(request):
-    return HttpResponse("ASCII.", content_type="text/html; 
charset=this_should_be_junk")
+    return HttpResponse("UTF-8", content_type="text/html; 
charset=this_should_be_junk")
+
+def content_type_no_charset(request):
+    return HttpResponse("UTF-8", content_type="text/html")
+
+def encode_response(request):
+    return HttpResponse(u"\ue863", content_type="text/html; charset=GBK")
\ No newline at end of file


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Django updates" group.
To post to this group, send email to django-updates@googlegroups.com
To unsubscribe from this group, send email to 
django-updates+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/django-updates?hl=en
-~----------~----~----~----~------~----~------~--~---

[Changeset] r11030 - in django/branches/soc2009/http-wsgi-improvements: django/http tests/regressiontests/charsets

Reply via email to