Log message for revision 71628: Merging /Zope/branches/ajung-zpt-encoding-fixes This branch fixes several encoding issues with the ZopePageTemplate implementation, some webdav issues and now uses unicode internally for ZPT instances (but not for the PageTemplate(File) classes)
Changed: U Zope/trunk/doc/CHANGES.txt U Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py U Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py U Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py A Zope/trunk/lib/python/Products/PageTemplates/utils.py U Zope/trunk/lib/python/Products/PageTemplates/www/default.html U Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt -=- Modified: Zope/trunk/doc/CHANGES.txt =================================================================== --- Zope/trunk/doc/CHANGES.txt 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/doc/CHANGES.txt 2006-12-20 16:54:06 UTC (rev 71628) @@ -39,6 +39,8 @@ until the late startup phase. This in in particular useful when running Zope behind a loadbalancer (patch by Patrick Gerken). + - the ZopePageTemplate implementation now uses unicode internally. + Bugs Fixed - Collector #2191: extended DateTime parser for better support Modified: Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py =================================================================== --- Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/lib/python/Products/PageTemplates/PageTemplateFile.py 2006-12-20 16:54:06 UTC (rev 71628) @@ -32,6 +32,14 @@ LOG = getLogger('PageTemplateFile') def guess_type(filename, text): + + # check for XML ourself since guess_content_type can't + # detect text/xml if 'filename' won't end with .xml + # XXX: fix this in zope.contenttype + + if text.startswith('<?xml'): + return 'text/xml' + content_type, dummy = guess_content_type(filename, text) if content_type in ('text/html', 'text/xml'): return content_type Modified: Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py =================================================================== --- Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/lib/python/Products/PageTemplates/ZopePageTemplate.py 2006-12-20 16:54:06 UTC (rev 71628) @@ -40,20 +40,14 @@ from Products.PageTemplates.PageTemplateFile import guess_type from Products.PageTemplates.Expressions import SecureModuleImporter -# regular expression to extract the encoding from the XML preamble -encoding_reg = re.compile('<\?xml.*?encoding="(.*?)".*?\?>', re.M) +from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv + preferred_encodings = ['utf-8', 'iso-8859-15'] if os.environ.has_key('ZPT_PREFERRED_ENCODING'): preferred_encodings.insert(0, os.environ['ZPT_PREFERRED_ENCODING']) + -def sniffEncoding(text, default_encoding='utf-8'): - """Try to determine the encoding from html or xml""" - if text.startswith('<?xml'): - mo = encoding_reg.search(text) - if mo: - return mo.group(1) - return default_encoding class Src(Acquisition.Explicit): """ I am scary code """ @@ -79,7 +73,6 @@ func_defaults = None func_code = FuncCode((), 0) - strict = False _default_bindings = {'name_subpath': 'traverse_subpath'} _default_content_fn = os.path.join(package_home(globals()), @@ -108,22 +101,68 @@ security.declareProtected(view_management_screens, 'read', 'ZScriptHTML_tryForm') - def __init__(self, id, text=None, content_type=None, encoding='utf-8', - strict=False): + def __init__(self, id, text=None, content_type=None, strict=True, output_encoding='utf-8'): self.id = id self.expand = 0 - self.strict = strict self.ZBindings_edit(self._default_bindings) + self.output_encoding = output_encoding + + # default content if not text: text = open(self._default_content_fn).read() - encoding = 'utf-8' content_type = 'text/html' - self.pt_edit(text, content_type, encoding) + self.pt_edit(text, content_type) + security.declareProtected(change_page_templates, 'pt_edit') - def pt_edit(self, text, content_type, encoding='utf-8'): + def pt_edit(self, text, content_type, keep_output_encoding=False): + text = text.strip() - if self.strict and not isinstance(text, unicode): + + is_unicode = isinstance(text, unicode) + encoding = None + output_encoding = None + + if content_type == 'text/xml': + + if is_unicode: + encoding = None + output_encoding = 'utf-8' + else: + encoding = encodingFromXMLPreamble(text) + output_encoding = 'utf-8' + + + elif content_type == 'text/html': + + charset = charsetFromMetaEquiv(text) + + if is_unicode: + + if charset: + encoding = None + output_encoding = charset + else: + encoding = None + output_encoding = 'iso-8859-15' + + else: + + if charset: + encoding = charset + output_encoding = charset + else: + encoding = 'iso-8859-15' + output_encoding = 'iso-8859-15' + + else: + raise ValueError('Unsupported content-type %s' % content_type) + + # for content updated through WebDAV, FTP + if not keep_output_encoding: + self.output_encoding = output_encoding + + if not is_unicode: text = unicode(text, encoding) self.ZCacheable_invalidate() @@ -137,16 +176,16 @@ source_dot_xml = Src() security.declareProtected(change_page_templates, 'pt_editAction') - def pt_editAction(self, REQUEST, title, text, content_type, encoding, expand): + def pt_editAction(self, REQUEST, title, text, content_type, expand): """Change the title and document.""" if self.wl_isLocked(): raise ResourceLockedError("File is locked via WebDAV") self.expand = expand - self.pt_setTitle(title, encoding) + self.pt_setTitle(title, self.output_encoding) - self.pt_edit(text, content_type, encoding) + self.pt_edit(text, content_type, True) REQUEST.set('text', self.read()) # May not equal 'text'! REQUEST.set('title', self.title) message = "Saved changes." @@ -157,7 +196,7 @@ security.declareProtected(change_page_templates, 'pt_setTitle') def pt_setTitle(self, title, encoding='utf-8'): - if self.strict and not isinstance(title, unicode): + if not isinstance(title, unicode): title = unicode(title, encoding) self._setPropValue('title', title) @@ -186,8 +225,7 @@ if not content_type in ('text/html', 'text/xml'): raise ValueError('Unsupported mimetype: %s' % content_type) - encoding = sniffEncoding(text, encoding) - self.pt_edit(text, content_type, encoding) + self.pt_edit(text, content_type) return self.pt_editForm(manage_tabs_message='Saved changes') security.declareProtected(change_page_templates, 'pt_changePrefs') @@ -240,6 +278,8 @@ return c def write(self, text): + if not isinstance(text, unicode): + raise TypeError("'text' parameter must be unicode") self.ZCacheable_invalidate() ZopePageTemplate.inheritedAttribute('write')(self, text) @@ -291,8 +331,9 @@ """ Handle HTTP PUT requests """ self.dav__init(REQUEST, RESPONSE) self.dav__simpleifhandler(REQUEST, RESPONSE, refresh=1) - ## XXX this should be unicode or we must pass an encoding - self.pt_edit(REQUEST.get('BODY', '')) + text = REQUEST.get('BODY', '') + content_type = guess_type('', text) + self.pt_edit(text, content_type) RESPONSE.setStatus(204) return RESPONSE @@ -303,8 +344,8 @@ security.declareProtected(ftp_access, 'manage_FTPget') def manage_FTPget(self): "Get source for FTP download" - self.REQUEST.RESPONSE.setHeader('Content-Type', self.content_type) - return self.read() + result = self.pt_render() + return result.encode(self.output_encoding) security.declareProtected(view_management_screens, 'html') def html(self): @@ -353,6 +394,12 @@ # acquisition context, so we don't know where it is. :-( return None + def pt_render(self, source=False, extra_context={}): + result = PageTemplate.pt_render(self, source, extra_context) + assert isinstance(result, unicode) + return result + + def wl_isLocked(self): return 0 @@ -407,8 +454,8 @@ content_type = headers['content_type'] else: content_type = guess_type(filename, text) - encoding = sniffEncoding(text, encoding) + else: if hasattr(text, 'read'): filename = getattr(text, 'filename', '') @@ -418,9 +465,14 @@ content_type = headers['content_type'] else: content_type = guess_type(filename, text) - encoding = sniffEncoding(text, encoding) - zpt = ZopePageTemplate(id, text, content_type, encoding) + # ensure that we pass unicode to the constructor to + # avoid further hassles with pt_edit() + + if not isinstance(text, unicode): + text = unicode(text, encoding) + + zpt = ZopePageTemplate(id, text, content_type, output_encoding=encoding) zpt.pt_setTitle(title, encoding) self._setObject(id, zpt) zpt = getattr(self, id) Modified: Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py =================================================================== --- Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/lib/python/Products/PageTemplates/tests/testZopePageTemplate.py 2006-12-20 16:54:06 UTC (rev 71628) @@ -1,3 +1,5 @@ +# -*- encoding: iso-8859-15 -*- + """ZopePageTemplate regression tests. Ensures that adding a page template works correctly. @@ -6,14 +8,163 @@ """ - import unittest import Zope2 import transaction import zope.component.testing from zope.traversing.adapters import DefaultTraversable from Testing.makerequest import makerequest +from Testing.ZopeTestCase import ZopeTestCase, installProduct +from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate, manage_addPageTemplate +from Products.PageTemplates.utils import encodingFromXMLPreamble, charsetFromMetaEquiv + +ascii_str = '<html><body>hello world</body></html>' +iso885915_str = '<html><body>üöäÜÖÄß</body></html>' +utf8_str = unicode(iso885915_str, 'iso-8859-15').encode('utf-8') + +xml_template = '''<?xml vesion="1.0" encoding="%s"?> +<foo> +üöäÜÖÄß +</foo> +''' + +xml_iso_8859_15 = xml_template % 'iso-8859-15' +xml_utf8 = unicode(xml_template, 'iso-8859-15').encode('utf-8') % 'utf-8' + +html_template_w_header = ''' +<html> + <head> + <META http-equiv="content-type" content="text/html; charset=%s"> + </hed> + <body> + test üöäÜÖÄß + </body> +</html> +''' + +html_iso_8859_15_w_header = html_template_w_header % 'iso-8859-15' +html_utf8_w_header = unicode(html_template_w_header, 'iso-8859-15').encode('utf-8') % 'utf-8' + +html_template_wo_header = ''' +<html> + <body> + test üöäÜÖÄß + </body> +</html> +''' + +html_iso_8859_15_wo_header = html_template_wo_header +html_utf8_wo_header = unicode(html_template_wo_header, 'iso-8859-15').encode('utf-8') + + +installProduct('PageTemplates') + +class ZPTUtilsTests(unittest.TestCase): + + def testExtractEncodingFromXMLPreamble(self): + extract = encodingFromXMLPreamble + self.assertEqual(extract('<?xml version="1.0" ?>'), 'utf-8') + self.assertEqual(extract('<?xml encoding="utf-8" version="1.0" ?>'), 'utf-8') + self.assertEqual(extract('<?xml encoding="UTF-8" version="1.0" ?>'), 'utf-8') + self.assertEqual(extract('<?xml encoding="ISO-8859-15" version="1.0" ?>'), 'iso-8859-15') + self.assertEqual(extract('<?xml encoding="iso-8859-15" version="1.0" ?>'), 'iso-8859-15') + + def testExtractCharsetFromMetaHTTPEquivTag(self): + extract = charsetFromMetaEquiv + self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=UTF-8"></html>'), 'utf-8') + self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html; charset=iso-8859-15"></html>'), 'iso-8859-15') + self.assertEqual(extract('<html><META http-equiv="content-type" content="text/html"></html>'), None) + self.assertEqual(extract('<html>...<html>'), None) + + +class ZopePageTemplateFileTests(ZopeTestCase): + + def testPT_RenderWithAscii(self): + manage_addPageTemplate(self.app, 'test', text=ascii_str, encoding='ascii') + zpt = self.app['test'] + result = zpt.pt_render() + # use startswith() because the renderer appends a trailing \n + self.assertEqual(result.encode('ascii').startswith(ascii_str), True) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + + def testPT_RenderWithISO885915(self): + manage_addPageTemplate(self.app, 'test', text=iso885915_str, encoding='iso-8859-15') + zpt = self.app['test'] + result = zpt.pt_render() + # use startswith() because the renderer appends a trailing \n + self.assertEqual(result.encode('iso-8859-15').startswith(iso885915_str), True) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + + def testPT_RenderWithUTF8(self): + manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8') + zpt = self.app['test'] + result = zpt.pt_render() + # use startswith() because the renderer appends a trailing \n + self.assertEqual(result.encode('utf-8').startswith(utf8_str), True) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + + def testWriteAcceptsUnicode(self): + manage_addPageTemplate(self.app, 'test', '', encoding='utf-8') + zpt = self.app['test'] + s = u'this is unicode' + zpt.write(s) + self.assertEqual(zpt.read(), s) + self.assertEqual(isinstance(zpt.read(), unicode), True) + + def testWriteWontAcceptsNonUnicode(self): + manage_addPageTemplate(self.app, 'test', '', encoding='utf-8') + zpt = self.app['test'] + self.assertRaises(TypeError, zpt.write, 'this is not unicode') + + + def _createZPT(self): + manage_addPageTemplate(self.app, 'test', text=utf8_str, encoding='utf-8') + zpt = self.app['test'] + return zpt + + def _makePUTRequest(self, body): + return {'BODY' : body} + + def _put(self, text): + zpt = self._createZPT() + REQUEST = self.app.REQUEST + REQUEST.set('BODY', text) + zpt.PUT(REQUEST, REQUEST.RESPONSE) + return zpt + + def testPutHTMLIso8859_15WithCharsetInfo(self): + zpt = self._put(html_iso_8859_15_w_header) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + self.assertEqual(zpt.content_type, 'text/html') + + def testPutHTMLUTF8_WithCharsetInfo(self): + zpt = self._put(html_utf8_w_header) + self.assertEqual(zpt.output_encoding, 'utf-8') + self.assertEqual(zpt.content_type, 'text/html') + + def testPutHTMLIso8859_15WithoutCharsetInfo(self): + zpt = self._put(html_iso_8859_15_wo_header) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + self.assertEqual(zpt.content_type, 'text/html') + + def testPutHTMLUTF8_WithoutCharsetInfo(self): + zpt = self._put(html_utf8_wo_header) + self.assertEqual(zpt.output_encoding, 'iso-8859-15') + self.assertEqual(zpt.content_type, 'text/html') + + def testPutXMLIso8859_15(self): + """ XML: use always UTF-8 als output encoding """ + zpt = self._put(xml_iso_8859_15) + self.assertEqual(zpt.output_encoding, 'utf-8') + self.assertEqual(zpt.content_type, 'text/xml') + + def testPutXMLUTF8(self): + """ XML: use always UTF-8 als output encoding """ + zpt = self._put(xml_utf8) + self.assertEqual(zpt.output_encoding, 'utf-8') + self.assertEqual(zpt.content_type, 'text/xml') + class ZPTRegressions(unittest.TestCase): def setUp(self): @@ -58,14 +209,7 @@ pt = self.app.pt1 self.assertEqual(pt.document_src(), self.text) - def test_BBB_for_strict_attribute(self): - # Collector 2213: old templates don't have 'strict' attribute. - from Products.PageTemplates.ZopePageTemplate import ZopePageTemplate - zpt = ZopePageTemplate('issue_2213') - del zpt.strict # simulate old templates - self.assertEqual(zpt.strict, False) - class ZPTMacros(zope.component.testing.PlacelessSetup, unittest.TestCase): def setUp(self): @@ -132,7 +276,9 @@ def test_suite(): suite = unittest.makeSuite(ZPTRegressions) + suite.addTests(unittest.makeSuite(ZPTUtilsTests)) suite.addTests(unittest.makeSuite(ZPTMacros)) + suite.addTests(unittest.makeSuite(ZopePageTemplateFileTests)) return suite if __name__ == '__main__': Copied: Zope/trunk/lib/python/Products/PageTemplates/utils.py (from rev 71627, Zope/branches/ajung-zpt-encoding-fixes/lib/python/Products/PageTemplates/utils.py) Modified: Zope/trunk/lib/python/Products/PageTemplates/www/default.html =================================================================== --- Zope/trunk/lib/python/Products/PageTemplates/www/default.html 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/lib/python/Products/PageTemplates/www/default.html 2006-12-20 16:54:06 UTC (rev 71628) @@ -1,6 +1,7 @@ <html> <head> <title tal:content="template/title">The title</title> + <meta http-equiv="content-type" content="text/html;charset=utf-8"> </head> <body> Modified: Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt =================================================================== --- Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt 2006-12-20 16:46:11 UTC (rev 71627) +++ Zope/trunk/lib/python/Products/PageTemplates/www/ptEdit.zpt 2006-12-20 16:54:06 UTC (rev 71628) @@ -1,4 +1,4 @@ -<h1 tal:replace="structure python:context.manage_page_header(management_page_charset='utf-8')">Header</h1> +<h1 tal:replace="structure python:context.manage_page_header(management_page_charset=context.output_encoding)">Header</h1> <h2 tal:define="manage_tabs_message options/manage_tabs_message | nothing" tal:replace="structure context/manage_tabs">Tabs</h2> @@ -33,7 +33,7 @@ tal:content="python:context.bobobase_modification_time().strftime('%Y-%m-%d %I:%M %p')">1/1/2000 </div> </td> - <td align="left" valign="top" colspan="2"> + <td align="left" valign="top" colspan="2" rowspan="2"> <a href="source.html" tal:condition="context/html">Browse HTML source</a> <a href="source.xml" tal:condition="not:context/html">Browse XML source</a> <br /> @@ -44,6 +44,17 @@ </td> </tr> + <tr> + <td align="left" valign="middle"> + <div class="form-label">Output encoding</div> + </td> + <td align="left" valign="middle"> + <div class="form-text" + tal:content="context/output_encoding" + /> + </td> + </tr> + <tr tal:define="errors context/pt_errors" tal:condition="errors"> <tal:block define="global body python:context.document_src({'raw':1})" /> <td align="left" valign="middle" class="form-label">Errors</td> _______________________________________________ Zope-Checkins maillist - Zope-Checkins@zope.org http://mail.zope.org/mailman/listinfo/zope-checkins