Hello all, I'm a fairly new zope developer, came across a "bug" in my application that <meta http-equiv="content-type" content="text/html;charset=UTF-8" /> tags were being stripped out from ZPT templates. Is there a reason for this? This is done in the _prepare_html function of zope.pagetemplate.pagetemplatefile.PageTemplateFile. My application produces XHTML that contains non-ASCII characters that is then used by other applications so it needs to have the content type set on the document itself in addition to the HTTP headers.
Secondly, finding and stripping of the meta tag is done using a regular expression so simply changing the order of the attributes on the <meta> tag would make the reg-exp not match. Attached is a patch that uses HTMLParser to find the content type meta tag instead of a regex. It stops parsing the html as soon as it encounters the required meta tag. Miano
Index: src/zope/pagetemplate/pagetemplatefile.py =================================================================== --- src/zope/pagetemplate/pagetemplatefile.py (revision 124430) +++ src/zope/pagetemplate/pagetemplatefile.py (working copy) @@ -23,19 +23,49 @@ import re import logging +from HTMLParser import HTMLParser, HTMLParseError + from zope.pagetemplate.pagetemplate import PageTemplate DEFAULT_ENCODING = "utf-8" -meta_pattern = re.compile( - r'\s*<meta\s+http-equiv=["\']?Content-Type["\']?' - r'\s+content=["\']?([^;]+);\s*charset=([^"\']+)["\']?\s*/?\s*>\s*', +meta_pattern = re.compile(r'\s*["\']?([^;]+);\s*charset=([^"\']+)', re.IGNORECASE) + def package_home(gdict): filename = gdict["__file__"] return os.path.dirname(filename) + +class FoundMetaContentTypeTag(Exception): + def __init__(self, value): + self.parameter = value + def __str__(self): + return repr(self.parameter) + + +class FindMetaContentTypeHTMLParser(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.content_type = None + self.encoding = DEFAULT_ENCODING + + def handle_startendtag(self, tag, attrs): + if tag == "meta": + http_equiv = [a[1] for a in attrs if a[0] == "http-equiv"] + if http_equiv and http_equiv[0].lower() == "content-type": + content = [a[1] for a in attrs if a[0] == "content"] + if content: + match = meta_pattern.search(content[0]) + if match is not None: + self.content_type, self.encoding = match.groups() + raise FoundMetaContentTypeTag("Content Type Meta tag found") + + def get_params(self): + return self.content_type, self.encoding + + class PageTemplateFile(PageTemplate): "Zope wrapper for filesystem Page Template using TAL, TALES, and METAL" @@ -57,16 +87,16 @@ return path def _prepare_html(self, text): - match = meta_pattern.search(text) - if match is not None: - type_, encoding = match.groups() - # TODO: Shouldn't <meta>/<?xml?> stripping - # be in PageTemplate.__call__()? - text = meta_pattern.sub("", text) - else: - type_ = None - encoding = DEFAULT_ENCODING - return unicode(text, encoding), type_ + parser = FindMetaContentTypeHTMLParser() + content_type = None + encoding = DEFAULT_ENCODING + try: + parser.feed(text) + except FoundMetaContentTypeTag: + content_type, encoding = parser.get_params() + except HTMLParseError: + pass + return unicode(text, encoding), content_type def _read_file(self): __traceback_info__ = self.filename Index: src/zope/pagetemplate/tests/test_ptfile.py =================================================================== --- src/zope/pagetemplate/tests/test_ptfile.py (revision 124430) +++ src/zope/pagetemplate/tests/test_ptfile.py (working copy) @@ -161,7 +161,9 @@ self.failUnlessEqual(rendered.strip(), u"<html><head><title>" u"\u0422\u0435\u0441\u0442" - u"</title></head></html>") + u'</title><meta http-equiv="Content-Type"' + u' content="text/html; charset=windows-1251" />' + u"</head></html>") def test_xhtml(self): pt = self.get_pt( @@ -176,7 +178,9 @@ self.failUnlessEqual(rendered.strip(), u"<html><head><title>" u"\u0422\u0435\u0441\u0442" - u"</title></head></html>") + u'</title><meta http-equiv="Content-Type"' + u' content="text/html; charset=windows-1251" />' + u"</head></html>")
_______________________________________________ Zope-Dev maillist - Zope-Dev@zope.org https://mail.zope.org/mailman/listinfo/zope-dev ** No cross posts or HTML encoding! ** (Related lists - https://mail.zope.org/mailman/listinfo/zope-announce https://mail.zope.org/mailman/listinfo/zope )