Hi, Here is my problem : My local encoding is iso-8859-15, I use utf8 encoded xml files, which use dtd, which use a common dtd :
== sample.xml <?xml version="1.0" encoding="utf-8"?> <!DOCTYPE sample SYSTEM "sample.dtd"> <sample/> == == sample.dtd <!ENTITY % COMMON_DTD SYSTEM "common.dtd"> %COMMON_DTD; <!ELEMENT sample EMPTY> == == common.dtd <!ENTITY % sample_entity "sample"> == I put them in a non-ascii directory : /home/panard/tests/yéyé/ for exemple. And now, I want to parse sample.xml using its absolute path name : == test1.py import codecs import locale import os import xml.sax import xml.sax.handler def locale_from_unicode( s ) : return codecs.getencoder( locale.getpreferredencoding() )( s, 'replace' )[ 0 ] class EntityResolver : def resolveEntity( self, publicId, systemId ) : return locale_from_unicode( systemId ) parser = xml.sax.make_parser() er = EntityResolver() parser.setEntityResolver( er ) parser.parse( os.path.join( os.getcwd(), "sample.xml" ) ) == which results to : Traceback (most recent call last): File "test1.py", line 18, in ? parser.parse( os.path.join( os.getcwd(), "sample.xml" ) ) File "/usr/lib/python2.4/xml/sax/expatreader.py", line 107, in parse xmlreader.IncrementalParser.parse(self, source) File "/usr/lib/python2.4/xml/sax/xmlreader.py", line 123, in parse self.feed(buffer) File "/usr/lib/python2.4/xml/sax/expatreader.py", line 211, in feed self._err_handler.fatalError(exc) File "/usr/lib/python2.4/xml/sax/handler.py", line 38, in fatalError raise exception xml.sax._exceptions.SAXParseException: /home/panard/tests/yéyé/sample.dtd:2:0: error in processing external entity reference I've tried to change the ExternalEntityRefHandler to know where the problem really appends : == test.py import codecs import locale import os import traceback import xml.sax from xml.sax import saxutils, xmlreader import xml.sax.handler def locale_from_unicode( s ) : return codecs.getencoder( locale.getpreferredencoding() )( s, 'replace' )[ 0 ] class EntityResolver : def resolveEntity( self, publicId, systemId ) : return locale_from_unicode( systemId ) # from /usr/lib/python2.4/xml/sax/expatreader.py line 373 def external_entity_ref( context, base, sysid, pubid): print [ sysid ] ## modified self = parser ## modified if not self._external_ges: return 1 source = self._ent_handler.resolveEntity(pubid, sysid) source = saxutils.prepare_input_source(source, self._source.getSystemId() or "") self._entity_stack.append((self._parser, self._source)) self._parser = self._parser.ExternalEntityParserCreate(context) self._source = source try: xmlreader.IncrementalParser.parse(self, source) except: traceback.print_exc() ## modified return 0 # FIXME: save error info here? (self._parser, self._source) = self._entity_stack[-1] del self._entity_stack[-1] return 1 parser = xml.sax.make_parser() er = EntityResolver() parser.setEntityResolver( er ) parser.external_entity_ref = external_entity_ref parser.parse( os.path.join( os.getcwd(), "sample.xml" ) ) == the backtrace is now : [EMAIL PROTECTED] ~/tests/yéyé $ python test.py [u'sample.dtd'] Traceback (most recent call last): File "test.py", line 35, in external_entity_ref xmlreader.IncrementalParser.parse(self, source) File "/usr/lib/python2.4/xml/sax/xmlreader.py", line 123, in parse self.feed(buffer) File "/usr/lib/python2.4/xml/sax/expatreader.py", line 207, in feed self._parser.Parse(data, isFinal) UnicodeDecodeError: 'utf8' codec can't decode bytes in position 20-22: invalid data ... and the preceding backtrace ... So my question is : is there a nice way to resolve my problem ? For now, I've disabled external_entity_ref feature :( Thanks, Panard -- HomePage : http://dev.inzenet.org/~panard Qomics : http://qomics.inzenet.org YZis editor - http://www.yzis.org Smileys: http://smileys.inzenet.org
pgpJiy9SaLfpr.pgp
Description: PGP signature
_______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor