Serhiy Storchaka <storch...@gmail.com> added the comment:

> Serhiy, note that _SimpleElementPath is now gone in 3.3, since ElementPath.py 
> is always there in stdlib. Could you update the patch to reflect this?

Don't worry, _SimpleElementPath is not used in changes.

> Another thing. I'm trying really hard to phase out the doctest tests of 
> etree, replacing them with unittest-based tests as much as possible. The 
> doctests are causing all kinds of trouble with parametrized testing for both 
> the Python and the C implementations. Please don't add new doctests. If you 
> add tests, add them to existing TestCase classes, or create new ones.

Done. I replaced the encoding doctest by unittest-based tests and merge
it with StringIOTest and user IO tests in one IOTest class. Added test
for StringIO writing.

Also I've improved support of unbuffered file objects (as for
issue1470548).

----------
Added file: http://bugs.python.org/file26300/etree_write_utf16_3.patch

_______________________________________
Python tracker <rep...@bugs.python.org>
<http://bugs.python.org/issue1767933>
_______________________________________
diff -r d03dbc324b60 Lib/test/test_xml_etree.py
--- a/Lib/test/test_xml_etree.py        Sat Jul 07 22:15:22 2012 +1000
+++ b/Lib/test/test_xml_etree.py        Sat Jul 07 17:23:00 2012 +0300
@@ -888,65 +888,6 @@
     """
     ET.XML("<?xml version='1.0' encoding='%s'?><xml />" % encoding)
 
-def encoding():
-    r"""
-    Test encoding issues.
-
-    >>> elem = ET.Element("tag")
-    >>> elem.text = "abc"
-    >>> serialize(elem)
-    '<tag>abc</tag>'
-    >>> serialize(elem, encoding="utf-8")
-    b'<tag>abc</tag>'
-    >>> serialize(elem, encoding="us-ascii")
-    b'<tag>abc</tag>'
-    >>> serialize(elem, encoding="iso-8859-1")
-    b"<?xml version='1.0' encoding='iso-8859-1'?>\n<tag>abc</tag>"
-
-    >>> elem.text = "<&\"\'>"
-    >>> serialize(elem)
-    '<tag>&lt;&amp;"\'&gt;</tag>'
-    >>> serialize(elem, encoding="utf-8")
-    b'<tag>&lt;&amp;"\'&gt;</tag>'
-    >>> serialize(elem, encoding="us-ascii") # cdata characters
-    b'<tag>&lt;&amp;"\'&gt;</tag>'
-    >>> serialize(elem, encoding="iso-8859-1")
-    b'<?xml version=\'1.0\' 
encoding=\'iso-8859-1\'?>\n<tag>&lt;&amp;"\'&gt;</tag>'
-
-    >>> elem.attrib["key"] = "<&\"\'>"
-    >>> elem.text = None
-    >>> serialize(elem)
-    '<tag key="&lt;&amp;&quot;\'&gt;" />'
-    >>> serialize(elem, encoding="utf-8")
-    b'<tag key="&lt;&amp;&quot;\'&gt;" />'
-    >>> serialize(elem, encoding="us-ascii")
-    b'<tag key="&lt;&amp;&quot;\'&gt;" />'
-    >>> serialize(elem, encoding="iso-8859-1")
-    b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag 
key="&lt;&amp;&quot;\'&gt;" />'
-
-    >>> elem.text = '\xe5\xf6\xf6<>'
-    >>> elem.attrib.clear()
-    >>> serialize(elem)
-    '<tag>\xe5\xf6\xf6&lt;&gt;</tag>'
-    >>> serialize(elem, encoding="utf-8")
-    b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>'
-    >>> serialize(elem, encoding="us-ascii")
-    b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>'
-    >>> serialize(elem, encoding="iso-8859-1")
-    b"<?xml version='1.0' 
encoding='iso-8859-1'?>\n<tag>\xe5\xf6\xf6&lt;&gt;</tag>"
-
-    >>> elem.attrib["key"] = '\xe5\xf6\xf6<>'
-    >>> elem.text = None
-    >>> serialize(elem)
-    '<tag key="\xe5\xf6\xf6&lt;&gt;" />'
-    >>> serialize(elem, encoding="utf-8")
-    b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />'
-    >>> serialize(elem, encoding="us-ascii")
-    b'<tag key="&#229;&#246;&#246;&lt;&gt;" />'
-    >>> serialize(elem, encoding="iso-8859-1")
-    b'<?xml version=\'1.0\' encoding=\'iso-8859-1\'?>\n<tag 
key="\xe5\xf6\xf6&lt;&gt;" />'
-    """
-
 def methods():
     r"""
     Test serialization methods.
@@ -2166,16 +2107,129 @@
         self.assertEqual(self._subelem_tags(e), ['a1'])
 
 
-class StringIOTest(unittest.TestCase):
+class IOTest(unittest.TestCase):
+    def test_encoding(self):
+        # Test encoding issues.
+        elem = ET.Element("tag")
+        elem.text = "abc"
+        self.assertEqual(serialize(elem), '<tag>abc</tag>')
+        self.assertEqual(serialize(elem, encoding="utf-8"),
+                b'<tag>abc</tag>')
+        self.assertEqual(serialize(elem, encoding="us-ascii"),
+                b'<tag>abc</tag>')
+        for enc in ("iso-8859-1", "utf-16", "utf-32"):
+            self.assertEqual(serialize(elem, encoding=enc),
+                    ("<?xml version='1.0' encoding='%s'?>\n"
+                     "<tag>abc</tag>" % enc).encode(enc))
+
+        elem = ET.Element("tag")
+        elem.text = "<&\"\'>"
+        self.assertEqual(serialize(elem), '<tag>&lt;&amp;"\'&gt;</tag>')
+        self.assertEqual(serialize(elem, encoding="utf-8"),
+                b'<tag>&lt;&amp;"\'&gt;</tag>')
+        self.assertEqual(serialize(elem, encoding="us-ascii"),
+                b'<tag>&lt;&amp;"\'&gt;</tag>')
+        for enc in ("iso-8859-1", "utf-16", "utf-32"):
+            self.assertEqual(serialize(elem, encoding=enc),
+                    ("<?xml version='1.0' encoding='%s'?>\n"
+                     "<tag>&lt;&amp;\"'&gt;</tag>" % enc).encode(enc))
+
+        elem = ET.Element("tag")
+        elem.attrib["key"] = "<&\"\'>"
+        self.assertEqual(serialize(elem), '<tag key="&lt;&amp;&quot;\'&gt;" 
/>')
+        self.assertEqual(serialize(elem, encoding="utf-8"),
+                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
+        self.assertEqual(serialize(elem, encoding="us-ascii"),
+                b'<tag key="&lt;&amp;&quot;\'&gt;" />')
+        for enc in ("iso-8859-1", "utf-16", "utf-32"):
+            self.assertEqual(serialize(elem, encoding=enc),
+                    ("<?xml version='1.0' encoding='%s'?>\n"
+                     "<tag key=\"&lt;&amp;&quot;'&gt;\" />" % enc).encode(enc))
+
+        elem = ET.Element("tag")
+        elem.text = '\xe5\xf6\xf6<>'
+        self.assertEqual(serialize(elem), '<tag>\xe5\xf6\xf6&lt;&gt;</tag>')
+        self.assertEqual(serialize(elem, encoding="utf-8"),
+                b'<tag>\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;</tag>')
+        self.assertEqual(serialize(elem, encoding="us-ascii"),
+                b'<tag>&#229;&#246;&#246;&lt;&gt;</tag>')
+        for enc in ("iso-8859-1", "utf-16", "utf-32"):
+            self.assertEqual(serialize(elem, encoding=enc),
+                    ("<?xml version='1.0' encoding='%s'?>\n"
+                     "<tag>åöö&lt;&gt;</tag>" % enc).encode(enc))
+
+        elem = ET.Element("tag")
+        elem.attrib["key"] = '\xe5\xf6\xf6<>'
+        self.assertEqual(serialize(elem), '<tag key="\xe5\xf6\xf6&lt;&gt;" />')
+        self.assertEqual(serialize(elem, encoding="utf-8"),
+                b'<tag key="\xc3\xa5\xc3\xb6\xc3\xb6&lt;&gt;" />')
+        self.assertEqual(serialize(elem, encoding="us-ascii"),
+                b'<tag key="&#229;&#246;&#246;&lt;&gt;" />')
+        for enc in ("iso-8859-1", "utf-16", "utf-32"):
+            self.assertEqual(serialize(elem, encoding=enc),
+                    ("<?xml version='1.0' encoding='%s'?>\n"
+                     "<tag key=\"åöö&lt;&gt;\" />" % enc).encode(enc))
+
     def test_read_from_stringio(self):
         tree = ET.ElementTree()
+        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
+        tree.parse(stream)
+        self.assertEqual(tree.getroot().tag, 'site')
+
+    def test_write_to_stringio(self):
         stream = io.StringIO()
-        stream.write('''<?xml version="1.0"?><site></site>''')
-        stream.seek(0)
-        tree.parse(stream)
+        tree = ET.ElementTree(ET.XML('''<site />'''))
+        tree.write(stream, encoding='unicode')
+        self.assertEqual(stream.getvalue(), '''<site />''')
 
+    def test_read_from_bytesio(self):
+        tree = ET.ElementTree()
+        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
+        tree.parse(raw)
         self.assertEqual(tree.getroot().tag, 'site')
 
+    def test_write_to_bytesio(self):
+        raw = io.BytesIO()
+        tree = ET.ElementTree(ET.XML('''<site />'''))
+        tree.write(raw)
+        self.assertEqual(raw.getvalue(), b'''<site />''')
+
+    class dummy:
+        pass
+
+    def test_read_from_user_text_reader(self):
+        stream = io.StringIO('''<?xml version="1.0"?><site></site>''')
+        reader = self.dummy()
+        reader.read = stream.read
+        tree = ET.ElementTree()
+        tree.parse(reader)
+        self.assertEqual(tree.getroot().tag, 'site')
+
+    def test_write_to_user_text_writer(self):
+        stream = io.StringIO()
+        writer = self.dummy()
+        writer.write = stream.write
+        tree = ET.ElementTree(ET.XML('''<site />'''))
+        tree.write(writer, encoding='unicode')
+        self.assertEqual(stream.getvalue(), '''<site />''')
+
+    def test_read_from_user_binary_reader(self):
+        raw = io.BytesIO(b'''<?xml version="1.0"?><site></site>''')
+        reader = self.dummy()
+        reader.read = raw.read
+        tree = ET.ElementTree()
+        tree.parse(reader)
+        self.assertEqual(tree.getroot().tag, 'site')
+        tree = ET.ElementTree()
+
+    def test_write_to_user_binary_writer(self):
+        raw = io.BytesIO()
+        writer = self.dummy()
+        writer.write = raw.write
+        tree = ET.ElementTree(ET.XML('''<site />'''))
+        tree.write(writer)
+        self.assertEqual(raw.getvalue(), b'''<site />''')
+
 
 class ParseErrorTest(unittest.TestCase):
     def test_subclass(self):
@@ -2299,7 +2353,7 @@
     test_classes = [
         ElementSlicingTest,
         BasicElementTest,
-        StringIOTest,
+        IOTest,
         ParseErrorTest,
         XincludeTest,
         ElementTreeTest,
diff -r d03dbc324b60 Lib/xml/etree/ElementTree.py
--- a/Lib/xml/etree/ElementTree.py      Sat Jul 07 22:15:22 2012 +1000
+++ b/Lib/xml/etree/ElementTree.py      Sat Jul 07 17:23:00 2012 +0300
@@ -100,6 +100,7 @@
 import sys
 import re
 import warnings
+import io
 
 from . import ElementPath
 
@@ -814,20 +815,32 @@
             encoding = encoding.lower()
         if hasattr(file_or_filename, "write"):
             file = file_or_filename
+            if encoding != "unicode":
+                if not isinstance(file, io.BufferedIOBase):
+                    if isinstance(file, io.RawIOBase):
+                        file = io.BufferedWriter(file)
+                    else:
+                        file = io.BufferedIOBase()
+                        file.writable = lambda: True
+                        file.write = file_or_filename.write
+                        try:
+                            # Required to write BOM
+                            file.seekable = file_or_filename.seekable
+                            file.tell = file_or_filename.tell
+                        except AttributeError:
+                            pass
+                file = io.TextIOWrapper(file, encoding=encoding,
+                                        errors="xmlcharrefreplace",
+                                        newline="\n")
+            close_file = False
         else:
             if encoding != "unicode":
-                file = open(file_or_filename, "wb")
+                file = open(file_or_filename, "w", encoding=encoding,
+                            errors="xmlcharrefreplace")
             else:
                 file = open(file_or_filename, "w")
-        if encoding != "unicode":
-            def write(text):
-                try:
-                    return file.write(text.encode(encoding,
-                                                  "xmlcharrefreplace"))
-                except (TypeError, AttributeError):
-                    _raise_serialization_error(text)
-        else:
-            write = file.write
+            close_file = True
+        write = file.write
         if method == "xml" and (xml_declaration or
                 (xml_declaration is None and
                  encoding not in ("utf-8", "us-ascii", "unicode"))):
@@ -843,8 +856,11 @@
             qnames, namespaces = _namespaces(self._root, default_namespace)
             serialize = _serialize[method]
             serialize(write, self._root, qnames, namespaces)
-        if file_or_filename is not file:
+        if close_file:
             file.close()
+        elif file_or_filename is not file:
+            file.flush()
+            file.detach()
 
     def write_c14n(self, file):
         # lxml.etree compatibility.  use output method instead
@@ -1134,10 +1150,9 @@
 # @defreturn string
 
 def tostring(element, encoding=None, method=None):
-    class dummy:
-        pass
     data = []
-    file = dummy()
+    file = io.BufferedIOBase()
+    file.writable = lambda: True
     file.write = data.append
     ElementTree(element).write(file, encoding, method=method)
     if encoding in (str, "unicode"):
@@ -1161,10 +1176,9 @@
 # @since 1.3
 
 def tostringlist(element, encoding=None, method=None):
-    class dummy:
-        pass
     data = []
-    file = dummy()
+    file = io.BufferedIOBase()
+    file.writable = lambda: True
     file.write = data.append
     ElementTree(element).write(file, encoding, method=method)
     # FIXME: merge small fragments into larger parts
_______________________________________________
Python-bugs-list mailing list
Unsubscribe: 
http://mail.python.org/mailman/options/python-bugs-list/archive%40mail-archive.com

Reply via email to