commit:     75b897d91874c505e14dc2c0808c4cad4268ad76
Author:     Brian Dolbec <dolsen <AT> gentoo <DOT> org>
AuthorDate: Tue May  3 07:18:05 2016 +0000
Commit:     Brian Dolbec <dolsen <AT> gentoo <DOT> org>
CommitDate: Tue May  3 09:12:30 2016 +0000
URL:        https://gitweb.gentoo.org/proj/portage.git/commit/?id=75b897d9

repoman: Use lxml for parsing of metadata

Note that we no longer throw a QA error for a missing XML prolog, as long as
the encoding matches the default ('UTF-8'; lowercase is also allowed).

 pym/repoman/modules/scan/metadata/pkgmetadata.py | 44 +++++++-----------------
 1 file changed, 12 insertions(+), 32 deletions(-)

diff --git a/pym/repoman/modules/scan/metadata/pkgmetadata.py 
b/pym/repoman/modules/scan/metadata/pkgmetadata.py
index e8db92f..22afddf 100644
--- a/pym/repoman/modules/scan/metadata/pkgmetadata.py
+++ b/pym/repoman/modules/scan/metadata/pkgmetadata.py
@@ -7,8 +7,8 @@ import sys
 from itertools import chain
 
 try:
-       import xml.etree.ElementTree
-       from xml.parsers.expat import ExpatError
+       from lxml import etree
+       from lxml.etree import ParserError
 except (SystemExit, KeyboardInterrupt):
        raise
 except (ImportError, SystemError, RuntimeError, Exception):
@@ -26,12 +26,11 @@ from repoman._portage import portage
 from repoman.metadata import metadata_dtd_uri
 from repoman.checks.herds.herdbase import get_herd_base
 from repoman.checks.herds.metadata import check_metadata, UnknownHerdsError
-from repoman._xml import _XMLParser, _MetadataTreeBuilder, XmlLint
+from repoman._xml import XmlLint
 from repoman.modules.scan.scanbase import ScanBase
 
 from portage.exception import InvalidAtom
 from portage import os
-from portage import _encodings, _unicode_encode
 from portage import exception
 from portage.dep import Atom
 
@@ -141,50 +140,31 @@ class PkgMetadata(ScanBase, USEFlagChecks):
 
                # metadata.xml parse check
                metadata_bad = False
-               xml_info = {}
-               xml_parser = _XMLParser(xml_info, target=_MetadataTreeBuilder())
 
                # read metadata.xml into memory
                try:
-                       _metadata_xml = xml.etree.ElementTree.parse(
-                               _unicode_encode(
-                                       os.path.join(checkdir, "metadata.xml"),
-                                       encoding=_encodings['fs'], 
errors='strict'),
-                               parser=xml_parser)
-               except (ExpatError, SyntaxError, EnvironmentError) as e:
+                       _metadata_xml = etree.parse(os.path.join(checkdir, 
'metadata.xml'))
+               except (ParserError, SyntaxError, EnvironmentError) as e:
                        metadata_bad = True
                        self.qatracker.add_error("metadata.bad", 
"%s/metadata.xml: %s" % (xpkg, e))
                        del e
                        self.muselist = frozenset(self.musedict)
                        return False
 
-               if "XML_DECLARATION" not in xml_info:
+               xml_encoding = _metadata_xml.docinfo.encoding
+               if xml_encoding.upper() != metadata_xml_encoding:
                        self.qatracker.add_error(
                                "metadata.bad", "%s/metadata.xml: "
-                               "xml declaration is missing on first line, "
-                               "should be '%s'" % (xpkg, 
metadata_xml_declaration))
-               else:
-                       xml_version, xml_encoding, xml_standalone = \
-                               xml_info["XML_DECLARATION"]
-                       if xml_encoding is None or \
-                               xml_encoding.upper() != metadata_xml_encoding:
-                               if xml_encoding is None:
-                                       encoding_problem = "but it is undefined"
-                               else:
-                                       encoding_problem = "not '%s'" % 
xml_encoding
-                               self.qatracker.add_error(
-                                       "metadata.bad", "%s/metadata.xml: "
-                                       "xml declaration encoding should be 
'%s', %s" %
-                                       (xpkg, metadata_xml_encoding, 
encoding_problem))
+                               "xml declaration encoding should be '%s', not 
'%s'" %
+                               (xpkg, metadata_xml_encoding, xml_encoding))
 
-               if "DOCTYPE" not in xml_info:
+               if not _metadata_xml.docinfo:
                        metadata_bad = True
                        self.qatracker.add_error(
                                "metadata.bad",
                                "%s/metadata.xml: %s" % (xpkg, "DOCTYPE is 
missing"))
                else:
-                       doctype_name, doctype_system, doctype_pubid = \
-                               xml_info["DOCTYPE"]
+                       doctype_system = _metadata_xml.docinfo.system_url
                        if doctype_system != metadata_dtd_uri:
                                if doctype_system is None:
                                        system_problem = "but it is undefined"
@@ -194,7 +174,7 @@ class PkgMetadata(ScanBase, USEFlagChecks):
                                        "metadata.bad", "%s/metadata.xml: "
                                        "DOCTYPE: SYSTEM should refer to '%s', 
%s" %
                                        (xpkg, metadata_dtd_uri, 
system_problem))
-
+                       doctype_name = _metadata_xml.docinfo.doctype.split(' 
')[1]
                        if doctype_name != metadata_doctype_name:
                                self.qatracker.add_error(
                                        "metadata.bad", "%s/metadata.xml: "

Reply via email to