commit:     fe715a306754a4df2d05a3a24034d015c16377bf
Author:     Peter Levine <plevine457 <AT> gmail <DOT> com>
AuthorDate: Mon Jul 24 22:40:02 2023 +0000
Commit:     Sam James <sam <AT> gentoo <DOT> org>
CommitDate: Sun Aug  6 23:28:34 2023 +0000
URL:        https://gitweb.gentoo.org/proj/mirrorselect.git/commit/?id=fe715a30

extractor.py: parse proto from the uri

The protocol can be parsed from the URI so we can get rid of the
protocol tag altogether.

Bug: https://bugs.gentoo.org/911183
Suggested-by: Florian Schmaus <flow <AT> gentoo.org>
Suggested-by: Sam James <sam <AT> gentoo.org>
Signed-off-by: Peter Levine <plevine457 <AT> gmail.com>
Signed-off-by: Sam James <sam <AT> gentoo.org>

 mirrorselect/mirrorparser3.py | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/mirrorselect/mirrorparser3.py b/mirrorselect/mirrorparser3.py
index 089f949..444bc11 100644
--- a/mirrorselect/mirrorparser3.py
+++ b/mirrorselect/mirrorparser3.py
@@ -44,6 +44,22 @@ class MirrorParser3:
        def _reset(self):
                self._dict = {}
 
+       def _get_proto(self, uri=None):
+               if not uri: # Don't parse if empty
+                       return None;
+               try:
+                       import sys;
+                       if sys.version_info[0] >= 3:
+                               from urllib.parse import urlparse
+                               return urlparse(uri).scheme
+                       else:
+                               from urllib2 import Request
+                               return Request(uri).get_type()
+               except Exception as e: # Add general exception to catch errors
+                       from mirrorselect.output import Output
+                       Output.write(('_get_proto(): Exception while parsing 
the protocol '
+                               'for URI %s: %s\n')% (uri, e), 2)
+
        def parse(self, text):
                self._reset()
                for mirrorgroup in ET.XML(text):
@@ -60,7 +76,7 @@ class MirrorParser3:
                                                        "region": 
mirrorgroup.get("region"),
                                                        "ipv4": e.get("ipv4"),
                                                        "ipv6": e.get("ipv6"),
-                                                       "proto": 
e.get("protocol"),
+                                                       "proto": 
e.get("protocol") or self._get_proto(uri),
                                                        }
 
        def tuples(self):

Reply via email to