Author: sebb
Date: Wed Jun 18 22:42:59 2025
New Revision: 1926561

URL: http://svn.apache.org/viewvc?rev=1926561&view=rev
Log:
Validate location entry

Modified:
    comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py

Modified: 
comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py
URL: 
http://svn.apache.org/viewvc/comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py?rev=1926561&r1=1926560&r2=1926561&view=diff
==============================================================================
--- comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py 
(original)
+++ comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py Wed 
Jun 18 22:42:59 2025
@@ -91,12 +91,14 @@ print("Extracting PMC DOAP file data for
 for loc in xmldoc.getElementsByTagName('location') :
     url = loc.childNodes[0].data
     try:
-        if url.startswith('http'):
+        if re.match(r'https?://', url):
             rdf = URLopen(url).read()
-        else:
+        elif re.match(r'committees/[a-z][-a-z0-9]+\.rdf$', url):
             with open("../../data/%s" % url, 'r', encoding='utf-8') as f:
                 rdf = f.read()
             url = 
"https://svn.apache.org/repos/asf/comdev/projects.apache.org/trunk/data/%s"; % 
url
+        else:
+            print(f"ERROR: Unexpected location: {url}")
         rdfxml = ET.fromstring(rdf)
         rdfdata = rdfxml[0]
         expected = '{http://projects.apache.org/ns/asfext#}pmc'


Reply via email to