Author: sebb
Date: Wed Jun 18 22:42:59 2025
New Revision: 1926561
URL: http://svn.apache.org/viewvc?rev=1926561&view=rev
Log:
Validate location entry
Modified:
comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py
Modified:
comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py
URL:
http://svn.apache.org/viewvc/comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py?rev=1926561&r1=1926560&r2=1926561&view=diff
==============================================================================
--- comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py
(original)
+++ comdev/projects.apache.org/trunk/scripts/cronjobs/parsecommitteeinfo.py Wed
Jun 18 22:42:59 2025
@@ -91,12 +91,14 @@ print("Extracting PMC DOAP file data for
for loc in xmldoc.getElementsByTagName('location') :
url = loc.childNodes[0].data
try:
- if url.startswith('http'):
+ if re.match(r'https?://', url):
rdf = URLopen(url).read()
- else:
+ elif re.match(r'committees/[a-z][-a-z0-9]+\.rdf$', url):
with open("../../data/%s" % url, 'r', encoding='utf-8') as f:
rdf = f.read()
url =
"https://svn.apache.org/repos/asf/comdev/projects.apache.org/trunk/data/%s" %
url
+ else:
+ print(f"ERROR: Unexpected location: {url}")
rdfxml = ET.fromstring(rdf)
rdfdata = rdfxml[0]
expected = '{http://projects.apache.org/ns/asfext#}pmc'