Author: humbedooh Date: Tue Aug 6 23:16:23 2019 New Revision: 1864571 URL: http://svn.apache.org/viewvc?rev=1864571&view=rev Log: Start work on a WSGI edition of getjson.py, for maximum speed.
Added: comdev/reporter.apache.org/trunk/scripts/pdata.py (with props) comdev/reporter.apache.org/trunk/scripts/wsgi.py Added: comdev/reporter.apache.org/trunk/scripts/pdata.py URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/pdata.py?rev=1864571&view=auto ============================================================================== --- comdev/reporter.apache.org/trunk/scripts/pdata.py (added) +++ comdev/reporter.apache.org/trunk/scripts/pdata.py Tue Aug 6 23:16:23 2019 @@ -0,0 +1,405 @@ +#!/usr/bin/env python +""" + WSGI script to return data to the reporter.a.o wizard + + It also populates various json files from JIRA if they are stale: + data/JIRA/jira_projects.json - list of all JIRA projects + data/JIRA/%.json - for each JIRA project + + Reads the following: + data/JIRA/jira_projects.json + data/JIRA/%s.json + data/health.json + data/releases/%s.json + data/pmcs.json + data/projects.json + data/mailinglists.json + data/maildata_extended.json + https://whimsy.apache.org/public/member-info.json + https://whimsy.apache.org/public/public_ldap_projects.json + data/cache/checker.json + +""" + +import os, sys, re, json, subprocess, time +import base64, requests + +CACHE_TIMEOUT = 3600 + +import committee_info +from urlutils import UrlCache + +# This script may be called frequently, so don't just rely on IfNewer checks +uc = UrlCache(cachedir='../data/cache', interval=CACHE_TIMEOUT, silent=True) + +# Relative path to home directory from here (site) +RAOHOME_FULL = '/var/www/reporter.apache.org/' +RAOHOME = RAOHOME_FULL + +COMMITTER_INFO = 'https://whimsy.apache.org/public/public_ldap_people.json' +MEMBER_INFO = 'https://whimsy.apache.org/public/member-info.json' +PROJECTS = 'https://whimsy.apache.org/public/public_ldap_projects.json' +DESCRIPTIONS = 'https://projects.apache.org/json/foundation/committees.json' + +jmap = { + 'trafficserver': ['TS'], + 'cordova': ['CB'], + 'corinthia': ['COR'] +} + +pmap = {# convert mailing list name to PMC name + 'community': 'comdev', + 'ws': 'webservices', + 'hc': 'httpcomponents', + 'whimsical': 'whimsy', + 'empire': 'empire-db' +} + +ldapmap = { + 'webservices': 'ws' +} + +jirapass = "" +with open(RAOHOME+"data/jirapass.txt", "r") as f: + jirapass = f.read().strip() + f.close() + +def readJson(filename, *default): + """Read a JSON file. If the read fails, return the default (if any) otherwise return the exception""" + data = {} + try: + with open(filename, "r") as f: + data = json.load(f) + f.close() + except: + if default == None: + raise + else: + return default[0] # only want first arg + return data + +def loadJson(url): + resp = uc.get(url, name=None, encoding='utf-8', errors=None) + j = json.load(resp) + resp.close() + return j + +projects = loadJson(PROJECTS)['projects'] +members = loadJson(MEMBER_INFO)['members'] +committers = loadJson(COMMITTER_INFO)['people'] +charters = loadJson(DESCRIPTIONS) + +def getPMCs(uid): + """Returns the array of LDAP committee groups to which the uid belongs. Excludes incubator""" + groups = [] + for group in projects: + if group != "incubator" and 'pmc' in projects[group]: + if uid in projects[group]['owners']: + groups.append(group) + groups = [pmap.get(x, x) for x in groups] + return groups + + +def isASFMember(uid): + """Determine if the uid is a member of the ASF""" + return uid in members + +def getJIRAProjects(project, tlpid): + """Reads data/JIRA/jira_projects.json (re-creating it if it is stale) + Returns the list of JIRA projects for the project argument + Assumes that the project names match or the project category matches + (after trimming "Apache " and spaces and lower-casing)""" + project = project.replace("Apache ", "").strip().lower() + refresh = True + x = {} + jiras = [] + try: + mtime = 0 + try: + st=os.stat(RAOHOME+"data/JIRA/jira_projects.json") + mtime=st.st_mtime + except: + pass + if mtime >= (time.time() - 86400): + refresh = False + x = readJson(RAOHOME+"data/JIRA/jira_projects.json") + else: + if sys.version_info >= (3, 0): + base64string = base64.encodestring(('%s:%s' % ('githubbot', jirapass)).encode('ascii'))[:-1] + else: + base64string = base64.encodestring('%s:%s' % ('githubbot', jirapass))[:-1] + + try: + req = requests.get("https://issues.apache.org/jira/rest/api/2/project.json", headers = {"Authorization": "Basic %s" % base64string}).json() + with open(RAOHOME+"data/JIRA/jira_projects.json", "w") as f: + json.dump(x, f, indent=1) + f.close() + except: + pass + except: + pass + + for entry in x: + # Check if this is actually a TLP not ours + mayuse = True + for xtlp in charters: + if fixProjectCategory(xtlp['name']) == fixProjectCategory(entry['name']) and xtlp['id'] != tlpid: + mayuse = False + break + elif fixProjectCategory(xtlp['name']) == fixProjectCategory(entry['name']) and xtlp['id'] == tlpid: + jiras.append(entry['key']) + mayuse = False + break + if mayuse and 'projectCategory' in entry and fixProjectCategory(entry['projectCategory']['name']) == project: + jiras.append(entry['key']) + return jiras + +def fixProjectCategory(cat): + return cat.replace("Apache ", "").replace(" Framework", "").strip().lower() + +def getJIRAS(project): + """Reads data/JIRA/%s.json % (project), re-creating it if it is stale + from the number of issues created and resolved in the last 91 days + Returns array of [created, resolved, project] + """ + refresh = True + try: + st=os.stat(RAOHOME+"data/JIRA/%s.json" % project) + mtime=st.st_mtime + if mtime >= (time.time() - (2*86400)): + refresh = False + x = readJson(RAOHOME+"data/JIRA/%s.json" % project) + return x[0], x[1], x[2] + except: + pass + + if refresh: + if sys.version_info >= (3, 0): + base64string = base64.encodestring(('%s:%s' % ('githubbot', jirapass)).encode('ascii'))[:-1] + else: + base64string = base64.encodestring('%s:%s' % ('githubbot', jirapass))[:-1] + + try: + headers = {"Authorization": "Basic %s" % base64string} + req = requests.get("""https://issues.apache.org/jira/rest/api/2/search?jql=project%20=%20'""" + project + """'%20AND%20created%20%3E=%20-91d""", headers = headers) + cdata = req.json() + req = requests.get("""https://issues.apache.org/jira/rest/api/2/search?jql=project%20=%20'""" + project + """'%20AND%20resolved%20%3E=%20-91d""", headers = headers) + rdata = req.json + with open(RAOHOME+"data/JIRA/%s.json" % project, "w") as f: + json.dump([cdata['total'], rdata['total'], project], f, indent=1) + f.close() + return cdata['total'], rdata['total'], project + except Exception as err: + # Don't create an empty file if the request fails. The likely cause is that the project does not use JIRA, + # or getjson has been invoked with an invalid pmc name. Invalid files will cause the refresh script to + # retry the requests unnecessarily. + # Furthermore, if there is a temporary issue, creating an empty file will prevent a retry for 48hours. +# with open(RAOHOME+"data/JIRA/%s.json" % project, "w") as f: +# json.dump([0,0,None], f, indent=1) +# f.close() + return 0,0, None +""" +Reads: + - committee_info.PMCsummary() + - data/health.json + +@return: + - dict contains pmc name & chair extracted from committee_info.PMCsummary() + - list of project names + - health entry from data/health.json +""" + +lastPSummary = 0 + +def getProjectData(project = None): + global lastPSummary + if lastPSummary < (time.time() - CACHE_TIMEOUT): + global pmcSummary + pmcSummary = committee_info.PMCsummary() + lastPSummary = time.time() + x = {} + y = [] + z = {} + for xproject in pmcSummary: + y.append(xproject) + if xproject == project: + x['name'] = pmcSummary[project]['name'] + x['chair'] = pmcSummary[project]['chair'] + if project: + for entry in dataHealth: + if entry['group'] == project: + z = entry + for xtlp in charters: + if xtlp.get('id') == project: + x['charter'] = xtlp.get('charter', '') + return x, y, z; + +def getReleaseData(project): + """Reads data/releases/%s.json and returns the contents""" + return readJson(RAOHOME+"data/releases/%s.json" % project, {}) + +groups = [] + +pmcSummary = None +dataHealth = None +pchanges = None +cchanges = None +bugzillastats = None +lastRead = 0 +ml = None +mld = None +pmcdates = None +checker_json = None + +def generate(user, project, runkibble): + global lastRead + if re.match(r"^[-a-zA-Z0-9_.]+$", user): + isMember = isASFMember(user) + + groups = getPMCs(user) + + # Check if we need to re-read json inputs... + if lastRead < (time.time() - CACHE_TIMEOUT): + global pmcSummary, dataHealth,pchanges, cchanges, bugzillastats, ml, mld, pmcdates, checker_json + pmcSummary = committee_info.PMCsummary() + dataHealth = readJson(RAOHOME+"data/health.json", []) + pchanges = readJson(RAOHOME+"data/pmcs.json") + cchanges = readJson(RAOHOME+"data/projects.json") + bugzillastats = readJson(RAOHOME+"data/bugzillastats.json", {}) + mld = readJson(RAOHOME+"data/maildata_extended.json") + ml = readJson(RAOHOME+"data/mailinglists.json") + pmcdates = readJson(RAOHOME+"data/pmcdates.json") + # fetch checker_json from checker.apache.org ; use cache as fallback + try: + checker_json = requests.get("https://checker.apache.org/json/", timeout = 1.0).json() + except: + checker_json = readJson(RAOHOME+"data/cache/checker.json", None) + lastRead = time.time() + + emails = {} + + for entry in mld: # e.g. hc-dev, ant-users, ws-dev + tlp = entry.split("-")[0] + nentry = entry + if tlp == "empire": + tlp = "empire-db" + nentry = entry.replace("empire-", "empire-db-") + if tlp in pmap: # convert ml prefix to PMC internal name + tlp = pmap[tlp] + if tlp == project: + emails[nentry] = mld[entry] + + dates = {} + bdata = {} # bugzilla data + jdata = {} + cdata = {} + ddata = {} + rdata = {} + allpmcs = [] + keys = {} + count = {} + health = {} + + checker = {} + + group = project + + jiras = [] + count = [0,0] + xgroup = group + if group in ldapmap: + xgroup = ldapmap[group] + if xgroup in pchanges: + count[0] = len(pchanges[xgroup]) + if xgroup in cchanges: + count[1] = len(cchanges[xgroup]) + jdata = [0,0, None] + ddata, allpmcs, health = getProjectData(group) + rdata = getReleaseData(group) + if group in bugzillastats: + bdata = bugzillastats + else: + bdata = [0,0,{}] + # a PMC may have projects using Bugzilla *and* JIRA - e.g. Tomcat - (or neither) + jiraname = group.upper() + if group in jmap: + keys = [] + jdata[2] = [] + for jiraname in jmap: + x,y, p = getJIRAS(jiraname) + jdata[0] += x + jdata[1] += y + if x > 0 or y > 0: + jdata[2].append(p) + keys.append(jiraname) + elif group in ddata and 'name' in ddata: + jiras = getJIRAProjects(ddata['name'], group) + keys = jiras + jdata[2] = [] + for jiraname in jiras: + x,y, p= getJIRAS(jiraname) + jdata[0] += x + jdata[1] += y + if x > 0 or y > 0: + jdata[2].append(p) + elif jiraname: + keys=[jiraname] + x,y, p= getJIRAS(jiraname) + jdata[0] += x + jdata[1] += y + jdata[2] = p + + cdata = cdata[xgroup] if xgroup in cdata else {'pmc': {}, 'committer': {}} + for pmc in pchanges: + if pmc == xgroup: + for member in pchanges[pmc]: + if pchanges[pmc][member][1] > 0: + cdata['pmc'][member] = pchanges[pmc][member] + for pmc in cchanges: + if pmc == xgroup: + for member in cchanges[pmc]: + if cchanges[pmc][member][1] > 0: + cdata['committer'][member] = cchanges[pmc][member] + if group in pmcdates: # Make sure we have this PMC in the JSON, so as to not bork + dates = pmcdates[group] # only send the groups we want + if checker_json and 'meta' in checker_json and 'projects' in checker_json: + meta = checker_json['meta'] + prjs = checker_json['projects'] + checker = prjs[group] if group in prjs else { 'errors': 0 } + checker['meta'] = meta + + # Add in kibble data if called with only= OR only one project... + kibble = None + if runkibble: + try: + xenv = os.environ.copy() + if 'SCRIPT_NAME' in xenv: + del xenv['SCRIPT_NAME'] + cmd = ('%s/site/wizard/kibble.py' % RAOHOME_FULL, project) + if jdata and jdata[2]: + cmd += tuple(jdata[2]) + txt = subprocess.check_output(cmd, env = xenv) + kibble = json.loads(txt) + except subprocess.CalledProcessError as e: + return None + + + output = { + 'count': count, + 'delivery': emails, + 'jira': jdata, + 'bugzilla': bdata, + 'changes': cdata, + 'pmcdates': dates, + 'pdata': ddata, + 'releases': rdata, + 'keys': keys, + 'health': health, + 'checker': checker, + 'you': committers[user], + 'kibble': kibble, + } + + return output + else: + return {'okay': False, 'error': "Invalid user credentials!"} Propchange: comdev/reporter.apache.org/trunk/scripts/pdata.py ------------------------------------------------------------------------------ svn:executable = * Added: comdev/reporter.apache.org/trunk/scripts/wsgi.py URL: http://svn.apache.org/viewvc/comdev/reporter.apache.org/trunk/scripts/wsgi.py?rev=1864571&view=auto ============================================================================== --- comdev/reporter.apache.org/trunk/scripts/wsgi.py (added) +++ comdev/reporter.apache.org/trunk/scripts/wsgi.py Tue Aug 6 23:16:23 2019 @@ -0,0 +1,57 @@ +#!/usr/bin/env python2.7 +import os +import cgi +import json +import pdata +import time +import re + +CACHE_TIMEOUT = 3600 + + +def app(environ, start_fn): + committers = pdata.loadJson(pdata.COMMITTER_INFO)['people'] + project = environ.get('QUERY_STRING') + user = environ.get('HTTP_X_AUTHENTICATED_USER') + + output = {'okay': False, 'error': 'Unknown user ID provided!'} + + dumps = {} + groups = [] + if user: + groups = pdata.getPMCs(user) + if project and user and re.match(r"[-a-z0-9]+", project): + groups = [project] + + for xproject in groups: + + # Try cache first? (max 6 hours old) + wanted_file = "/tmp/pdata-%s.json" % xproject + if xproject == project: + wanted_file = "/tmp/pdata-kibbled-%s.json" % xproject + if (os.path.exists(wanted_file) and os.path.getmtime(wanted_file) > (time.time() - CACHE_TIMEOUT)): + mpdata = json.load(open(wanted_file, "r")) + # If cache failed, generate fom scratch + else: + mpdata = pdata.generate(user, xproject, xproject == project) + open(wanted_file, "w").write(json.dumps(mpdata)) + # Weave results into combined object, mindful of kibble data + for k, v in mpdata.items(): + if k not in dumps: + dumps[k] = {} + if (k != 'kibble'): + dumps[k][xproject] = v + if k == 'kibble' and v: + dumps['kibble'] =v + + # Set personalized vars, dump + if dumps and user: + ddata, allpmcs, health = pdata.getProjectData() + dumps['you'] = committers[user] + dumps['all'] = allpmcs + dumps['pmcs'] = groups + output = dumps + + start_fn('200 OK', [('Content-Type', 'application/json')]) + + return [json.dumps(output, indent = 2).encode('ascii')]