[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: fe8d9eedef4fa5b406f304c83e064d62860d35df Author: Gilles Dartiguelongue gentoo org> AuthorDate: Mon Jan 23 00:06:46 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Mon Jan 23 00:06:46 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=fe8d9eed sync: fix a missing .items to iterate on dict backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 25b6ea0..c3ed83c 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -242,7 +242,7 @@ def sync_versions(): db.session.delete(kwd_obj) # 3.2 cleanup dead revisions -for version, ver_obj in pkg_versions: +for version, ver_obj in pkg_versions.items(): if version not in pkg['versions']: db.session.delete(ver_obj)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: edc09cb3b2f3862e6fc5d5277041fbce091d3281 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 17:45:56 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 17:45:56 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=edc09cb3 sync: add version and keyword synchronization backend/lib/sync.py | 42 ++ 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 22008ea..25b6ea0 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -5,7 +5,7 @@ from datetime import datetime import requests from .. import app, db -from .models import Category, Maintainer, Package, PackageVersion +from .models import Category, Keyword, Maintainer, Package, PackageVersion SYNC_BUFFER_SECS = 60*60 #1 hour proj_url = "https://api.gentoo.org/metastructure/projects.xml"; @@ -165,6 +165,8 @@ def sync_versions(): for maintainer in Maintainer.query.all(): existing_maintainers[maintainer.email] = maintainer +all_keywords = {kwd.name: kwd for kwd in Keyword.query.all()} + packages_to_sync = Package.query.filter(Package.last_sync_ts < ts).order_by(Package.last_sync_ts).all() print("Going to sync %d packages%s" % (len(packages_to_sync), (" (oldest sync UTC timestamp: %s)" % packages_to_sync[0].last_sync_ts if len(packages_to_sync) else ""))) @@ -183,7 +185,7 @@ def sync_versions(): if 'description' in pkg: package.description = pkg['description'] - # 2. refresh maintainers +# 2. refresh maintainers maintainers = [] for maint in pkg.get('maintainers', []): if 'email' not in maint or 'type' not in maint: @@ -208,9 +210,41 @@ def sync_versions(): # Intentionally outside if 'maintainers' in pkg, because if there are no maintainers in JSON, it's falled to maintainer-needed and we need to clean out old maintainer entries package.maintainers = maintainers # TODO: Retain order to know who is primary; retain description associated with the maintainership -# TODO: 3. refresh versions +# 3.1. refresh versions +pkg_versions = {pkgver.version: pkgver for pkgver in package.versions} +for version in pkg['versions']: +if version['version'] not in pkg_versions: +pkgver = PackageVersion(version=version['version'], +package=package) +db.session.add(pkgver) +else: +pkgver = pkg_versions[version['version']] + +pkg_keywords = {kwd.name: kwd for kwd in pkgver.keywords} + +# 4.1. synchronize new keywords +for keyword in version['keywords']: +if keyword in pkg_keywords: +continue + +# TODO: keywords should be initialized earlier to not have to +# worry about their existence here +if keyword not in all_keywords: +kwd = Keyword(name=keyword) +db.session.add(kwd) +all_keywords[keyword] = kwd + +pkgver.keywords.append(all_keywords[keyword]) + +# 4.2. cleanup removed keywords +for keyword, kwd_obj in pkg_keywords.items(): +if keyword not in version['keywords']: +db.session.delete(kwd_obj) -# TODO: 4. refresh keywords +# 3.2 cleanup dead revisions +for version, ver_obj in pkg_versions: +if version not in pkg['versions']: +db.session.delete(ver_obj) # 5. mark package as refreshed package.last_sync_ts = now
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 01fe45522776507f8b9e5d973c2982f66d78b6db Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 17:12:53 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 17:12:53 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=01fe4552 sync: add detail points to sync_versions backend/lib/sync.py | 15 +++ 1 file changed, 15 insertions(+) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 02e1116..22008ea 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -152,6 +152,12 @@ def sync_packages(): db.session.commit() def sync_versions(): +"""Synchronize packages version data from packages.gentoo.org. + +For each package that has not been updated in the last SYNC_BUFFER_SECS, +pull package information and refresh its description, maintainers, +versions and keywords. +""" cnt = 0 ts = datetime.utcfromtimestamp(time.time() - SYNC_BUFFER_SECS) now = datetime.utcnow() @@ -172,9 +178,12 @@ def sync_versions(): pkg = data.json() print ("Updating package: %s" % package.full_name) + +# 1. refresh description if 'description' in pkg: package.description = pkg['description'] + # 2. refresh maintainers maintainers = [] for maint in pkg.get('maintainers', []): if 'email' not in maint or 'type' not in maint: @@ -198,6 +207,12 @@ def sync_versions(): # Intentionally outside if 'maintainers' in pkg, because if there are no maintainers in JSON, it's falled to maintainer-needed and we need to clean out old maintainer entries package.maintainers = maintainers # TODO: Retain order to know who is primary; retain description associated with the maintainership + +# TODO: 3. refresh versions + +# TODO: 4. refresh keywords + +# 5. mark package as refreshed package.last_sync_ts = now if not cnt % 100:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: b888c93b7892c532385626c9d2a55a8b11661e99 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:35:17 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:35:17 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=b888c93b sync: use dict facilities for key retrieval with a default backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 4cbfe1b..723c3af 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -192,7 +192,7 @@ def sync_versions(): if maint['type'] == 'project': is_project = True print("Adding %s maintainer %s" % ("project" if is_project else "individual", email)) -new_maintainer = Maintainer(email=email, is_project=is_project, name=maint['name'] if 'name' in maint else None) +new_maintainer = Maintainer(email=email, is_project=is_project, name=maint.get('name')) db.session.add(new_maintainer) existing_maintainers[email] = new_maintainer maintainers.append(new_maintainer)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: f969ccffe04df2d1eeb014dfe67d58177da476fb Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:34:13 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:34:13 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=f969ccff sync: reduce unneeded conditional evaluation tags cannot be evaluated to go though these branches after the first if so switch to elif. backend/lib/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 429d14b..4cbfe1b 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -36,7 +36,7 @@ def get_project_data(): tag = elem.tag.lower() if tag in ['email']: proj[tag] = elem.text.lower() -if tag in ['name', 'url', 'description']: +elif tag in ['name', 'url', 'description']: proj[tag] = elem.text elif tag == 'member': member = {} @@ -46,7 +46,7 @@ def get_project_data(): member_tag = member_elem.tag.lower() if member_tag in ['email']: member[member_tag] = member_elem.text.lower() -if member_tag in ['name', 'role']: +elif member_tag in ['name', 'role']: member[member_tag] = member_elem.text if 'email' in member: proj['members'].append(member)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: c71c75d3fbf28528c844f8280e0ef499dacb1819 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:35:58 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:35:58 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c71c75d3 sync: use dict facilities for key retrieval with a default backend/lib/sync.py | 39 +++ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 723c3af..02e1116 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -176,26 +176,25 @@ def sync_versions(): package.description = pkg['description'] maintainers = [] -if 'maintainers' in pkg: -for maint in pkg['maintainers']: -if 'email' not in maint or 'type' not in maint: -raise ValueError( -"Package %s maintainer %s entry not GLEP 67 valid" % -(package.full_name, maint) -) - -email = maint['email'].lower() -if email in existing_maintainers: -maintainers.append(existing_maintainers[email]) -else: -is_project = False -if maint['type'] == 'project': -is_project = True -print("Adding %s maintainer %s" % ("project" if is_project else "individual", email)) -new_maintainer = Maintainer(email=email, is_project=is_project, name=maint.get('name')) -db.session.add(new_maintainer) -existing_maintainers[email] = new_maintainer -maintainers.append(new_maintainer) +for maint in pkg.get('maintainers', []): +if 'email' not in maint or 'type' not in maint: +raise ValueError( +"Package %s maintainer %s entry not GLEP 67 valid" % +(package.full_name, maint) +) + +email = maint['email'].lower() +if email in existing_maintainers: +maintainers.append(existing_maintainers[email]) +else: +is_project = False +if maint['type'] == 'project': +is_project = True +print("Adding %s maintainer %s" % ("project" if is_project else "individual", email)) +new_maintainer = Maintainer(email=email, is_project=is_project, name=maint.get('name')) +db.session.add(new_maintainer) +existing_maintainers[email] = new_maintainer +maintainers.append(new_maintainer) # Intentionally outside if 'maintainers' in pkg, because if there are no maintainers in JSON, it's falled to maintainer-needed and we need to clean out old maintainer entries package.maintainers = maintainers # TODO: Retain order to know who is primary; retain description associated with the maintainership
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: e8f79bda15a675e5802b0daad41144b082d20247 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:07:52 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:23:56 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=e8f79bda sync: sort imports according to PEP8 backend/lib/sync.py | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index c837c23..5e8240d 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,7 +1,9 @@ -import xml.etree.ElementTree as ET -import requests import time +import xml.etree.ElementTree as ET from datetime import datetime + +import requests + from .. import app, db from .models import Category, Maintainer, Package, PackageVersion @@ -10,6 +12,7 @@ proj_url = "https://api.gentoo.org/metastructure/projects.xml"; pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() + def get_project_data(): projects = {} data = http_session.get(proj_url)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 29a6bea1536dd23adbc84454aacb2c81d0499f82 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:21:36 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:23:56 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=29a6bea1 sync: replace assert with ValueError raise Simpler expression, probably here to stay. backend/lib/sync.py | 9 + 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 0aab3bc..429d14b 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -178,10 +178,11 @@ def sync_versions(): maintainers = [] if 'maintainers' in pkg: for maint in pkg['maintainers']: -assert ( -'email' in maint and 'type' in maint, -"Package %s maintainer %s entry not GLEP 67 valid" % (package.full_name, maint) -) +if 'email' not in maint or 'type' not in maint: +raise ValueError( +"Package %s maintainer %s entry not GLEP 67 valid" % +(package.full_name, maint) +) email = maint['email'].lower() if email in existing_maintainers:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 5e7347647516660603dddeedcf570d0cfef27b1a Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 12:18:00 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:23:56 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5e734764 sync: define project keys default values Costs less than checking for it in each loop iteration and does no harm later to loop on empty lists. backend/lib/sync.py | 64 +++-- 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 5e8240d..0aab3bc 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -28,7 +28,10 @@ def get_project_data(): if proj_elem.tag.lower() != 'project': print("Skipping unknown subtag <%s>" % proj_elem.tag) continue -proj = {} +proj = { +'members': [], +'subprojects': [], +} for elem in proj_elem: tag = elem.tag.lower() if tag in ['email']: @@ -46,14 +49,11 @@ def get_project_data(): if member_tag in ['name', 'role']: member[member_tag] = member_elem.text if 'email' in member: -if 'members' not in proj: -proj['members'] = [] proj['members'].append(member) elif tag == 'subproject': if 'ref' in elem.attrib: -if 'subprojects' not in proj: -proj['subprojects'] = [] -# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. TODO: Might change if sync code will want it differently +# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. +# TODO: Might change if sync code will want it differently proj['subprojects'].append((elem.attrib['ref'].lower(), True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) else: print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") @@ -86,32 +86,34 @@ def sync_projects(): new_maintainer = Maintainer(email=email, is_project=True, description=data['description'], name=data['name'], url=data['url']) db.session.add(new_maintainer) existing_maintainers[email] = new_maintainer + members = [] -if 'subprojects' in data: -for subproject_email, inherit_members in data['subprojects']: -# TODO: How should we handle inherit_members? -if subproject_email in existing_maintainers: -members.append(existing_maintainers[subproject_email]) -else: -print("Creating new project entry for subproject: %s" % subproject_email) -new_subproject = Maintainer(email=subproject_email, is_project=True) -db.session.add(new_subproject) -existing_maintainers[subproject_email] = new_subproject -members.append(new_subproject) -if 'members' in data: -for member in data['members']: -if member['email'] in existing_maintainers: -# TODO: Stop overwriting the name from master data, if/once we have a proper sync source for individual maintainers (Gentoo LDAP?) -if 'name' in member: -existing_maintainers[member['email']].name = member['name'] -members.append(existing_maintainers[member['email']]) -else: -print("Adding individual maintainer %s" % member['email']) -new_maintainer = Maintainer(email=member['email'], is_project=False, name=member['name'] if 'name' in member else None) -db.session.add(new_maintainer) -existing_maintainers[member['email']] = new_maintainer -members.append(new_maintainer) -# TODO: Include role information in the association? + +for subproject_email, inherit_members in data['subprojects']: +# TODO: How should we handle inherit_members? +if subproject_email in existing_maintainers: +members.append(existing_maintainers[subproject_email]) +else: +print("Creating new project entry for subproject: %s" % subproject_email) +new_subproject = Maintainer(email=subproject_email, is_project=True) +db.session.add(new_subproject) +existing_maintainers[subproject_email] = new_subproject +members.app
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: ed727d30df105b6852f5118baa5a454965b6f4ba Author: Mart Raudsepp gentoo org> AuthorDate: Sun Jan 22 12:07:48 2017 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Jan 22 12:07:48 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=ed727d30 sync: Use dict comprehension in sync_categories as well backend/lib/sync.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index dbb44c2..c837c23 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -117,10 +117,8 @@ def sync_categories(): data = http_session.get(url) # TODO: Handle response error (if not data) categories = data.json() -existing_categories = {} # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 -for cat in Category.query.all(): -existing_categories[cat.name] = cat +existing_categories = {cat.name: cat for cat in Category.query.all()} for category in categories: if category['name'] in existing_categories: existing_categories[category['name']].description = category['description']
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 24047d7602bbdbaae60f88e6811dc8570227161f Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 11:58:33 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:00:24 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=24047d76 sync: use ORM magics in sync_packages ORM knows howto map objects to ids through relationships so skip the details and focus on the thing you want to do. backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index ba31477..dbb44c2 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -144,7 +144,7 @@ def sync_packages(): if package['name'] in existing_packages: continue # TODO: Update description once we keep that in DB else: -new_pkg = Package(category_id=category.id, name=package['name']) +new_pkg = Package(category=category, name=package['name']) db.session.add(new_pkg) db.session.commit()
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 793722996da7f8c9120c678b16350363d30c6bf1 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 11:39:41 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 12:00:20 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=79372299 sync: use assert for GLEP67 compliance check Should never be raised actually but who knows. backend/lib/sync.py | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 7c499b5..ba31477 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -175,15 +175,17 @@ def sync_versions(): maintainers = [] if 'maintainers' in pkg: for maint in pkg['maintainers']: -if 'email' not in maint: -print("WARNING: Package %s was told to have a maintainer without an e-mail identifier" % package.full_name) -continue +assert ( +'email' in maint and 'type' in maint, +"Package %s maintainer %s entry not GLEP 67 valid" % (package.full_name, maint) +) + email = maint['email'].lower() if email in existing_maintainers: maintainers.append(existing_maintainers[email]) else: is_project = False -if 'type' in maint and maint['type'] == 'project': +if maint['type'] == 'project': is_project = True print("Adding %s maintainer %s" % ("project" if is_project else "individual", email)) new_maintainer = Maintainer(email=email, is_project=is_project, name=maint['name'] if 'name' in maint else None)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: fab9c6f0ce09830aa95fc3bdfe09c03663094660 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Jan 22 11:57:24 2017 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Jan 22 11:57:24 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=fab9c6f0 sync: Fix pkg sync for packages that have a same named pkg in another category Also fixes an InstrumentedList issue due to change from the categories.packages relationship from dynamic loading to select in commit 8d90fa1009 having broken that earlier backend/lib/sync.py | 8 +--- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index d292291..7c499b5 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -131,12 +131,6 @@ def sync_categories(): def sync_packages(): for category in Category.query.all(): -if not category.packages: -print('Category %s has no packages' % category.name) -existing_packages = [] -else: -existing_packages = category.packages.all() - data = http_session.get(pkg_url_base + "categories/" + category.name + ".json") if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? @@ -144,7 +138,7 @@ def sync_packages(): packages = data.json()['packages'] # TODO: Use UPSERT instead (on_conflict_do_update) -existing_packages = {pkg.name: pkg for pkg in Package.query.all()} +existing_packages = {pkg.name: pkg for pkg in category.packages} for package in packages: if package['name'] in existing_packages:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: cd3166150bd42dc8b516e2776d4093418b19d423 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 11:03:03 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 11:04:36 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=cd316615 sync: fix broken sync_packages I think there is a problem in the logic here but at least this gets me past the initial sync. backend/lib/sync.py | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 744811b..48629cc 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -131,7 +131,12 @@ def sync_categories(): def sync_packages(): for category in Category.query.all(): -existing_packages = category.packages.all() +if not category.packages: +print('Category %s has no packages' % category.name) +existing_packages = [] +else: +existing_packages = category.packages.all() + data = http_session.get(pkg_url_base + "categories/" + category.name + ".json") if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 5f53c4b92b93e9206089a15ff3851925ed3b8952 Author: Gilles Dartiguelongue gentoo org> AuthorDate: Sun Jan 22 11:04:12 2017 + Commit: Gilles Dartiguelongue gentoo org> CommitDate: Sun Jan 22 11:04:40 2017 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5f53c4b9 sync: use dict-comprehension in sync_packages backend/lib/sync.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 48629cc..d292291 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -143,9 +143,9 @@ def sync_packages(): continue packages = data.json()['packages'] # TODO: Use UPSERT instead (on_conflict_do_update) -existing_packages = {} -for pkg in Package.query.all(): -existing_packages[pkg.name] = pkg + +existing_packages = {pkg.name: pkg for pkg in Package.query.all()} + for package in packages: if package['name'] in existing_packages: continue # TODO: Update description once we keep that in DB
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 8d90fa100941d73a026a7270f64d16fbe65dc8a5 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 07:09:52 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 07:09:52 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=8d90fa10 models: Add preliminary model and fields for keyword and p.mask storage backend/lib/models.py | 24 ++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index 2eb9e8c..010d58f 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -2,6 +2,18 @@ from datetime import datetime from .. import db +class Keyword(db.Model): +id = db.Column(db.Integer, primary_key=True) +# current longest entries would be of length 16 with "~sparc64-freebsd" and "~sparc64-solaris" +name = db.Column(db.Unicode(20), unique=True, nullable=False) # TODO: Force lower case? + +@property +def stable(self): +return not self.name.startswith('~') + +def __repr__(self): +return "" % self.name + class Category(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode(30), unique=True, nullable=False) @@ -19,12 +31,13 @@ class Package(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode(128), nullable=False) category_id = db.Column(db.Integer, db.ForeignKey('category.id'), nullable=False) -category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) +category = db.relationship('Category', backref=db.backref('packages', lazy='select')) description = db.Column(db.Unicode(500)) last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, default=datetime.utcfromtimestamp(0)) maintainers = db.relationship("Maintainer", secondary=package_maintainer_rel_table, backref='directly_maintained_packages') +# versions backref @property def full_name(self): @@ -33,11 +46,18 @@ class Package(db.Model): def __repr__(self): return "" % (self.category.name, self.name) +package_version_keywords_rel_table = db.Table('package_version_keywords_rel', +db.Column('package_version_id', db.Integer, db.ForeignKey('package_version.id')), +db.Column('keyword_id', db.Integer, db.ForeignKey('keyword.id')), +) + class PackageVersion(db.Model): id = db.Column(db.Integer, primary_key=True) version = db.Column(db.Unicode(128), nullable=False) package_id = db.Column(db.Integer, db.ForeignKey('package.id'), nullable=False) -package = db.relationship('Package', backref=db.backref('versions', lazy='dynamic')) +package = db.relationship('Package', backref=db.backref('versions', lazy='select')) +keywords = db.relationship("Keyword", secondary=package_version_keywords_rel_table) +masks = db.Column(db.UnicodeText, nullable=True) # Concatenated mask reasons if p.masked, NULL if not a masked version. TODO: arch specific masks def __repr__(self): return "" % (self.package.category.name, self.package.name, self.version)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 5f3073d21e0748a9414fbd516c3e032d0456ab35 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 04:41:46 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 04:41:46 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5f3073d2 sync: Always handle e-mails in lower case to not end up with duplicates Suggested-by: Doug Freed mtu.edu> backend/lib/models.py | 1 + backend/lib/sync.py | 24 ++-- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index ba20622..2eb9e8c 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -50,6 +50,7 @@ maintainer_project_membership_rel_table = db.Table('maintainer_project_membershi class Maintainer(db.Model): id = db.Column(db.Integer, primary_key=True) +# TODO: This has to be unique case insensitive. Currently we have to always force lower() to guarantee this and find the proper maintainer entry; later we might want to use some sort of NOCASE collate rules here to keep the capitalization as preferred per master data email = db.Column(db.Unicode(50), nullable=False, unique=True) is_project = db.Column(db.Boolean, nullable=False, server_default='f', default=False) name = db.Column(db.Unicode(128)) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 7ba583d..744811b 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -28,7 +28,9 @@ def get_project_data(): proj = {} for elem in proj_elem: tag = elem.tag.lower() -if tag in ['email', 'name', 'url', 'description']: +if tag in ['email']: +proj[tag] = elem.text.lower() +if tag in ['name', 'url', 'description']: proj[tag] = elem.text elif tag == 'member': member = {} @@ -36,19 +38,20 @@ def get_project_data(): member['is_lead'] = True for member_elem in elem: member_tag = member_elem.tag.lower() -if member_tag in ['email', 'name', 'role']: +if member_tag in ['email']: +member[member_tag] = member_elem.text.lower() +if member_tag in ['name', 'role']: member[member_tag] = member_elem.text if 'email' in member: if 'members' not in proj: proj['members'] = [] proj['members'].append(member) -pass elif tag == 'subproject': if 'ref' in elem.attrib: if 'subprojects' not in proj: proj['subprojects'] = [] # subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. TODO: Might change if sync code will want it differently -proj['subprojects'].append((elem.attrib['ref'], True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) +proj['subprojects'].append((elem.attrib['ref'].lower(), True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) else: print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") else: @@ -77,7 +80,7 @@ def sync_projects(): existing_maintainers[email].url = data['url'] else: print ("Adding project %s" % email) -new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url']) +new_maintainer = Maintainer(email=email, is_project=True, description=data['description'], name=data['name'], url=data['url']) db.session.add(new_maintainer) existing_maintainers[email] = new_maintainer members = [] @@ -176,16 +179,17 @@ def sync_versions(): if 'email' not in maint: print("WARNING: Package %s was told to have a maintainer without an e-mail identifier" % package.full_name) continue -if maint['email'] in existing_maintainers: # FIXME: Some proxy-maintainers are using mixed case e-mail address, right now we'd be creating duplicates right now if the case is different across different packages -maintainers.append(existing_maintainers[maint['email']]) +email = maint['email'].lower() +if email in existing_maintainers: +maintainers.append(existing_maintainers[email]) else: is_project = False if 'type' in maint and maint['type'] == 'project': is_project = True
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 0522c4ccf0f4ca737572b8164cde6bb9c498ba7f Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 02:52:48 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 02:52:48 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=0522c4cc sync: Increase the sync delta to 1 hour and print the sync count and oldest TS at start backend/lib/sync.py | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 8c687c6..7ba583d 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -5,7 +5,7 @@ from datetime import datetime from .. import app, db from .models import Category, Maintainer, Package, PackageVersion -SYNC_BUFFER_SECS = 30*60 +SYNC_BUFFER_SECS = 60*60 #1 hour proj_url = "https://api.gentoo.org/metastructure/projects.xml"; pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() @@ -154,7 +154,10 @@ def sync_versions(): for maintainer in Maintainer.query.all(): existing_maintainers[maintainer.email] = maintainer -for package in Package.query.filter(Package.last_sync_ts < ts).order_by(Package.last_sync_ts).all(): +packages_to_sync = Package.query.filter(Package.last_sync_ts < ts).order_by(Package.last_sync_ts).all() +print("Going to sync %d packages%s" % (len(packages_to_sync), (" (oldest sync UTC timestamp: %s)" % packages_to_sync[0].last_sync_ts if len(packages_to_sync) else ""))) + +for package in packages_to_sync: cnt += 1 data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json") if not data:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: c6f4ea5ccc10c9441345f83d9ea6b0d2a121ede4 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 02:39:40 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 02:39:40 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c6f4ea5c sync: Don't forget to commit db transaction after all packages are synced Sometimes don't need to cancel out, so save the updates after the last modulo 100 to DB too :) backend/lib/sync.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 0250fba..8c687c6 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -193,3 +193,5 @@ def sync_versions(): print("%d packages updated, committing DB transaction" % cnt) db.session.commit() now = datetime.utcnow() + +db.session.commit()
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 32483c9459bcfc4f7e3848b3c0e3dc6c1c41829d Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 02:08:03 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 02:08:03 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=32483c94 sync: Order package details syncing based on how old the last sync was This way if we got stuck and re-run much later (or it has exceeded the buffer time constant), we'll at least sync the oldest ones first, so we always end up being less out of date with the oldest sync ts. backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 567da2d..0250fba 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -154,7 +154,7 @@ def sync_versions(): for maintainer in Maintainer.query.all(): existing_maintainers[maintainer.email] = maintainer -for package in Package.query.filter(Package.last_sync_ts < ts).all(): +for package in Package.query.filter(Package.last_sync_ts < ts).order_by(Package.last_sync_ts).all(): cnt += 1 data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json") if not data:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: dde4a3a9c8fbe76897219886f21d046392d65730 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 01:56:00 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 01:56:00 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dde4a3a9 sync: Add package description and maintainers sync Maintains a sync timestamp to skip recently synced packages, so if a previous run got stuck, we can skip re-doing it too soon. Saves the DB transaction after every 100 packages, because packages.g.o seems to rate-limit us, so at least we will have things saved into DB periodically to cancel out when we get stuck and restart. backend/lib/sync.py | 49 + 1 file changed, 45 insertions(+), 4 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index e53fa9b..567da2d 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,8 +1,11 @@ import xml.etree.ElementTree as ET import requests +import time +from datetime import datetime from .. import app, db from .models import Category, Maintainer, Package, PackageVersion +SYNC_BUFFER_SECS = 30*60 proj_url = "https://api.gentoo.org/metastructure/projects.xml"; pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() @@ -144,11 +147,49 @@ def sync_packages(): db.session.commit() def sync_versions(): -for package in Package.query.all(): +cnt = 0 +ts = datetime.utcfromtimestamp(time.time() - SYNC_BUFFER_SECS) +now = datetime.utcnow() +existing_maintainers = {} +for maintainer in Maintainer.query.all(): +existing_maintainers[maintainer.email] = maintainer + +for package in Package.query.filter(Package.last_sync_ts < ts).all(): +cnt += 1 data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json") if not data: print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update continue -from pprint import pprint -pprint(data.json()) -break + +pkg = data.json() + +print ("Updating package: %s" % package.full_name) +if 'description' in pkg: +package.description = pkg['description'] + +maintainers = [] +if 'maintainers' in pkg: +for maint in pkg['maintainers']: +if 'email' not in maint: +print("WARNING: Package %s was told to have a maintainer without an e-mail identifier" % package.full_name) +continue +if maint['email'] in existing_maintainers: # FIXME: Some proxy-maintainers are using mixed case e-mail address, right now we'd be creating duplicates right now if the case is different across different packages +maintainers.append(existing_maintainers[maint['email']]) +else: +is_project = False +if 'type' in maint and maint['type'] == 'project': +is_project = True +print("Adding %s maintainer %s" % ("project" if is_project else "individual", maint['email'])) +new_maintainer = Maintainer(email=maint['email'], is_project=is_project, name=maint['name'] if 'name' in maint else None) +db.session.add(new_maintainer) +existing_maintainers[maint['email']] = new_maintainer +maintainers.append(new_maintainer) + +# Intentionally outside if 'maintainers' in pkg, because if there are no maintainers in JSON, it's falled to maintainer-needed and we need to clean out old maintainer entries +package.maintainers = maintainers # TODO: Retain order to know who is primary; retain description associated with the maintainership +package.last_sync_ts = now + +if not cnt % 100: +print("%d packages updated, committing DB transaction" % cnt) +db.session.commit() +now = datetime.utcnow()
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: ed46487bc107c4f404d23e6429e0e4050616459b Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 01:55:18 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 01:55:18 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=ed46487b models: Add package maintainers relationship table and ORM relationships backend/lib/models.py | 9 + 1 file changed, 9 insertions(+) diff --git a/backend/lib/models.py b/backend/lib/models.py index e06dcf8..ba20622 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -10,6 +10,11 @@ class Category(db.Model): def __repr__(self): return "" % self.name +package_maintainer_rel_table = db.Table('package_maintainer_rel', +db.Column('package_id', db.Integer, db.ForeignKey('package.id')), +db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')), +) + class Package(db.Model): id = db.Column(db.Integer, primary_key=True) name = db.Column(db.Unicode(128), nullable=False) @@ -17,6 +22,9 @@ class Package(db.Model): category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) description = db.Column(db.Unicode(500)) last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, default=datetime.utcfromtimestamp(0)) +maintainers = db.relationship("Maintainer", +secondary=package_maintainer_rel_table, +backref='directly_maintained_packages') @property def full_name(self): @@ -54,6 +62,7 @@ class Maintainer(db.Model): secondaryjoin=id==maintainer_project_membership_rel_table.c.maintainer_id, backref='projects') # projects relationship backref ^^ +# directly_maintained_packages backref - list of packages maintained directly by given project or individual maintainer (as opposed to a bigger list that includes packages maintained by parent/child projects or projects the given individual maintainer is part of) def __repr__(self): return "" % ("project" if self.is_project else "individual", self.email)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: f1a5e9bb01bb7fd802e7cf87b4e9dd675e910140 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Dec 7 00:30:06 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Dec 7 00:30:06 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=f1a5e9bb models: Add description and last_sync_ts columns for Package description we will get from package.g.o per-package detailed json, last_sync_ts will be used to record when that detailed json was last pulled, so that we can rate-limit as-needed. If still using sqlite, can DROP TABLE package; and re-create with ./manage.py init or add the columns manually ALTER TABLE package ADD COLUMN description VARCHAR(500); ALTER TABLE package ADD COLUMN last_sync_ts TIMESTAMP NOT NULL; though that NOT NULL vs default on sqlalchemy's side for now might pose an issue, solving of which is an easy exercise for those that care instead of recreating. backend/lib/models.py | 4 1 file changed, 4 insertions(+) diff --git a/backend/lib/models.py b/backend/lib/models.py index f842a8a..e06dcf8 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -1,3 +1,4 @@ +from datetime import datetime from .. import db @@ -14,6 +15,8 @@ class Package(db.Model): name = db.Column(db.Unicode(128), nullable=False) category_id = db.Column(db.Integer, db.ForeignKey('category.id'), nullable=False) category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) +description = db.Column(db.Unicode(500)) +last_sync_ts = db.Column(db.TIMESTAMP, nullable=False, default=datetime.utcfromtimestamp(0)) @property def full_name(self): @@ -31,6 +34,7 @@ class PackageVersion(db.Model): def __repr__(self): return "" % (self.package.category.name, self.package.name, self.version) + maintainer_project_membership_rel_table = db.Table('maintainer_project_membership_rel', db.Column('project_id', db.Integer, db.ForeignKey('maintainer.id')), db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')),
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 8c264ac120faebd8463f9b6fadde65f40df2ddb0 Author: Mart Raudsepp gentoo org> AuthorDate: Mon Dec 5 17:44:25 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Mon Dec 5 17:44:25 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=8c264ac1 sync: return empty dict on projects retrieval error, so the caller won't error backend/lib/sync.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 2d6244c..e53fa9b 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -8,16 +8,16 @@ pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() def get_project_data(): +projects = {} data = http_session.get(proj_url) if not data: print("Failed retrieving projects.xml") -return +return projects root = ET.fromstring(data.content) -projects = {} # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 if root.tag.lower() != 'projects': print("Downloaded projects.xml root tag isn't 'projects'") -return +return projects for proj_elem in root: if proj_elem.tag.lower() != 'project': print("Skipping unknown subtag <%s>" % proj_elem.tag)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 9664464413b7cd59f861eff01148454974e23030 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 08:02:10 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 08:02:10 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=96644644 sync: use requests response.json() directly instead of json.loads This should ensure requests will handle UTF-8 fully correctly for us Suggested-by: Doug Freed mtu.edu> backend/lib/sync.py | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 22419bf..2d6244c 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,5 +1,4 @@ import xml.etree.ElementTree as ET -from flask import json import requests from .. import app, db from .models import Category, Maintainer, Package, PackageVersion @@ -111,7 +110,7 @@ def sync_categories(): url = pkg_url_base + "categories.json" data = http_session.get(url) # TODO: Handle response error (if not data) -categories = json.loads(data.text) +categories = data.json() existing_categories = {} # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 for cat in Category.query.all(): @@ -131,7 +130,7 @@ def sync_packages(): if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? continue -packages = json.loads(data.text)['packages'] +packages = data.json()['packages'] # TODO: Use UPSERT instead (on_conflict_do_update) existing_packages = {} for pkg in Package.query.all(): @@ -151,5 +150,5 @@ def sync_versions(): print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update continue from pprint import pprint -pprint(json.loads(data.text)) +pprint(data.json()) break
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: dac532df96cb16626f4f1656b5aa2f82b8383c8d Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 07:59:39 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 07:59:39 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=dac532df sync: Fix UTF-8 handling for projects.xml import Need to feed response.content bytestring into ElementTree, not response.text. With the latter ET seems to figure it's already decoded and goes all latin-1 on us. >From response.content bytestream it notices the UTF-8 encoding XML markup and >does things right. Diagnosed-by: Doug Freed mtu.edu> backend/lib/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 4894315..22419bf 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -13,7 +13,7 @@ def get_project_data(): if not data: print("Failed retrieving projects.xml") return -root = ET.fromstring(data.text) +root = ET.fromstring(data.content) projects = {} # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 if root.tag.lower() != 'projects':
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 080e857b7081db90f874c73fd271d8bd699195d6 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 07:43:13 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 07:43:13 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=080e857b sync: Update individual maintainer names during projects sync for the time being ... until we don't have master data for this that we shouldn't overwrite. Also remove a now done TODO item and tweak a debug string I messed up pre-commit. backend/lib/sync.py | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 57a7cb1..4894315 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -37,7 +37,6 @@ def get_project_data(): if member_tag in ['email', 'name', 'role']: member[member_tag] = member_elem.text if 'email' in member: -# TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly if 'members' not in proj: proj['members'] = [] proj['members'].append(member) @@ -94,9 +93,12 @@ def sync_projects(): if 'members' in data: for member in data['members']: if member['email'] in existing_maintainers: +# TODO: Stop overwriting the name from master data, if/once we have a proper sync source for individual maintainers (Gentoo LDAP?) +if 'name' in member: +existing_maintainers[member['email']].name = member['name'] members.append(existing_maintainers[member['email']]) else: -print("Adding individual%s" % member['email']) +print("Adding individual maintainer %s" % member['email']) new_maintainer = Maintainer(email=member['email'], is_project=False, name=member['name'] if 'name' in member else None) db.session.add(new_maintainer) existing_maintainers[member['email']] = new_maintainer
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: a0e5f8b3559f243236d9dd1170a00d4405042631 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 06:24:39 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 06:24:39 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=a0e5f8b3 models: Add association table and ORM relationship between projects and members backend/lib/models.py | 12 1 file changed, 12 insertions(+) diff --git a/backend/lib/models.py b/backend/lib/models.py index bc6cd20..f842a8a 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -31,6 +31,11 @@ class PackageVersion(db.Model): def __repr__(self): return "" % (self.package.category.name, self.package.name, self.version) +maintainer_project_membership_rel_table = db.Table('maintainer_project_membership_rel', +db.Column('project_id', db.Integer, db.ForeignKey('maintainer.id')), +db.Column('maintainer_id', db.Integer, db.ForeignKey('maintainer.id')), +) + class Maintainer(db.Model): id = db.Column(db.Integer, primary_key=True) email = db.Column(db.Unicode(50), nullable=False, unique=True) @@ -39,5 +44,12 @@ class Maintainer(db.Model): url = db.Column(db.Unicode()) description = db.Column(db.Unicode(500)) +members = db.relationship("Maintainer", +secondary=maintainer_project_membership_rel_table, +primaryjoin=id==maintainer_project_membership_rel_table.c.project_id, + secondaryjoin=id==maintainer_project_membership_rel_table.c.maintainer_id, +backref='projects') +# projects relationship backref ^^ + def __repr__(self): return "" % ("project" if self.is_project else "individual", self.email)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: d1965a898e3f92f94accb630d4daf68d156a0d0c Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 06:26:47 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 06:26:47 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d1965a89 sync: Project members and subprojects syncing to DB backend/lib/sync.py | 26 +- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 6ed8e01..57a7cb1 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -78,9 +78,33 @@ def sync_projects(): print ("Adding project %s" % email) new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url']) db.session.add(new_maintainer) +existing_maintainers[email] = new_maintainer +members = [] +if 'subprojects' in data: +for subproject_email, inherit_members in data['subprojects']: +# TODO: How should we handle inherit_members? +if subproject_email in existing_maintainers: +members.append(existing_maintainers[subproject_email]) +else: +print("Creating new project entry for subproject: %s" % subproject_email) +new_subproject = Maintainer(email=subproject_email, is_project=True) +db.session.add(new_subproject) +existing_maintainers[subproject_email] = new_subproject +members.append(new_subproject) +if 'members' in data: +for member in data['members']: +if member['email'] in existing_maintainers: +members.append(existing_maintainers[member['email']]) +else: +print("Adding individual%s" % member['email']) +new_maintainer = Maintainer(email=member['email'], is_project=False, name=member['name'] if 'name' in member else None) +db.session.add(new_maintainer) +existing_maintainers[member['email']] = new_maintainer +members.append(new_maintainer) +# TODO: Include role information in the association? +existing_maintainers[email].members = members db.session.commit() - def sync_categories(): url = pkg_url_base + "categories.json" data = http_session.get(url)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: df4ddb601efbef157147fcfd6057afd01636acab Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 05:26:10 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 05:26:10 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=df4ddb60 sync: Initial projects syncing to DB without members backend/lib/sync.py | 35 --- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index fbc653a..6ed8e01 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -2,19 +2,19 @@ import xml.etree.ElementTree as ET from flask import json import requests from .. import app, db -from .models import Category, Package, PackageVersion +from .models import Category, Maintainer, Package, PackageVersion proj_url = "https://api.gentoo.org/metastructure/projects.xml"; pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() -def sync_projects(): +def get_project_data(): data = http_session.get(proj_url) if not data: print("Failed retrieving projects.xml") return root = ET.fromstring(data.text) -projects = [] +projects = {} # Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 if root.tag.lower() != 'projects': print("Downloaded projects.xml root tag isn't 'projects'") @@ -53,12 +53,33 @@ def sync_projects(): else: print("Skipping unknown subtag <%s>" % tag) if 'email' in proj: -projects.append(proj) +projects[proj['email']] = proj else: print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,)) -from pprint import pprint -print("Found the following projects and data:") -pprint(projects) +return projects + +def sync_projects(): +projects = get_project_data() +existing_maintainers = {} +# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 +for maintainer in Maintainer.query.all(): +existing_maintainers[maintainer.email] = maintainer +for email, data in projects.items(): +if email in existing_maintainers: +print ("Updating project %s" % email) +existing_maintainers[email].is_project = True +if 'description' in data: +existing_maintainers[email].description = data['description'] +if 'name' in data: +existing_maintainers[email].name = data['name'] +if 'url' in data: +existing_maintainers[email].url = data['url'] +else: +print ("Adding project %s" % email) +new_maintainer = Maintainer(email=data['email'], is_project=True, description=data['description'], name=data['name'], url=data['url']) +db.session.add(new_maintainer) +db.session.commit() + def sync_categories(): url = pkg_url_base + "categories.json"
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: a46c779bf33cf558d287f8bcf11a5e483046bb17 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 05:24:45 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 05:25:29 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=a46c779b models: Add Maintainer model As this is a new table, just re-doing "./manage.py init" should add it to db, while keeping old data. backend/lib/models.py | 11 +++ 1 file changed, 11 insertions(+) diff --git a/backend/lib/models.py b/backend/lib/models.py index 57f3e64..bc6cd20 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -30,3 +30,14 @@ class PackageVersion(db.Model): def __repr__(self): return "" % (self.package.category.name, self.package.name, self.version) + +class Maintainer(db.Model): +id = db.Column(db.Integer, primary_key=True) +email = db.Column(db.Unicode(50), nullable=False, unique=True) +is_project = db.Column(db.Boolean, nullable=False, server_default='f', default=False) +name = db.Column(db.Unicode(128)) +url = db.Column(db.Unicode()) +description = db.Column(db.Unicode(500)) + +def __repr__(self): +return "" % ("project" if self.is_project else "individual", self.email)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 20275e6f354929fe3d702fb9b296f828704eb5a1 Author: Mart Raudsepp gentoo org> AuthorDate: Sun Dec 4 04:48:07 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sun Dec 4 04:48:07 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=20275e6f models: Use sqlalchemy Unicode columns instead of String backend/lib/models.py | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index 8f7637d..57f3e64 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -3,15 +3,15 @@ from .. import db class Category(db.Model): id = db.Column(db.Integer, primary_key=True) -name = db.Column(db.String(30), unique=True, nullable=False) -description = db.Column(db.String(500)) +name = db.Column(db.Unicode(30), unique=True, nullable=False) +description = db.Column(db.Unicode(500)) def __repr__(self): return "" % self.name class Package(db.Model): id = db.Column(db.Integer, primary_key=True) -name = db.Column(db.String(128), nullable=False) +name = db.Column(db.Unicode(128), nullable=False) category_id = db.Column(db.Integer, db.ForeignKey('category.id'), nullable=False) category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) @@ -24,7 +24,7 @@ class Package(db.Model): class PackageVersion(db.Model): id = db.Column(db.Integer, primary_key=True) -version = db.Column(db.String(128), nullable=False) +version = db.Column(db.Unicode(128), nullable=False) package_id = db.Column(db.Integer, db.ForeignKey('package.id'), nullable=False) package = db.relationship('Package', backref=db.backref('versions', lazy='dynamic'))
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 5972da09a9d9faaa7dbf45929a6c09a0d07d0691 Author: Mart Raudsepp gentoo org> AuthorDate: Fri Nov 11 01:22:04 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Fri Nov 11 01:22:04 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=5972da09 Add parsed project members to the result dict backend/lib/sync.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 291d701..fbc653a 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -38,6 +38,9 @@ def sync_projects(): member[member_tag] = member_elem.text if 'email' in member: # TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly +if 'members' not in proj: +proj['members'] = [] +proj['members'].append(member) pass elif tag == 'subproject': if 'ref' in elem.attrib:
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: c11a83cc5a9e9b0ce885caddef5a3b593fc4 Author: Mart Raudsepp gentoo org> AuthorDate: Thu Nov 10 15:50:27 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Thu Nov 10 15:50:27 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=c11a8333 Normalize subproject inherit-members to True or False during parsing backend/lib/sync.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 7139119..291d701 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -43,8 +43,8 @@ def sync_projects(): if 'ref' in elem.attrib: if 'subprojects' not in proj: proj['subprojects'] = [] -# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is followed). TODO: Might change if sync code will want it differently -proj['subprojects'].append((elem.attrib['ref'], elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None)) +# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is True or False. TODO: Might change if sync code will want it differently +proj['subprojects'].append((elem.attrib['ref'], True if ('inherit-members' in elem.attrib and elem.attrib['inherit-members'] == '1') else False)) else: print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") else:
[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
commit: d7dbfa3ba07dcd2cbc1f0be9f9575c436c9a82e3 Author: Mart Raudsepp gentoo org> AuthorDate: Thu Nov 10 15:43:16 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Thu Nov 10 15:43:40 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d7dbfa3b Initial projects.xml parsing code with debug printout backend/lib/sync.py | 59 + manage.py | 8 +++- 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index ce54937..7139119 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,13 +1,64 @@ +import xml.etree.ElementTree as ET from flask import json import requests from .. import app, db from .models import Category, Package, PackageVersion -url_base = "https://packages.gentoo.org/"; +proj_url = "https://api.gentoo.org/metastructure/projects.xml"; +pkg_url_base = "https://packages.gentoo.org/"; http_session = requests.session() +def sync_projects(): +data = http_session.get(proj_url) +if not data: +print("Failed retrieving projects.xml") +return +root = ET.fromstring(data.text) +projects = [] +# Parsing is based on http://www.gentoo.org/dtd/projects.dtd as of 2016-11-10 +if root.tag.lower() != 'projects': +print("Downloaded projects.xml root tag isn't 'projects'") +return +for proj_elem in root: +if proj_elem.tag.lower() != 'project': +print("Skipping unknown subtag <%s>" % proj_elem.tag) +continue +proj = {} +for elem in proj_elem: +tag = elem.tag.lower() +if tag in ['email', 'name', 'url', 'description']: +proj[tag] = elem.text +elif tag == 'member': +member = {} +if 'is-lead' in elem.attrib and elem.attrib['is-lead'] == '1': +member['is_lead'] = True +for member_elem in elem: +member_tag = member_elem.tag.lower() +if member_tag in ['email', 'name', 'role']: +member[member_tag] = member_elem.text +if 'email' in member: +# TODO: Sync the members (it's valid as email is given) - maybe at the end, after we have synced the project data, so we can add him to the project directly +pass +elif tag == 'subproject': +if 'ref' in elem.attrib: +if 'subprojects' not in proj: +proj['subprojects'] = [] +# subprojects will be a list of (subproject_email, inherit-members) tuples where inherit-members is None, 0 or 1 (if dtd is followed). TODO: Might change if sync code will want it differently +proj['subprojects'].append((elem.attrib['ref'], elem.attrib['inherit-members'] if 'inherit-members' in elem.attrib else None)) +else: +print("Invalid tag inside project %s - required 'ref' attribute missing" % proj['email'] if 'email' in proj else "") +else: +print("Skipping unknown subtag <%s>" % tag) +if 'email' in proj: +projects.append(proj) +else: +print("Skipping incomplete project data due to lack of required email identifier: %s" % (proj,)) +from pprint import pprint +print("Found the following projects and data:") +pprint(projects) + def sync_categories(): -url = url_base + "categories.json" +url = pkg_url_base + "categories.json" data = http_session.get(url) # TODO: Handle response error (if not data) categories = json.loads(data.text) @@ -26,7 +77,7 @@ def sync_categories(): def sync_packages(): for category in Category.query.all(): existing_packages = category.packages.all() -data = http_session.get(url_base + "categories/" + category.name + ".json") +data = http_session.get(pkg_url_base + "categories/" + category.name + ".json") if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? continue @@ -45,7 +96,7 @@ def sync_packages(): def sync_versions(): for package in Package.query.all(): -data = http_session.get(url_base + "packages/" + package.full_name + ".json") +data = http_session.get(pkg_url_base + "packages/" + package.full_name + ".json") if not data: print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update continue diff --git a/manage.py b/manage.py index 359c63a..a31b96c 100755 --- a/manage.py +++ b/manage.py @@ -21,12 +21,18 @@ def init(): @manager.command def sync_gentoo(): -"""Synchronize Gentoo data from packages.gentoo.org API""" +"""Synchr
[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
commit: d584775a6820f23561c5b8922a46644920bbf2e6 Author: Mart Raudsepp gentoo org> AuthorDate: Thu Nov 10 09:09:42 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Thu Nov 10 09:09:42 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=d584775a Add dirty sync_versions debug code This just prints the first packages versions JSON data out and exits, so just some initial debug code out of the way to sync in projects.xml first, as sync_versions will need to reference projects and maintainers, so better to finish projects.xml sync first. backend/lib/sync.py | 12 +++- manage.py | 2 -- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index a6aef23..ce54937 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,7 +1,7 @@ from flask import json import requests from .. import app, db -from .models import Category, Package +from .models import Category, Package, PackageVersion url_base = "https://packages.gentoo.org/"; http_session = requests.session() @@ -42,3 +42,13 @@ def sync_packages(): new_pkg = Package(category_id=category.id, name=package['name']) db.session.add(new_pkg) db.session.commit() + +def sync_versions(): +for package in Package.query.all(): +data = http_session.get(url_base + "packages/" + package.full_name + ".json") +if not data: +print("No JSON data for package %s" % package.full_name) # FIXME: Handle better; e.g mark the package as removed if no pkgmove update +continue +from pprint import pprint +pprint(json.loads(data.text)) +break diff --git a/manage.py b/manage.py index 4634518..359c63a 100755 --- a/manage.py +++ b/manage.py @@ -36,12 +36,10 @@ def sync_packages(): """Synchronize only Gentoo packages base data (without details)""" sync.sync_packages() -''' @manager.command def sync_versions(): """Synchronize only Gentoo package details""" sync.sync_versions() -''' if __name__ == '__main__': manager.run()
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 6113941adc9693cac0a4aa12cdac82f75c7921bd Author: Mart Raudsepp gentoo org> AuthorDate: Sat Sep 24 07:01:30 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Sat Sep 24 07:01:30 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=6113941a Add a full_name property to package and remove some debug spam on sync backend/lib/models.py | 4 backend/lib/sync.py | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index 8e47d56..8f7637d 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -15,6 +15,10 @@ class Package(db.Model): category_id = db.Column(db.Integer, db.ForeignKey('category.id'), nullable=False) category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) +@property +def full_name(self): +return "%s/%s" % (self.category.name, self.name) + def __repr__(self): return "" % (self.category.name, self.name) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 6dcb6b9..a6aef23 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -26,7 +26,6 @@ def sync_categories(): def sync_packages(): for category in Category.query.all(): existing_packages = category.packages.all() -print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,)) data = http_session.get(url_base + "categories/" + category.name + ".json") if not data: print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone?
[gentoo-commits] proj/grumpy:master commit in: /, backend/lib/
commit: 1e826829e42b0524365770dd329af5217a5f6b54 Author: Mart Raudsepp gentoo org> AuthorDate: Wed Sep 7 20:20:20 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Sep 7 20:20:20 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829 Add syncing of packages in categories from packages.g.o (just name) Also add manage.py commands to call the sync steps individually for testing backend/lib/sync.py | 28 ++-- manage.py | 25 ++--- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 3cfb746..6dcb6b9 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,15 +1,18 @@ from flask import json import requests from .. import app, db -from .models import Category +from .models import Category, Package +url_base = "https://packages.gentoo.org/"; http_session = requests.session() def sync_categories(): -url = "https://packages.gentoo.org/categories.json"; +url = url_base + "categories.json" data = http_session.get(url) +# TODO: Handle response error (if not data) categories = json.loads(data.text) existing_categories = {} +# TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 for cat in Category.query.all(): existing_categories[cat.name] = cat for category in categories: @@ -19,3 +22,24 @@ def sync_categories(): new_cat = Category(name=category['name'], description=category['description']) db.session.add(new_cat) db.session.commit() + +def sync_packages(): +for category in Category.query.all(): +existing_packages = category.packages.all() +print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,)) +data = http_session.get(url_base + "categories/" + category.name + ".json") +if not data: +print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? +continue +packages = json.loads(data.text)['packages'] +# TODO: Use UPSERT instead (on_conflict_do_update) +existing_packages = {} +for pkg in Package.query.all(): +existing_packages[pkg.name] = pkg +for package in packages: +if package['name'] in existing_packages: +continue # TODO: Update description once we keep that in DB +else: +new_pkg = Package(category_id=category.id, name=package['name']) +db.session.add(new_pkg) +db.session.commit() diff --git a/manage.py b/manage.py index 4f123aa..4634518 100755 --- a/manage.py +++ b/manage.py @@ -4,7 +4,7 @@ from flask_script import Manager, Shell from backend import app, db -from backend.lib.sync import sync_categories +from backend.lib import sync manager = Manager(app) @@ -21,8 +21,27 @@ def init(): @manager.command def sync_gentoo(): -"""Syncronize Gentoo data from packages.gentoo.org API""" -sync_categories() +"""Synchronize Gentoo data from packages.gentoo.org API""" +sync.sync_categories() +sync.sync_packages() +#sync_versions() + +@manager.command +def sync_categories(): +"""Synchronize only Gentoo categories data""" +sync.sync_categories() + +@manager.command +def sync_packages(): +"""Synchronize only Gentoo packages base data (without details)""" +sync.sync_packages() + +''' +@manager.command +def sync_versions(): +"""Synchronize only Gentoo package details""" +sync.sync_versions() +''' if __name__ == '__main__': manager.run()
[gentoo-commits] proj/grumpy:master commit in: backend/lib/
commit: 724bb757e8b08382dcbdd460cbef533b91e6338f Author: Mart Raudsepp gentoo org> AuthorDate: Wed Sep 7 20:17:51 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Wed Sep 7 20:17:51 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=724bb757 Don't double-quote debug output for full atoms from %r usage backend/lib/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/lib/models.py b/backend/lib/models.py index 5088e3e..8e47d56 100644 --- a/backend/lib/models.py +++ b/backend/lib/models.py @@ -16,7 +16,7 @@ class Package(db.Model): category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) def __repr__(self): -return "" % (self.category.name, self.name) +return "" % (self.category.name, self.name) class PackageVersion(db.Model): id = db.Column(db.Integer, primary_key=True) @@ -25,4 +25,4 @@ class PackageVersion(db.Model): package = db.relationship('Package', backref=db.backref('versions', lazy='dynamic')) def __repr__(self): -return "" % (self.package.category.name, self.package.name, self.version) +return "" % (self.package.category.name, self.package.name, self.version)
[gentoo-commits] proj/grumpy:master commit in: backend/lib/, /, backend/
commit: 6d5b0a5ba688677a127d1df1439080482c9709d1 Author: Mart Raudsepp gentoo org> AuthorDate: Tue Sep 6 21:19:29 2016 + Commit: Mart Raudsepp gentoo org> CommitDate: Tue Sep 6 21:20:02 2016 + URL:https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=6d5b0a5b Add initial code to sync categories from packages.g.o with associated plumbing Now this should make http://localhost:5000 show the available categories: ./manage.py init ./manage.py sync_gentoo ./manage.py runserver backend/__init__.py | 15 ++- backend/lib/models.py | 28 backend/lib/sync.py | 21 + manage.py | 15 +-- requirements.txt | 2 ++ 5 files changed, 78 insertions(+), 3 deletions(-) diff --git a/backend/__init__.py b/backend/__init__.py index 81ca7eb..b03432b 100644 --- a/backend/__init__.py +++ b/backend/__init__.py @@ -1,7 +1,20 @@ from flask import Flask +from flask_sqlalchemy import SQLAlchemy app = Flask(__name__) +app.config['SQLALCHEMY_DATABASE_URI'] = "sqlite:///grumpy.db" # FIXME: configuration support +db = SQLAlchemy(app) + +from .lib import models + @app.route("/") def hello_world(): -return "Hello World!" +categories = models.Category.query.all() +text = "" +for cat in categories: +text += "%s: %s" % (cat.name, cat.description) +return "Hello World! These are the package categories I know about:%s" % text + + +__all__ = ["app", "db"] diff --git a/backend/lib/models.py b/backend/lib/models.py new file mode 100644 index 000..5088e3e --- /dev/null +++ b/backend/lib/models.py @@ -0,0 +1,28 @@ +from .. import db + + +class Category(db.Model): +id = db.Column(db.Integer, primary_key=True) +name = db.Column(db.String(30), unique=True, nullable=False) +description = db.Column(db.String(500)) + +def __repr__(self): +return "" % self.name + +class Package(db.Model): +id = db.Column(db.Integer, primary_key=True) +name = db.Column(db.String(128), nullable=False) +category_id = db.Column(db.Integer, db.ForeignKey('category.id'), nullable=False) +category = db.relationship('Category', backref=db.backref('packages', lazy='dynamic')) + +def __repr__(self): +return "" % (self.category.name, self.name) + +class PackageVersion(db.Model): +id = db.Column(db.Integer, primary_key=True) +version = db.Column(db.String(128), nullable=False) +package_id = db.Column(db.Integer, db.ForeignKey('package.id'), nullable=False) +package = db.relationship('Package', backref=db.backref('versions', lazy='dynamic')) + +def __repr__(self): +return "" % (self.package.category.name, self.package.name, self.version) diff --git a/backend/lib/sync.py b/backend/lib/sync.py new file mode 100644 index 000..3cfb746 --- /dev/null +++ b/backend/lib/sync.py @@ -0,0 +1,21 @@ +from flask import json +import requests +from .. import app, db +from .models import Category + +http_session = requests.session() + +def sync_categories(): +url = "https://packages.gentoo.org/categories.json"; +data = http_session.get(url) +categories = json.loads(data.text) +existing_categories = {} +for cat in Category.query.all(): +existing_categories[cat.name] = cat +for category in categories: +if category['name'] in existing_categories: +existing_categories[category['name']].description = category['description'] +else: +new_cat = Category(name=category['name'], description=category['description']) +db.session.add(new_cat) +db.session.commit() diff --git a/manage.py b/manage.py index b28d93a..4f123aa 100755 --- a/manage.py +++ b/manage.py @@ -3,15 +3,26 @@ from flask_script import Manager, Shell -from backend import app +from backend import app, db +from backend.lib.sync import sync_categories manager = Manager(app) def shell_context(): -return dict(app=manager.app) +return dict(app=manager.app, db=db) manager.add_command('shell', Shell(make_context=shell_context)) +@manager.command +def init(): +"""Initialize empty database with tables""" +db.create_all() + +@manager.command +def sync_gentoo(): +"""Syncronize Gentoo data from packages.gentoo.org API""" +sync_categories() + if __name__ == '__main__': manager.run() diff --git a/requirements.txt b/requirements.txt index eaf59ef..78e4b2b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,4 @@ Flask +Flask-SQLAlchemy Flask-Script #manage.py +requests