commit: 1e826829e42b0524365770dd329af5217a5f6b54 Author: Mart Raudsepp <leio <AT> gentoo <DOT> org> AuthorDate: Wed Sep 7 20:20:20 2016 +0000 Commit: Mart Raudsepp <leio <AT> gentoo <DOT> org> CommitDate: Wed Sep 7 20:20:20 2016 +0000 URL: https://gitweb.gentoo.org/proj/grumpy.git/commit/?id=1e826829
Add syncing of packages in categories from packages.g.o (just name) Also add manage.py commands to call the sync steps individually for testing backend/lib/sync.py | 28 ++++++++++++++++++++++++++-- manage.py | 25 ++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/backend/lib/sync.py b/backend/lib/sync.py index 3cfb746..6dcb6b9 100644 --- a/backend/lib/sync.py +++ b/backend/lib/sync.py @@ -1,15 +1,18 @@ from flask import json import requests from .. import app, db -from .models import Category +from .models import Category, Package +url_base = "https://packages.gentoo.org/" http_session = requests.session() def sync_categories(): - url = "https://packages.gentoo.org/categories.json" + url = url_base + "categories.json" data = http_session.get(url) + # TODO: Handle response error (if not data) categories = json.loads(data.text) existing_categories = {} + # TODO: Use UPSERT instead (on_conflict_do_update) if we can rely on postgresql:9.5 for cat in Category.query.all(): existing_categories[cat.name] = cat for category in categories: @@ -19,3 +22,24 @@ def sync_categories(): new_cat = Category(name=category['name'], description=category['description']) db.session.add(new_cat) db.session.commit() + +def sync_packages(): + for category in Category.query.all(): + existing_packages = category.packages.all() + print("Existing packages in DB for category %s: %s" % (category.name, existing_packages,)) + data = http_session.get(url_base + "categories/" + category.name + ".json") + if not data: + print("No JSON data for category %s" % category.name) # FIXME: Better handling; mark category as inactive/gone? + continue + packages = json.loads(data.text)['packages'] + # TODO: Use UPSERT instead (on_conflict_do_update) + existing_packages = {} + for pkg in Package.query.all(): + existing_packages[pkg.name] = pkg + for package in packages: + if package['name'] in existing_packages: + continue # TODO: Update description once we keep that in DB + else: + new_pkg = Package(category_id=category.id, name=package['name']) + db.session.add(new_pkg) + db.session.commit() diff --git a/manage.py b/manage.py index 4f123aa..4634518 100755 --- a/manage.py +++ b/manage.py @@ -4,7 +4,7 @@ from flask_script import Manager, Shell from backend import app, db -from backend.lib.sync import sync_categories +from backend.lib import sync manager = Manager(app) @@ -21,8 +21,27 @@ def init(): @manager.command def sync_gentoo(): - """Syncronize Gentoo data from packages.gentoo.org API""" - sync_categories() + """Synchronize Gentoo data from packages.gentoo.org API""" + sync.sync_categories() + sync.sync_packages() + #sync_versions() + +@manager.command +def sync_categories(): + """Synchronize only Gentoo categories data""" + sync.sync_categories() + +@manager.command +def sync_packages(): + """Synchronize only Gentoo packages base data (without details)""" + sync.sync_packages() + +''' +@manager.command +def sync_versions(): + """Synchronize only Gentoo package details""" + sync.sync_versions() +''' if __name__ == '__main__': manager.run()