Hi, Is there any update on this patch ?
---------- Forwarded message ---------- From: <[email protected]> Date: Sat, Aug 29, 2015 at 12:39 AM Subject: [PATCH] Modify blends_prospective_gatherer.py : Avoid duplicate entries in table blends_prospectivepackages. Insert in UDD the package which has the latest chlog_date To: [email protected] Cc: [email protected], Akshita Jha <[email protected]> From: Akshita Jha <[email protected]> --- udd/blends_prospective_gatherer.py | 60 +++++++++++++++++++++++++++++++++----- 1 file changed, 52 insertions(+), 8 deletions(-) diff --git a/udd/blends_prospective_gatherer.py b/udd/blends_prospective_gatherer.py index 69652e8..ddf81d9 100644 --- a/udd/blends_prospective_gatherer.py +++ b/udd/blends_prospective_gatherer.py @@ -19,6 +19,7 @@ from debian import deb822 import email.Utils from bibref_gatherer import upstream_reader +from datetime import datetime debug=0 @@ -414,14 +415,57 @@ class blends_prospective_gatherer(gatherer): %(vcs_type)s, %(vcs_url)s, %(vcs_browser)s, %(wnpp)s, %(wnpp_type)s, %(wnpp_desc)s, %(license)s, %(chlog_date)s, %(chlog_version)s)""" - try: - cur.executemany(pkgquery, pkgs) - except ProgrammingError: - print "Error while inserting packages" - raise - except KeyError, err: - print "Error while inserting packages", err - raise + + + dup_pkgs = [] # List containing packages which violate Primary Key Condition + for p in pkgs: + try: + cur.execute(pkgquery, p) + #cur.executemany(pkgquery, pkgs) + except ProgrammingError: + print "Error while inserting packages" + raise + except KeyError, err: + print "Error while inserting packages", err + raise + except IntegrityError as err: + dup_pkgs.append(p) # If IntegrityError => package already exists in UDD + # Append this duplicate record to dup_pkgs + print "Duplicate Key Error while inserting packages", err + self.connection.rollback() + else: + self.connection.commit() + + # Once all the prospective packages are inserted into UDD, make sure that + # these packages are the ones that have the latest chlog_date . + # If the package inserted in UDD, has a chlog_date earlier than its duplicate, + # Delete the record of this package from UDD and insert its duplicate in UDD. + for d in dup_pkgs: + + dup_query = "SELECT package, chlog_date FROM %s WHERE package='%s'" %(my_config['table'], d['package']) + + cur.execute(dup_query) + c = cur.fetchone() + + # chlog_date of package inserted in UDD + udd_date = " ".join(c[1].split()[:-1]) + udd_date = datetime.strptime(udd_date, '%a, %d %b %Y %H:%M:%S') + + # chlog_date for duplicated package not in UDD + dup_date = " ".join(d['chlog_date'].split()[:-1]) + dup_date = datetime.strptime(dup_date, '%a, %d %b %Y %H:%M:%S') + + # compare the chlog_date of the UDD package and its duplicate + # if udd_date > dup_date -> do nothing + # else: delete the udd package and insert its duplicate + if udd_date < dup_date: + del_query = "DELETE FROM ONLY %s WHERE package='%s'" %(my_config['table'], d['package']) + cur.execute(del_query) + cur.execute(pkgquery, d) + + # test_query = "SELECT package, chlog_date FROM %s WHERE package='%s'" %(my_config['table'], d['package']) + # cur.execute(test_query) + # c = cur.fetchone() cur.execute("DEALLOCATE package_insert") -- 1.9.1 -- Akshita Jha
