On Tue, Apr 05, 2011 at 11:57:46PM -0700, elij wrote: > - remove need to use mysql for generating the sql > - just consider categories an integer range, specified to the size > of that in the aur-schema. > - use the logging module instead of writing directly to stderr > this makes the code cleaner as it removes the numerous tests for the value > of DBUG, yet allows devs to control the level of output verbosity. > --- > support/schema/gendummydata.py | 106 +++++++++------------------------------ > 1 files changed, 25 insertions(+), 81 deletions(-) >
I agree with both changes, but please split that one into two separate patches. > diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py > index 7b1d0cf..8ed9f69 100755 > --- a/support/schema/gendummydata.py > +++ b/support/schema/gendummydata.py > @@ -15,9 +15,9 @@ import os > import sys > import cStringIO > import commands > +import logging > > - > -DBUG = 1 > +log_level = logging.DEBUG # logging level. set to logging.INFO to reduce > output I'm not a Python coder, but is there any reason to use lowercase here whereas we use uppercase for all other constants? > SEED_FILE = "/usr/share/dict/words" > DB_HOST = os.getenv("DB_HOST", "localhost") > DB_NAME = os.getenv("DB_NAME", "AUR") > @@ -33,6 +33,7 @@ PKG_FILES = (8, 30) # min/max number of files in a > package > PKG_DEPS = (1, 5) # min/max depends a package has > PKG_SRC = (1, 3) # min/max sources a package has > PKG_CMNTS = (1, 5) # min/max number of comments a package has > +CATEGORIES_COUNT = 17 # the number of categories from aur-schema > VOTING = (0, .30) # percentage range for package voting > RANDOM_PATHS = ( # random path locations for package files > "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", > @@ -45,44 +46,25 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", > "ftp://") > RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") > FORTUNE_CMD = "/usr/bin/fortune -l" > > +# setup logging > +logformat = "%(levelname)s: %(message)s" > +logging.basicConfig(format=logformat, level=log_level) > +log = logging.getLogger() > > if len(sys.argv) != 2: > - sys.stderr.write("Missing output filename argument"); > + log.error("Missing output filename argument") > raise SystemExit > > # make sure the seed file exists > # > if not os.path.exists(SEED_FILE): > - sys.stderr.write("Please install the 'words' Arch package\n"); > - raise SystemExit > - > -# Make sure database access will be available > -# > -try: > - import MySQLdb > -except: > - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); > - raise SystemExit > - > -# try to connect to database > -# > -try: > - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, > - db = DB_NAME, passwd = DB_PASS) > - dbc = db.cursor() > -except: > - sys.stderr.write("Could not connect to database\n"); > + log.error("Please install the 'words' Arch package") > raise SystemExit Shouldn't we rather use "sys.exit(1);" here instead of raising a SystemExit exception? That way we'd have a proper exit status, also. Might be something to include in the debugging/error handling patch. > > -esc = db.escape_string > - > - > # track what users/package names have been used > # > seen_users = {} > seen_pkgs = {} > -categories = {} > -category_keys = [] > user_keys = [] > > # some functions to generate random data > @@ -95,14 +77,14 @@ def genVersion(): > ver.append("%d" % random.randrange(0,100)) > return ".".join(ver) + "-u%d" % random.randrange(1,11) > def genCategory(): > - return categories[category_keys[random.randrange(0,len(category_keys))]] > + return random.randrange(0,CATEGORIES_COUNT) > def genUID(): > return seen_users[user_keys[random.randrange(0,len(user_keys))]] > > > # load the words, and make sure there are enough words for users/pkgs > # > -if DBUG: print "Grabbing words from seed file..." > +log.debug("Grabbing words from seed file...") > fp = open(SEED_FILE, "r") > contents = fp.readlines() > fp.close() > @@ -117,7 +99,7 @@ else: > > # select random usernames > # > -if DBUG: print "Generating random user names..." > +log.debug("Generating random user names...") > user_id = USER_ID > while len(seen_users) < MAX_USERS: > user = random.randrange(0, len(contents)) > @@ -130,7 +112,7 @@ user_keys = seen_users.keys() > > # select random package names > # > -if DBUG: print "Generating random package names..." > +log.debug("Generating random package names...") > num_pkgs = PKG_ID > while len(seen_pkgs) < MAX_PKGS: > pkg = random.randrange(0, len(contents)) > @@ -149,22 +131,6 @@ while len(seen_pkgs) < MAX_PKGS: > # > contents = None > > -# Load package categories from database > -# > -if DBUG: print "Loading package categories..." > -q = "SELECT * FROM PackageCategories" > -dbc.execute(q) > -row = dbc.fetchone() > -while row: > - categories[row[1]] = row[0] > - row = dbc.fetchone() > -category_keys = categories.keys() > - > -# done with the database > -# > -dbc.close() > -db.close() > - > # developer/tu IDs > # > developers = [] > @@ -179,8 +145,7 @@ out.write("BEGIN;\n") > > # Begin by creating the User statements > # > -if DBUG: print "Creating SQL statements for users.", > -count = 0 > +log.debug("Creating SQL statements for users.") > for u in user_keys: > account_type = 1 # default to normal user > if not has_devs or not has_tus: > @@ -201,22 +166,18 @@ for u in user_keys: > # a normal user account > # > pass > - > + > s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) > VALUES (%d, %d, '%s', '%s...@example.com', MD5('%s'));\n" % (seen_users[u], > account_type, u, u, u) > out.write(s) > - if count % 10 == 0: > - if DBUG: print ".", > - count += 1 > -if DBUG: print "." > -if DBUG: > - print "Number of developers:", len(developers) > - print "Number of trusted users:", len(trustedusers) > - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) > - print "Number of packages:", MAX_PKGS > + > +log.debug("Number of developers: %d" % len(developers)) > +log.debug("Number of trusted users: %d" % len(trustedusers)) > +log.debug("Number of users: %d" % > (MAX_USERS-len(developers)-len(trustedusers))) > +log.debug("Number of packages: %d" % MAX_PKGS) > > # Create the package statements > # > -if DBUG: print "Creating SQL statements for packages.", > +log.debug("Creating SQL statements for packages.") > count = 0 > for p in seen_pkgs.keys(): > NOW = int(time.time()) > @@ -237,26 +198,21 @@ for p in seen_pkgs.keys(): > genCategory(), NOW, uuid, muid) > > out.write(s) > - if count % 100 == 0: > - if DBUG: print ".", > count += 1 > > # create random comments for this package > # > num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) > for i in range(0, num_comments): > - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) > + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") Why did you drop escape_string() here? > now = NOW + random.randrange(400, 86400*3) > s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, > CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, > now) > out.write(s) > > -if DBUG: print "." > - > # Cast votes > # > track_votes = {} > -if DBUG: print "Casting votes for packages.", > -count = 0 > +log.debug("Casting votes for packages.") > for u in user_keys: > num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), > int(len(seen_pkgs)*VOTING[1])) > @@ -270,9 +226,6 @@ for u in user_keys: > track_votes[pkg] = 0 > track_votes[pkg] += 1 > out.write(s) > - if count % 100 == 0: > - if DBUG: print ".", > - count += 1 > > # Update statements for package votes > # > @@ -282,8 +235,7 @@ for p in track_votes.keys(): > > # Create package dependencies and sources > # > -if DBUG: print "."; print "Creating statements for package depends/sources.", > -count = 0 > +log.debug("Creating statements for package depends/sources.") > for p in seen_pkgs.keys(): > num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) > this_deps = {} > @@ -307,17 +259,9 @@ for p in seen_pkgs.keys(): > seen_pkgs[p], src) > out.write(s) > > - if count % 100 == 0: > - if DBUG: print ".", > - count += 1 > - > - > # close output file > # > out.write("COMMIT;\n") > out.write("\n") > out.close() > - > -if DBUG: print "." > -if DBUG: print "Done." > - > +log.debug("Done.") > -- > 1.7.4.1