On Tue 05 April 2011 at 17:54 -0700, elij wrote: > - no need to use mysql > - just considering categories as an integer range, specified to the size > of that in the aur-schema.
So does this produce valid SQL commands ? Why don't you escape the strings anymore ? > - use logging module instead of writing directly to stderr > this makes the code cleaner as there is only one test for the value of > DBUG. Why is this in the same patch? And I don't really see the point of using the logging module here: it seems to spam the user with dozens of "DEBUG: working..." where the previous little dots actually looked nice. > --- > support/schema/gendummydata.py | 100 > +++++++++++++--------------------------- > 1 files changed, 32 insertions(+), 68 deletions(-) > > diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py > index 7b1d0cf..47d9bd5 100755 > --- a/support/schema/gendummydata.py > +++ b/support/schema/gendummydata.py > @@ -15,7 +15,8 @@ import os > import sys > import cStringIO > import commands > - > +import logging > +import re Where is the re module used ? > > DBUG = 1 > SEED_FILE = "/usr/share/dict/words" > @@ -33,6 +34,7 @@ PKG_FILES = (8, 30) # min/max number of files in a > package > PKG_DEPS = (1, 5) # min/max depends a package has > PKG_SRC = (1, 3) # min/max sources a package has > PKG_CMNTS = (1, 5) # min/max number of comments a package has > +CATEGORIES_COUNT = 17 # the number of categories from aur-schema I am wondering whether something like counting the matching lines in aur-schema.sql would not be a better idea. > VOTING = (0, .30) # percentage range for package voting > RANDOM_PATHS = ( # random path locations for package files > "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib", > @@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.", "ftp://ftp.", "http://", > "ftp://") > RANDOM_LOCS = ("pub", "release", "files", "downloads", "src") > FORTUNE_CMD = "/usr/bin/fortune -l" > > +# setup logging > +logformat = "%(levelname)s: %(message)s" > +if DBUG != 0: > + level = logging.DEBUG > +else: > + level = logging.INFO > +logging.basicConfig(format=logformat, level=level) > +log = logging.getLogger() > > if len(sys.argv) != 2: > - sys.stderr.write("Missing output filename argument"); > + log.error("Missing output filename argument") > raise SystemExit > > # make sure the seed file exists > # > if not os.path.exists(SEED_FILE): > - sys.stderr.write("Please install the 'words' Arch package\n"); > - raise SystemExit > - > -# Make sure database access will be available > -# > -try: > - import MySQLdb > -except: > - sys.stderr.write("Please install the 'mysql-python' Arch package\n"); > + log.error("Please install the 'words' Arch package") > raise SystemExit > > -# try to connect to database > -# > -try: > - db = MySQLdb.connect(host = DB_HOST, user = DB_USER, > - db = DB_NAME, passwd = DB_PASS) > - dbc = db.cursor() > -except: > - sys.stderr.write("Could not connect to database\n"); > - raise SystemExit > - > -esc = db.escape_string > - > - > # track what users/package names have been used > # > seen_users = {} > seen_pkgs = {} > -categories = {} > -category_keys = [] > user_keys = [] > > # some functions to generate random data > @@ -95,14 +82,14 @@ def genVersion(): > ver.append("%d" % random.randrange(0,100)) > return ".".join(ver) + "-u%d" % random.randrange(1,11) > def genCategory(): > - return categories[category_keys[random.randrange(0,len(category_keys))]] > + return random.randrange(0,CATEGORIES_COUNT) > def genUID(): > return seen_users[user_keys[random.randrange(0,len(user_keys))]] > > > # load the words, and make sure there are enough words for users/pkgs > # > -if DBUG: print "Grabbing words from seed file..." > +log.debug("Grabbing words from seed file...") > fp = open(SEED_FILE, "r") > contents = fp.readlines() > fp.close() > @@ -117,7 +104,7 @@ else: > > # select random usernames > # > -if DBUG: print "Generating random user names..." > +log.debug("Generating random user names...") > user_id = USER_ID > while len(seen_users) < MAX_USERS: > user = random.randrange(0, len(contents)) > @@ -130,7 +117,7 @@ user_keys = seen_users.keys() > > # select random package names > # > -if DBUG: print "Generating random package names..." > +log.debug("Generating random package names...") > num_pkgs = PKG_ID > while len(seen_pkgs) < MAX_PKGS: > pkg = random.randrange(0, len(contents)) > @@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS: > # > contents = None > > -# Load package categories from database > -# > -if DBUG: print "Loading package categories..." > -q = "SELECT * FROM PackageCategories" > -dbc.execute(q) > -row = dbc.fetchone() > -while row: > - categories[row[1]] = row[0] > - row = dbc.fetchone() > -category_keys = categories.keys() > - > -# done with the database > -# > -dbc.close() > -db.close() > - > # developer/tu IDs > # > developers = [] > @@ -179,7 +150,7 @@ out.write("BEGIN;\n") > > # Begin by creating the User statements > # > -if DBUG: print "Creating SQL statements for users.", > +log.debug("Creating SQL statements for users.") > count = 0 > for u in user_keys: > account_type = 1 # default to normal user > @@ -201,22 +172,20 @@ for u in user_keys: > # a normal user account > # > pass > - > + > s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) > VALUES (%d, %d, '%s', '%s...@example.com', MD5('%s'));\n" % (seen_users[u], > account_type, u, u, u) > out.write(s) > if count % 10 == 0: > - if DBUG: print ".", > + log.debug("working...") > count += 1 > -if DBUG: print "." > -if DBUG: > - print "Number of developers:", len(developers) > - print "Number of trusted users:", len(trustedusers) > - print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers)) > - print "Number of packages:", MAX_PKGS > +log.debug("Number of developers: %d" % len(developers)) > +log.debug("Number of trusted users: %d" % len(trustedusers)) > +log.debug("Number of users: %d" % > (MAX_USERS-len(developers)-len(trustedusers))) > +log.debug("Number of packages: %d" % MAX_PKGS) > > # Create the package statements > # > -if DBUG: print "Creating SQL statements for packages.", > +log.debug("Creating SQL statements for packages.") > count = 0 > for p in seen_pkgs.keys(): > NOW = int(time.time()) > @@ -238,24 +207,22 @@ for p in seen_pkgs.keys(): > > out.write(s) > if count % 100 == 0: > - if DBUG: print ".", > + log.debug("working...") > count += 1 > > # create random comments for this package > # > num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1]) > for i in range(0, num_comments): > - fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'","")) > + fortune = commands.getoutput(FORTUNE_CMD).replace("'","") > now = NOW + random.randrange(400, 86400*3) > s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, > CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, > now) > out.write(s) > > -if DBUG: print "." > - > # Cast votes > # > track_votes = {} > -if DBUG: print "Casting votes for packages.", > +log.debug("Casting votes for packages.") > count = 0 > for u in user_keys: > num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]), > @@ -271,7 +238,7 @@ for u in user_keys: > track_votes[pkg] += 1 > out.write(s) > if count % 100 == 0: > - if DBUG: print ".", > + log.debug("working...") > count += 1 > > # Update statements for package votes > @@ -282,7 +249,7 @@ for p in track_votes.keys(): > > # Create package dependencies and sources > # > -if DBUG: print "."; print "Creating statements for package depends/sources.", > +log.debug("Creating statements for package depends/sources.") > count = 0 > for p in seen_pkgs.keys(): > num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1]) > @@ -308,7 +275,7 @@ for p in seen_pkgs.keys(): > out.write(s) > > if count % 100 == 0: > - if DBUG: print ".", > + log.debug("working...") > count += 1 > > > @@ -317,7 +284,4 @@ for p in seen_pkgs.keys(): > out.write("COMMIT;\n") > out.write("\n") > out.close() > - > -if DBUG: print "." > -if DBUG: print "Done." > - > +log.debug("Done.") > -- > 1.7.4.1 >