- no need to use mysql
- just considering categories as an integer range, specified to the size
  of that in the aur-schema.
- use logging module instead of writing directly to stderr
  this makes the code cleaner as there is only one test for the value of
  DBUG.
---
 support/schema/gendummydata.py |  100 +++++++++++++---------------------------
 1 files changed, 32 insertions(+), 68 deletions(-)

diff --git a/support/schema/gendummydata.py b/support/schema/gendummydata.py
index 7b1d0cf..47d9bd5 100755
--- a/support/schema/gendummydata.py
+++ b/support/schema/gendummydata.py
@@ -15,7 +15,8 @@ import os
 import sys
 import cStringIO
 import commands
-
+import logging
+import re
 
 DBUG      = 1
 SEED_FILE = "/usr/share/dict/words"
@@ -33,6 +34,7 @@ PKG_FILES = (8, 30)    # min/max number of files in a package
 PKG_DEPS  = (1, 5)     # min/max depends a package has
 PKG_SRC   = (1, 3)     # min/max sources a package has
 PKG_CMNTS = (1, 5)     # min/max number of comments a package has
+CATEGORIES_COUNT = 17  # the number of categories from aur-schema
 VOTING    = (0, .30)   # percentage range for package voting
 RANDOM_PATHS = (       # random path locations for package files
        "/usr/bin", "/usr/lib", "/etc", "/etc/rc.d", "/usr/share", "/lib",
@@ -45,44 +47,29 @@ RANDOM_URL = ("http://www.";, "ftp://ftp.";, "http://";, 
"ftp://";)
 RANDOM_LOCS = ("pub", "release", "files", "downloads", "src")
 FORTUNE_CMD = "/usr/bin/fortune -l"
 
+# setup logging
+logformat = "%(levelname)s: %(message)s"
+if DBUG != 0:
+    level = logging.DEBUG
+else:
+    level = logging.INFO
+logging.basicConfig(format=logformat, level=level)
+log = logging.getLogger()
 
 if len(sys.argv) != 2:
-       sys.stderr.write("Missing output filename argument");
+       log.error("Missing output filename argument")
        raise SystemExit
 
 # make sure the seed file exists
 #
 if not os.path.exists(SEED_FILE):
-       sys.stderr.write("Please install the 'words' Arch package\n");
-       raise SystemExit
-
-# Make sure database access will be available
-#
-try:
-       import MySQLdb
-except:
-       sys.stderr.write("Please install the 'mysql-python' Arch package\n");
+       log.error("Please install the 'words' Arch package")
        raise SystemExit
 
-# try to connect to database
-#
-try:
-       db = MySQLdb.connect(host = DB_HOST, user = DB_USER,
-                       db = DB_NAME, passwd = DB_PASS)
-       dbc = db.cursor()
-except:
-       sys.stderr.write("Could not connect to database\n");
-       raise SystemExit
-
-esc = db.escape_string
-
-
 # track what users/package names have been used
 #
 seen_users = {}
 seen_pkgs = {}
-categories = {}
-category_keys = []
 user_keys = []
 
 # some functions to generate random data
@@ -95,14 +82,14 @@ def genVersion():
                ver.append("%d" % random.randrange(0,100))
        return ".".join(ver) + "-u%d" % random.randrange(1,11)
 def genCategory():
-       return categories[category_keys[random.randrange(0,len(category_keys))]]
+       return random.randrange(0,CATEGORIES_COUNT)
 def genUID():
        return seen_users[user_keys[random.randrange(0,len(user_keys))]]
 
 
 # load the words, and make sure there are enough words for users/pkgs
 #
-if DBUG: print "Grabbing words from seed file..."
+log.debug("Grabbing words from seed file...")
 fp = open(SEED_FILE, "r")
 contents = fp.readlines()
 fp.close()
@@ -117,7 +104,7 @@ else:
 
 # select random usernames
 #
-if DBUG: print "Generating random user names..."
+log.debug("Generating random user names...")
 user_id = USER_ID
 while len(seen_users) < MAX_USERS:
        user = random.randrange(0, len(contents))
@@ -130,7 +117,7 @@ user_keys = seen_users.keys()
 
 # select random package names
 #
-if DBUG: print "Generating random package names..."
+log.debug("Generating random package names...")
 num_pkgs = PKG_ID
 while len(seen_pkgs) < MAX_PKGS:
        pkg = random.randrange(0, len(contents))
@@ -149,22 +136,6 @@ while len(seen_pkgs) < MAX_PKGS:
 #
 contents = None
 
-# Load package categories from database
-#
-if DBUG: print "Loading package categories..."
-q = "SELECT * FROM PackageCategories"
-dbc.execute(q)
-row = dbc.fetchone()
-while row:
-       categories[row[1]] = row[0]
-       row = dbc.fetchone()
-category_keys = categories.keys()
-
-# done with the database
-#
-dbc.close()
-db.close()
-
 # developer/tu IDs
 #
 developers = []
@@ -179,7 +150,7 @@ out.write("BEGIN;\n")
 
 # Begin by creating the User statements
 #
-if DBUG: print "Creating SQL statements for users.",
+log.debug("Creating SQL statements for users.")
 count = 0
 for u in user_keys:
        account_type = 1  # default to normal user
@@ -201,22 +172,20 @@ for u in user_keys:
                        # a normal user account
                        #
                        pass
-       
+
        s = "INSERT INTO Users (ID, AccountTypeID, Username, Email, Passwd) 
VALUES (%d, %d, '%s', '%s...@example.com', MD5('%s'));\n" % (seen_users[u], 
account_type, u, u, u)
        out.write(s)
        if count % 10 == 0:
-               if DBUG: print ".",
+               log.debug("working...")
        count += 1
-if DBUG: print "."
-if DBUG:
-       print "Number of developers:", len(developers)
-       print "Number of trusted users:", len(trustedusers)
-       print "Number of users:", (MAX_USERS-len(developers)-len(trustedusers))
-       print "Number of packages:", MAX_PKGS
+log.debug("Number of developers: %d" % len(developers))
+log.debug("Number of trusted users: %d" % len(trustedusers))
+log.debug("Number of users: %d" % 
(MAX_USERS-len(developers)-len(trustedusers)))
+log.debug("Number of packages: %d" % MAX_PKGS)
 
 # Create the package statements
 #
-if DBUG: print "Creating SQL statements for packages.",
+log.debug("Creating SQL statements for packages.")
 count = 0
 for p in seen_pkgs.keys():
        NOW = int(time.time())
@@ -238,24 +207,22 @@ for p in seen_pkgs.keys():
 
        out.write(s)
        if count % 100 == 0:
-               if DBUG: print ".",
+               log.debug("working...")
        count += 1
 
        # create random comments for this package
        #
        num_comments = random.randrange(PKG_CMNTS[0], PKG_CMNTS[1])
        for i in range(0, num_comments):
-               fortune = esc(commands.getoutput(FORTUNE_CMD).replace("'",""))
+               fortune = commands.getoutput(FORTUNE_CMD).replace("'","")
                now = NOW + random.randrange(400, 86400*3)
                s = "INSERT INTO PackageComments (PackageID, UsersID, Comments, 
CommentTS) VALUES (%d, %d, '%s', %d);\n" % (seen_pkgs[p], genUID(), fortune, 
now)
                out.write(s)
 
-if DBUG: print "."
-
 # Cast votes
 #
 track_votes = {}
-if DBUG: print "Casting votes for packages.",
+log.debug("Casting votes for packages.")
 count = 0
 for u in user_keys:
        num_votes = random.randrange(int(len(seen_pkgs)*VOTING[0]),
@@ -271,7 +238,7 @@ for u in user_keys:
                        track_votes[pkg] += 1
                        out.write(s)
                        if count % 100 == 0:
-                               if DBUG: print ".",
+                               log.debug("working...")
                        count += 1
 
 # Update statements for package votes
@@ -282,7 +249,7 @@ for p in track_votes.keys():
 
 # Create package dependencies and sources
 #
-if DBUG: print "."; print "Creating statements for package depends/sources.",
+log.debug("Creating statements for package depends/sources.")
 count = 0
 for p in seen_pkgs.keys():
        num_deps = random.randrange(PKG_DEPS[0], PKG_DEPS[1])
@@ -308,7 +275,7 @@ for p in seen_pkgs.keys():
                out.write(s)
 
        if count % 100 == 0:
-               if DBUG: print ".",
+               log.debug("working...")
        count += 1
 
 
@@ -317,7 +284,4 @@ for p in seen_pkgs.keys():
 out.write("COMMIT;\n")
 out.write("\n")
 out.close()
-
-if DBUG: print "."
-if DBUG: print "Done."
-
+log.debug("Done.")
-- 
1.7.4.1

Reply via email to