2007/10/26, smitty1e <[EMAIL PROTECTED]>: > Disclaimer(s): the author is nobody's pythonista. This could probably > be done more elegantly. > The driver for the effort is to get PyMacs to work with new-style > classes. > This rendering stage stands alone, and might be used for other > purposes. > A subsequent post will show using the resulting file to produce (I > think valid) .el trampoline > signatures for PyMacs. > If nothing else, it shows some python internals in an interesting way. > Tested against version 2.5.1 > Maybe "lumberjack.py" would be a better name, since "It cuts down > trees, goes real slow, and uses disk galore. Wishes it'd been > webfoot[1], just like its dear author". > Cheers, > Chris > > [1] Author was born in Oregon. > > #A sample file: > class sample( object ): > """fairly trivial sample class for demonstration purposes. > """ > def __init__( self > , some_string ): > self.hold_it = some_string > > def show( self ): > print self.hold_it > > #Invocation: > # ./pysqlrender.py -f sample.py -o output > > #Script: > #!/usr/bin/python > > """Script to dump the parse tree of an input file to a SQLite > database. > """ > > from optparse import OptionParser > import os > import parser > import pprint > import re > import sqlite3 > import symbol > import token > import types > > from types import ListType \ > , TupleType > > target_table = """CREATE TABLE tbl_parse_tree ( > parse_tree_id INTEGER PRIMARY KEY > AUTOINCREMENT > , parse_tree_symbol_id > , parse_tree_indent > , parse_tree_value );""" > > target_insert = """INSERT INTO tbl_parse_tree ( > parse_tree_symbol_id > , parse_tree_indent > , parse_tree_value ) > VALUES (%s, %s, '%s' );""" > > symbol_table = """CREATE TABLE tlp_parse_tree_symbol ( > parse_tree_symbol_id INTEGER PRIMARY KEY > , parse_tree_symbol_val );""" > symbol_insert = """INSERT INTO tlp_parse_tree_symbol ( > parse_tree_symbol_id > , parse_tree_symbol_val ) > VALUES ( %s, '%s' );""" > > class symbol_manager( object ): > """ Class to merge symbols and tokens for ease of use. > """ > def __init__( self > , c ): > for k in symbol.sym_name: > sql = symbol_insert % ( k, symbol.sym_name[k] ) > try: > c.execute( sql ) > except sqlite3.IntegrityError: > pass > for k in token.tok_name: > sql = symbol_insert % ( k, token.tok_name[k] ) > try: > c.execute( sql ) > except sqlite3.IntegrityError: > pass > > def get_symbol( self > , key ): > ret = -1 > if symbol.sym_name.has_key(key): ret = symbol.sym_name[key] > elif token.tok_name.has_key(key) : ret = token.tok_name[ key] > return ret > > def recurse_it( self, tester ): > """Check to see if dump_tup should recurse > """ > if self.get_symbol(tester) > 0: > return True > return False > > class stocker( object ): > """Remembers the depth of the tree and effects the INSERTs > into the output file. > """ > def __init__( self ): > self.cur_indent = 0 > > def do_symbol( self > , c > , symbol_value > , val = "" ): > """Stuff something from the parse tree into the database > table. > """ > if symbol_value==5: self.cur_indent += 1 > elif symbol_value==6: self.cur_indent -= 1 > > try: > sql = target_insert \ > % ( symbol_value > , self.cur_indent > , re.sub( "'", "`", str(val) )) > c.execute( sql ) > except AttributeError: > print "connection bad in lexer" > except sqlite3.OperationalError: > print "suckage at indent of %s for %s" \ > % (self.cur_indent, sql) > > def dump_tup( tup > , sym > , c > , stok ): > """Recursive function to descend TUP and analyze its elements. > tup parse tree of a file, rendered as a tuple > sym dictionary rendered from symbol module > c live database cursor > stok output object effect token storage > """ > for node in tup: > typ = type( node ) > r = getattr( typ > , "__repr__" > , None ) > > if (issubclass(typ, tuple) and r is tuple.__repr__): > > if token.tok_name.has_key( node[0] ): > stok.do_symbol( c > , node[0] > , node[1] ) > elif sym.recurse_it( node[0] ): > stok.do_symbol( c > , node[0] > , '__py__' ) #If you say node[1] here, > # the sqlite file is fat > # and instructive > for node2 in node[1:]: > dump_tup( node2 > , sym > , c > , stok ) > else: > stok.do_symbol( c > , node[0] > , node[1] ) > dump_tup( node[1] > , sym > , c > , stok ) > else: > stok.do_symbol( c > , 0 > , node ) > > > def convert_python_source_tree_to_table( file_name > , target_name ): > """Retrieve information from the parse tree of a source file. > Create an output database file in sqlite. > Make a table in there, and then procede to stuff the flattened > input parse tree into it. > > file_name Name of the file to read Python source code from. > target_name Name for the sqlite database > """ > x = open( file_name ).readlines() > y = [] > [y.append( line.replace("\r\n","") ) for line in x] > > ast = parser.suite( "\n".join(y) ) > conn = sqlite3.connect( target_name ) > conn.isolation_level = None > c = conn.cursor() > c.execute( target_table ) > c.execute( symbol_table ) > sym = symbol_manager( c ) > stok = stocker() > > #pprint.pprint( ast.totuple() ) > dump_tup( ast.totuple() > , sym > , c > , stok ) > > def main(): > usage = "usage: %prog [options] arg" > parser = OptionParser(usage) > parser.add_option("-f", "--file", dest="filename" > , action="store", type="string" > , help ="read python source from FILENAME") > #TODO: test for existence of output file, eject if exists > parser.add_option("-o", "--output",dest="output" > , action="store", type="string" > , help ="name of sqlite output file") > (options, args) = parser.parse_args() > > convert_python_source_tree_to_table( options.filename > , options.output ) > > if __name__ == "__main__": > main() > > -- > http://mail.python.org/mailman/listinfo/python-list >
Hello, I took a look at that script and I have made some changes. Before posting my version let me comment on somethings.. First, you probably noticed that it gets slow as you run the script with "larger" files. All the time "wasted" on this is because you set isolation level to None, so there are a lot of commits and commits make this slow. Removing that isolation level and doing only one commit after dump_tup cuts basically all time wasted; Second, don't use "%s" to insert values into your sql query string please. For sqlite you should substitute those by "?"s and pass a tuple to it; Third, don't use "yourdict.has_key(key)", use "key in yourdict". I have read the Disclamer at top, but maybe you wanted to hear something; Fourth, It could be the email client but did you use 3 spaces for indent ? :/ ; Fifth, other observations are left to the reader as exercise My version: #!/usr/bin/env python """Script to dump the parse tree of an input file to a SQLite database. """ import token import parser import symbol import sqlite3 from optparse import OptionParser TARGET_TABLE = """CREATE TABLE tbl_parse_tree ( parse_tree_id INTEGER PRIMARY KEY AUTOINCREMENT, parse_tree_symbol_id, parse_tree_indent, parse_tree_value)""" TARGET_INSERT = """INSERT INTO tbl_parse_tree (parse_tree_symbol_id, parse_tree_indent, parse_tree_value) VALUES (?, ?, ?)""" SYMBOL_TABLE = """CREATE TABLE tlp_parse_tree_symbol ( parse_tree_symbol_id INTEGER PRIMARY KEY, parse_tree_symbol_val)""" SYMBOL_INSERT = """INSERT INTO tlp_parse_tree_symbol (parse_tree_symbol_id, parse_tree_symbol_val) VALUES (?, ?)""" class SymbolManager(object): """Class to merge symbols and tokens for ease of use.""" def __init__(self, c): self.to_merge = token.tok_name.copy() self.to_merge.update(symbol.sym_name) for k, v in self.to_merge.iteritems(): c.execute(SYMBOL_INSERT, (k, v)) def get_symbol(self, key): return self.to_merge[key] if key in self.to_merge else -1 def recurse_it(self, tester): """Check to see if dump_tup should recurse""" if self.get_symbol(tester) > 0: return True return False class Stocker(object): """Remembers the depth of the tree and effects the INSERTs into the output file. """ def __init__(self): self.cur_indent = 0 def do_symbol(self, c, symbol_value, val=""): """Stuff something from the parse tree into the database table.""" if symbol_value == 5: self.cur_indent += 1 elif symbol_value==6: self.cur_indent -= 1 c.execute(TARGET_INSERT, (symbol_value, self.cur_indent, str(val).replace("'", "`"))) def dump_tup(tup, sym, c, stok): """Recursive function to descend TUP and analyze its elements. tup parse tree of a file, rendered as a tuple sym dictionary rendered from symbol module c live database cursor stok output object effect token storage """ for node in tup: typ = type(node) r = getattr(typ, "__repr__", None) if (issubclass(typ, tuple) and r is tuple.__repr__): if node[0] in token.tok_name: stok.do_symbol(c, node[0], node[1]) elif sym.recurse_it(node[0]): #If you say node[1] here, the sqlite file is fat # and instructive stok.do_symbol(c, node[0], '__py__' ) for node2 in node[1:]: dump_tup(node2, sym, c, stok) else: stok.do_symbol(c, node[0], node[1]) dump_tup(node[1], sym, c, stok) else: stok.do_symbol(c, 0, node) def python_source_tree_to_db(file_name, target_name): """Retrieve information from the parse tree of a source file. Create an output database file in sqlite. Make a table in there, and then procede to stuff the flattened input parse tree into it. file_name Name of the file to read Python source code from. target_name Name for the sqlite database """ conn = sqlite3.connect(target_name) c = conn.cursor() c.execute(TARGET_TABLE) c.execute(SYMBOL_TABLE) ast = parser.suite(''.join(open(file_name, 'rU').readlines())) sym = SymbolManager(c) stok = Stocker() #pprint.pprint(ast.totuple()) dump_tup(ast.totuple(), sym, c, stok) conn.commit() def main(): oparser = OptionParser("usage: %prog [options] arg") oparser.add_option("-f", "--file", dest="filename", help="read python source from FILENAME") oparser.add_option("-o", "--output", dest="output", help="name of sqlite output file") (options, _) = oparser.parse_args() if not options.filename or not options.output: oparser.print_help() else: try: open(options.output) print "Output file exists, chose another one." except IOError: python_source_tree_to_db(options.filename, options.output) if __name__ == "__main__": main() -- -- Guilherme H. Polo Goncalves -- http://mail.python.org/mailman/listinfo/python-list