On Nov 27, 2010, at 12:27 PM, Luis Díaz wrote: > thanks and quiet > only was the comment, > I also thought of the size of line 3 > > and I read > I like to read the code
Here's the code with line 3 truncated: import re, cPickle, random, datetime IUP = {'shoebill':{'a':1,'187':1},'trout-like':{'parr':1}, class Learner: def __init__(self): self.db={} def learn(self,text): replacements1={'[^a-zA-Z0-9\.;:\-]':' ', '\s+':' ',', ':' , ', '\. ':' . ', ': ':' : ','; ':' ; '} for key,value in replacements1.items(): text=re.sub(key,value,text) items=[item.lower() for item in text.split(' ')] for i in range(len(items)-1): item=items[i] nextitem=items[i+1] if not self.db.has_key(item): self.db[item]={} if not self.db[item].has_key(nextitem): self.db[item][nextitem]=1 else: self.db[item][nextitem]+=1 def save(self,filename): cPickle.dump(self.db,open(filename,'wb')) def load(self,filename): self.loadd(cPickle.load(open(filename,'rb'))) def loadd(self,db): self.db=db def generate(self,length=10000,prefix=False): replacements2={' ,':',', ' \.':'.\n', ' :':':', ' ;':';', '\n\s+':'\n' } keys=self.db.keys() key=keys[random.randint(0,len(keys)-1)] words=key words=words.capitalize() regex=re.compile('[a-z]+') for i in range(length): okey=key if not key in self.db: break # should not happen db=self.db[key] s=sum(db.values()) i=random.randint(0,s-1) for key,value in db.items(): if i<value: break else: i-=value if okey=='.': key1=key.capitalize() else: key1=key if prefix and regex.findall(key1) and \ random.random()<0.01: key1='<a href="%s%s">%s</a>' % (prefix,key1,key1) words+=' '+key1 text=words for key,value in replacements2.items(): text=re.sub(key,value,text) return text+'.\n' def da_du_ma(n=4): return ''.join([['da','du','ma','mo','ce','co', 'pa','po','sa','so','ta','to']\ [random.randint(0,11)] for i in range(n)]) def populate(table, n, default=True): ell=Learner() #ell.learn(open('20417.txt','r').read()) #ell.save('frequencies.pickle') #ell.load('frequencies.pickle') ell.loadd(IUP) ids={} for i in range(n): record={} for fieldname in table.fields: field = table[fieldname] if not isinstance(field.type,(str,unicode)): continue elif field.type == 'id': continue elif default and field.default: record[fieldname]=field.default elif field.type == 'text': record[fieldname]=ell.generate(random.randint(10,100),prefix=None) elif field.type == 'boolean': record[fieldname]=random.random()>0.5 elif field.type in ['datetime', 'date']: record[fieldname] = \ datetime.datetime(2009,1,1) - \ datetime.timedelta(days=random.randint(0,10000)) elif field.type == 'time': h = random.randint(0,23) m = 15*random.randint(0,3) record[fieldname] = datetime.time(h,m,0) elif field.type == 'password': record[fieldname] = '' elif field.type == 'upload': record[fieldname] = None elif field.type=='integer' and hasattr(field.requires,'options'): options=field.requires.options() record[fieldname] = options[random.randint(0,len(options)-1)][0] elif field.type=='list:integer' and hasattr(field.requires,'options'): options=field.requires.options() if len(options) > 0: vals = [] for i in range(0, random.randint(0,len(options)-1)/2): vals.append(options[random.randint(0,len(options)-1)][0]) record[fieldname] = vals elif field.type in ['integer','double'] or str(field.type).startswith('decimal'): try: record[fieldname] = random.randint(field.requires.minimum,field.requires.maximum-1) except: record[fieldname] = random.randint(0,1000) elif field.type[:10] == 'reference ': tablename = field.type[10:] if not tablename in ids: if table._db._dbname=='gql': ids[tablename] = [x.id for x in table._db(table._db[field.type[10:]].id>0).select()] else: ids[tablename] = [x.id for x in table._db(table._db[field.type[10:]].id>0).select()] n = len(ids[tablename]) if n: record[fieldname] = ids[tablename][random.randint(0,n-1)] else: record[fieldname] = 0 elif field.type[:15] == 'list:reference ': tablename = field.type[15:] if not tablename in ids: if table._db._dbname=='gql': ids[tablename] = [x.id for x in table._db(table._db[field.type[15:]].id>0).select()] else: ids[tablename] = [x.id for x in table._db(table._db[field.type[15:]].id>0).select()] n = len(ids[tablename]) if n: vals = [] for i in range(0, random.randint(0,n-1)/2): vals.append(ids[tablename][random.randint(0,n-1)]) record[fieldname] = vals else: record[fieldname] = 0 elif field.type=='list:string' and hasattr(field.requires,'options'): options=field.requires.options() if len(options) > 0: vals = [] for i in range(0, random.randint(0,len(options)-1)/2): vals.append(options[random.randint(0,len(options)-1)][0]) record[fieldname] = vals elif field.type=='string' and hasattr(field.requires,'options'): options=field.requires.options() record[fieldname] = options[random.randint(0,len(options)-1)][0] elif field.type=='string' and fieldname.find('url')>=0: record[fieldname] = 'http://%s.example.com' % da_du_ma(4) elif field.type=='string' and fieldname.find('email')>=0: record[fieldname] = '%...@example.com' % da_du_ma(4) elif field.type=='string' and fieldname.find('name')>=0: record[fieldname] = da_du_ma(4).capitalize() elif field.type=='string': record[fieldname] = ell.generate(10, prefix=False)[:field.length].replace('\n',' ') table.insert(**record) if __name__ == '__main__': ell=Learner() ell.loadd(eval(IUP)) print ell.generate(1000,prefix=None)