Hi there. What this section area does is takes a data file that is comma separated and imports - there is a unique ID in the first field and a code in the second field that corresponds to a certain section of information. What I need from this is for the process to role up against unique ID all section holdings withot duplicates, report on section combinations, and overal section counts. In addtion I need the ability to assigna value for page count to these sections and have the ability to uploada translation file just in case a section is identiifed by multiple values that needs to be normalized to a single unique value.
Sorry for the lengthly code response - all commenst are appreciated - as mentioned I am quite new with Python - it is doing what I need it to do but I think that it is a mess and needs to be cleaned up a little. Thanks for any comments. GTXY20 import sys import os class __analysis: def __init__(self): print '***Analysis Tool***' datafile=raw_input('data file name:') self.datafile=datafile self.parsefile() # script to import unitID section data and section page count reference and create a sorted dictionary # where in uhdata{} key=unitID and value=unitID section holdings # where in pgcnt{} key=Section and value=page count def parsefile(self): try: uhdatafile = open(self.datafile, 'r') records = uhdatafile.read() uhdatafile.close() lines = records.split() self.uhdata={} for line in lines: uh, tf = line.split(',') if uh in self.uhdata: f=self.uhdata[uh] if tf not in f: f.append(tf) else: self.uhdata[uh]=[tf] for uh, Sections in self.uhdata.items(): Sections.sort() except IOError: print 'file not found check file name' analysis() ftranslateok=raw_input('would you like to translate section codes? (y/n):') if ftranslateok == 'y': self.transFn() else: pass pgcountok=raw_input('would you like to assign section page counts? (y/n):') if pgcountok == 'y': self.setPageCounts() else: missingpgcounts={} fmissingpgcounts=[] for x in self.uhdata: for f in self.uhdata[x]: if f not in fmissingpgcounts: fmissingpgcounts.append(f) for x in fmissingpgcounts: missingpgcounts[x]=0 self.pgcounts = missingpgcounts fdistmodel=raw_input('would you like to define max section distribution cut off? (y/n):') if fdistmodel == 'y': self.fdistmax=raw_input('what is the max distributions before a full book?:') self.fdistmax=int(self.fdistmax) self.Sectiondistmax() else: self.fdistmax=1000000000 self.Sectiondistmax() sys.exit(1) # function to determine number of uniqueID for each section def Sectionqty(self): Sectionqtyoutfile = open('Sectionqty.txt', 'w+') Sectionqtyoutfile.write ('Section\tQTY\n') from collections import defaultdict fcounts=defaultdict(int) flst=[] flst2=[] if self.fdistmax == 1000000000: for v in self.uhdata.values(): for item in v: fcounts[item]+=1 for k,v in sorted(fcounts.items()): Section=k fqty=v Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty)) else: for k,v in self.uhdata.items(): if len(v)<=self.fdistmax: flst.append(self.uhdata[k]) for i in flst: for x in i: flst2.append(x) for Sections in flst2: fcounts[Sections]+=1 for k,v in sorted(fcounts.items()): Section= k fqty= v Sectionqtyoutfile.write ('%s\t%s\n' % (Section, fqty)) Sectionqtyoutfile.close() self.SectionCombqty() # function to determine number of uniqueID section combinations and associated section page counts def SectionCombqty(self): SectionCombqtyoutfile = open('SectionCombqty.txt', 'w+') SectionCombqtyoutfile.write('Combination Qty\tNumber of Sections\tCombination\tCombinationPageCount\tTotalPages\n') fullbook = 'Full Book' fgreater=[] fcheck=0 from collections import defaultdict fcomb=defaultdict(int) for uh in self.uhdata.keys(): fcomblst=self.uhdata[uh] fcomb[tuple(fcomblst)]+=1 if self.fdistmax == 1000000000: for count, items in sorted( ((v,k) for k,v in fcomb.items ()),reverse=True): fpgcounts = sum([self.pgcounts.get(i,i) for i in list(items)]) Sectioncomb = ','.join(items) holdings = len(items) totpgcounts = count*fpgcounts SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' % (count,holdings,Sectioncomb,fpgcounts,totpgcounts)) else: for count, items in sorted( ((v,k) for k,v in fcomb.items ()),reverse=True): if len(items) <= self.fdistmax: fpgcounts = sum([self.pgcounts.get(i,i) for i in list(items)]) Sectioncomb = ','.join(items) holdings = len(items) totpgcounts = count*fpgcounts SectionCombqtyoutfile.write ('%s\t%s\t%s\t%s\t%s\n' % (count,holdings,Sectioncomb,fpgcounts,totpgcounts)) for count, items in sorted( ((v,k) for k,v in fcomb.items ()),reverse=True): if len(items)>self.fdistmax: fgreater.append(count) fcheck=sum(fgreater) SectionCombqtyoutfile.write ('%s\t''>''%s\t%s\t%s\t%s\n' % (fcheck,self.fdistmax,fullbook,fullbook,fullbook)) SectionCombqtyoutfile.close() # where in pgcnt{} key=Section and value=page count def setPageCounts(self): pagecountfile=raw_input('page count file name:') self.pagecountfile=pagecountfile try: pagecountinfile = open(self.pagecountfile, 'r') records = pagecountinfile.read() pagecountinfile.close() self.pgcounts={} lines = records.split() for line in lines: fpg, cnt = line.split(',') self.pgcounts[fpg]=int(cnt) except IOError: print 'file not found check file name' analysis() # function to determine number of uniqueID distributions and associated Sections held def Sectiondistmax(self): from collections import defaultdict Sectiondistoutfile = open('Sectiondist.txt', 'w+') Sectiondistoutfile.write ('SectionDistributions\tQTY\n') fgreater=[] fullbook = "Full Book" fcheck=0 fcount=defaultdict(int) for uh in self.uhdata.keys(): f=self.uhdata[uh] fcount[len(f)]+=1 if self.fdistmax == 1000000000: for k,v in sorted(fcount.items()): fdist=k fqty=v Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty)) else: for k,v in sorted(fcount.items()): if k <= self.fdistmax: fdist=k fqty=v Sectiondistoutfile.write ('%s\t%s\n' % (fdist,fqty)) for k,v in sorted(fcount.items()): if k > self.fdistmax: fgreater.append(fcount[k]) fcheck=sum(fgreater) Sectiondistoutfile.write ('%s\t%s\n' % (fullbook,fcheck)) Sectiondistoutfile.close() self.Sectionqty() #function to translate UnitID Sectioncodes to normalized assigned Section code (e.g. parent and mulitple child section codes) def transFn(self): transfile=raw_input('Section translate file name:') self.transfile=transfile try: transfilein=open(self.transfile, 'r') records = transfilein.read() transfilein.close() lines = records.split() transDict = {} for line in lines: key, value = line.split(',') transDict[key] = value for key, value in self.uhdata.items(): self.uhdata[key] = [ transDict.get(i, i) for i in value ] for k in self.uhdata: self.uhdata[k]=sorted(set(self.uhdata[k])) except IOError: print 'file not found check file name' analysis() On Jan 4, 2008 2:23 PM, bob gailer <[EMAIL PROTECTED]> wrote: > GTXY20 wrote: > > > > There are no errors per se - the script is doing what it needs to I > > guess I just want to check it for compliance - for some reason I think > > itis a mess and should be much cleaner. > > > > I am only concerned with one particular area of the complete project - > > it is 229 lines in total - would this be too much to post? > > Did you get my invitation to post programs in that size range? Please > just do it. The suspense is killing me! > > I do not have a website to post code to - just don't want to post too > > much for the group and annoy anyone. > Dialogging about it is more painful than just reading the code. > > > > Thanks for your comments and let me know. > >
_______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor