I think that I found a solution to my thread issues, however I know it is not the most efficient method possible. Just to give you a little information on what this project is all about.... I have 3 lists of email addresses. (1) "host email address" = contains a list of all of my emails address (around 150,000 users) (2) "email addresses" = contains a list of email addresses that I have to match with the file "host email address". If there are any matches, then I have to print them out to a file. (this could be up to 8 million users) (3) "domain addresses" = contains a list of email domains that I have to match with the "host email address" file. If there are any matched, then I have to print them out to a file. (could be 2000 or more domains)
When running the application, you will have the "host email address" and can have either one or both of the other files running at the same time. My problem was that when the application ran, it appeared to stall. I decided to use threads for (1) the processing of data and (2) the progress bar. The solution I found that enabled the two threads to communicate was the use of global variables. I know this is this is not the most efficient method but, using this solution, I do not see the stalling issue that I found before (which is a good thing). I am still not happy with it, because I know it is not efficient, but I found this to be the best solution for my needs. Thoughts? The code is below. Before you see the code, I must thank everyone who helped me with this project (including the open source coders). =================== #! /usr/bin/env python import difflib, sys, thread, re, os, time import Tkinter from Tkinter import * from sets import Set import tkFileDialog, tkMessageBox from tkFileDialog import * listName = ['','',''] threadStat = 0 mailsrch = re.compile(r'[EMAIL PROTECTED],4}') domsrch = re.compile(r"@(\S+)") statusVar = 0.0 # for the progress bar startProgress = 0 ################################################################ class Meter(Tkinter.Frame): def __init__(self, master, width=300, height=20, bg='black', fillcolor='cyan',\ value=0.0, text=None, font=None, textcolor='white', *args, **kw): Tkinter.Frame.__init__(self, master, bg=bg, width=width, height=height, *args, **kw) self._value = value self._canv = Tkinter.Canvas(self, bg=self['bg'], width=self['width'], height=self['height'],\ highlightthickness=0, relief='flat', bd=0) self._canv.pack(fill='both', expand=1) self._rect = self._canv.create_rectangle(0, 0, 0, self._canv.winfo_reqheight(), fill=fillcolor,\ width=0) self._text = self._canv.create_text(self._canv.winfo_reqwidth()/2, self._canv.winfo_reqheight()/2,\ text='', fill=textcolor) if font: self._canv.itemconfigure(self._text, font=font) self.set(value, text) self.bind('<Configure>', self._update_coords) def _update_coords(self, event): '''Updates the position of the text and rectangle inside the canvas when the size of the widget gets changed.''' self._canv.update_idletasks() self._canv.coords(self._text, self._canv.winfo_width()/2, self._canv.winfo_height()/2) self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*self._value, self._canv.winfo_height()) self._canv.update_idletasks() def get(self): return self._value, self._canv.itemcget(self._text, 'text') def set(self, value=0.0, text=None): #make the value failsafe: if value < 0.0: value = 0.0 elif value > 1.0: value = 1.0 self._value = value if text == None: #if no text is specified use the default percentage string: text = str(int(round(100 * value))) + ' %' self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*value, self._canv.winfo_height()) self._canv.itemconfigure(self._text, text=text) self._canv.update_idletasks() ########################################################## def fail(msg): out = sys.stderr.write out(msg + "\n\n") out(__doc__) return 0 ################################################################ def fopen(fname): try: return open(fname, 'U') except IOError, detail: return fail("couldn't open " + fname + ": " + str(detail)) ################################################################ def fetchFiles(file1,file2,file3): #file1: host list file2 = email list; file3=domain; method= method = '' print file1 print file2 print file3 f1 = fopen(file1) a = f1.readlines(); f1.close() d1 = {} for c in a: for m in mailsrch.findall(c): d1[m.lower()] = None print "starting list 2" thread.start_new_thread(showProcessing, ()) #DOMAIN COMPARISON if file2 == '': domain(d1,file3) #EMAIL COMPARISON elif file3 == '': email(d1,file2) #BOTH else: both(d1,file2,file3) ############################################################### def domain (d1,file3): f3 = fopen(file3) domains = f3.readlines(); f3.close() print len(domains) totalLen = len(domains) print totalLen try: progressInc = abs(1.0/totalLen) except: tkMessageBox.showerror ( "What are you doing?", "One of your files had no information. I cannot process this, I QUIT." ) global threadStat threadStat = 1 progressInc = 1 print progressInc global statusVar utp = open("data/emailMatch.txt","w") domainList = [] for domain in domains: domainList.extend(domsrch.findall(domain.lower())) domainsSet = set(domainList) for key in d1: name, domain = key.split("@",1) if domain.lower() in domainsSet: utp.write(key + '\n') statusVar += progressInc utp.close() endProc() ############################################################### def email (d1, file2): f2 = fopen(file2) method = 'email' emails = f2.readlines(); f2.close() totalLen = len(emails) print totalLen try: progressInc = abs(1.0/totalLen) except: tkMessageBox.showerror ( "What are you doing?", "One of your files had no information. I cannot process this, I QUIT." ) global threadStat threadStat = 1 progressInc = 1 print progressInc global statusVar utp = open("data/emailMatch.txt","w") for email in emails: for n in mailsrch.findall(email.lower()): if d1.has_key( n ): utp.write(n + '\n') statusVar += progressInc utp.close() print "I am done with email comparison" endProc() ############################################################### def both (d1, file2, file3): #doing the Domains first f3 = fopen(file3) domains = f3.readlines(); f3.close() f2 = fopen(file2) method = 'email' emails = f2.readlines(); f2.close() totalLen = len(domains) + len(emails) print totalLen try: progressInc = abs(1.0/totalLen) except: tkMessageBox.showerror ( "What are you doing?", "One of your files had no information. I cannot process this, I QUIT." ) global threadStat threadStat = 1 progressInc = 1 print progressInc global statusVar finList = [] domainList = [] for domain in domains: domainList.extend(domsrch.findall(domain.lower())) domainsSet = set(domainList) for key in d1: name, domain = key.split("@",1) if domain.lower() in domainsSet: finList.append(key) statusVar += progressInc print "I am done with domain comparison" #Next do email addresses for email in emails: for n in mailsrch.findall(email.lower()): if d1.has_key( n ): finList.append(n) statusVar += progressInc print "I am done with email comparison" print "removing duplication" #removeDups(finList) dupFreeList = removeDups(finList) dupFreeList.sort() utp = open("data/emailMatch.txt","w") for emails in dupFreeList: utp.write(emails + '\n') utp.close() print "i am done doing both" endProc() ############################################################### def removeDups(s): n = len(s) if n == 0: return [] u = {} try: for x in s: u[x] = 1 except TypeError: del u # move on to the next method else: return u.keys() try: t = list(s) t.sort() except TypeError: del t # move on to the next method else: assert n > 0 last = t[0] lasti = i = 1 while i < n: if t[i] != last: t[lasti] = last = t[i] lasti += 1 i += 1 return t[:lasti] # Brute force is all that's left. u = [] for x in s: if x not in u: u.append(x) return u ############################################################### def endProc(): global threadStat threadStat = 1 thread.exit() ############################################################### def showProcessing(): mroot = Tkinter.Tk(className='Worker Bee') metric = Meter(mroot, relief='ridge', bd=3) metric.pack(fill='x') setInc = 0.1 global statusVar global threadStat while threadStat == 0: if statusVar < 0.3: message = "YAWN. Have any coffee" elif statusVar < 0.5 and statusVar > 0.3: message = "Im working, so you dont have to." elif statusVar < 0.7 and statusVar > 0.5: message = "I hope you sold something, to pay me off" else: message = "Almost there chief." metric.set(statusVar, message) time.sleep(10) metric.set(1.0, 'WOOT WOOT WOOT. DONE') print threadStat ############################################################### def startProc(): noisy = 1 qseen = rseen = 0 #print listName f1name = listName[0] f2name = listName[1] f3name = listName[2] if f1name == '': tkMessageBox.showerror ( "Open file", "You must upload host email list." ) method = 'failed' print "ERROR! You need to upload host email address" elif f2name =='' and f3name == '': tkMessageBox.showerror ( "Open file", "You must upload another document to compare host list." ) method = 'failed' print "ERROR! You need to upload another file" else: thread.start_new_thread(fetchFiles, (f1name,f2name,f3name,)) global threadStat while threadStat == 0: pass ############################################################### def openMax(): a = tkFileDialog.askopenfilename() listName[0] = a def openEmail(): b = tkFileDialog.askopenfilename() listName[1] = b def openDomain(): c = tkFileDialog.askopenfilename() listName[2] = c ############################################################### main = Tk() bframe = Frame(main) main.title("Suppression Utility") b1 = Button(bframe,text='Host Email List',command=openMax) b2 = Button(bframe,text='Email List',command=openEmail) b3 = Button(bframe,text='Domain List',command=openDomain) b4 = Button(text='Start Processing',command=startProc) bframe.pack(side=TOP) b1.pack(side=LEFT) b3.pack(side=RIGHT) b2.pack(side=RIGHT) b4.pack(side=BOTTOM) main.mainloop() ####################################### ======================== Dave Huggins On 2/18/07, Gabriel Genellina <[EMAIL PROTECTED]> wrote:
En Sun, 18 Feb 2007 23:37:02 -0300, Sick Monkey <[EMAIL PROTECTED]> escribió: > Well if this cannot be done, can a thread call a function in the main > method? > I have been trying and have not been successive. Perhaps I am using > thread > incorrectly. The safe way to pass information between threads is to use Queue. From inside the working thread, you put() an item with enough state information. On the main (GUI) thread, you use after() to check for any data in the queue, and then update the interfase accordingly. I think there is a recipe in the Python Cookbook http://aspn.activestate.com/ASPN/Cookbook/Python -- Gabriel Genellina -- http://mail.python.org/mailman/listinfo/python-list
-- http://mail.python.org/mailman/listinfo/python-list