I think that I found a solution to my thread issues, however I know it is
not the most efficient method possible.
Just to give you a little information on what this project is all about....
  I have 3 lists of email addresses.
     (1)  "host email address" =  contains a list of all of my emails
address (around 150,000 users)
     (2)  "email addresses"  =  contains a list of email addresses that I
have to match with the file "host email address".  If there are any matches,
then I have to print them out to a file. (this could be up to 8 million
users)
     (3)  "domain addresses" =  contains a list of email domains that I
have to match with the "host email address" file.  If there are any matched,
then I have to print them out to  a file. (could be 2000 or more domains)

 When running the application, you will have the "host email address" and
can have either one or both of the other files running at the same time.

My problem was that when the application ran, it appeared to stall.  I
decided to use threads for (1) the processing of data and (2) the progress
bar.  The solution I found that enabled the two threads to communicate was
the use of global variables.

I know this is this is not the most efficient method but, using this
solution, I do not see the stalling issue that I found before (which is a
good thing).  I am still not happy with it, because I know it is not
efficient, but I found this to be the best solution for my needs.

Thoughts?

The code is below.  Before you see the code, I must thank everyone who
helped me with this project (including the open source coders).
===================
#! /usr/bin/env python
import difflib, sys, thread, re, os, time import Tkinter from Tkinter import
* from sets import Set import tkFileDialog, tkMessageBox  from tkFileDialog
import *
listName = ['','','']
threadStat = 0
mailsrch = re.compile(r'[EMAIL PROTECTED],4}')
domsrch = re.compile(r"@(\S+)")
statusVar = 0.0  # for the progress bar
startProgress = 0
################################################################
class Meter(Tkinter.Frame):
   def __init__(self, master, width=300, height=20, bg='black',
fillcolor='cyan',\
                value=0.0, text=None, font=None, textcolor='white', *args,
**kw):
       Tkinter.Frame.__init__(self, master, bg=bg, width=width,
height=height, *args, **kw)
       self._value = value

       self._canv = Tkinter.Canvas(self, bg=self['bg'],
width=self['width'], height=self['height'],\
                                   highlightthickness=0, relief='flat',
bd=0)
       self._canv.pack(fill='both', expand=1)
       self._rect = self._canv.create_rectangle(0, 0, 0,
self._canv.winfo_reqheight(), fill=fillcolor,\
                                                width=0)
       self._text = self._canv.create_text(self._canv.winfo_reqwidth()/2,
self._canv.winfo_reqheight()/2,\
                                           text='', fill=textcolor)
       if font:
           self._canv.itemconfigure(self._text, font=font)

       self.set(value, text)
       self.bind('<Configure>', self._update_coords)

   def _update_coords(self, event):
       '''Updates the position of the text and rectangle inside the canvas
when the size of
       the widget gets changed.'''
       self._canv.update_idletasks()
       self._canv.coords(self._text, self._canv.winfo_width()/2,
self._canv.winfo_height()/2)
       self._canv.coords(self._rect, 0, 0,
self._canv.winfo_width()*self._value, self._canv.winfo_height())
       self._canv.update_idletasks()

   def get(self):
       return self._value, self._canv.itemcget(self._text, 'text')

   def set(self, value=0.0, text=None):
       #make the value failsafe:
       if value < 0.0:
           value = 0.0
       elif value > 1.0:
           value = 1.0
       self._value = value
       if text == None:
           #if no text is specified use the default percentage string:
           text = str(int(round(100 * value))) + ' %'
       self._canv.coords(self._rect, 0, 0, self._canv.winfo_width()*value,
self._canv.winfo_height())
       self._canv.itemconfigure(self._text, text=text)
       self._canv.update_idletasks()


##########################################################
def fail(msg):
   out = sys.stderr.write
   out(msg + "\n\n")
   out(__doc__)
   return 0
################################################################
def fopen(fname):
   try:
       return open(fname, 'U')
   except IOError, detail:
       return fail("couldn't open " + fname + ": " + str(detail))
################################################################
def fetchFiles(file1,file2,file3): #file1: host list file2 = email list;
file3=domain; method=    method = ''
  print file1
  print file2
  print file3
  f1 = fopen(file1)
  a = f1.readlines(); f1.close()
  d1 = {}
  for c in a:
     for m in mailsrch.findall(c):
        d1[m.lower()] = None
  print "starting list 2"
  thread.start_new_thread(showProcessing, ())
  #DOMAIN COMPARISON    if file2 == '':
     domain(d1,file3)
  #EMAIL COMPARISON    elif file3 == '':
     email(d1,file2)
  #BOTH    else:
     both(d1,file2,file3)
############################################################### def domain
(d1,file3):
  f3 = fopen(file3)
  domains = f3.readlines(); f3.close()
  print len(domains)
  totalLen = len(domains)
  print totalLen
  try:
       progressInc = abs(1.0/totalLen)
  except:
       tkMessageBox.showerror (
           "What are you doing?",
           "One of your files had no information.  I cannot process this, I
QUIT."
       )
       global threadStat
       threadStat = 1
       progressInc = 1

  print progressInc
  global statusVar

  utp = open("data/emailMatch.txt","w")
  domainList = []
  for domain in domains:
    domainList.extend(domsrch.findall(domain.lower()))
  domainsSet = set(domainList)
  for key in d1:
     name, domain = key.split("@",1)
     if domain.lower() in domainsSet:
        utp.write(key + '\n')
     statusVar += progressInc
  utp.close()
  endProc()
###############################################################
def email (d1, file2):
  f2 = fopen(file2)
  method = 'email'
  emails = f2.readlines(); f2.close()

  totalLen = len(emails)
  print totalLen
  try:
       progressInc = abs(1.0/totalLen)
  except:
       tkMessageBox.showerror (
           "What are you doing?",
           "One of your files had no information.  I cannot process this, I
QUIT."
       )
       global threadStat
       threadStat = 1
       progressInc = 1

  print progressInc
  global statusVar
  utp = open("data/emailMatch.txt","w")
  for email in emails:
     for n in mailsrch.findall(email.lower()):
        if d1.has_key( n ):
           utp.write(n + '\n')
     statusVar += progressInc
  utp.close()
  print "I am done with email comparison"
  endProc()
###############################################################
def both (d1, file2, file3):
  #doing the Domains first    f3 = fopen(file3)
  domains = f3.readlines(); f3.close()

  f2 = fopen(file2)
  method = 'email'
  emails = f2.readlines(); f2.close()

  totalLen = len(domains) + len(emails)
  print totalLen
  try:
       progressInc = abs(1.0/totalLen)
  except:
       tkMessageBox.showerror (
           "What are you doing?",
           "One of your files had no information.  I cannot process this, I
QUIT."
       )
       global threadStat
       threadStat = 1
       progressInc = 1
  print progressInc
  global statusVar

  finList = []
  domainList = []
  for domain in domains:
    domainList.extend(domsrch.findall(domain.lower()))
  domainsSet = set(domainList)
  for key in d1:
     name, domain = key.split("@",1)
     if domain.lower() in domainsSet:
        finList.append(key)
     statusVar += progressInc
  print "I am done with domain comparison"
  #Next do email addresses       for email in emails:
     for n in mailsrch.findall(email.lower()):
        if d1.has_key( n ):
            finList.append(n)
     statusVar += progressInc
  print "I am done with email comparison"
  print "removing duplication"
  #removeDups(finList)
  dupFreeList = removeDups(finList)
  dupFreeList.sort()

  utp = open("data/emailMatch.txt","w")
  for emails in dupFreeList:
     utp.write(emails + '\n')
  utp.close()
  print "i am done doing both"
  endProc()

###############################################################
def removeDups(s):
  n = len(s)
  if n == 0:
     return []
  u = {}
  try:
     for x in s:
        u[x] = 1
  except TypeError:
     del u  # move on to the next method
  else:
     return u.keys()
  try:
     t = list(s)
     t.sort()
  except TypeError:
     del t  # move on to the next method
  else:
     assert n > 0
     last = t[0]
     lasti = i = 1
     while i < n:
        if t[i] != last:
           t[lasti] = last = t[i]
           lasti += 1
        i += 1
     return t[:lasti]
  # Brute force is all that's left.    u = []    for x in s:
     if x not in u:
        u.append(x)
  return u

###############################################################
def endProc():
 global threadStat
 threadStat = 1
 thread.exit()
###############################################################
def showProcessing():
  mroot = Tkinter.Tk(className='Worker Bee')    metric = Meter(mroot,
relief='ridge', bd=3)    metric.pack(fill='x')
  setInc = 0.1
  global statusVar
  global threadStat

  while threadStat == 0:
     if statusVar < 0.3:
       message = "YAWN.  Have any coffee"
     elif statusVar < 0.5 and statusVar > 0.3:
       message = "Im working, so you dont have to."
     elif statusVar < 0.7 and statusVar > 0.5:
       message = "I hope you sold something, to pay me off"
     else:
       message = "Almost there chief."
     metric.set(statusVar, message)
     time.sleep(10)
  metric.set(1.0, 'WOOT WOOT WOOT.  DONE')
  print threadStat
###############################################################

def startProc():
   noisy = 1
   qseen = rseen = 0

   #print listName     f1name = listName[0]
   f2name = listName[1]
   f3name = listName[2]
   if f1name == '':
     tkMessageBox.showerror (
           "Open file",
           "You must upload host email list."
       )

     method = 'failed'
     print "ERROR!  You need to upload host email address"
   elif f2name =='' and f3name == '':
     tkMessageBox.showerror (
           "Open file",
           "You must upload another document to compare host list."
       )

     method = 'failed'

     print "ERROR!  You need to upload another file"
   else:
     thread.start_new_thread(fetchFiles, (f1name,f2name,f3name,))
     global threadStat
     while threadStat == 0: pass

###############################################################
def openMax():
   a = tkFileDialog.askopenfilename()
   listName[0] = a

def openEmail():
   b = tkFileDialog.askopenfilename()
   listName[1] = b

def openDomain():
   c = tkFileDialog.askopenfilename()
   listName[2] = c

###############################################################

main = Tk()
bframe = Frame(main)
main.title("Suppression Utility")

b1 = Button(bframe,text='Host Email List',command=openMax)
b2 = Button(bframe,text='Email List',command=openEmail)
b3 = Button(bframe,text='Domain List',command=openDomain)
b4 = Button(text='Start Processing',command=startProc)

bframe.pack(side=TOP)
b1.pack(side=LEFT)
b3.pack(side=RIGHT)
b2.pack(side=RIGHT)
b4.pack(side=BOTTOM)

main.mainloop()
#######################################

========================
Dave Huggins


On 2/18/07, Gabriel Genellina <[EMAIL PROTECTED]> wrote:

En Sun, 18 Feb 2007 23:37:02 -0300, Sick Monkey <[EMAIL PROTECTED]>
escribió:

> Well if this cannot be done, can a thread call a function in the main
> method?
> I have been trying and have not been successive.  Perhaps I am using
> thread
> incorrectly.

The safe way to pass information between threads is to use Queue. From
inside the working thread, you put() an item with enough state
information. On the main (GUI) thread, you use after() to check for any
data in the queue, and then update the interfase accordingly.
I think there is a recipe in the Python Cookbook
http://aspn.activestate.com/ASPN/Cookbook/Python

--
Gabriel Genellina

--
http://mail.python.org/mailman/listinfo/python-list

-- 
http://mail.python.org/mailman/listinfo/python-list

Reply via email to