I want to add some simple search function for my homepage. It need to search through all the html files of my homepage (about 300 pages), and highlight the search words.
I made some test with HTMLParser, it works but slow. So, my question is how can I improve its speed? from HTMLParser import HTMLParser class HightLightParser(HTMLParser): def __init__(self, outfile, words): self.outfile = outfile self.words = words self.found = False HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): self.outfile.write( self.get_starttag_text( ) ) def handle_endtag(self, tag): self.outfile.write( "</%s>" % tag ) def handle_data(self, data): for word in self.words: data = data.replace(word, "<font color=red>%s</font>" % word) #highlight self.outfile.write(data) class SearchParser(HTMLParser): def __init__(self, words): self.words = words self.found = False HTMLParser.__init__(self) def handle_data(self, data): for word in self.words: if word in data: # search self.found = True words = ["the"] x = SearchParser(words) data = file("input.htm").read() x.feed(data) if x.found: y = HightLightParser(file("output.htm", "w"),words) y.feed(data) -- http://mail.python.org/mailman/listinfo/python-list