Michael P. Reilly wrote: > Good. But one VERY important point to note is that that you are not working > with "variables" here. You are working with members of a class instance. > This is a very different beast. You could just use getattr(), setattr() and > delattr() for these. > > But continuing... you might want to think about this in a step back. Each > of the self.hN and self.in_hN have something in common and they all have the > same behavior. That sounds a lot like a job for "object oriented > programming", no? We can create a class that look and acts like a list > (like hN), but is only active if we have set it (if in_hN is True). > > Actually, because of the structure of the SGML code, "BAD CODE1" isn't quite > the "bad code", the "handle_data" code is actually worse. The reason "BAD > CODE1" looks bad is not because of your code, but because SGMLParser forces > you to create so many methods in the subclass. There are no "start_hN" and > "end_hN" catch-all methods available. For this reason, I made only a minor > change to the "start_hN" and "end_hN" methods, but changed the reset and > handle_data methods quite a bit. > > class HeaderCapture: > def __init__(self, contents=[]): > self.contents = contents[:] # copy > self.deactivate() > def append(self, item): > # could raise an exception, but for now, ignore > if self.active: > self.contents.append(item) > def __len__(self): > return len(self.contents) > def __getitem__(self, idx): > return self.contents[idx] > def activate(self): > self.active = True > def deactivate(self): > self.active = False > ... > class Lister(SGMLParser): > > def reset(self): > SGMLParser.reset(self) > self.headers = { > 'h1': HeaderCapture(), > 'h2': HeaderCapture(), > 'h3': HeaderCapture(), > 'h4': HeaderCapture(), > 'h5': HeaderCapture(), > 'h6': HeaderCapture(), > } > > def handle_data(self, text): > # only one would be active, but legally, two could > for hc in self.headers.values(): > hc.append(text) # if not active, ignore > > def start_h1(self, attrs): > self.headers['h1'].activate() > def end_h1(self): > self.headers['h1'].deactivate() > def start_h2(self, attrs): > self.headers['h2'].activate() > def end_h2(self): > self.headers['h2'].deactivate() > def start_h3(self, attrs): > self.headers['h3'].activate() > def end_h3(self): > self.headers['h3'].deactivate() > def start_h4(self, attrs): > self.headers['h4'].activate() > def end_h4(self): > self.headers['h4'].deactivate() > def start_h5(self, attrs): > self.headers['h5'].activate() > def end_h5(self): > self.headers['h5'].deactivate() > def start_h6(self, attrs): > self.headers['h6'].activate() > def end_h6(self): > self.headers['h6'].deactivate() > To continue this, your "BAD CODE2" becomes
for tag in 'h1 h2 h3 h4 h5 h6'.split(): Show_step(tag) for i in parser.headers[tag]: print i Kent > On 7/15/06, Сергій <[EMAIL PROTECTED]> wrote: > >> But again, like others have suggested, you should rethink your problem >> >>> and your solution before starting down your path. What are you really >>> capturing? >>> >>> >> Rethink problem... >> I try to use sgmllib - get all info tagged in "h1"... "h6" >> I've created file lister.py: >> >> "from sgmllib import SGMLParser >> >> class Lister(SGMLParser): >> >> def reset(self): >> SGMLParser.reset(self) >> self.h1 = [] >> self.h2 = [] >> self.h3 = [] >> self.h4 = [] >> self.h5 = [] >> self.h6 = [] >> >> self.in_h1 = False >> self.in_h2 = False >> self.in_h3 = False >> self.in_h4 = False >> self.in_h5 = False >> self.in_h6 = False >> >> def handle_data(self, text): >> if self.in_h1 == True: >> self.h1.append(text) >> elif self.in_h2 == True: >> self.h2.append(text) >> elif self.in_h3 == True: >> self.h3.append(text) >> elif self.in_h4 == True: >> self.h4.append(text) >> elif self.in_h5 == True: >> self.h5.append(text) >> elif self.in_h6 == True: >> self.h6.append(text) >> >> #AND NOW "BAD CODE1": >> >> def start_h1(self, attrs): >> self.in_h1 = True >> >> def end_h1(self): >> self.in_h1 = False >> >> def start_h2(self, attrs): >> self.in_h2 = True >> >> def end_h2(self): >> self.in_h2 = False >> >> def start_h3(self, attrs): >> self.in_h3 = True >> >> def end_h3(self): >> self.in_h3 = False >> >> def start_h4(self, attrs): >> self.in_h4 = True >> >> def end_h4(self): >> self.in_h4 = False >> >> def start_h5(self, attrs): >> self.in_h5 = True >> >> def end_h5(self): >> self.in_h5 = False >> >> def start_h6(self, attrs): >> self.in_h6 = True >> >> def end_h6(self): >> self.in_h6 = False >> >> " >> >> And now I want to print all text in this tags. >> >> file use_lister.py: >> >> " >> >> import urllib, lister >> >> f = open('_1.html', 'r') >> text = f.read() >> f.close() >> >> parser = urllister.Lister() >> parser.feed(text) >> parser.close() >> >> #AND NOW "BAD CODE2": >> >> Show_step('h1') >> for i in parser.h1: >> print i >> >> Show_step('h2') >> for i in parser.h2: >> print i >> >> Show_step('h3') >> for i in parser.h3: >> print i >> >> Show_step('h4') >> for i in parser.h4: >> print i >> >> Show_step('h5') >> for i in parser.h5: >> print i >> >> Show_step('h6') >> for i in parser.h6: >> print i >> >> " >> >> >> >> And I don't like this "BAD CODE1" and "BAD CODE2" >> >> How to rewrite bad codes??? >> >> _______________________________________________ >> Tutor maillist - Tutor@python.org >> http://mail.python.org/mailman/listinfo/tutor >> >> >> >> > > > > ------------------------------------------------------------------------ > > _______________________________________________ > Tutor maillist - Tutor@python.org > http://mail.python.org/mailman/listinfo/tutor > _______________________________________________ Tutor maillist - Tutor@python.org http://mail.python.org/mailman/listinfo/tutor