> Try Beautiful Soup, or if your input is simple enough, the re module.
Hi Gabriel, I first tried "HTMLParser" and wrote this short script: from HTMLParser import HTMLParser from htmlentitydefs import entitydefs class MyDocParser(HTMLParser): def __init__(self): self.paths = [] self.readingpaths = 0 # flag HTMLParser.__init__(self) def handle_starttag(self, tag, attrs): if tag == 'parameter': self.readingpaths = 1 def handle_endtag(self, tag): if tag == 'parameter': self.readingpaths = 0 def handle_data(self, data): if self.readingpaths: self.paths.append(data) def handle_entityref(self, name): " handle values like 'Home & Products' " if entitydefs.has_key(name): self.handle_data(entitydefs[name]) else: self.handle_data('&' + name + ';') def handle_charref(self, name): """ handle values like 'Home & Products®' Ignores invalid character references """ try: charnum = int(name) except ValueError: return if charnum < 1 or charnum > 255: return def get_paths(self): return self.paths def parse_content(content): """ parse """ parser = MyDocParser() parser.feed(content) paths = parser.get_paths() return paths # /end This works as long as there are no other <paramter> Tags in the content that I parse. Nico -- http://mail.python.org/mailman/listinfo/python-list