Jay wrote: > Yes i know, i did check out a couple but i could never understand it. > They were confusing for me and i wasnt hoping for a full typed > tutorial, just like some help with excactly wat im trying to do, not > the whole module... but watever, Thx alot for the feedbak. > Well I don't want to hold this up as an example of best practice (it was a quick hack to get some book graphics for my web site), but this example shows you how you can extract stuff from XML, in this case returned from Amazon's web services module.
Sorry about any wrapping that mangles the code. regards Steve #!/usr/bin/python # # getbooks.py: download book details from Amazon.com # # hwBuild: database-driven web content management system # Copyright (C) 2005 Steve Holden - [EMAIL PROTECTED] # # This program is free software; you can redistribute it # and/or modify it under the terms of the GNU General # Public License as published by the Free Software # Foundation; either version 2 of the License, or (at # your option) any later version. # # This program is distributed in the hope that it will be # useful, but WITHOUT ANY WARRANTY; without even the implied # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR # PURPOSE. See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the # Free Software Foundation, Inc., 59 Temple Place, Suite 330, # Boston, MA 02111-1307 USA # import urllib import urlparse import os import re from xml.parsers import expat from config import Config picindir = os.path.join(Config['datadir'], "pybooks") for f in os.listdir(picindir): os.unlink(os.path.join(picindir, f)) filpat = re.compile(r"\d+") class myParser: def __init__(self): self.parser = expat.ParserCreate() self.parser.StartElementHandler = self.start_element self.parser.EndElementHandler = self.end_element self.parser.CharacterDataHandler = self.character_data self.processing = 0 self.count = 0 def parse(self, f): self.parser.ParseFile(f) return self.count def start_element(self, name, attrs): if name == "MediumImage": self.processing = 1 self.imgname = "" if self.processing == 1 and name == "URL": self.processing = 2 def end_element(self, name): if self.processing == 2 and name == "URL": self.processing = 1 print "Getting:", self.imgname scheme, loc, path, params, query, fragment = urlparse.urlparse(self.imgname) itemno = filpat.match(os.path.basename(path)) fnam = itemno.group() u = urllib.urlopen(self.imgname) img = u.read() outfile = file(os.path.join(picindir, "%s.jpg" % fnam), "wb") outfile.write(img) outfile.close() self.count += 1 if self.processing ==1 and name == "MediumImage": self.processing = 0 def character_data(self, data): if self.processing == 2: self.imgname += data def main(search=None): print "Search:", search count = 0 for pageNum in range(1,5): f = urllib.urlopen("http://webservices.amazon.com/onca/xml?Service=AWSECommerceService&AWSAccessKeyId=XXXXXXXXXXXXXXXXXXXX&t=steveholden-20&SearchIndex=Books&Operation=ItemSearch&Keywords=%s&ItemPage=%d&ResponseGroup=Images&type=lite&Version=2004-11-10&f=xml" % (urllib.quote(search or Config['book-search']), pageNum)) fnam = os.path.join(picindir, "bookdata.txt") file(fnam, "w").write(f.read()) f = file(fnam, "r") p = myParser() n = p.parse(f) if n == 0: break count += n return count if __name__ == "__main__": import sys search = None if len(sys.argv) > 1: search = sys.argv[1] n = main(search) print "Pictures found:", n -- Steve Holden +44 150 684 7255 +1 800 494 3119 Holden Web LLC www.holdenweb.com PyCon TX 2006 www.python.org/pycon/ -- http://mail.python.org/mailman/listinfo/python-list