This script takes a column of digikey part numbers and retrieves mfg, mfg#, and price breakdowns from the digikey website. I might write some more scripts like this to get part info from other websites like mouser and farnell.
#Input: file named digikey.csv that has 1 column of digikey part numbers # this can be copy/pasted from gattrib if digikey# is stored in symbols #Output: file named digikeyinfo.csv columns digikey#, mfg, mfg#, and quantity-price pairs # rows will probably have different amount of columns due to different quantity-prices #by Josh Jordan josh.outersp...@gmail.com
import sys import urllib2 import re import string ifile = open("digikey.csv", "r") ofile = open("digikeyinfo.csv", "w") #wpage = open("test.html", "w") #save html page if a regex doesnt work dks = "http://search.digikey.com/scripts/DkSearch/dksus.dll?Detail&name=" for line in ifile: #skipping certain csv lines and adding -ND to any digikey# without it part = line.rstrip() if part.startswith("digikey"): ofile.write("digikey,mfg,mfgnum\n") continue if part.startswith("unknown"): ofile.write("unknown,unknown\n") continue if not part.endswith("-ND"): part += "-ND" #this is the url to the page of digikey part information url = dks+part page = urllib2.urlopen(url) pstring = page.read() #try to get mfg part number regex = '<tr><th align=right>Manufacturer Part Number</th><td>(.*?)</td></tr>' mat = re.search(regex, pstring) try: mfgnum = mat.group(1) except: mfgnum = "NF" #try to get mfg name regex = '<tr><th align=right>Manufacturer</th><td><a href="(?:.*?)">(.*?)(?:<.*?></a>|</a>)</td></tr>' mat = re.search(regex, pstring) try: mfg = mat.group(1) except: mfg = "NF" #add price breakdowns in breakdown/price pairs breakdownprice = "" price_s = "<tr><th>Price Break</th><th>Unit Price</th><th>Extended Price</th></tr>" regex = '<tr><td align=center>(.*?)</td><td align=right>(.*?)</td><td align=right>(?:.*?)</td></tr>' plines = re.split("\n+", pstring) for i, line in enumerate(plines): if price_s == line: break plines = plines[i+1:] prevpb = 0 for line in plines: mat = re.match(regex, line) try: pbreak = mat.group(1).replace(",", "") price = mat.group(2) if int(pbreak) < prevpb: print "pbreak wrong- " + int(pbreak.replace(",", "")) + " >= " + prevpb break prevpb = int(pbreak) breakdownprice += ", " + pbreak +", "+ price except: break output = part + ", " + mfg + ", " + mfgnum + breakdownprice+'\n' print output ofile.write(output) ofile.flush()
_______________________________________________ geda-user mailing list geda-user@moria.seul.org http://www.seul.org/cgi-bin/mailman/listinfo/geda-user