valid point...!! here's the test python.. ugly as it is!!
#!/usr/bin/python # # test.py # # scrapes/extracts the basic data for the college # # # the app gets/stores # name # url # address (street/city/state # phone # ######################################################################3 #test python script import re import libxml2dom import urllib import urllib2 import sys, string from mechanize import Browser import mechanize #import tidy import os.path import cookielib from libxml2dom import Node from libxml2dom import NodeList import subprocess import MySQLdb #import mysql_config import time ######################## # # Parse pricegrabber.com ######################## urlopen = urllib2.urlopen ##cj = urllib2.cookielib.LWPCookieJar() Request = urllib2.Request br = Browser() br2 = Browser() user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' values1 = {'name' : 'Michael Foord', 'location' : 'Northampton', 'language' : 'Python' } headers = { 'User-Agent' : user_agent } url="http://schedule.berkeley.edu/" url="http://schedule.psu.edu/" #======================================= if __name__ == "__main__": # main app txdata = None #---------------------------- # get the kentucky test pages #br.set_cookiejar(cj) br.set_handle_redirect(True) br.set_handle_referer(True) br.set_handle_robots(False) br.addheaders = [('User-Agent', 'Firefox')] #cnt is the page count for the master url murl=url print "url =",murl br.open(murl) #cj.save(COOKIEFILE) # resave cookies res = br.response() # this is a copy of response s = res.read() # s contains HTML not XML text d = libxml2dom.parseString(s, html=1) #get the input/text dialogs #tn1 = "//[EMAIL PROTECTED]'main_content']/form[1]/input[position()=1]/@name" q="//img/parent::*/attribute::href" q="//[EMAIL PROTECTED]'cos_search1']/@action" t1=d.xpath(q) print "href = ",t1 print "hnode =",t1[0].nodeValue print "htest =",t1[0].textContent print "htesttt =",t1[0].toString() sys.exit() thanks!! -----Original Message----- From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] Behalf Of Fredrik Lundh Sent: Saturday, August 23, 2008 5:58 AM To: python-list@python.org Subject: Re: xpath questions... bruce wrote: > Regarding the xpath question I've posed, some have said that it shouldn't be > here on the mailing list. Give that I'm writing the test scripts/apps in > python, using the python libs, where else should it be posted? > > I mean, I could post the entire sample script so you can see that it's using > python, but I simplified the issue. there was zero Python content left after the simplification. maybe you should at least mention what library you're using to "play around with xpath and the html dom" ? </F> -- http://mail.python.org/mailman/listinfo/python-list -- http://mail.python.org/mailman/listinfo/python-list