for some reason my code is ending prematurly and I cant figure out why. It happens every time it hits a certain part of the page I'm parsing, but that part has the same format as all the others. Any clue why this is happening?
(page source can be found in the url in the code thats not commented) require "watir" require 'win32ole' require 'rubygems' require 'hpricot' browser = Watir::IE.new # opens a new browser numlinks = browser.links.length numlinks = numlinks - 1 foundtitle = 0 reset = 0 gottitle = 0 crn = "" prof = "" #browser.goto("http://www.umbc.edu/AboutUMBC/Schedule/spring2009/") browser.goto("http://www.umbc.edu/AboutUMBC/Schedule/spring2009/ EDUC.html") ##### Go to each link on the above page ########### #links = browser.links.collect {|link| link.href if link.href =~ / html/} #links.compact.each_with_index do |link, i| # browser.goto(link) rawhtml = browser.html # puts all of the pages html into rawhtml rawhtml.each do |line| # for each line of html if (gottitle == 1) foundtitle = 0 gottitle = 0 end if (foundtitle == 1) if (/<\/B>/.match(line)) gottitle = 1 #puts $desig +" | "+ $title else $title = $title + " " + line.strip $title.sub!('&', '&') $title.sub!(' : ', ': ') end end if (/<B><A \w+=[\w]+><\/A><A.+">(.+)<\/A> (.+)\s+/.match(line)) if /\d-\d/.match(line) $desig, $title = line.match(/<B><A \w+=[\w"]+><\/A><A.+">(. +)<\/A> (.+)\d-\d credits/).captures $title.strip! foundtitle = 1 else if (/No credit/.match(line)) $desig, $title = line.match(/<B><A \w+=[\w"]+><\/A><A.+">(. +)<\/A> (.+)No credit/).captures $title.strip! foundtitle = 1 else $desig, $title = line.match(/<B><A \w+=[\w"]+><\/A><A.+">(. +)<\/A> (.+)\w+ credit/).captures $title.strip! foundtitle = 1 end end else if (/\[<A \w+=".+">(\d+)<\/A>\]/.match(line)) if ((/\[<A \w+=".+">(\d+)<\/A>\].+\([\w\s]+\)\s(.+)/).match (line)) crn, prof = line.match(/\[<A \w+=".+">(\d+)<\/A>\].+\([\w \s]+\)\s(.+)/).captures end if ((/\[<A \w+=".+">(\d+)<\/A>\].+arranged\s(.+)$/).match (line)) crn, prof = line.match(/\[<A \w+=".+">(\d+)<\/A>\]. +arranged\s(.+)/).captures end crn.strip! prof.strip! puts crn puts $title puts prof puts $desig #puts crn + " | " + $title + " | " + prof + " | " + $desig end end #puts line end # for each line of html #end # end of links loop --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "Watir General" group. To post to this group, send email to watir-general@googlegroups.com Before posting, please read the following guidelines: http://wiki.openqa.org/display/WTR/Support To unsubscribe from this group, send email to watir-general-unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/watir-general -~----------~----~----~----~------~----~------~--~---