Script: http://paste.debian.net/1199271/
It mostly works but line 78 is supposed to extract <span class="price-unit">100 pieces / lot</span> No matter what I try it's failed and I DON'T KNOW WHY? It's a simple div.classname match.. Could someone take a look and figure it out - I'm stuck. -------------------------------------------------------- import re, sys, time from selenium.common.exceptions import NoSuchElementException from selenium.common.exceptions import StaleElementReferenceException from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.desired_capabilities import DesiredCapabilities url = 'https://www.aliexpress.com' caps = DesiredCapabilities().FIREFOX; caps["pageLoadStrategy"] = 'eager' ignored_exceptions=(NoSuchElementException,StaleElementReferenceException,) fh = open('/tmp/log.html', 'w') fh.write('<!doctype html> <title>parts</title><body>\n<table>\n') def convert(m): money = m.group() return str(round(float(money) * 72.4, 3)) import re def process_fields(txt): if '$' in txt: txt = txt.replace('+', '') txt = txt.replace('$', '') txt = txt.replace('US', '') txt = txt.replace('Shipping:', '') r = re.sub(r'(\s*[0-9]+\.[0-9]+)', convert, txt) return str(r) def ali_search(url, txt): driver.get(url) assert 'AliExpress' in driver.title try: srch_elem = WebDriverWait(driver, 3600, ignored_exceptions=ignored_exceptions).until( EC.presence_of_element_located((By.XPATH, '//div[@class="search-key-box"]'))) print('search') x = driver.find_element_by_id('search-key') if 'input' in x.tag_name: print 'success' finally: for c in list(txt): time.sleep(1) x.send_keys(c) x.send_keys(Keys.RETURN) try: element = WebDriverWait(driver, 3600, ignored_exceptions=ignored_exceptions).until( EC.presence_of_element_located((By.XPATH, '//div[@class="product-container"]'))) finally: print('product-container') x = driver.find_element_by_xpath('//body') x.send_keys(Keys.HOME) for i in range(1,10): print('send END') time.sleep(1) x.send_keys(Keys.PAGE_DOWN) time.sleep(1) #driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") # EC.presence_of_element_located((By.XPATH, '//div[contains(@class, " product-list")]'))) divs = element.find_elements_by_xpath('//li[@class="list-item packaging_sale"]') for c, div in enumerate(divs): fh.write('<tr>') for param in ['price-current', 'item-price-row packaging-sale', 'shipping-value', 'store-name']: try: if 'store' in param: fh.write('<td>' + div.find_elements_by_class_name(param)[0].text + '</td>') elif 'sale' in param: print param lot = div.find_elements_by_class_name(param) fh.write('<td>' + str(lot) + '</td>') else: fh.write('<td>' + process_fields(div.find_elements_by_class_name(param).text) + '</td>') except Exception as e: fh.write('<td>' + str(e) + '</td>') fh.write('</tr>\n') fh.write('\n</table></body>') fh.close() def part_lookup(): global driver with webdriver.Firefox(executable_path=r'/mnt/sdb1/root/geckodriver', firefox_binary='/mnt/sdb1/firefox/firefox-bin', capabilities=caps) as driver: if len(sys.argv) == 2: ali_search(url, sys.argv[1]) time.sleep(3600) part_lookup() -- https://mail.python.org/mailman/listinfo/python-list