Hi Maybe you did not commit your database. after "insert" or "update" your mysql with Python, you should use database.commit() to commit your database.
2013/12/18 Shaun Marchant <[email protected]> > Hi Group, > > I've managed to get a pipeline working that is INSERTING my scraped data > into a MySQL database but I am unable to update records as it appears the > update logic is not working correctly... any suggestions as to why its not > triggering? I have tested the SQL statement etc directly which is working > fine. > > Pipeline.py: > import sys > import MySQLdb > import hashlib > from datetime import datetime > from scrapy.exceptions import DropItem > from scrapy.http import Request > > class EmptyItemPipeline(object): > > def process_item(self, item, spider): > if item['handicap']: > return item > else: > item['handicap'] = '99' > return item > > class MySQLStorePipeline(object): > > def __init__(self, dbpool): > self.dbpool = dbpool > > @classmethod > def from_settings(cls, settings): > dbargs = dict( > host=settings['localhost'], > db=settings['database'], > user=settings['username'], > passwd=settings['password'], > charset='utf8', > use_unicode=True, > ) > dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs) > return cls(dbpool) > > def process_item(self, item, spider): > # run db query in the thread pool > d = self.dbpool.runInteraction(self._do_upsert, item, spider) > d.addErrback(self._handle_error, item, spider) > # at the end return the item in case of success or failure > d.addBoth(lambda _: item) > # return the deferred instead the item. This makes the engine to > # process next item (according to CONCURRENT_ITEMS setting) after > this > # operation (deferred) has finished. > return d > > def _do_upsert(self, conn, item, spider): > > conn.execute("""SELECT EXISTS( > SELECT 1 FROM tblHandicaps WHERE Player = %s)""", > (item['name'][0])) > ret = conn.fetchone()[0] > > if ret: > conn.execute(""" > UPDATE tblHandicaps > SET Player=%s, Handicap=%s, Exact=%s, Category=%s, Updated > WHERE Player=%s > """, (item['name'][0], item['handicap'][0], item['exact'][0], > item['category'][0], datetime.now(), item['name'][0])) > spider.log("Item updated in db: %s %r" % (item['name'][0], > item)) > else: > conn.execute("""INSERT INTO tblHandicaps (Player, Handicap, > Exact, Category, Sex, Updated) VALUES (%s, %s, %s, %s, 'M', %s)""", > (item['name'][0], > item['handicap'][0], > item['exact'][0], > item['category'][0], > datetime.now())) > > spider.log("Item stored in db: %s %r" % (item['name'][0], > item)) > > def _handle_error(self, failure, item, spider): > """Handle occurred on db interaction.""" > # do nothing, just log > log.err(failure) > > > > I don't see a specific error in the console log other than an SQL primary > key error as its trying to INSERT rather than UPDATE. > > Cheers > > > Shaun > > -- > You received this message because you are subscribed to the Google Groups > "scrapy-users" group. > To unsubscribe from this group and stop receiving emails from it, send an > email to [email protected]. > To post to this group, send email to [email protected]. > Visit this group at http://groups.google.com/group/scrapy-users. > For more options, visit https://groups.google.com/groups/opt_out. > -- You received this message because you are subscribed to the Google Groups "scrapy-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to [email protected]. To post to this group, send email to [email protected]. Visit this group at http://groups.google.com/group/scrapy-users. For more options, visit https://groups.google.com/groups/opt_out.
