If you have lots of data write them in a database e.g. SQLite. It will take 
care of sorting and de-duplication.
If not, store them in memory in a dict. Then on spider_closed 
<http://doc.scrapy.org/en/latest/topics/signals.html#scrapy.signals.spider_closed>
 write 
them to the relevant files. Here are roughly some of the changes.

P.S. I think that when you read/sort from your csv file to do sortrows(), 
you sort according to the *text* of the date instead of the date itself. 
This doesn't produce the same sorting order.

class MystocksPipeline(object):

@classmethod
def from_crawler(cls, crawler, *args, **kwargs):
   spider = super(CrawlSpider, cls).from_crawler(crawler, *args, **kwargs)
   crawler.signals.connect(spider.spider_closed, 
signal=signals.spider_closed)
   spider.my_little_dict = defaultdict(list)
   return spider

def spider_closed(self):
for code, items in self.my_little_dict.iteritems():
filename = "%s.csv" % code
with open(filename, 'w') as f:
self.csvwriter = csv.writer(f,lineterminator='\n')
for item in sorted(items, lambda item: item['today'], reverse=True):
self.csvwriter.writerow([item['today'], item['previous'], item['day_high'], 
item['day_low'], item['price'], item['volume'], item['adjustedPrice']])

    def process_item(self, item, spider):
        ...
item['today'] = today
        ...

        self.my_little_dict[item['code']].append(item)

        return item




On Friday, March 18, 2016 at 1:09:08 PM UTC, njogu chege wrote:
>
> Hello fellow scrapy users, i have written a simple spider to scrape 
> financial data from a website and return csv files with the scraped data. 
> However my application needs the rows to be sorted in descending order. i 
> have written a function in the pipelines file for this purpose, the problem 
> i am experiencing is that i need to run the spider twice to sort the rows 
> leaving me with duplicate entries. Can anyone assist me with this. here is 
> the code for pipelines.py
>
> class MystocksPipeline(object):
>
>     def sortrows(self, myfile, mydata):
>         
>         sortedlist = sorted(mydata, key=operator.itemgetter(0), reverse =
> True)
>         with open(myfile, "wb") as f:
>             fileWriter = csv.writer(f, delimiter=',')
>             for row in sortedlist:
>                 fileWriter.writerow(row)
>
>     def process_item(self, item, spider):
>
>         today = time.strftime("%Y-%m-%d")
>
>         name = item['code']
>         filename = "%s.csv" % name
>
>
>         self.csvwriter = csv.writer(open(filename, 'a'),lineterminator=
> '\n')
>         
>         item['volume'] = item['volume'].replace('.','')
>         item['volume'] = item['volume'].replace('M','0000')
>         item['volume'] = item['volume'].replace('-','0')
>         item['volume'] = item['volume'].replace(',','')
>         item['date'] = item['date'].replace('Price list and trading 
> summary for Monday, ','')
>         item['date'] = item['date'].replace('Price list and trading 
> summary for Tuesday, ','')
>         item['date'] = item['date'].replace('Price list and trading 
> summary for Wednesday, ','')
>         item['date'] = item['date'].replace('Price list and trading 
> summary for Thursday, ','')
>         item['date'] = item['date'].replace('Price list and trading 
> summary for Friday, ','')
>         item['date'] = item['date'].replace('"','')
>
>         item['adjustedPrice'] = item['adjustedPrice'].replace('-','0')
>         item['percentChange'] = item['percentChange'].replace('-','0')
>         item['change'] = item['change'].replace('-','0')
>
>
>
>     
>         self.csvwriter.writerow([today, item['previous'], item['day_high'
> ], item['day_low'], item['price'], item['volume'], item['adjustedPrice']])
>         data = csv.reader(open(filename),delimiter=',')
>         self.sortrows(filename,data)
>
>
>
>         return item
>
>
>
>

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to