Hello fellow scrapy users, i have written a simple spider to scrape 
financial data from a website and return csv files with the scraped data. 
However my application needs the rows to be sorted in descending order. i 
have written a function in the pipelines file for this purpose, the problem 
i am experiencing is that i need to run the spider twice to sort the rows 
leaving me with duplicate entries. Can anyone assist me with this. here is 
the code for pipelines.py

class MystocksPipeline(object):

    def sortrows(self, myfile, mydata):
        
        sortedlist = sorted(mydata, key=operator.itemgetter(0), reverse =
True)
        with open(myfile, "wb") as f:
            fileWriter = csv.writer(f, delimiter=',')
            for row in sortedlist:
                fileWriter.writerow(row)

    def process_item(self, item, spider):

        today = time.strftime("%Y-%m-%d")

        name = item['code']
        filename = "%s.csv" % name


        self.csvwriter = csv.writer(open(filename, 'a'),lineterminator='\n')
        
        item['volume'] = item['volume'].replace('.','')
        item['volume'] = item['volume'].replace('M','0000')
        item['volume'] = item['volume'].replace('-','0')
        item['volume'] = item['volume'].replace(',','')
        item['date'] = item['date'].replace('Price list and trading summary 
for Monday, ','')
        item['date'] = item['date'].replace('Price list and trading summary 
for Tuesday, ','')
        item['date'] = item['date'].replace('Price list and trading summary 
for Wednesday, ','')
        item['date'] = item['date'].replace('Price list and trading summary 
for Thursday, ','')
        item['date'] = item['date'].replace('Price list and trading summary 
for Friday, ','')
        item['date'] = item['date'].replace('"','')

        item['adjustedPrice'] = item['adjustedPrice'].replace('-','0')
        item['percentChange'] = item['percentChange'].replace('-','0')
        item['change'] = item['change'].replace('-','0')



    
        self.csvwriter.writerow([today, item['previous'], item['day_high'], 
item['day_low'], item['price'], item['volume'], item['adjustedPrice']])
        data = csv.reader(open(filename),delimiter=',')
        self.sortrows(filename,data)



        return item



-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at https://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to