I'm trying to save the contents of my scrape to an XML file. I'm having a 
hard time understanding what is the right way of achieving this. When I run 
my spider using the code below, it doesn't generate an XML file. I'm sure I 
must have to specify a location and nam but I don't know how. Any help 
would be appreciated:

*Command*

scrapy crawl site

*settings.py*

BOT_NAME = 'crawler'                                                            
                        
                                                                                
                        
SPIDER_MODULES = ['crawler.spiders']                                            
                        
NEWSPIDER_MODULE = 'crawler.spiders'

ITEM_PIPELINES = {
    'crawler.pipelines.XmlItemExporter': 300,
}

FEED_EXPORTERS_BASE = {
    'xml': 'scrapy.contrib.exporter.XmlItemExporter',
}


*spider.py*


from scrapy.contrib.spiders import XMLFeedSpider
from crawler.items import CrawlerItem

class SiteSpider(XMLFeedSpider):
    name = 'site'
    allowed_domains = ['www.w3schools.com']
    start_urls = ['http://www.w3schools.com/xml/note.xml']
    itertag = 'note'
    
    def parse_node(self, response, node):
        item = CrawlerItem()
        item['to'] = node.select('to').extract()
        item['who'] = node.select('from').extract()
        item['heading'] = node.select('heading').extract()
        item['body'] = node.select('body').extract()
        return item



*pipeline.py*

class XmlItemExporter(BaseItemExporter):

    def __init__(self, file, **kwargs):
        self.item_element = kwargs.pop('item_element', 'item')
        self.root_element = kwargs.pop('root_element', 'items')
        self._configure(kwargs)
        self.xg = XMLGenerator(file, encoding=self.encoding)

    def start_exporting(self):
        self.xg.startDocument()
        self.xg.startElement(self.root_element, {})

    def export_item(self, item):
        self.xg.startElement(self.item_element, {})
        for name, value in self._get_serialized_fields(item, default_value=''):
            self._export_xml_field(name, value)
        self.xg.endElement(self.item_element)

    def finish_exporting(self):
        self.xg.endElement(self.root_element)
        self.xg.endDocument()

    def _export_xml_field(self, name, serialized_value):
        self.xg.startElement(name, {})
        if hasattr(serialized_value, '__iter__'):
            for value in serialized_value:
                self._export_xml_field('value', value)
        else:
            self.xg.characters(serialized_value)
        self.xg.endElement(name)

-- 
You received this message because you are subscribed to the Google Groups 
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.

Reply via email to