I'm trying to save the contents of my scrape to an XML file. I'm having a
hard time understanding what is the right way of achieving this. When I run
my spider using the code below, it doesn't generate an XML file. I'm sure I
must have to specify a location and nam but I don't know how. Any help
would be appreciated:
*Command*
scrapy crawl site
*settings.py*
BOT_NAME = 'crawler'
SPIDER_MODULES = ['crawler.spiders']
NEWSPIDER_MODULE = 'crawler.spiders'
ITEM_PIPELINES = {
'crawler.pipelines.XmlItemExporter': 300,
}
FEED_EXPORTERS_BASE = {
'xml': 'scrapy.contrib.exporter.XmlItemExporter',
}
*spider.py*
from scrapy.contrib.spiders import XMLFeedSpider
from crawler.items import CrawlerItem
class SiteSpider(XMLFeedSpider):
name = 'site'
allowed_domains = ['www.w3schools.com']
start_urls = ['http://www.w3schools.com/xml/note.xml']
itertag = 'note'
def parse_node(self, response, node):
item = CrawlerItem()
item['to'] = node.select('to').extract()
item['who'] = node.select('from').extract()
item['heading'] = node.select('heading').extract()
item['body'] = node.select('body').extract()
return item
*pipeline.py*
class XmlItemExporter(BaseItemExporter):
def __init__(self, file, **kwargs):
self.item_element = kwargs.pop('item_element', 'item')
self.root_element = kwargs.pop('root_element', 'items')
self._configure(kwargs)
self.xg = XMLGenerator(file, encoding=self.encoding)
def start_exporting(self):
self.xg.startDocument()
self.xg.startElement(self.root_element, {})
def export_item(self, item):
self.xg.startElement(self.item_element, {})
for name, value in self._get_serialized_fields(item, default_value=''):
self._export_xml_field(name, value)
self.xg.endElement(self.item_element)
def finish_exporting(self):
self.xg.endElement(self.root_element)
self.xg.endDocument()
def _export_xml_field(self, name, serialized_value):
self.xg.startElement(name, {})
if hasattr(serialized_value, '__iter__'):
for value in serialized_value:
self._export_xml_field('value', value)
else:
self.xg.characters(serialized_value)
self.xg.endElement(name)
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.