Hi,
I am beginners with scrapy and also with the python. So far I could manage
to create some basic spiders based on given examples in scrapy doc.
Now I would like to extend the default features according to custom
requirement. Atm I am trying to change the default tag names in output xml.
For example,
1) By default it generates xml with item_element='item' and
root_element='items', instead I want to change it to item_element='ad' and
root_element='ads'. Also how to remove multi-value fields are exported by each
value
inside the <value> element
Send me sample source code for above queries...
Thanks,
Sachin
--
You received this message because you are subscribed to the Google Groups
"scrapy-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
To post to this group, send email to [email protected].
Visit this group at http://groups.google.com/group/scrapy-users.
For more options, visit https://groups.google.com/d/optout.
#!/usr/bin/env python
# encoding=utf-8
from scrapy.contrib.exporter import XmlItemExporter
class CustomXmlItemExporter(XmlItemExporter):
def __init__(self, file, **kwargs):
super(CustomXmlItemExporter, self).__init__(file,
root_element="Jobs",
item_element="Job",
**kwargs)
def _export_xml_field(self, name, serialized_value=None):
self.xg.startElement(name, {})
if hasattr(serialized_value, 'items'):
for subname, value in serialized_value.items():
self._export_xml_field(subname, value)
elif hasattr(serialized_value, '__iter__'):
for value in serialized_value:
self._export_xml_field('value',value)
else:
self._xg_characters(serialized_value)
self.xg.endElement(name)
## def _export_xml_field(self, name, serialized_value, children_name=''):
## self.xg.startElement(name, {})
## if children_name:
## self._export_xml_field(children_name, serialized_value)
#### aa1 elif hasattr(serialized_value, 'items'):
#### for subname, value in serialized_value.items():
#### self._export_xml_field(subname, value)
#### elif hasattr(serialized_value, '__iter__'):
#### for value in serialized_value:
#### self._export_xml_field(name, value)
#### else:
#### self.xg.characters(serialized_value)
## self.xg.endElement(name)