python - Scrapy prints fields but doesn't populate XML file -
i have problem prints xml files correctly doesn't populate xml file content.
the output in terminal this:
[u'tove'] [u'jani'] [u'reminder'] [u"don't forget me weekend!"]
however output site_products.xml results in (which wrong, no data):
<?xml version="1.0" encoding="utf-8"?> <items></items>
spider.py
from scrapy.contrib.spiders import xmlfeedspider crawler.items import crawleritem class sitespider(xmlfeedspider): name = 'site' allowed_domains = ['www.w3schools.com'] start_urls = ['http://www.w3schools.com/xml/note.xml'] itertag = 'note' def parse_node(self, response, selector): = selector.xpath('//to/text()').extract() = selector.xpath('//from/text()').extract() heading = selector.xpath('//heading/text()').extract() body = selector.xpath('//body/text()').extract() return item
pipelines.py
from scrapy import signals scrapy.contrib.exporter import xmlitemexporter class xmlexportpipeline(object): def __init__(self): self.files = {} @classmethod def from_crawler(cls, crawler): pipeline = cls() crawler.signals.connect(pipeline.spider_opened, signals.spider_opened) crawler.signals.connect(pipeline.spider_closed, signals.spider_closed) return pipeline def spider_opened(self, spider): file = open('%s_products.xml' % spider.name, 'w+b') self.files[spider] = file self.exporter = xmlitemexporter(file) self.exporter.start_exporting() def spider_closed(self, spider): self.exporter.finish_exporting() file = self.files.pop(spider) file.close() def process_item(self, item, spider): self.exporter.export_item(item) return item
items.py
import scrapy class crawleritem(scrapy.item): = scrapy.field() = scrapy.field() heading = scrapy.field() body = scrapy.field() pass
settings.py
bot_name = 'crawler' spider_modules = ['crawler.spiders'] newspider_module = 'crawler.spiders' item_pipelines = {'crawler.pipelines.xmlexportpipeline': 300,}
any appreciated.
you need instantiate crawleritem
instance in parse_node()
method:
def parse_node(self, response, selector): item = crawleritem() item['to'] = selector.xpath('//to/text()').extract() item['who'] = selector.xpath('//from/text()').extract() item['heading'] = selector.xpath('//heading/text()').extract() item['body'] = selector.xpath('//body/text()').extract() return item
Comments
Post a Comment