Python实现从脚本里运行scrapy的方法
本文实例讲述了Python实现从脚本里运行scrapy的方法。分享给大家供大家参考。具体如下:
#!/usr/bin/python importos os.environ.setdefault('SCRAPY_SETTINGS_MODULE','project.settings')#Mustbeatthetopbeforeotherimports fromscrapyimportlog,signals,project fromscrapy.xlib.pydispatchimportdispatcher fromscrapy.confimportsettings fromscrapy.crawlerimportCrawlerProcess frommultiprocessingimportProcess,Queue classCrawlerScript(): def__init__(self): self.crawler=CrawlerProcess(settings) ifnothasattr(project,'crawler'): self.crawler.install() self.crawler.configure() self.items=[] dispatcher.connect(self._item_passed,signals.item_passed) def_item_passed(self,item): self.items.append(item) def_crawl(self,queue,spider_name): spider=self.crawler.spiders.create(spider_name) ifspider: self.crawler.queue.append_spider(spider) self.crawler.start() self.crawler.stop() queue.put(self.items) defcrawl(self,spider): queue=Queue() p=Process(target=self._crawl,args=(queue,spider,)) p.start() p.join() returnqueue.get(True) #Usage if__name__=="__main__": log.start() """ Thisexamplerunsspider1andthenspider2threetimes. """ items=list() crawler=CrawlerScript() items.append(crawler.crawl('spider1')) foriinrange(3): items.append(crawler.crawl('spider2')) printitems
希望本文所述对大家的Python程序设计有所帮助。