Python实现从脚本里运行scrapy的方法
本文实例讲述了Python实现从脚本里运行scrapy的方法。分享给大家供大家参考。具体如下:
#!/usr/bin/python
importos
os.environ.setdefault('SCRAPY_SETTINGS_MODULE','project.settings')#Mustbeatthetopbeforeotherimports
fromscrapyimportlog,signals,project
fromscrapy.xlib.pydispatchimportdispatcher
fromscrapy.confimportsettings
fromscrapy.crawlerimportCrawlerProcess
frommultiprocessingimportProcess,Queue
classCrawlerScript():
def__init__(self):
self.crawler=CrawlerProcess(settings)
ifnothasattr(project,'crawler'):
self.crawler.install()
self.crawler.configure()
self.items=[]
dispatcher.connect(self._item_passed,signals.item_passed)
def_item_passed(self,item):
self.items.append(item)
def_crawl(self,queue,spider_name):
spider=self.crawler.spiders.create(spider_name)
ifspider:
self.crawler.queue.append_spider(spider)
self.crawler.start()
self.crawler.stop()
queue.put(self.items)
defcrawl(self,spider):
queue=Queue()
p=Process(target=self._crawl,args=(queue,spider,))
p.start()
p.join()
returnqueue.get(True)
#Usage
if__name__=="__main__":
log.start()
"""
Thisexamplerunsspider1andthenspider2threetimes.
"""
items=list()
crawler=CrawlerScript()
items.append(crawler.crawl('spider1'))
foriinrange(3):
items.append(crawler.crawl('spider2'))
printitems
希望本文所述对大家的Python程序设计有所帮助。