python爬取淘宝商品销量信息
python爬取淘宝商品销量的程序,运行程序,输入想要爬取的商品关键词,在代码中的‘###'可以进一步约束商品的属性,比如某某作者的书籍,可以在###处输入作者名字,以及时期等等。最后可以得到所要商品的总销量
importrequests importbs4 importre importjson defopen(keywords,page): headers={"User-Agent":"Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/55.0.2883.87Safari/537.36"} payload={'q':keywords,'sort':"sale-desc",'s':(page-1)*44} url="https://s.taobao.com/search" res=requests.get(url,params=payload) returnres defget_item(res): g_page_config=re.search(r'g_page_config=(.*?);\n',res.text) page_config_json=json.loads(g_page_config.group(1)) page_item=page_config_json['mods']['itemlist']['data']['auctions'] result=[]#整理出我们关注的信息(ID,标题,链接,售价,销量和商家) foreachinpage_item: dict1=dict.fromkeys(('id','title','link','price','sale','shoper')) dict1['id']=each['nid'] dict1['title']=each['title'] dict1['link']=each['detail_url'] dict1['price']=each['view_price'] dict1['sale']=each['view_sales'] dict1['shoper']=each['nick'] result.append(dict1) returnresult defcount_sales(items): count=0 foreachinitems: if'###'ineach['title']:#规定只取标题中‘###'的商品 count+=int(re.search(r'\d+',each['sale']).group()) returncount defmain(): keywords=input("请输入搜索关键词:")#可以为各种商品名称 length=10#淘宝商品页数 total=0 foreachinrange(length): res=open(keywords,each+1) items=get_item(res) total+=count_sales(items)#销售总量 print(total) if__name__=="__main__": main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。