python爬取淘宝商品销量信息
python爬取淘宝商品销量的程序,运行程序,输入想要爬取的商品关键词,在代码中的‘###'可以进一步约束商品的属性,比如某某作者的书籍,可以在###处输入作者名字,以及时期等等。最后可以得到所要商品的总销量
importrequests
importbs4
importre
importjson
defopen(keywords,page):
headers={"User-Agent":"Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/55.0.2883.87Safari/537.36"}
payload={'q':keywords,'sort':"sale-desc",'s':(page-1)*44}
url="https://s.taobao.com/search"
res=requests.get(url,params=payload)
returnres
defget_item(res):
g_page_config=re.search(r'g_page_config=(.*?);\n',res.text)
page_config_json=json.loads(g_page_config.group(1))
page_item=page_config_json['mods']['itemlist']['data']['auctions']
result=[]#整理出我们关注的信息(ID,标题,链接,售价,销量和商家)
foreachinpage_item:
dict1=dict.fromkeys(('id','title','link','price','sale','shoper'))
dict1['id']=each['nid']
dict1['title']=each['title']
dict1['link']=each['detail_url']
dict1['price']=each['view_price']
dict1['sale']=each['view_sales']
dict1['shoper']=each['nick']
result.append(dict1)
returnresult
defcount_sales(items):
count=0
foreachinitems:
if'###'ineach['title']:#规定只取标题中‘###'的商品
count+=int(re.search(r'\d+',each['sale']).group())
returncount
defmain():
keywords=input("请输入搜索关键词:")#可以为各种商品名称
length=10#淘宝商品页数
total=0
foreachinrange(length):
res=open(keywords,each+1)
items=get_item(res)
total+=count_sales(items)#销售总量
print(total)
if__name__=="__main__":
main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。