python爬取网易云音乐热歌榜实例代码
首先找到要下载的歌曲排行榜的链接,这里用的是:
https://music.163.com/discover/toplist?id=3778678
然后更改你要保存的目录,目录要先建立好文件夹,例如我的是保存在D盘-360下载-网易云热歌榜文件夹内,就可以完成下载。
如果文件夹没有提前建好,会报错[Errno2]Nosuchfileordirectory。
代码实现:
fromurllibimportrequest
frombs4importBeautifulSoup
importre
importrequests
importtime
classMusic(object):
def__init__(self,baseurl,path):
head={
"user-agent":"Mozilla/5.0(WindowsNT10.0;WOW64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/69.0.3497.100Safari/537.36"
}
self.baseurl=baseurl
self.headers=head
self.path=path
defmain(self):
html=self.askurl()
bs4=self.analysis(html)
name1=self.matching(bs4)
self.save(name1)
defaskurl(self):
req=request.Request(url=self.baseurl,headers=self.headers)
response=request.urlopen(req)
html=response.read().decode("utf-8")
returnhtml
defanalysis(self,html):
soup=BeautifulSoup(html,"html.parser")
bs4=soup.find_all("textarea")
bs4=str(bs4)
returnbs4
defmatching(self,bs4):
rule0=re.compile(r'"name":"(.*?)","tns":[],"alias":[]')
name0=re.findall(rule0,bs4)
str=""
foriinname0:
str=str+","+i
str=str.replace("\xa0","")
rule1=re.compile(r'jpg,(.*?),(.*?)","id":(\d*)')
name1=re.findall(rule1,str)
returnname1
defsave(self,name1):
forjinname1:
print("正在下载:"+j[1]+"-"+j[0]+"...")
url="http://music.163.com/song/media/outer/url?id="+j[2]
content=requests.get(url=url,headers=self.headers).content
withopen(self.path+j[1]+"-"+j[0]+".mp3","wb")asf:
f.write(content)
print(j[1]+"-"+j[0]+"下载完毕。\n")
time.sleep(0.5)
return
if__name__=="__main__":
baseurl="https://music.163.com/discover/toplist?id=3778678"#要爬取的热歌榜链接
path="D:/360下载/网易云热歌榜/"#保存的文件目录
demo0=Music(baseurl,path)
demo0.main()
print("下载完毕")
内容扩展:
Python3实战之爬虫抓取网易云音乐的热门评论
#!/usr/bin/envpython3
#-*-coding:utf-8-*-
importre
importurllib.request
importurllib.error
importurllib.parse
importjson
defget_all_hotSong():#获取热歌榜所有歌曲名称和id
url='http://music.163.com/discover/toplist?id=3778678'#网易云云音乐热歌榜url
html=urllib.request.urlopen(url).read().decode('utf8')#打开url
html=str(html)#转换成str
pat1=r'.* '#进行第一次筛选的正则表达式
result=re.compile(pat1).findall(html)#用正则表达式进行筛选
result=result[0]#获取tuple的第一个元素
pat2=r'(.*?) '#进行歌名筛选的正则表达式
pat3=r'.*? '#进行歌ID筛选的正则表达式
hot_song_name=re.compile(pat2).findall(result)#获取所有热门歌曲名称
hot_song_id=re.compile(pat3).findall(result)#获取所有热门歌曲对应的Id
returnhot_song_name,hot_song_id
defget_hotComments(hot_song_name,hot_song_id):
url='http://music.163.com/weapi/v1/resource/comments/R_SO_4_'+hot_song_id+'?csrf_token='#歌评url
header={#请求头部
'User-Agent':'Mozilla/5.0(X11;Fedora;Linuxx86_64)AppleWebKit/537.36(KHTML,likeGecko)Chrome/58.0.3029.110Safari/537.36'
}
#post请求表单数据
data={'params':'zC7fzWBKxxsm6TZ3PiRjd056g9iGHtbtc8vjTpBXshKIboaPnUyAXKze+KNi9QiEz/IieyRnZfNztp7yvTFyBXOlVQP/JdYNZw2+GRQDg7grOR2ZjroqoOU2z0TNhy+qDHKSV8ZXOnxUF93w3DA51ADDQHB0IngL+v6N8KthdVZeZBe0d3EsUFS8ZJltNRUJ','encSecKey':'4801507e42c326dfc6b50539395a4fe417594f7cf122cf3d061d1447372ba3aa804541a8ae3b3811c081eb0f2b71827850af59af411a10a1795f7a16a5189d163bc9f67b3d1907f5e6fac652f7ef66e5a1f12d6949be851fcf4f39a0c2379580a040dc53b306d5c807bf313cc0e8f39bf7d35de691c497cda1d436b808549acc'}
postdata=urllib.parse.urlencode(data).encode('utf8')#进行编码
request=urllib.request.Request(url,headers=header,data=postdata)
reponse=urllib.request.urlopen(request).read().decode('utf8')
json_dict=json.loads(reponse)#获取json
hot_commit=json_dict['hotComments']#获取json中的热门评论
num=0
fhandle=open('./song_comments','a')#写入文件
fhandle.write(hot_song_name+':'+'\n')
foriteminhot_commit:
num+=1
fhandle.write(str(num)+'.'+item['content']+'\n')
fhandle.write('\n==============================================\n\n')
fhandle.close()
hot_song_name,hot_song_id=get_all_hotSong()#获取热歌榜所有歌曲名称和id
num=0
whilenum
以上就是python爬取网易云音乐热歌榜实例代码的详细内容,更多关于python爬取网易云音乐热歌榜的资料请关注毛票票其它相关文章!