python3爬取数据至mysql的方法
本文实例为大家分享了python3爬取数据至mysql的具体代码,供大家参考,具体内容如下
直接贴代码
#!/usr/local/bin/python3.5
#-*-coding:UTF-8-*-
fromurllib.requestimporturlopen
frombs4importBeautifulSoup
importre
importdatetime
importrandom
importpymysql
connect=pymysql.connect(host='192.168.10.142',unix_socket='/tmp/mysql.sock',user='root',passwd='1234',db='scraping',charset='utf8')
cursor=connect.cursor()
cursor.execute('USEscraping')
random.seed(datetime.datetime.now())
defstore(title,content):
execute=cursor.execute("select*frompagesWHERE`title`=%s",title)
ifexecute<=0:
cursor.execute("insertintopages(`title`,`content`)VALUES(%s,%s)",(title,content))
cursor.connection.commit()
else:
print('Thiscontentisalreadyexist.')
defget_links(acticle_url):
html=urlopen('http://en.wikipedia.org'+acticle_url)
soup=BeautifulSoup(html,'html.parser')
title=soup.h1.get_text()
content=soup.find('div',{'id':'mw-content-text'}).find('p').get_text()
store(title,content)
returnsoup.find('div',{'id':'bodyContent'}).findAll('a',href=re.compile("^(/wiki/)(.)*$"))
links=get_links('')
try:
whilelen(links)>0:
newActicle=links[random.randint(0,len(links)-1)].attrs['href']
links=get_links(newActicle)
print(links)
finally:
cursor.close()
connect.close()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。