Python实现微信好友的数据分析
基于微信开放的个人号接口python库itchat,实现对微信好友的获取,并对省份、性别、微信签名做数据分析。
效果:
直接上代码,建三个空文本文件stopwords.txt,newdit.txt、unionWords.txt,下载字体simhei.ttf或删除字体要求的代码,就可以直接运行。
#wxfriends.py2018-07-09 importitchat importsys importpandasaspd importmatplotlib.pyplotasplt plt.rcParams['font.sans-serif']=['SimHei']#绘图时可以显示中文 plt.rcParams['axes.unicode_minus']=False#绘图时可以显示中文 importjieba importjieba.possegaspseg fromscipy.miscimportimread fromwordcloudimportWordCloud fromosimportpath #解决编码问题 non_bmp_map=dict.fromkeys(range(0x10000,sys.maxunicode+1),0xfffd) #获取好友信息 defgetFriends(): friends=itchat.get_friends(update=True)[0:] flists=[] foriinfriends: fdict={} fdict['NickName']=i['NickName'].translate(non_bmp_map) ifi['Sex']==1: fdict['Sex']='男' elifi['Sex']==2: fdict['Sex']='女' else: fdict['Sex']='雌雄同体' ifi['Province']=='': fdict['Province']='未知' else: fdict['Province']=i['Province'] fdict['City']=i['City'] fdict['Signature']=i['Signature'] flists.append(fdict) returnflists #将好友信息保存成CSV defsaveCSV(lists): df=pd.DataFrame(lists) try: df.to_csv("wxfriends.csv",index=True,encoding='gb18030') exceptExceptionasret: print(ret) returndf #统计性别、省份字段 defanysys(df): df_sex=pd.DataFrame(df['Sex'].value_counts()) df_province=pd.DataFrame(df['Province'].value_counts()[:15]) df_signature=pd.DataFrame(df['Signature']) returndf_sex,df_province,df_signature #绘制柱状图,并保存 defdraw_chart(df_list,x_feature): try: x=list(df_list.index) ylist=df_list.values y=[] foriinylist: forjini: y.append(j) plt.bar(x,y,label=x_feature) plt.legend() plt.savefig(x_feature) plt.close() except: print("绘图失败") #解析取个性签名构成列表 defgetSignList(signature): sig_list=[] foriinsignature.values: forjini: sig_list.append(j.translate(non_bmp_map)) returnsig_list #分词处理,并根据需要填写停用词、自定义词、合并词替换 defsegmentWords(txtlist): stop_words=set(line.strip()forlineinopen('stopwords.txt',encoding='utf-8')) newslist=[] #新增自定义词 jieba.load_userdict("newdit.txt") forsubjectintxtlist: ifsubject.isspace(): continue word_list=pseg.cut(subject) forword,flaginword_list: ifnotwordinstop_wordsandflag=='n'orflag=='eng'andword!='span'andword!='class': newslist.append(word) #合并指定的相似词 forlineinopen('unionWords.txt',encoding='utf-8'): newline=line.encode('utf-8').decode('utf-8-sig')#解决\ufeff问题 unionlist=newline.split("*") forjinrange(1,len(unionlist)): #wordDict[unionlist[0]]+=wordDict.pop(unionlist[j],0) forindex,valueinenumerate(newslist): ifvalue==unionlist[j]: newslist[index]=unionlist[0] returnnewslist #高频词统计 defcountWords(newslist): wordDict={} foriteminnewslist: wordDict[item]=wordDict.get(item,0)+1 itemList=list(wordDict.items()) itemList.sort(key=lambdax:x[1],reverse=True) foriinrange(100): word,count=itemList[i] print("{}:{}".format(word,count)) #绘制词云 defdrawPlant(newslist): d=path.dirname(__file__) mask_image=imread(path.join(d,"timg.png")) content=''.join(newslist) wordcloud=WordCloud(font_path='simhei.ttf',background_color="white",width=1300,height=620,max_words=200).generate(content)#mask=mask_image, #Displaythegeneratedimage: plt.imshow(wordcloud) plt.axis("off") wordcloud.to_file('wordcloud.jpg') plt.show() defmain(): #登陆微信 itchat.auto_login()#登陆后不需要扫码hotReload=True flists=getFriends() fdf=saveCSV(flists) df_sex,df_province,df_signature=anysys(fdf) draw_chart(df_sex,"性别") draw_chart(df_province,"省份") wordList=segmentWords(getSignList(df_signature)) countWords(wordList) drawPlant(wordList) main()
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。