对python读写文件去重、RE、set的使用详解
如下所示:
#-*-coding:utf-8-*- fromdatetimeimportdatetime importre defMain(): sourcr_dir='/data/u_lx_data/fudan/muying/muying_11yue_all.txt' target_dir='/data/u_lx_data/fudan/muying/python/uid_regular_get.txt' uset=set()#去重 print("开始。。。。。") print(datetime.now().strftime('%Y-%m-%d%H:%M:%S')) withopen(target_dir,'w+')asf_write: withopen(sourcr_dir,'r')asf_scorce: forlineinf_scorce: line=line.strip().split("\t") #宝宝树 ifline[2]=='babytree.com': uidList=re.findall(r'.*NL=u%02(u\d+)',line[3],re.I) ifuidList: #去重代码 ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) print("宝宝树已完成") #柚宝宝 elifline[2]=='youzibuy.com': ifline[4].find("yunqi.youzibuy.com/tae_top_notify")!=-1: uidList=re.findall(r'.*myuid=(\d+)',line[4],re.I) ifuidList: ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) print("柚宝宝已完成") #妈妈帮 elifline[2]=='mmbang.com': uidList=re.findall(r'.*uid=(\d+)',line[3],re.I) ifuidList: ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) print("妈妈帮已完成") #妈妈网 elifline[2]=='mama.cn': ifline[4].find("mapi.mama.cn/feed/users/show")!=-1: uidList=re.findall(r'.*friend_uid=(\d+)',line[4],re.I) ifuidList: ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) ifline[4].find("mamaquan/mmq_thread")!=-1: uidList=re.findall(r'.*uid=(\d+)',line[4],re.I) ifuidList: ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) print("妈妈网已完成") #育儿网 elifline[2]=='ci123.com': uidList=re.findall(r'.*ci123js=([a-zA-Z]+\d+)',line[3],re.I) ifuidList: ifuidList[0]notinuset: f_write.write(uidList[0]+"\n") uset.add(uidList[0]) print("育儿网已完成") print("完成。。。。。") print(datetime.now().strftime('%Y-%m-%d%H:%M:%S')) if__name__=="__main__": Main()
以上这篇对python读写文件去重、RE、set的使用详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。