对python读写文件去重、RE、set的使用详解
如下所示:
#-*-coding:utf-8-*-
fromdatetimeimportdatetime
importre
defMain():
sourcr_dir='/data/u_lx_data/fudan/muying/muying_11yue_all.txt'
target_dir='/data/u_lx_data/fudan/muying/python/uid_regular_get.txt'
uset=set()#去重
print("开始。。。。。")
print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'))
withopen(target_dir,'w+')asf_write:
withopen(sourcr_dir,'r')asf_scorce:
forlineinf_scorce:
line=line.strip().split("\t")
#宝宝树
ifline[2]=='babytree.com':
uidList=re.findall(r'.*NL=u%02(u\d+)',line[3],re.I)
ifuidList:
#去重代码
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
print("宝宝树已完成")
#柚宝宝
elifline[2]=='youzibuy.com':
ifline[4].find("yunqi.youzibuy.com/tae_top_notify")!=-1:
uidList=re.findall(r'.*myuid=(\d+)',line[4],re.I)
ifuidList:
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
print("柚宝宝已完成")
#妈妈帮
elifline[2]=='mmbang.com':
uidList=re.findall(r'.*uid=(\d+)',line[3],re.I)
ifuidList:
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
print("妈妈帮已完成")
#妈妈网
elifline[2]=='mama.cn':
ifline[4].find("mapi.mama.cn/feed/users/show")!=-1:
uidList=re.findall(r'.*friend_uid=(\d+)',line[4],re.I)
ifuidList:
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
ifline[4].find("mamaquan/mmq_thread")!=-1:
uidList=re.findall(r'.*uid=(\d+)',line[4],re.I)
ifuidList:
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
print("妈妈网已完成")
#育儿网
elifline[2]=='ci123.com':
uidList=re.findall(r'.*ci123js=([a-zA-Z]+\d+)',line[3],re.I)
ifuidList:
ifuidList[0]notinuset:
f_write.write(uidList[0]+"\n")
uset.add(uidList[0])
print("育儿网已完成")
print("完成。。。。。")
print(datetime.now().strftime('%Y-%m-%d%H:%M:%S'))
if__name__=="__main__":
Main()
以上这篇对python读写文件去重、RE、set的使用详解就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持毛票票。