python查找重复图片并删除(图片去重)
本文实例为大家分享了python查找重复图片并删除的具体代码,供大家参考,具体内容如下
和网络爬虫配套的,也可单独使用,从网上爬下来的图片重复太多,代码支持识别不同尺寸大小一致的图片,并把重复的图片删除,只保留第一份。
#-*-coding:utf-8-*-
importcv2
importnumpyasnp
importos,sys,types
defcmpandremove2(path):
dirs=os.listdir(path)
dirs.sort()
iflen(dirs)<=0:
return
dict={}
foriindirs:
prepath=path+"/"+i
preimg=cv2.imread(prepath)
iftype(preimg)istypes.NoneType:
continue
preresize=cv2.resize(preimg,(8,8))
pregray=cv2.cvtColor(preresize,cv2.COLOR_BGR2GRAY)
premean=cv2.mean(pregray)[0]
prearr=np.array(pregray.data)
forjinrange(0,len(prearr)):
ifprearr[j]>=premean:
prearr[j]=1
else:
prearr[j]=0
print"get",prepath
dict[i]=prearr
dictkeys=dict.keys()
dictkeys.sort()
index=0
whileTrue:
ifindex>=len(dictkeys):
break
curkey=dictkeys[index]
dellist=[]
printcurkey
index2=index
whileTrue:
ifindex2>=len(dictkeys):
break
j=dictkeys[index2]
ifcurkey==j:
index2=index2+1
continue
arr1=dict[curkey]
arr2=dict[j]
diff=0
forkinrange(0,len(arr2)):
ifarr1[k]!=arr2[k]:
diff=diff+1
ifdiff<=5:
dellist.append(j)
index2=index2+1
iflen(dellist)>0:
forjindellist:
file=path+"/"+j
print"remove",file
os.remove(file)
dict.pop(j)
dictkeys=dict.keys()
dictkeys.sort()
index=index+1
defcmpandremove(path):
index=0
flag=0
dirs=os.listdir(path)
dirs.sort()
iflen(dirs)<=0:
return0
whileTrue:
ifindex>=len(dirs):
break
prepath=path+dirs[index]
printprepath
index2=0
preimg=cv2.imread(prepath)
iftype(preimg)istypes.NoneType:
index=index+1
continue
preresize=cv2.resize(preimg,(8,8))
pregray=cv2.cvtColor(preresize,cv2.COLOR_BGR2GRAY)
premean=cv2.mean(pregray)[0]
prearr=np.array(pregray.data)
foriinrange(0,len(prearr)):
ifprearr[i]>=premean:
prearr[i]=1
else:
prearr[i]=0
removepath=[]
whileTrue:
ifindex2>=len(dirs):
break
ifindex2!=index:
curpath=path+dirs[index2]
#printcurpath
curimg=cv2.imread(curpath)
iftype(curimg)istypes.NoneType:
index2=index2+1
continue
curresize=cv2.resize(curimg,(8,8))
curgray=cv2.cvtColor(curresize,cv2.COLOR_BGR2GRAY)
curmean=cv2.mean(curgray)[0]
curarr=np.array(curgray.data)
foriinrange(0,len(curarr)):
ifcurarr[i]>=curmean:
curarr[i]=1
else:
curarr[i]=0
diff=0
foriinrange(0,len(curarr)):
ifcurarr[i]!=prearr[i]:
diff=diff+1
ifdiff<=5:
print'thesame'
removepath.append(curpath)
flag=1
index2=index2+1
index=index+1
iflen(removepath)>0:
forfileinremovepath:
print"remove",file
os.remove(file)
dirs=os.listdir(path)
dirs.sort()
iflen(dirs)<=0:
return0
#index=0
returnflag
path='pics/'
cmpandremove(path)
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持毛票票。
声明:本文内容来源于网络,版权归原作者所有,内容由互联网用户自发贡献自行上传,本网站不拥有所有权,未作人工编辑处理,也不承担相关法律责任。如果您发现有涉嫌版权的内容,欢迎发送邮件至:czq8825#qq.com(发邮件时,请将#更换为@)进行举报,并提供相关证据,一经查实,本站将立刻删除涉嫌侵权内容。