python查找指定具有相同内容文件的方法
本文实例讲述了python查找指定具有相同内容文件的方法。分享给大家供大家参考。具体如下:
python代码用于查找指定具有相同内容的文件,可以同时指定多个目录
调用方式:pythondoublesdetector.pyc:\;d:\;e:\>doubles.txt
#Hello,thisscriptiswritteninPython-http://www.python.org
#doublesdetector.py1.0p
importos,os.path,string,sys,sha
message="""
doublesdetector.py1.0p
Thisscriptwillsearchforfilesthatareidentical
(whatevertheirname/date/time).
Syntax:python%s<directories>
where<directories>isadirectoryoralistofdirectories
separatedbyasemicolon(;)
Examples:python%sc:\windows
python%sc:\;d:\;e:\>doubles.txt
python%sc:\programfiles>doubles.txt
Thisscriptispublicdomain.Feelfreetoreuseandtweakit.
TheauthorofthisscriptSebastienSAUVAGE<sebsauvageatsebsauvagedotnet>
http://sebsauvage.net/python/
"""%((sys.argv[0],)*4)
deffileSHA(filepath):
"""ComputeSHA(SecureHashAlgorythm)ofafile.
Input:filepath:fullpathandnameoffile(eg.'c:\windows\emm386.exe')
Output:string:containsthehexadecimalrepresentationoftheSHAofthefile.
returns'0'iffilecouldnotberead(filenotfound,noreadrights...)
"""
try:
file=open(filepath,'rb')
digest=sha.new()
data=file.read(65536)
whilelen(data)!=0:
digest.update(data)
data=file.read(65536)
file.close()
except:
return'0'
else:
returndigest.hexdigest()
defdetectDoubles(directories):
fileslist={}
#Groupallfilesbysize(inthefileslistdictionnary)
fordirectoryindirectories.split(';'):
directory=os.path.abspath(directory)
sys.stderr.write('Scanningdirectory'+directory+'...')
os.path.walk(directory,callback,fileslist)
sys.stderr.write('\n')
sys.stderr.write('Comparingfiles...')
#Removekeys(filesize)inthedictionnarywhichhaveonly1file
for(filesize,listoffiles)infileslist.items():
iflen(listoffiles)==1:
delfileslist[filesize]
#NowcomputeSHAoffilesthathavethesamesize,
#andgroupfilesbySHA(inthefilesshadictionnary)
filessha={}
whilelen(fileslist)>0:
(filesize,listoffiles)=fileslist.popitem()
forfilepathinlistoffiles:
sys.stderr.write('.')
sha=fileSHA(filepath)
iffilessha.has_key(sha):
filessha[sha].append(filepath)
else:
filessha[sha]=[filepath]
iffilessha.has_key('0'):
delfilessha['0']
#Removekeys(sha)inthedictionnarywhichhaveonly1file
for(sha,listoffiles)infilessha.items():
iflen(listoffiles)==1:
delfilessha[sha]
sys.stderr.write('\n')
returnfilessha
defcallback(fileslist,directory,files):
sys.stderr.write('.')
forfileNameinfiles:
filepath=os.path.join(directory,fileName)
ifos.path.isfile(filepath):
filesize=os.stat(filepath)[6]
iffileslist.has_key(filesize):
fileslist[filesize].append(filepath)
else:
fileslist[filesize]=[filepath]
iflen(sys.argv)>1:
doubles=detectDoubles("".join(sys.argv[1:]))
print'Thefollowingfilesareidentical:'
print'\n'.join(["----\n%s"%'\n'.join(doubles[filesha])forfileshaindoubles.keys()])
print'----'
else:
printmessage
希望本文所述对大家的Python程序设计有所帮助。