Python中使用md5sum检查目录中相同文件代码分享
"""Thismodulecontainscodefrom ThinkPythonbyAllenB.Downey
http://thinkpython.com
Copyright2012AllenB.Downey License:GNUGPLv3http://www.gnu.org/licenses/gpl.html
"""
importos
defwalk(dirname): """Findsthenamesofallfilesindirnameanditssubdirectories.
dirname:stringnameofdirectory """ names=[] fornameinos.listdir(dirname): path=os.path.join(dirname,name)
ifos.path.isfile(path): names.append(path) else: names.extend(walk(path)) returnnames
defcompute_checksum(filename): """ComputestheMD5checksumofthecontentsofafile.
filename:string """ cmd='md5sum'+filename returnpipe(cmd)
defcheck_diff(name1,name2): """Computesthedifferencebetweenthecontentsoftwofiles.
name1,name2:stringfilenames """ cmd='diff%s%s'%(name1,name2) returnpipe(cmd)
defpipe(cmd): """Runsacommandinasubprocess.
cmd:stringUnixcommand
Returns(res,stat),theoutputofthesubprocessandtheexitstatus. """ fp=os.popen(cmd) res=fp.read() stat=fp.close() assertstatisNone returnres,stat
defcompute_checksums(dirname,suffix): """Computeschecksumsforallfileswiththegivensuffix.
dirname:stringnameofdirectorytosearch suffix:stringsuffixtomatch
Returns:mapfromchecksumtolistoffileswiththatchecksum """ names=walk(dirname)
d={} fornameinnames: ifname.endswith(suffix): res,stat=compute_checksum(name) checksum,_=res.split()
ifchecksumind: d[checksum].append(name) else: d[checksum]=[name]
returnd
defcheck_pairs(names): """Checkswhetheranyinalistoffilesdiffersfromtheothers.
names:listofstringfilenames """ forname1innames: forname2innames: ifname1<name2: res,stat=check_diff(name1,name2) ifres: returnFalse returnTrue
defprint_duplicates(d): """Checksforduplicatefiles.
Reportsanyfileswiththesamechecksumandcheckswhetherthey are,infact,identical.
d:mapfromchecksumtolistoffileswiththatchecksum """ forkey,namesind.iteritems(): iflen(names)>1: print'Thefollowingfileshavethesamechecksum:' fornameinnames: printname
ifcheck_pairs(names): print'Andtheyareidentical.'
if__name__=='__main__': d=compute_checksums(dirname='.',suffix='.py') print_duplicates(d)