Python批量转换文件编码格式
自己写的方法,适用于linux,
#!/usr/bin/python
#coding=utf-8
importsys
importos,os.path
importdircache
importcommands
defadd(x,y):
returnx*y
deftrans(dirname):
lis=dircache.opendir(dirname)
forainlis:
af=dirname+os.sep+a
##printaf
ifos.path.isdir(af):
##printaf
trans(af)
else:
##printaf+"encoding="+fi.name
ft=commands.getoutput('file-i'+af)
##printft
ifa.find('.htm')==-1anda.find('.xml')==-1andft.find('text/')!=-1andft.find('iso-8859')!=-1:
print'gbk'+ft+">"+af
commands.getoutput('iconv-ficonv-fgbk-tutf-8-c-o'+""+af+""+af)
trans(os.getcwd())
py2.6以下版本可用代码
importos,sys defconvert(filename,in_enc="GBK",out_enc="UTF8"): try: print"convert"+filename, content=open(filename).read() new_content=content.decode(in_enc).encode(out_enc) open(filename,'w').write(new_content) print"done" except: print"error" defexplore(dir): forroot,dirs,filesinos.walk(dir): forfileinfiles: path=os.path.join(root,file) convert(path) defmain(): forpathinsys.argv[1:]: ifos.path.isfile(path): convert(path) elifos.path.isdir(path): explore(path) if__name__=="__main__": main()
支持py3.1的版本
importos
importsys
importcodecs
#该程序用于将目录下的文件从指定格式转换到指定格式,默认的是GBK转到utf-8
defconvert(file,in_enc="GBK",out_enc="UTF-8"):
try:
print("convert"+file)
f=codecs.open(file,'r',in_enc)
new_content=f.read()
codecs.open(file,'w',out_enc).write(new_content)
#print(f.read())
exceptIOErroraserr:
print("I/Oerror:{0}".format(err))
defexplore(dir):
forroot,dirs,filesinos.walk(dir):
forfileinfiles:
path=os.path.join(root,file)
convert(path)
defmain():
forpathinsys.argv[1:]:
if(os.path.isfile(path)):
convert(path)
elifos.path.isdir(path):
explore(path)
if__name__=="__main__":
main()
以上所述就是本文的全部内容了,希望大家能够喜欢。