Python实现的下载8000首儿歌的代码分享
下载8000首儿歌的python的代码:
#-*-coding:UTF-8-*-
frompyqueryimportPyQueryaspy fromlxmlimportetree importurllib importre importos importsys importlogging
defformat(filename): tuple=('',''','\'') forcharintuple: if(filename.find(char)!=-1): filename=filename.replace(char,"_") returnfilename
defdownload_mp3(mp3_url,filename,dir): f=dir+"\\"+filename ifos.path.exists(f): logger.debug(f+"isexisted.") return try: open(f,'wb').write(urllib.urlopen(mp3_url).read()) logger.debug( filename+'isdownloaded.') except: logger.debug(filename+'isnotdownloaded.')
defdownload_all_mp3(start,end,dir,logger): forxinrange(start,end): try: url="http://www.youban.com/mp3-d"+str(x)+".html" logger.debug(str(x)+":"+url) doc=py(url=url) e=doc('.mp3downloadbox') ifeisNoneore=='': logger.debug(url+"isnotexisted.") return e=unicode(e) #logger.debug(e) regex=re.compile(ur".*<h1>(.*)</h1>.*downloadboxlist.*?<a.*?\"(.*?)\"",re.UNICODE|re.S) m=regex.search(e) ifmisnotNone: title=m.group(1).strip() title2=str(x)+"_"+title+".mp3" #title2=re.sub('','_',title2) title2=format(title2) link=m.group(2) #logger.debug("title:"+title+"link:"+link) iflink==''ortitle=='': logger.debug(url+"isnotuseful") continue logger.debug(str(x)+":"+link) download_mp3(link,title2,dir) except: logger.debug(url+"metexception.") continue
if__name__=="__main__": dir_root="e:\\song" ifsys.argv[3]!='':dir_root=sys.argv[3] start,end=1,8000 ifsys.argv[1]>=0andsys.argv[2]>=0: start,end=int(sys.argv[1]),int(sys.argv[2]) print("Downloadfrom%sto%s.\n"%(start,end)) dir=dir_root+"\\"+str(start)+"-"+str(end) ifnotos.path.exists(dir): os.mkdir(dir) print"Downloadto"+dir+".\n" logger=logging.getLogger("simple") logger.setLevel(logging.DEBUG) fh=logging.FileHandler(dir+"\\"+"download.log") ch=logging.StreamHandler() formatter=logging.Formatter("%(message)s") ch.setFormatter(formatter) fh.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) download_all_mp3(start,end,dir,logger)