#!/usr/bin/envpython
#coding=utf-8
#------------------------------------------------------
#Name:nginx日志分析脚本
#Purpose:此脚本只用来分析nginx的访问日志
#Version:1.0
#Author:LEO
#Created:2013-05-07
#Modified:2013-05-07
#Copyright:(c)LEO2013
#------------------------------------------------------
importsys
importtime
#该类是用来打印格式
classdisplayFormat(object):
defformat_size(self,size):
'''''格式化流量单位'''
KB=1024#KB->BB是字节
MB=1048576#MB->B
GB=1073741824#GB->B
TB=1099511627776#TB->B
ifsize>=TB:
size=str(size/TB)+'T'
elifsize<KB:
size=str(size)+'B'
elifsize>=GBandsize<TB:
size=str(size/GB)+'G'
elifsize>=MBandsize<GB:
size=str(size/MB)+'M'
else:
size=str(size/KB)+'K'
returnsize
#定义字符串格式化
formatstring='%-15s%-10s%-12s%8s%10s%10s%10s%10s%10s%10s%10s'
deftransverse_line(self):
'''''输出横线'''
printself.formatstring%('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
defhead(self):
'''''输出头部信息'''
printself.formatstring%('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
deferror_print(self):
'''''输出错误信息'''
print
print'Usage:'+sys.argv[0]+'NginxLogFilePath[Number]'
print
sys.exit(1)
defexecut_time(self):
'''''输出脚本执行的时间'''
print
print"ScriptExecutionTime:%.3fsecond"%time.clock()
print
#该类是用来生成主机信息的字典
classhostInfo(object):
host_info=['200','404','500','302','304','503','403','times','size']
def__init__(self,host):
self.host=host={}.fromkeys(self.host_info,0)
defincrement(self,status_times_size,is_size):
'''''该方法是用来给host_info中的各个值加1'''
ifstatus_times_size=='times':
self.host['times']+=1
elifis_size:
self.host['size']=self.host['size']+status_times_size
else:
self.host[status_times_size]+=1
defget_value(self,value):
'''''该方法是取到各个主机信息中对应的值'''
returnself.host[value]
#该类是用来分析文件
classfileAnalysis(object):
def__init__(self):
'''''初始化一个空字典'''
self.report_dict={}
self.total_request_times,self.total_traffic,self.total_200,
self.total_404,self.total_500,self.total_403,self.total_302,
self.total_304,self.total_503=0,0,0,0,0,0,0,0,0
defsplit_eachline_todict(self,line):
'''''分割文件中的每一行,并返回一个字典'''
split_line=line.split()
split_dict={'remote_host':split_line[0],'status':split_line[8],
'bytes_sent':split_line[9],}
returnsplit_dict
defgenerate_log_report(self,logfile):
'''''读取文件,分析split_eachline_todict方法生成的字典'''
forlineinlogfile:
try:
line_dict=self.split_eachline_todict(line)
host=line_dict['remote_host']
status=line_dict['status']
exceptValueError:
continue
exceptIndexError:
continue
ifhostnotinself.report_dict:
host_info_obj=hostInfo(host)
self.report_dict[host]=host_info_obj
else:
host_info_obj=self.report_dict[host]
host_info_obj.increment('times',False)
ifstatusinhost_info_obj.host_info:
host_info_obj.increment(status,False)
try:
bytes_sent=int(line_dict['bytes_sent'])
exceptValueError:
bytes_sent=0
host_info_obj.increment(bytes_sent,True)
returnself.report_dict
defreturn_sorted_list(self,true_dict):
'''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量并生成一个正真的字典,方便排序'''
forhost_keyintrue_dict:
host_value=true_dict[host_key]
times=host_value.get_value('times')
self.total_request_times=self.total_request_times+times
size=host_value.get_value('size')
self.total_traffic=self.total_traffic+size
o200=host_value.get_value('200')
o404=host_value.get_value('404')
o500=host_value.get_value('500')
o403=host_value.get_value('403')
o302=host_value.get_value('302')
o304=host_value.get_value('304')
o503=host_value.get_value('503')
true_dict[host_key]={'200':o200,'404':o404,'500':o500,
'403':o403,'302':o302,'304':o304,
'503':o503,'times':times,'size':size}
self.total_200=self.total_200+o200
self.total_404=self.total_404+o404
self.total_500=self.total_500+o500
self.total_302=self.total_302+o302
self.total_304=self.total_304+o304
self.total_503=self.total_503+o503
sorted_list=sorted(true_dict.items(),key=lambdat:(t[1]['times'],
t[1]['size']),reverse=True)
returnsorted_list
classMain(object):
defmain(self):
'''''主调函数'''
display_format=displayFormat()
arg_length=len(sys.argv)
ifarg_length==1:
display_format.error_print()
elifarg_length==2orarg_length==3:
infile_name=sys.argv[1]
try:
infile=open(infile_name,'r')
ifarg_length==3:
lines=int(sys.argv[2])
else:
lines=0
exceptIOError,e:
print
printe
display_format.error_print()
exceptValueError:
print
print"PleaseEnterAVolidNumber!!"
display_format.error_print()
else:
display_format.error_print()
fileAnalysis_obj=fileAnalysis()
not_true_dict=fileAnalysis_obj.generate_log_report(infile)
log_report=fileAnalysis_obj.return_sorted_list(not_true_dict)
total_ip=len(log_report)
iflines:
log_report=log_report[0:lines]
infile.close()
print
total_traffic=display_format.format_size(fileAnalysis_obj.total_traffic)
total_request_times=fileAnalysis_obj.total_request_times
print'TotalIP:%sTotalTraffic:%sTotalRequestTimes:%d'
%(total_ip,total_traffic,total_request_times)
print
display_format.head()
display_format.transverse_line()
forhostinlog_report:
times=host[1]['times']
times_percent=(float(times)/float(fileAnalysis_obj.total_request_times))*100
printdisplay_format.formatstring%(host[0],
display_format.format_size(host[1]['size']),
times,str(times_percent)[0:5],
host[1]['200'],host[1]['404'],
host[1]['500'],host[1]['403'],
host[1]['302'],host[1]['304'],host[1]['503'])
if(notlines)ortotal_ip==lines:
display_format.transverse_line()
printdisplay_format.formatstring%(total_ip,total_traffic,
total_request_times,'100%',
fileAnalysis_obj.total_200,
fileAnalysis_obj.total_404,
fileAnalysis_obj.total_500,
fileAnalysis_obj.total_403,
fileAnalysis_obj.total_302,
fileAnalysis_obj.total_304,
fileAnalysis_obj.total_503)
display_format.execut_time()
if__name__=='__main__':
main_obj=Main()
main_obj.main()