|
1 #!/usr/bin/env python
2 # coding:utf-8
3
4 import sys,time
5
6 class DisplayFormat(object):
7
8 def format_size(self,size):
9 KB = 1024 # KB -> B 1024
10 MB = 1048576 # MB -> B 1024 * 1024
11 GB = 1073741824 # GB -> B 1024 * 1024 * 1024
12 TB = 1099511627776 # TB -> B 1024 * 1024 * 1024
13
14 if size >= TB:
15 size = str(size >> 40) + 'T'
16 elif size < KB:
17 size = str(size) + 'B'
18 elif size >= GB and size < TB:
19 size = str(size >> 30) + 'G'
20 elif size >= MB and size < GB:
21 size = str(size >> 20) + 'M'
22 else:
23 size = str(size >> 10) + 'K'
24
25 return size
26
27 formatstring = '%-18s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
28
29 def echo_line(self):
30 '''输出头部横线'''
31 print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,)
32
33 def echo_head(self):
34 '''输出头部信息'''
35 print self.formatstring % ('IP','Traffic','Time','Time%',200,404,403,503,500,302,304)
36
37 def echo_error(self):
38 '''输出错误信息'''
39 print 'Usage: ' + sys.argv[0] + 'filepath [number]'
40
41 def echo_time(self):
42 '''输出脚本执行时间'''
43 print 'The script is running %s second' % time.clock()
44
45
46 class HostInfo(object):
47
48 # 定义一个主机ip 的所有状态列表
49 host_info = ['200','404','403','503','500','302','304','size','time']
50
51 def __init__(self,host):
52 '''初始化一个主机信息字典'''
53 self.host = host = {}.fromkeys(self.host_info,0)
54
55 def add_1(self,status_size,is_size):
56 '''对访问次数,http返回的状态码,ip流量进行加1操作'''
57 if status_size == 'time':
58 self.host['time'] += 1
59 elif is_size:
60 self.host['size'] = self.host['size'] + status_size
61 else:
62 self.host[status_size] += 1
63
64 def get_value(self,value):
65 '''取出字典的值'''
66 return self.host[value]
67
68
69 class AnalysisFile(object):
70
71 def __init__(self):
72 '''初始化一个空字典'''
73 self.empty = {}
74 self.total_request_time,self.total_traffic,self.total_200,\
75 self.total_404,self.total_403,self.total_503,self.total_500,\
76 self.total_302,self.total_304 = 0,0,0,0,0,0,0,0,0
77
78 def split_line_todict(self,line):
79 '''传入文件的每一行取出0、8、9字段 生成字典 并返回这个字典'''
80 line_split = line.split()
81 line_dict = {'remote_host':line_split[0],'status':line_split[8],'bytes_sent':line_split[9]}
82 return line_dict
83
84 def read_log(self,logs):
85 for line in logs:
86 try:
87 dict_line = self.split_line_todict(line)
88 host = dict_line['remote_host']
89 status = dict_line['status']
90 except ValueError:
91 continue
92 except IndexError:
93 continue
94
95 if host not in self.empty:
96 host_info_obj = HostInfo(host)
97 self.empty[host] = host_info_obj
98 else:
99 host_info_obj = self.empty[host]
100
101 host_info_obj.add_1('time',False)
102
103 if status in host_info_obj.host_info:
104 host_info_obj.add_1(status,False)
105
106 try:
107 bytes_sent = int(dict_line['bytes_sent'])
108 except ValueError:
109 bytes_sent = 0
110
111 host_info_obj.add_1(bytes_sent,True)
112
113 return self.empty
114
115 def return_sorted_list(self,true_dict):
116 '''循环读取字典,计算总的流量、总的访问次数以及总的http返回码'''
117 for host_key in true_dict:
118 host_value = true_dict[host_key]
119 time = host_value.get_value('time')
120 self.total_request_time = self.total_request_time + time
121 size = host_value.get_value('size')
122 self.total_traffic = self.total_traffic + size
123
124 # 获取http返回状态码的次数
125 v_200 = host_value.get_value('200')
126 v_404 = host_value.get_value('404')
127 v_403 = host_value.get_value('403')
128 v_503 = host_value.get_value('503')
129 v_500 = host_value.get_value('500')
130 v_302 = host_value.get_value('302')
131 v_304 = host_value.get_value('304')
132
133 # 重新规划字典
134 true_dict[host_key] = {'200':v_200,'404':v_404,'403':v_403,\
135 '503':v_503,'500':v_500,'302':v_302,\
136 '304':v_304,'size':size,'time':time}
137
138
139 # 计算http返回状态码的总量
140 self.total_200 = self.total_200 + v_200
141 self.total_404 = self.total_404 + v_404
142 self.total_403 = self.total_403 + v_403
143 self.total_503 = self.total_503 + v_503
144 self.total_500 = self.total_500 + v_500
145 self.total_302 = self.total_302 + v_302
146 self.total_304 = self.total_304 + v_304
147
148 # 对总的访问次数和访问流量进行降序排序,并生成一个有序的列表
149 sorted_list = sorted(true_dict.items(),key=lambda i:(i[1]['size'],\
150 i[1]['time']),reverse=True)
151
152 return sorted_list
153
154
155 class Main(object):
156
157 def main(self):
158 '''主调函数'''
159 # 初始化DisplayFormat类的实例
160 displayformat = DisplayFormat()
161
162 args = len(sys.argv)
163 if args == 1:
164 displayformat.echo_error()
165 elif args == 2 or args == 3:
166 log_file = sys.argv[1]
167 try:
168 files = open(log_file,'r')
169 if args == 3:
170 lines = int(sys.argv[2])
171 else:
172 lines = 0
173 except IOError,e:
174 print
175 print e
176 displayformat.echo_error()
177 except VaueError,e:
178 print
179 print e
180 displayformat.echo_error()
181
182 else:
183 displayformat.echo_error()
184
185
186 #AnalysisFile类的实例化
187 fileanalysis = AnalysisFile()
188
189 # 调用read_log方法
190 news_dict = fileanalysis.read_log(files)
191
192 # 调用return_sorted_list方法
193 new_list = fileanalysis.return_sorted_list(news_dict)
194
195 # 计算所有ip的总量
196 total_ip = len(new_list)
197
198 if lines:
199 new_list = new_list[0:lines]
200 files.close()
201
202 # 打印出总的ip数,总访问流量,总的访问次数
203 print
204 total_request_time = fileanalysis.total_request_time
205 total_traffic = displayformat.format_size(fileanalysis.total_traffic)
206 print '总IP数量: %s 总的访问流量: %s 总的请求次数: %d' % (total_ip,\
207 total_traffic,\
208 total_request_time)
209
210 # 打印头部信息,和横线
211 print
212 displayformat.echo_head()
213 displayformat.echo_line()
214
215 # 循环读取news_list列表取出time项目 计算time百分比 通过displayformat格式化输出主机信息
216 for i in new_list:
217 time = i[1]['time']
218 time_percentage = (float(time) / float(fileanalysis.total_request_time)) * 100
219 print displayformat.formatstring % (i[0],\
220 displayformat.format_size(i[1]['size']),\
221 time,str(time_percentage)[0:5],\
222 i[1]['200'],i[1]['404'],i[1]['403'],\
223 i[1]['503'],i[1]['500'],i[1]['302'],i[1]['304'])
224
225 if not lines or total_ip == lines:
226 displayformat.echo_line()
227 print displayformat.formatstring % (total_ip,total_traffic,total_request_time,'100%',\
228 fileanalysis.total_200,fileanalysis.total_404,\
229 fileanalysis.total_403,fileanalysis.total_503,\
230 fileanalysis.total_500,fileanalysis.total_302,\
231 fileanalysis.total_304)
232
233 # 显示执行脚本的时间
234 print
235 displayformat.echo_time()
236
237 if __name__ == '__main__':
238 main = Main()
239 main.main()
|
|