|
nginx日志访问量,多维度统计,__fields表示每条日志以空格分隔后索引对应的字段名称。
如0表示第一个字段客户端IP(client_ip),要统计其它字段,只需在这里添加即可。
## 直接上代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
| # -*- coding:utf-8 -*-
import sys
try:
import json
except ImportError:
import simplejson as json
class AnalysisNginxLog(object):
''' analysis nginx proxy access log
proxy log format:
log_format proxy '$remote_addr - $host [$time_local] "$request" $status'
' $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for/$upstream_addr" "$request_time/$upstream_response_time/$upstream_status/$userId" "$xm_logid"';
'''
__fields = [
(0, 'client_ip'),
(2, 'domain'),
(3, 'access_time'),
# (4, 'time_zone'),
(5, 'http_method'),
(6, 'uri'),
(7, 'http_version'),
(8, 'request_status'),
(9, 'request_body_size')
]
__filter_args = [
'uri',
'access_time'
]
def __init__(self, logfile):
self.logfile = logfile
self.data = []
self._load_data()
def _open(self):
with open(self.logfile) as fp:
while True:
line = fp.readline().strip()
if not line:
break
yield line
def get_data(self):
self._load_data()
return self.data
def _filter1(self, data):
return data.strip().strip('#').strip('"')
def _filter2(self, data):
if not isinstance(data, dict):
raise Exception('Invalid Parameters, must be type of dict!')
for key, value in data.items():
if key not in self.__filter_args:
continue
if '?' in value:
v = value.split('?')[0]
data[key] = v
elif '[' in value:
data[key] = value.strip('[')
return data
def _load_data(self):
for line in self._open():
li = [x.strip() for x in line.split()]
dict_tmp = {}
for i, field in self.__fields:
try:
dict_tmp[field] = self._filter1(li)
except IndexError as e:
print('Error: {}, at {}'.format(
e, line
))
dict_tmp[field] = None
self.data.append(self._filter2(dict_tmp))
def counter_by_field(self, field, top):
fields = [x[1] for x in self.__fields]
if field not in fields:
raise Exception('Invalid Parameters!')
result = {}
for log in self.data:
if log[field] in result:
result[log[field]] += 1
else:
result[log[field]] = 1
result = sorted(result.items(), key=lambda d: d[1], reverse=True)[:top]
print(json.dumps(result, indent=2))
return result
def _help():
print('Usage: %s [log_file] [colums] [topN] ' % sys.argv[0])
sys.exit()
def counter_log(logfile, colums, top=10):
nginx = AnalysisNginxLog(logfile)
cols = colums.split(',')
for col in cols:
print('{c} [{col}] Top {top} {c}'.format(c='-' * 30, col=col, top=top))
nginx.counter_by_field(col, int(top))
if __name__ == '__main__':
if len(sys.argv) <= 2:
_help()
counter_log(*sys.argv[1:])
|
## 运行结果
1
2
| # > python analysis_nginx_proxy_log.py
Usage: analysis_nginx_proxy_log.py [log_file] [colums] [topN]
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| > python analysis_nginx_proxy_log.py pss_00-06.log uri,client_ip 3
------------------------------ [uri] Top 3 ------------------------------
[
[
"/api/pss/GetBatchSnDetails",
2298
],
[
"/api/xmss/CheckSubHome",
1456
],
[
"/api/xmss/GetPhyStock",
1299
]
]
------------------------------ [client_ip] Top 3 ------------------------------
[
[
"10.108.38.18",
2150
],
[
"10.114.135.253",
1403
],
[
"10.104.3.11",
1253
]
]
|
|
|