|
# -*- coding:utf-8 -*-
import sys
try:
import json
except ImportError:
import simplejson as json
class AnalysisNginxLog(object):
''' analysis nginx proxy access log
proxy log format:
log_format proxy '$remote_addr - $host [$time_local] "$request" $status'
' $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for/$upstream_addr" "$request_time/$upstream_response_time/$upstream_status/$userId" "$xm_logid"';
'''
__fields = [
(0, 'client_ip'),
(2, 'domain'),
(3, 'access_time'),
# (4, 'time_zone'),
(5, 'http_method'),
(6, 'uri'),
(7, 'http_version'),
(8, 'request_status'),
(9, 'request_body_size')
]
__filter_args = [
'uri',
'access_time'
]
def __init__(self, logfile):
self.logfile = logfile
self.data = []
self._load_data()
def _open(self):
with open(self.logfile) as fp:
while True:
line = fp.readline().strip()
if not line:
break
yield line
def get_data(self):
self._load_data()
return self.data
def _filter1(self, data):
return data.strip().strip('#').strip('"')
def _filter2(self, data):
if not isinstance(data, dict):
raise Exception('Invalid Parameters, must be type of dict!')
for key, value in data.items():
if key not in self.__filter_args:
continue
if '?' in value:
v = value.split('?')[0]
data[key] = v
elif '[' in value:
data[key] = value.strip('[')
return data
def _load_data(self):
for line in self._open():
li = [x.strip() for x in line.split()]
dict_tmp = {}
for i, field in self.__fields:
try:
dict_tmp[field] = self._filter1(li)
except IndexError as e:
print('Error: {}, at {}'.format(
e, line
))
dict_tmp[field] = None
self.data.append(self._filter2(dict_tmp))
def counter_by_field(self, field, top):
fields = [x[1] for x in self.__fields]
if field not in fields:
raise Exception('Invalid Parameters!')
result = {}
for log in self.data:
if log[field] in result:
result[log[field]] += 1
else:
result[log[field]] = 1
result = sorted(result.items(), key=lambda d: d[1], reverse=True)[:top]
print(json.dumps(result, indent=2))
return result
def _help():
print('Usage: %s [log_file] [colums] [topN] ' % sys.argv[0])
sys.exit()
def counter_log(logfile, colums, top=10):
nginx = AnalysisNginxLog(logfile)
cols = colums.split(',')
for col in cols:
print('{c} [{col}] Top {top} {c}'.format(c='-' * 30, col=col, top=top))
nginx.counter_by_field(col, int(top))
if __name__ == '__main__':
if len(sys.argv) |
|