|
#!/usr/bin/env python
#
coding=utf-8
import datetime
from urllib.parse import urlparse
from user_agents import parse as ua_parse
class NginxLineParser(object):
def parse(self, line):
""" 将 nginx 日志解析多个字段
"""
try:
line_item = line.strip().split('"')
self._server_name, self._local_ip, self._client_ip, self._remote_port = line_item[0].strip().split('[')[0].split()
self._time_local = line_item[0].strip().split('[')[-1].strip(']')
self._method, self._request, self._verb = line_item[1].strip().split()
self._status, self._body_bytes_sent = line_item[2].strip().split()
self._http_referer = line_item[3].strip()
self._http_user_agent = line_item[-2].strip()
self._request_time, self._upstream_response_time = line_item[-1].strip().split()
except:
with open('/tmp/parser_log_error.txt', 'a+') as f:
f.write(line + '\n')
def logline_to_dict(self):
""" 将日志段转为字典
"""
line_field = {}
line_field['server_name'] = self.server_name
line_field['local_ip'] = self.local_ip
line_field['client_ip'] = self.client_ip
line_field['remote_port'] = self.remote_port
line_field['time_local'] = self.time_local
line_field['method'] = self.method
line_field['request'] = self.request
line_field['verb'] = self.verb
line_field['status'] = self.status
line_field['body_bytes_sent'] = self.body_bytes_sent
line_field['http_referer'] = self.http_referer
line_field['http_user_agent'] = self.http_user_agent
line_field['request_time'] = self.request_time
line_field['upstream_response_time'] = self.upstream_response_time
return line_field
@property
def server_name(self):
return self._server_name
@property
def local_ip(self):
return self._local_ip
@property
def client_ip(self):
return self._client_ip
@property
def remote_port(self):
return self._remote_port
@property
def time_local(self):
return datetime.datetime.strptime(self._time_local, '%d/%b/%Y:%H:%M:%S +0800')
@property
def method(self):
return self._method
@property
def request(self):
return urlparse(self._request).path
@property
def verb(self):
return self._verb
@property
def body_bytes_sent(self):
return self._body_bytes_sent
@property
def http_referer(self):
return self._http_referer
@property
def http_user_agent(self):
ua_agent = ua_parse(self._http_user_agent)
if not ua_agent.is_bot:
return ua_agent.browser.family
@property
def user_agent_type(self):
us_agent = ua_parse(self._http_user_agent)
if us_agent.is_bot:
return us_agent.browser.family
@property
def status(self):
return self._status
@property
def request_time(self):
return self._request_time
@property
def upstream_response_time(self):
return self._upstream_response_time
def main():
"""程序执行入口
"""
ng_line_parser = NginxLineParser()
with open('test.log', 'r') as f:
for line in f:
ng_line_parser.parse(line)
if __name__ == '__main__':
main() |
|
|