|
#!/usr/bin/env python
import sys
import re
log_line_re = re.compile(r'''(?P\S+) #IP ADDRESS
\s+ #whitespace
\S+ #remote logname
\s+ #whitespace
\S+ #remote user
\s+ #whitespace
(?P\[[^\[\]]+\]) #time
\s+ #whitespace
(?P"[^"]+") #first line of request
\s+ #whitespace
(?P\d+)
\s+ #whitespace
(?P-|\d+)
\s* #whitespace
''', re.VERBOSE)
line ='192.168.1.37 - - [22/Jul/2010:14:51:56 +0800] "GET /xampp/head.php HTTP/1.1" 200 1362'
m = log_line_re.match(line)
groupdict = m.groupdict()
print groupdict
[root@test ~]# python log1
{'status': '200', 'remote_host': '192.168.1.37', 'request': '"GET /xampp/head.php HTTP/1.1"', 'bytes_sent': '1362', 'time': '[22/Jul/2010:14:51:56 +0800]'}
使用apachelog模块http://pypi.python.org/pypi/apachelog/1.0
#!/usr/bin/env python
import apachelog
p=apachelog.parser(apachelog.formats['common'])
for line in open('/opt/lampp/logs/access_log'):
try:
data = p.parse(line)
print data
except:
sys.stderr.write("Unable to parse %s" % line)
{'%l': '-', '%>s': '200', '%h': '192.168.1.38', '%b': '5573', '%u': '-', '%t': '[22/Jul/2010:16:02:27 +0800]', '%r': 'GET /admin/ListUserlist.php HTTP/1.1'}
|
|