python 正则分析nginx日志

sunage001 · 发表于 2018-8-3 11:51:48

#!/usr/bin/env python　　
#-*- coding:utf-8 –*-
　　
#Author:xiaoluo
　　
#QQ:942729042
　　
#date:2015:05:12
　　
import re
　　
import sys
　　
log = sys.argv[1]
　　
ip = r"?P<ip>[\d.]*"
　　
date = r"?P<date>\d+"
　　
month = r"?P<month>\w+"
　　
year = r"?P<year>\d+"
　　
log_time = r"?P<time>\S+"
　　
timezone = r"""?P<timezone>
　　
               [^\"]*
　　
      """
　　
name = r"""?P<name>\"
　　
         [^\"]*\"
　　
      """
　　
method = r"?P<method>\S+"
　　
request = r"?P<request>\S+"
　　
protocol = r"?P<protocol>\S+"
　　
status = r"?P<status>\d+"
　　
bodyBytesSent = r"?P<bodyBytesSent>\d+"
　　
refer = r"""?P<refer>\"
　　
         [^\"]*\"
　　
         """
　　
userAgent=r"""?P<userAgent>
　　
            .*
　　
            """
　　
#f = open('access1.log','r')
　　
#for logline in f.readlines():
　　
p = re.compile(r"(%s)\ \[(%s)/(%s)/(%s)\:(%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)" %(ip, date, month, year, log_time,timezone,name,method,request,protocol,status,bodyBytesSent,refer,userAgent), re.VERBOSE)
　　
def getcode():
　　
codedic={}
　　
f = open(log,'r')
　　
for logline in f.readlines():
　　
      matchs = p.match(logline)
　　
      if matchs !=None:
　　
         allGroups =matchs.groups()
　　
         status= allGroups[10]
　　
         codedic[status]=codedic.get(status,0) +1
　　
return codedic
　　
f.close()
　　
def getIP():
　　
f = open(log,'r')
　　
IPdic={}
　　
for logline in f.readlines():
　　
      matchs = p.match(logline)
　　
      if matchs !=None:
　　
         allGroups =matchs.groups()
　　
         IP=allGroups[0]
　　
         IPdic[IP] = IPdic.get(IP,0) +1
　　
IPdic=sorted(IPdic.iteritems(),key=lambda c:c[1],reverse=True)
　　
IPdic=IPdic[0:21:1]
　　
return IPdic
　　
f.close()
　　
def getURL():
　　
f = open(log,'r')
　　
URLdic={}
　　
for logline in f.readlines():
　　
      matchs = p.match(logline)
　　
      if matchs !=None:
　　
         allGroups =matchs.groups()
　　
         urlname = allGroups[6]
　　
         URLdic[urlname] = URLdic.get(urlname,0) +1
　　
URLdic=sorted(URLdic.iteritems(),key=lambda c:c[1],reverse=True)
　　
URLdic=URLdic[0:21:1]
　　
return URLdic
　　
def getpv():
　　
f = open(log,'r')
　　
pvdic={}
　　
for logline in f.readlines():
　　
      matchs = p.match(logline)
　　
      if matchs !=None:
　　
         allGroups =matchs.groups()
　　
         timezone=allGroups[4]
　　
         time = timezone.split(':')
　　
         minute = time[0]+":"+time[1]
　　
         pvdic[minute]=pvdic.get(minute,0) +1
　　
pvdic=sorted(pvdic.iteritems(),key=lambda c:c[1],reverse=True)
　　
pvdic=pvdic[0:21:1]
　　
return pvdic
　　
if __name__=='__main__':
　　
print "网站监控状况检查状态码"
　　
print getcode()
　　
print "网站访问量最高的20个IP地址"
　　
print getIP()
　　
print "网站访问最多的20个站点名"
　　
print getURL()
　　
print getpv()

账号		自动登录	找回密码
密码			立即注册

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

c++ size_t 和 int 的区别

[经验分享] python 正则分析nginx日志

浏览过的版块

扫码加入运维网微信交流群