sunage001 发表于 2018-8-3 11:51:48

python 正则分析nginx日志

#!/usr/bin/env python  
#-*- coding:utf-8 –*-
  
#Author:xiaoluo
  
#QQ:942729042
  
#date:2015:05:12
  
import re
  
import sys
  
log = sys.argv
  
ip = r"?P<ip>[\d.]*"
  
date = r"?P<date>\d+"
  
month = r"?P<month>\w+"
  
year = r"?P<year>\d+"
  
log_time = r"?P<time>\S+"
  
timezone = r"""?P<timezone>
  
               [^\"]*
  
         """
  
name = r"""?P<name>\"
  
            [^\"]*\"
  
      """
  
method = r"?P<method>\S+"
  
request = r"?P<request>\S+"
  
protocol = r"?P<protocol>\S+"
  
status = r"?P<status>\d+"
  
bodyBytesSent = r"?P<bodyBytesSent>\d+"
  
refer = r"""?P<refer>\"
  
             [^\"]*\"
  
             """
  
userAgent=r"""?P<userAgent>
  
                .*
  
               """
  
#f = open('access1.log','r')
  
#for logline in f.readlines():
  
p = re.compile(r"(%s)\ \[(%s)/(%s)/(%s)\:(%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)\ (%s)" %(ip, date, month, year, log_time,timezone,name,method,request,protocol,status,bodyBytesSent,refer,userAgent), re.VERBOSE)
  
def getcode():
  
    codedic={}
  
    f = open(log,'r')
  
    for logline in f.readlines():
  
         matchs = p.match(logline)
  
         if matchs !=None:
  
             allGroups =matchs.groups()
  
             status= allGroups
  
             codedic=codedic.get(status,0) +1
  
    return codedic
  
    f.close()
  
def getIP():
  
    f = open(log,'r')
  
    IPdic={}
  
    for logline in f.readlines():
  
      matchs = p.match(logline)
  
      if matchs !=None:
  
            allGroups =matchs.groups()
  
            IP=allGroups
  
            IPdic = IPdic.get(IP,0) +1
  
    IPdic=sorted(IPdic.iteritems(),key=lambda c:c,reverse=True)
  
    IPdic=IPdic
  
    return IPdic
  
    f.close()
  
def getURL():
  
    f = open(log,'r')
  
    URLdic={}
  
    for logline in f.readlines():
  
      matchs = p.match(logline)
  
      if matchs !=None:
  
            allGroups =matchs.groups()
  
            urlname = allGroups
  
            URLdic = URLdic.get(urlname,0) +1
  
    URLdic=sorted(URLdic.iteritems(),key=lambda c:c,reverse=True)
  
    URLdic=URLdic
  
    return URLdic
  
def getpv():
  
    f = open(log,'r')
  
    pvdic={}
  
    for logline in f.readlines():
  
      matchs = p.match(logline)
  
      if matchs !=None:
  
         allGroups =matchs.groups()
  
         timezone=allGroups
  
         time = timezone.split(':')
  
         minute = time+":"+time
  
         pvdic=pvdic.get(minute,0) +1
  
    pvdic=sorted(pvdic.iteritems(),key=lambda c:c,reverse=True)
  
    pvdic=pvdic
  
    return pvdic
  
if __name__=='__main__':
  
    print "网站监控状况检查状态码"
  
    print getcode()
  
    print "网站访问量最高的20个IP地址"
  
    print getIP()
  
    print "网站访问最多的20个站点名"
  
    print getURL()
  
    print getpv()
页: [1]
查看完整版本: python 正则分析nginx日志