孤独雪鹰 发表于 2018-11-19 12:16:40

python 实现nginx/apache 日志格式的统计脚本

  # !/usr/bin/env python
  # -*- coding:utf8 -*-
  import sys
  import re
  import time
  import os
  

  def main():
  months ={
  "jan": 1, "Feb": 2, "Mar": 3, "Apr": 4, "May": 5, "Jun": 6,
  "Jul": 07, "Aug": 8, "Sep": 9, "Oct": 10, "Nov": 11, "Dec": 12}
  error = '请输入正确的时间格式,例如 2016-08-03 10:00:00'
  start = raw_input("请输入要查询的开始时间段如:2016-08-03 10:00:00")
  #start = '2016-08-03 10:37:00'
  pattern = re.compile("^201-(0|1)-(|3)\s(|2)::$")
  i_start = re.match(pattern, start)
  if i_start == None:
  print error
  sys.exit(0)
  end = raw_input("请输入要查询的结束时间段如:2016-08-03 10:01:00")
  #end = '2016-08-03 10:38:00'
  i_end = re.match(pattern, end)
  if i_end == None:
  print error
  sys.exit(0)
  print "time format is true !"
  

  ## 获取格式的时间的定时间戳 ##
  start_stamp = time.mktime(time.strptime(start, "%Y-%m-%d %H:%M:%S"))
  

  end_stamp = time.mktime(time.strptime(end, "%Y-%m-%d %H:%M:%S"))
  

  ## 处理文件 ###
  log_abpath = raw_input("请输入log文件的绝对路径 例如:/var/log/apache2/access.log")
  filepath = os.path.exists(log_abpath)
  if not filepath:
  raise "对不起该目录文件不存在!!"
  sys.exit(0)
  print "文件存在!!"
  f = open(log_abpath, 'r')
  #f = open("/var/log/apache2/access.log", "r")
  logs = f.readlines()
  position = 0
  t_logs = []
  for log in logs:
  position += 1
  log = log.split(" ")
  log.insert(0, position)
  t_logs.append(log)
  

  for t_log in t_logs:
  #print type(t_log)
  tim = str(t_log)
  mon = tim
  #print mon
  #print tim
  #print type(tim)
  #print months
  tim_f = tim.replace(mon, str(months))
  #print tim_f
  d = time.strptime(tim_f, "%d/%m/%Y:%H:%M:%S")
  stamp = time.mktime(d)
  t_log.insert(1, stamp)
  

  # print t_logs
  count = 0
  for t_log in t_logs:
  if t_log > start_stamp < end_stamp:
  print "该时间段内的请求日志出现在,"+str(t_log)+"行"
      count += 1
  if count != 0:
  print "该段时间内一共出现了%d次请求"%(count)
  else:
  print "对不起,我已经努力帮你找了,可是找到该段时间的日志信息"
  

  if __name__ == '__main__':
  main()
  




页: [1]
查看完整版本: python 实现nginx/apache 日志格式的统计脚本