设为首页 收藏本站
查看: 616|回复: 0

[经验分享] python分析nginx日志并推送到open-falcon

[复制链接]

尚未签到

发表于 2018-11-13 11:14:07 | 显示全部楼层 |阅读模式
不解释,直接撸代码  

  
#!/usr/bin/python
  
# --*-- coding: utf-8 --*--
  
# NGINX日志分析
  

  
import time
  
import datetime
  
import sys
  
import os
  
import os.path
  
import re
  
import json
  
import socket
  
import requests
  
import subprocess
  

  
class NginxLog(object):
  """初始化数据"""
  def __init__(self, log_file, interface_list, seek_file):
  self.log_file = log_file
  self.interface_list = interface_list
  self.seek_file = seek_file
  

  def jsonFormat(self, python_data):
  """格式化成json格式"""
  json_data = json.dumps(python_data, indent=2)
  return json_data
  

  def hostname(self):
  """host_name: 主机名
  host_ip: 主机ip
  """
  host_name = socket.getfqdn(socket.gethostname( ))
  host_ip = socket.gethostbyname(host_name)
  return host_name
  

  def writeSeek(self, seek):
  """读过的文件游标写入临时文件"""
  with open(self.seek_file,'w') as f:
  f.write(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) + '\n')
  f.write(str(seek) + "\n")
  

  def LogRead(self):
  """读出新生成的日志
  # 如果第一次运行,或是删除临时文件,从头运行,否则,从上次读取之后运行
  # 0代表从头开始,1代表当前位置,2代表文件最末尾位置
  """
  if os.path.exists(self.seek_file):
  with open(self.seek_file) as f:
  seek_tmp = f.readlines()
  seek_old = int(seek_tmp[1].strip())
  else:
  seek_old = 0
  with open(self.log_file) as f:
  #记录当前最新文件游标
  f.seek(0,2)#最新游标位置
  seek_now = f.tell()
  #读取上次读完之后的日志
  if seek_now >= seek_old:
  f.seek(seek_old,0)#从文件开头为止偏移
  chunk = f.read(seek_now - seek_old)
  #如果seek_now-seek_old小于0说明日志轮训
  else:
  f.seek(0,0)
  chunk = f.read(seek_now)
  # 将这次的游标写入临时文件
  self.writeSeek(seek_now)
  return chunk
  

  def LogStatistics(self):
  """分析NGINX日志的正则表达示,如果日志格式更改,则需要相应作更改
  """
  

  #log_examp = '127.0.0.1 - - [02/Mar/2018:11:01:09 +0800] "HEAD /aaa HTTP/1.1" 404 0 "-" "curl/7.29.0"'
  #输出结果
  #100.64.40.7 - - [05/Mar/2018:09:15:19 +0800] "GET /wpsad.php HTTP/1.0" 200 0 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36 LBBROWSER" "114.255.44.143" 0.369
  result_list = []
  time_ns =  datetime.datetime.now().microsecond
  time_stamp = int(str(time.time())[0:10])
  host_name = self.hostname()
  # url列表循环
  for interface_item in self.interface_list:
  interface_item_dict_count = {}
  interface_item_dict_avg_request_time = {}
  interface_item_dict_2xx = {}
  interface_item_dict_4xx = {}
  interface_item_dict_5xx = {}
  interface_item_dict_count['ns']=interface_item_dict_avg_request_time['ns']=interface_item_dict_2xx['ns']=interface_item_dict_4xx['ns']=interface_item_dict_5xx['ns']=time_ns
  interface_item_dict_count['clock']=interface_item_dict_avg_request_time['clock']=interface_item_dict_2xx['clock']=interface_item_dict_4xx['clock']=interface_item_dict_5xx['clock']=time_stamp
  interface_item_dict_count['host']=interface_item_dict_avg_request_time['host']=interface_item_dict_2xx['host']=interface_item_dict_4xx['host']=interface_item_dict_5xx['host']=host_name
  interface_item_dict_count['key'] = interface_item + '_count'
  interface_item_dict_count['value'] = 0
  interface_item_dict_avg_request_time['key'] = interface_item + '_avg_request_time'
  interface_item_dict_avg_request_time['value'] = 0
  interface_item_dict_2xx['key'] = interface_item + '_2xx'
  interface_item_dict_2xx['value'] = 0
  interface_item_dict_4xx['key'] = interface_item + '_4xx'
  interface_item_dict_4xx['value'] = 0
  interface_item_dict_5xx['key'] = interface_item + '_5xx'
  interface_item_dict_5xx['value'] = 0
  hit_url_count = 0
  

  ##实时输出日志
  for line in self.LogRead().split('\n'):
  #print(line.split()[0])
  if line != None and len(line.split()) != 0:
  #匹配字段
  remote_addr = line.split()[0]
  #切割请求的url
  request_url = line.split()[6]
  status_code = line.split()[8]
  request_time = line.split()[-1]#保留等会用
  # 匹配之后数据结构操作
  if interface_item == request_url:
  hit_url_count += 1
  interface_item_dict_count['value'] += 1
  #响应时间
  interface_item_dict_avg_request_time['value'] += float(request_time)
  if status_code.strip('\"').startswith('2'):
  interface_item_dict_2xx['value'] += 1
  if status_code.strip('\"').startswith('4'):
  interface_item_dict_4xx['value'] += 1
  if status_code.strip('\"').startswith('5'):
  interface_item_dict_5xx['value'] += 1
  # 求平均请求反应时间
  if interface_item_dict_avg_request_time['value'] != 0:
  interface_item_dict_avg_request_time['value'] = interface_item_dict_avg_request_time['value'] / hit_url_count
  # 结果加入列表
  result_list.append(interface_item_dict_count)
  result_list.append(interface_item_dict_avg_request_time)
  result_list.append(interface_item_dict_2xx)
  result_list.append(interface_item_dict_4xx)
  result_list.append(interface_item_dict_5xx)
  return result_list
  #return self.jsonFormat(result_list)
  

  

  
def pushFalcon(return_data):
  """数据推到openfalcon"""
  all_data = []
  all_request_dic = {}
  wpsad_2xx_request_dic = {}
  wpsad_4xx_request_dic = {}
  wpsad_5xx_request_dic = {}
  wpsad_response_time_dic = {}
  for i in return_data:
  if 'wpsad.php_count' in i['key']:
  all_request_dic['value'] = i['value']
  all_request_dic['key'] = 'wpsad.php_count'
  all_request_dic['host_name'] = i['host']
  all_data.append(all_request_dic)
  if 'wpsad.php_2xx' in i['key']:
  wpsad_2xx_request_dic['value'] = i['value']
  wpsad_2xx_request_dic['key'] = 'wpsad.php_2xx_count'
  wpsad_2xx_request_dic['host_name'] = i['host']
  all_data.append(wpsad_2xx_request_dic)
  if 'wpsad.php_4xx' in i['key']:
  wpsad_4xx_request_dic['value'] = i['value']
  wpsad_4xx_request_dic['key'] = 'wpsad.php_4xx_count'
  wpsad_4xx_request_dic['host_name'] = i['host']
  all_data.append(wpsad_4xx_request_dic)
  if 'wpsad.php_5xx' in i['key']:
  wpsad_5xx_request_dic['value'] = i['value']
  wpsad_5xx_request_dic['key'] = 'wpsad.php_5xx_count'
  wpsad_5xx_request_dic['host_name'] = i['host']
  all_data.append(wpsad_5xx_request_dic)
  if 'wpsad.php_avg_request_time' in i['key']:
  wpsad_response_time_dic['key'] = 'wpsad_response_time'
  wpsad_response_time_dic['value'] = i['value']
  #all_data.append(wpsad_response_time_dic)
  # nginx请求状态数据
  ts = int(time.time())
  payload = []
  for i in all_data:
  temp_dic = {
  #"endpoint": i['host_name'],
  "endpoint": "vm172-31-32-13.ksc.com",
  "metric": i['key'],
  "timestamp": ts,
  "step": 60,
  "value": i['value'],
  "counterType": "GAUGE",
  "tags": "url="+i['key']
  }
  payload.append(temp_dic)
  #print(payload)
  

  # 响应时间数据
  response_time_dic = {
  "endpoint": "vm172-31-32-13.ksc.com",
  "metric": wpsad_response_time_dic['key'],
  "timestamp": ts,
  "step": 60,
  "value": wpsad_response_time_dic['value'],
  "counterType": "GAUGE",
  "tags": "",
  }
  payload.append(response_time_dic)
  # nginx并发请求数统计
  estab_data = {
  "endpoint": "vm172-31-32-13.ksc.com",
  "metric": "nginx_estab_num",
  "timestamp": ts,
  "step": 60,
  "value": 0,
  "counterType": "GAUGE",
  "tags": "",
  }
  time_wait = {
  "endpoint": "vm172-31-32-13.ksc.com",
  
                "metric": "nginx_timewait_num",
  
                "timestamp": ts,
  
                "step": 60,
  
                "value": 0,
  
                "counterType": "GAUGE",
  
                "tags": "",
  }
  # TIME_WAIT
  time_wait_cmd = "netstat -ant|grep -i '80'|grep 'TIME_WAIT'|wc -l"
  time_wait_p = subprocess.Popen(time_wait_cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  timewait_out = time_wait_p.stdout.read()
  timewait_err = time_wait_p.stderr.read()
  if not timewait_err:
  time_wait['value'] = int(timewait_out.strip())
  payload.append(time_wait)
  

  # ESTABLISHED
  estab_cmd = "netstat -ant|grep -i '80'|grep 'ESTABLISHED'|wc -l"
  estab_p = subprocess.Popen(estab_cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  estab_out = estab_p.stdout.read()
  estab_err = estab_p.stderr.read()
  if not estab_err:
  estab_data['value'] = int(estab_out.strip())
  payload.append(estab_data)
  # nginx进程占用的内存监控
  mem_dic = {
  "endpoint": "vm172-31-32-13.ksc.com",
  "metric": "nginx_mem",
  "timestamp":ts,
  "step": 60,
  "value": 0,
  "counterType": "GAUGE",
  "tags": "",
  }
  #mem_cmd = "top -b -n1|grep nginx|gawk '{if($6~/m$/) {sum+=$6*1024} else {sum+=$6} }; END {print int(sum/1024)}'"
  #mem_p = subprocess.Popen(time_wait_cmd, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  #mem_out = mem_p.stdout.read()
  #print(mem_out)
  #mem_err = mem_p.stderr.read()
  #if not mem_err:
  #mem_dic['value'] = int(mem_out.strip())
  #print(mem_dic['value'])
  #
  

  nginxpid = subprocess.Popen(["pidof", "nginx"], stdout=subprocess.PIPE)
  nginxpid = nginxpid.stdout.read().split()
  memsum = 0
  for i in nginxpid:
  pidfile = os.path.join("/proc/", str(i), "status")
  with open(pidfile) as f:
  for mem in f:
  if mem.startswith("VmRSS"):
  pidmem = int(mem.split()[1])
  memsum += pidmem
  memsum = int(memsum)//1024
  #print("%d %s" %(memsum,"M"))
  mem_dic['value'] = memsum
  payload.append(mem_dic)
  # 推送到falcon-agent
  #print(payload)
  r = requests.post("http://127.0.0.1:1988/v1/push", data=json.dumps(payload))
  
def main():
  # 需要分析的url列表
  interface_list = ['/wpsad.php']
  

  # 日志文件位置
  log_file = "/data/logs/nginx/ads.access.log"
  # 临时文件位置
  seek_file = "/data/logs/nginx/ads_log_check_seek.tmp"
  nginx_log = NginxLog(log_file, interface_list, seek_file)
  return_data = nginx_log.LogStatistics()
  #print return_json_data
  pushFalcon(return_data)
  

  
if __name__ == '__main__':
  main()



运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-634519-1-1.html 上篇帖子: 12.1 LNMP架构介绍12.2 MySQL安装12.3/12.4 PHP安装12.5 Nginx-martinLIU的博客 下篇帖子: nginx与lvs在做负载均衡方面的区别
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表