buhong 发表于 2018-8-15 08:15:14

Nginx服务整理 日志分析(shell+python)

案例一  
ip - - "GET / HTTP/1.1" 302 0 "-" "PycURL/7.19.7"
  

  
log_format access '$HTTP_X_REAL_IP - $remote_user [$time_local] "$request"'
  
'$status $body_bytes_sent "$http_referer" '
  
'"$http_user_agent" $HTTP_X_Forwarded_For';
  

  
192.168.21.1 - - "GET /2.php HTTP/1.1" 200 133 "-" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1707.0 Safari/537.36" "-"192.168.21.128 200 127.0.0.1:9000 0.119 0.119
  

  
#log_formatmain'$remote_addr - $remote_user [$time_local] "$request" '
  
#                  '$status $body_bytes_sent "$http_referer" '
  
#                  '"$http_user_agent" "$http_x_forwarded_for"';
  

  
$http_host:用户在浏览器中输入的URL(IP或着域名)地址192.168.21.128
  
$upstream_status: upstream状态    200
  
$upstream_addr: 后端upstream地址及端口127.0.0.1:9000
  
$request_time: 页面访问总时间0.119
  
$upstream_response_time:页面访问中upstream响应时间   0.119
  

  
$10 $body_bytes_sent
  
$1$remote_addr
  
$7$request
  
$11 $http_referer
  
$9$status
  
$6http_user_agent
  

  
1、总访问量
  
2、总带宽
  
3、独立访客量
  
4、访问IP统计
  
5、访问url统计
  
6、来源统计
  
7、404统计
  
8、搜索引擎访问统计(谷歌,百度)
  
9、搜索引擎来源统计(谷歌,百度)
  

  
#!/bin/bash
  
log_path=/home/www.centos.bz/log/access.log.1
  
domain="centos.bz"
  
email="log@centos.bz"
  
maketime=`date +%Y-%m-%d" "%H":"%M`
  
logdate=`date -d "yesterday" +%Y-%m-%d`
  
total_visit=`wc -l ${log_path} | awk '{print $1}'`
  
total_bandwidth=`awk -v total=0 '{total+=$10}END{print total/1024/1024}' ${log_path}`
  
total_unique=`awk '{ip[$1]++}END{print asort(ip)}' ${log_path}`
  
ip_pv=`awk '{ip[$1]++}END{for (k in ip){print ip,k}}' ${log_path} | sort -rn | head -20`
  
url_num=`awk '{url[$7]++}END{for (k in url){print url,k}}' ${log_path} | sort -rn | head -20`
  
referer=`awk -v domain=$domain '$11 !~
  
/http:\/\/[^/]*'"$domain"'/{url[$11]++}END{for (k in url){print
  
url,k}}' ${log_path} | sort -rn | head -20`
  
notfound=`awk '$9 == 404 {url[$7]++}END{for (k in url){print url,k}}' ${log_path} | sort -rn | head -20`
  
spider=`awk -F'"' '$6 ~ /Baiduspider/ {spider["baiduspider"]++} $6 ~
  
/Googlebot/ {spider["googlebot"]++}END{for (k in spider){print
  
k,spider}}'${log_path}`
  
search=`awk -F'"' '$4 ~ /http:\/\/www\.baidu\.com/
  
{search["baidu_search"]++} $4 ~ /http:\/\/www\.google\.com/
  
{search["google_search"]++}END{for (k in search){print k,search}}'
  
${log_path}`
  
#echo -e "概况\n报告生成时间:${maketime}\n总访问量:${total_visit}\n总带宽:${total_bandwidth}M\n独
  
立访客:${total_unique}\n\n访问IP统计\n${ip_pv}\n\n访问url统计\n${url_num}\n\n来源页面统计
  
\n${referer}\n\n404统计\n${notfound}\n\n蜘蛛统计\n${spider}\n\n搜索引擎来源统计
  
\n${search}" | mail -s "$domain $logdate log statistics" ${email}
页: [1]
查看完整版本: Nginx服务整理 日志分析(shell+python)