#!/bin/bash
PIDFILE=/usr/local/smokeping/var/smokeping.pid
SMOKEPING=/usr/local/smokeping/bin/smokeping
ERROR=0
RUNNING=0
ARGV="$@"
if [ "x$ARGV" = "x" ] ; then
ARGS=help
fi
for ARG in $@ $ARGS
do
if [ -f $PIDFILE ] ; then
PID=`cat $PIDFILE`
if kill -0 $PID 2>/dev/null ; then
# smokeping is running
RUNNING=1
else
# smokeping not running but PID file exists => delete PID file
rm -f $PIDFILE
RUNNING=0
fi
else
# smokeping (no pid file) not running
RUNNING=0
fi
case $ARG in
start)
if [ $RUNNING -eq 0 ] ; then
if $SMOKEPING > /dev/null; then
echo "$0 $ARG: smokeping started"
else
echo "$0 $ARG: smokeping could not be started"
ERROR=1
fi
else
echo "$0 $ARG: smokeping is running with PID $PID"
ERROR=2
fi
;;
stop)
if [ $RUNNING -eq 1 ] ; then
if kill $PID ; then
echo "$0 $ARG: smokeping ($PID) stopped"
rm $PIDFILE
else
echo "$0 $ARG: smokeping could not be stopped"
ERROR=3
fi
else
echo "$0 $ARG: smokeping not running"
ERROR=4
fi
;;
restart)
if [ $RUNNING -eq 1 ] ; then
if $SMOKEPING --restart > /dev/null; then
echo "$0 $ARG: smokeping restarted"
else
echo "$0 $ARG: smokeping could not be started"
ERROR=5
fi
else
$0 start
fi
;;
strace_debug)
rm -f /tmp/strace_smokeping
if [ $RUNNING -eq 1 ] ; then
if strace -o/tmp/strace_smokeping $SMOKEPING --restart >/dev/null; then
echo "$0 $ARG: smokeping restarted with strace debug in /tmp/strace_smokeping"
else
echo "$0 $ARG: smokeping strace debug could not be started"
ERROR=6
fi
else
if strace -o/tmp/strace_smokeping $SMOKEPING >/dev/null; then
echo "$0 $ARG: smokeping started with strace debug in /tmp/strace_smokeping"
else
echo "$0 $ARG: smokeping strace debug could not be started"
ERROR=7
fi
fi
;;
status)
if [ $RUNNING -eq 1 ] ; then
echo "$0 $ARG: smokeping is running with PID ($PID)"
else
echo "$0 $ARG: smokeping is not running"
fi
;;
*)
echo "usage: $0 (start|stop|restart|status|strace_debug|help)"
cat
start - start smokeping
stop - stop smokeping
restart - restart smokeping if running or start if not running
status - show status if smokeping is running or not
help - this screen
EOF
;;
esac
done
exit $ERROR
[iyunv@smokeping ~]# chmod +x /etc/init.d/smokeping
[iyunv@smokeping ~]# ll /etc/init.d/smokeping
-rwxr-xr-x 1 root root 2109 Jun 16 16:52 /etc/init.d/smokeping
(2)检查smokeping配置文件:
[iyunv@smokeping ~]# /usr/local/smokeping/bin/smokeping --check
Configuration file '/usr/local/smokeping/etc/config' syntax OK.
(3)启动smokeping:(无法启动)
[iyunv@smokeping ~]# service smokeping start
ERROR: slave 'boomer' is not defined in the '*** Slaves ***' section!
/etc/init.d/smokeping start: smokeping could not be started
出现错误à将在后面解决…
(4)配置apache /conf/httpd.conf
[iyunv@smokeping ~]# vim /etc/httpd/conf/httpd.conf
Alias /smokeping/ "/usr/local/smokeping/htdocs/"
Alias /cache/ "/usr/local/smokeping/cache/"
Options FollowSymLinks ExecCGI
AllowOverride None
AddHandler cgi-script cgi
Order allow,deny
Allow from all
[iyunv@smokeping ~]# service httpd start
Starting httpd: httpd: apr_sockaddr_info_get() failed for smokeping.www.iyunv.com
httpd: Could not reliably determine the server's fully qualified domain name, using 127.0.0.1 for ServerName
[ OK ]
[iyunv@smokeping ~]#
(5)测试。修改完后重启apache,在浏览器输入http://192.168.101.81/smokeping/smokeping.cgi
3.5 错误排查与测试:
出现错误à将在后面解决…
[iyunv@smokeping ~]# vim /usr/local/smokeping/etc/config
+ FPing
binary = /usr/local/sbin/fping
sourceaddress = 0.0.0.0
*** Slaves ***
secrets=/usr/local/smokeping/etc/smokeping_secrets.dist
+192.168.101.82
display_name=192.168.101.82
location=junjie
color=0000ff
++override
Probes.FPing.binary = /usr/local/sbin/fping
Probes.FPing.sourceaddress = 192.168.101.82
[iyunv@smokeping ~]# service smokeping restart
ERROR: /usr/local/smokeping/etc/config, line 111: File '/usr/local/smokeping/etc/smokeping_secrets.dist' is world-readable or writable, refusing it
/etc/init.d/smokeping start: smokeping could not be started
[iyunv@smokeping src]# ll /usr/local/smokeping/etc/smokeping_secrets.dist
-rw-r--r-- 1 apache apache 59 Aug 21 2007 /usr/local/smokeping/etc/smokeping_secrets.dist
[iyunv@smokeping src]# chmod 400 /usr/local/smokeping/etc/smokeping_secrets.dist
[iyunv@smokeping src]# ll /usr/local/smokeping/etc/smokeping_secrets.dist
-r-------- 1 apache apache 59 Aug 21 2007 /usr/local/smokeping/etc/smokeping_secrets.dist
[iyunv@smokeping src]#
[iyunv@smokeping ~]# service smokeping restart
WARNING: Hostname 'james.address' does currently not resolve to an IPv6 or IPv4 address
ERROR: slave 'boomer' is not defined in the '*** Slaves ***' section!
/etc/init.d/smokeping start: smokeping could not be started
*** Slaves ***
secrets=/usr/local/smokeping/etc/smokeping_secrets.dist
+boomer
display_name=boomer
location=junjie
color=00ffff
+192.168.101.82
display_name=192.168.101.82
location=junjie
color=0000ff
++override
Probes.FPing.binary = /usr/local/sbin/fping
Probes.FPing.sourceaddress = 192.168.101.82
+slave2
display_name=another
color=00ff00
[iyunv@smokeping ~]# service smokeping restart
/etc/init.d/smokeping start: smokeping started
[iyunv@smokeping ~]#
[iyunv@smokeping ~]# service httpd restart
Stopping httpd: [ OK ]
Starting httpd: httpd: apr_sockaddr_info_get() failed for smokeping.www.iyunv.com
httpd: Could not reliably determine the server's fully qualified domain name, using 127.0.0.1 for ServerName
[ OK ]
出现错误à成功解决
四. Smokeping 的其它配置
4.1 监控节点的增加
[iyunv@smokeping ~]# vim /usr/local/smokeping/etc/config
+ xjzhujunjie
menu = xjzhujunjie
title = xjzhujunjie-server
################web server###############
++ junjie-web
menu = junjie-web
title = xjzhujunjie 网站 192.168.101.82
host = 192.168.101.82
[iyunv@smokeping ~]# service smokeping stop
/etc/init.d/smokeping: line 14: gt: command not found
/etc/init.d/smokeping: line 14: /dev/null: Permission denied
/etc/init.d/smokeping stop: smokeping not running
[iyunv@smokeping ~]# service smokeping start
WARNING: Hostname 'james.address' does currently not resolve to an IPv6 or IPv4 address
/etc/init.d/smokeping start: smokeping started
4.2 报警设置
smokeping的alert设置有点复杂,但是却很好用,设置很灵活,考虑得很周全。它可以使用邮件进行alert,也可以直接调用外部程序进行IM的报警。在我们的监控中主要是采用邮件报警。报警参数设置如下,哪个节点需要报警增加alerts = manyloss即可
[iyunv@smokeping ~]# vim /usr/local/smokeping/etc/config
*** Alerts ***
to = xjzhujunjie@www.yunvn.com
from = xjzhujunjie@www.yunvn.com
+someloss
type = loss
pattern = >0%,*30*,>0%,*30*,>0% # in percent
comment = loss 1 packages in 30 continuous 3 times.
+manyloss
type = loss
pattern = >15%,*30*,>15%,*30*,>15% # in percent
comment = loss 5 packages in 30 continuous 3 times.
+rttbad
type = rtt
pattern = ==S,>50,>50 # in milliseconds
comment = For more than two consecutive 50-millisecond delay.
[iyunv@smokeping ~]# service smokeping stop
/etc/init.d/smokeping: line 14: gt: command not found
/etc/init.d/smokeping: line 14: /dev/null: Permission denied
/etc/init.d/smokeping stop: smokeping not running
[iyunv@smokeping ~]# service smokeping start
WARNING: Hostname 'james.address' does currently not resolve to an IPv6 or IPv4 address
/etc/init.d/smokeping start: smokeping started
(1)to 表示接受所有报警的邮箱,如果需要在特定的节点报警发送到特定的邮箱则在该节点上增加alertee = xjzhujunjie@www.yunvn.com 即可。
(2)manyloss 表示30个包丢15%的情况 连续出现3次就发报警。
(3)someloss 表示30个包丢1个,连续出现3次就发送报警;rttbad表示连续两个包延迟超过50ms就发送报警。
4.3 画图设置
Smokeping默认设置中是每5分钟画一次图,每5分钟发送20个ping包。网络工程师认为5分钟发送20个ping包太少,建议改为5分钟100个。画图的颜色等也要进行相应的更改: 在Database中,step =300 pings =20 改为 step = 300 pings =100
从图上可以看到,slave主机会自己去检查监测点的情况(loss and rtt),并将数值提交给master主机(通过smokeping.cgi)。值得注意的是,slave并不需要config文件,每次slave提交完数据以后,会询问master它自己的配置文件是否有修改,如果有修改的话slave会进行更新。
其它关于smokeping的使用,待续…….