|
我的博客已迁移到xdoujiang.com请去那边和我交流
晚上某个时段收到某台服务器load很高,但再登录服务器查看时候load可能已经下降了,
所以使用shell脚本来处理解决问题 监控频率30秒1次 将系统负载超过一定数值就记录到文本里
cat load.sh
#!/bin/bash
#--------------------------------------------------
#Author:jimmygong
#Email:jimmygong@taomee.com
#FileName:load.sh
#Function:
#Version:1.0
#Created:2015-06-02
#--------------------------------------------------
sleeptime=30
logpath="/opt/scripts"
[[ -d $logpath ]] || mkdir $logpath
echosucc ()
{
succstatus="[ Ok ]"
printf "\033[32m $succstatus $* \033[0m\n"
}
function usage ()
{
echo "Usage: $0 {start|stop}"
}
if [[ $# -ne 1 ]]
then
usage
exit 1
fi
function start ()
{
echosucc "Starting load monitor"
while true
do
load=$(cat /proc/loadavg|awk '{print $1}')
cpunum=$(cat /proc/cpuinfo|grep -c processor)
if [[ $(echo "$load > $cpunum" | bc) = 1 ]]
then
result=$(ps -eo pcpu,pmem,user,args|awk '$1 > 0' |sort -nr)
if [[ -n "$result" ]]
then
echo "$result" > $logpath/load.$(date +"%Y%m%d%H%M")
fi
fi
sleep $sleeptime
done
}
function stop ()
{
echo $$ > "$logpath/pidfile"
for PID in $(ps -eo pid,command|grep "$0"|grep -v grep|awk '{print $1}');
do
[[ $PID != $(cat $logpath/pidfile) ]] && kill -s TERM $PID >& /dev/null
done
echosucc 'Stopping load monitor'
}
case $1 in
start)
start &
;;
stop)
stop
;;
*)
usage
;;
esac
|
|
|