nagios,weblogic,8080,oracle,1521
在oracle服务所在主机上安装nagios的客户端,也就是nrpe和nagios-plugin:1.要添加nagios用户,并且把nagios添加到和Oracle用户相同的用户组中;
2.vi .bash_profile,把oracle用户的环境变量配置段复制过来,追加到该文件末尾;source .bash_profile使生效。
3./usr/local/nagios/libexec/check_oracle --tns servename测试是否返回ok
4.vi /usr/local/nagios/etc/nrpe.cfg 添加内容:
command=/usr/local/nagios/libexec/check_oracle --tns sid
command=/usr/local/nagios/libexec/check_oracle --db sid
command=/usr/local/nagios/libexec/check_oracle --login sid
5. vi /usr/local/nagios/etc/objects/commands.cfg添加如下内容:
# 'check_nrpe'command definition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
# 'check_oracle_tns' command definition
define command{
command_name check_oracle_tns
command_line $USER1$/check_oracle --tns $ARG1$
}
# 'check_oracle_db' command definition
define command{
command_name check_oracle_db
command_line $USER1$/check_oracle --db $ARG1$
}
# 'check_oracle_login' command definition
definecommand{
command_name check_oracle_login
command_line $USER1$/check_oracle --login $ARG1$
}
6.在# vi /usr/local/nagios/etc/services/192.168.1.XXX.cfg中,
define service{
use local-service ; Name of service template to use
host_name DB_XX.XXX
service_description check_oracle_tns
check_command check_nrpe!check_oracle_tns
notifications_enabled 1
}
define service{
use local-service ; Name of service template to use
host_name DB_XX.XXX
service_description check_oracle_db
check_command check_nrpe!check_oracle_db
notifications_enabled 1
}
define service{
use local-service ; Name of service template to use
host_name DB_XX.XXX
service_description check_oracle_login
check_command check_nrpe!check_oracle_login
notifications_enabled 1
}
7.如果在nagios的web管理页面中出现错误提示:Status Information:Cannot determine ORACLE_HOME for sid servername
请,在oracle所在主机上,用oracle用户启动nrpe:
$ ps aux|grep nrpe
oracle 244810.00.0399601064 ? Ss 08:48 0:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
....
http://blog.iyunv.com/qingchn/article/details/7838145
最近在调整线上监控准备把Oracle加入到监控中去,然后看了下Nagios的监控oracle的插件check_oracle发现可以监控的项目还不少,因为有监控主机,所以需要远程监控oracle。check_oracle的插件帮助如下:
view plaincopy
http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf
[*]check_oracle --tns <Oracle Sid or Hostname/IP address>
[*]check_oracle --db <ORACLE_SID>
[*]check_oracle --login <ORACLE_SID>
[*]check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
[*]check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
[*]check_oracle --oranames <Hostname>
[*]check_oracle --help
[*]check_oracle --version
由于调整只需要监控oracle进程,所以整个监控比较简单。
1,加入oracle监控的主机commonds
view plaincopy
http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf
[*]command=/usr/local/nagios/libexec/check_oracle --db lcartdg
2,重启nrpe
3,加入监控主机的service。由于之前已经添加了监控host,暂时不需要添加
view plaincopy
http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf
[*]define service{
[*] use generic-service
[*] host_name luckcart_db01,luckcart_dbbak01
[*] service_description Check_oracle
[*] check_command check_nrpe!check_oracle
[*] max_check_attempts 3
[*] normal_check_interval 10
[*] retry_check_interval 5
[*] check_period 24x7
[*] notification_interval 3
[*] notification_period 24x7
[*] notification_options w,u,c,r
[*] contact_groups admins
[*] }
4,重启nagios
view plaincopy
http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf
[*]service ngios restart
5,监控web页面,稍等就看到了oracle监控项目。
http://blog.sina.com.cn/s/blog_5426e0180100df5z.html
环境:Oracle 10g
CentOS 4.6 i386
Nagios 3.06
一、在 Oracle 所在服务器上安装 NRPE
#useradd nagios
# wget http://nchc.dl.sourceforge.net/sourceforge/nagios/nrpe-2.12.tar.gz
# tar xvfz nrpe-2.12.tar.gz
# cd nrpe-2.12
# ./configure --prefix=/usr/local/nagios
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
# make install-xinetd
注意点:
1.由于 nagios 脚本需要读取 oracle 相关文件。所以运行 nagios 的用户需要定义为 oracle 服务用户。并且修改 /etc/xinted.d/nrpe 中的配置。
service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = oracle
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 10.0.0.99
}
2.将nagios服务器上libexec目录中的check_oracle和utils.sh拷贝到oracle服务器的libexec目录中,并修改 check_oracle 脚本。将 $ORACLE_HOME 以及 $PATH 手动加入。
ORACLE_HOME=/home/oracle/OraHome_1
PATH=$PATH:$ORACLE_HOME/bin
二、 配置 nrpe 服务
修改 /usr/local/nagios/etc/nrpe.cfg 文件。加入以下内容:
#Check Oracle
command=/usr/local/nagios/libexec/check_oracle --tns sid user password
command=/usr/local/nagios/libexec/check_oracle --db sid user password
command=/usr/local/nagios/libexec/check_oracle --login sid user password
command=/usr/local/nagios/libexec/check_oracle --cache sid user password 80 90
command=/usr/local/nagios/libexec/check_oracle --tablespace sid user password USERS 90 80
具体参数写法请参考 check_oracle –help。
添加nrpe端口号:
vi /etc/services
添加这个
nrpe 5666/tcp # NRPE
配置完成后,重启 xinetd 服务。
# service xinetd restart
测试nrpe:
./check_nrpe -H 127.0.0.1
NRPE v2.12
说明nrpe安装成功。
三、配置 Nagios 服务端
1.安装 nrpe 脚本支持。—参考官方文档。
2.在nagios服务器端添加 nrpe 命令配置。修改 nagios/etc/objects/command.cfg 文件:
define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3.在nagios服务器端添加oracle主机配置文件,在 nagios/etc/objects 添加oracle 主机配置文件:oracle.cfg 。
define host {
use linux-server
host_name oracle
alias Oracle 10g
address 10.0.0.109
}
define service {
use generic-service
host_name oracle
service_description TNS Check
check_command check_nrpe!check_oracle_tns
}
define service {
use generic-service
host_name oracle
service_description DB Check
check_command check_nrpe!check_oracle_db
}
define service {
use generic-service
host_name oracle
service_description Login Check
check_command check_nrpe!check_oracle_login
}
define service {
use generic-service
host_name oracle
service_description Cache Check
check_command check_nrpe!check_oracle_cache
}
define service {
use generic-service
host_name oracle
service_description Tablespace Check
check_command check_nrpe!check_oracle_tablespace
}
如图:
插不进来图片,失败。。。
报错了“CHECK_NRPE: Error - Could not complete SSL handshake.
”,原来nrpe还需要ssl的支持,用yum安装openssl即可。
安装完openssl全都绿了,ok!
#########################################
http://oxiaobai.blog.iyunv.com/3369332/747037
最近根据公司的需要,开始捣腾Nagios,作为一个开源的监视工具来说,实在是颇为好用的。
那o小白是从官网上下的Nagios3.3.1版本(basic 和 plugin),那自己安装了一下,本来是想把安装和配置的所有过程都写下来的,但是网上已经有了不少这样的教程,而且官方的安装文档也写得十分详尽,那o小白就不再重复一次了,但是作为一名dba,至少要把check_oracle插件的使用给写出来。
那下面的操作是建立在Nagios顺利安装,Nagios用户有Oracle Client,并且环境变量正确的情况下,换句话说就是sqlplus和tnsnames能顺利执行。(注意,环境变量设错可能会出现sqlplus: error while loading shared libraries: libsqlplus.so: cannot open shared object file: No such file or directory)
首先,check_oracle -h可以知道这个插件的使用方法:
Usage:
check_oracle --tns <Oracle Sid or Hostname/IP address>
check_oracle --db <ORACLE_SID>
check_oracle --login <ORACLE_SID>
check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
check_oracle --oranames <Hostname>
check_oracle --help
check_oracle --version
根据不同的参数提供的功能比较全的,那o小白这里就拿两个比较复杂的做例子:
--cache:查看library和buffer的命中率,根据CRITICAL和WARNING的阈值进行报警。
--tablespace:查看表空间的使用率,根据CRITICAL和WARNING的阈值进行报警。
首先编辑$NAGIOS_HOME/etc/objects/commands.cfg文件,添加两个条目:
# 'check_tablespace_oracle' command definition
define command{
command_name check_tablespace_oracle
command_line $USER1$/check_oracle --tablespace $ARG1$ $ARG2$ $USER4$ $ARG3$ $ARG4$ $ARG5$
}
# 'check_rate_oracle' command definition
define command{
command_name check_rate_oracle
command_line $USER1$/check_oracle --cache $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$
}
为libexec下的check_oracle命令定义,用以在之后的应用的配置文件中设定。仔细看可以看到这两个配置的方式有所不同,在第一个tablespace的配置中,原来的密码位置用了一个宏来替代,为什么要用这个宏呢?由于在Nagios的网络浏览中可以看到策略的具体形式,如果用密码原文的话会有安全问题,所以可以在$NAGIOS_HOME/etc/resource.cfg文件中定义用户的宏,可以通过这个方法避免敏感信息的泄露,也可以通过设置宏来设定一些系统的路径,$USERS1$就是这个作用。在命令被最后解析的时候,所有的宏都会被替换,最多可以设置32个宏,resource.cfg中的内容如下:
$USER1$=/usr/local/nagios/libexec
# Store some usernames and passwords (hidden from the CGIs)
$USER4$=oracle
然后是应用的配置文件,这里o小白的是$NAGIOS_HOME/etc/objects/localhost.cfg,添加一个服务组定义,和两个服务:
服务组:#define service group
define servicegroup{
servicegroup_name oracle-service
alias Oracle Service
}
服务:#define a service to test check_oracle
define service{
use local-service ; Name of service template to use
servicegroups oracle-service
host_name localhost
service_description Oracle_System_Space
check_command check_tablespace_oracle!ora11g!cy!SYSTEM!90!80
}
define service{
use local-service ; Name of service template to use
servicegroups oracle-service
host_name localhost
service_description Oracle_Buff_Rate
check_command check_rate_oracle!ora11g!cy!oracle!80!90
}
可以看到,服务定义中根据之前的commands.cfg中的命令定义来具体传递参数,方法是用!,之前由于tablespace已经设置了宏,那这里就不需要在输入密码了,当然直接输入密码也是可行的。
然后就可以重启Nagios服务,service nagios restart
打开网络浏览器,输入nagios的网址后(通常是ip/nagios),就可以看到结果了:
http://ylw6006.blog.iyunv.com/470441/787496
为了尽量避免这种问题,想到去写一个监控脚本配合nagios监控,当数据库连接异常的时候,可以第一时间收到报警短信;java程序连接数据库使用连接池,所以不一定会及时暴露出问题!
写一个脚本放任务计划中运行,定期去连接下数据库,查询下系统时间和数据库的状态,spool输出到临时文件上
[*]# crontab -l
[*]*/5* * **/usr/local/nagios/libexec/connect_oracle.sh
[*]# cat /usr/local/nagios/libexec/connect_oracle.sh
#!/bin/sh
#functions: connect oracle server test
#author:lw.yang
#modify_date: 2012-02-22
[*]rm -rf /tmp/check_oracle.log
export ORACLE_HOME=/u01/app/oracle/product/10.2.0/db_1/
$ORACLE_HOME/bin/sqlplus username/password@ip:1521/services_name <<EOF
set echo off
set feedback off
spool /tmp/check_oracle.log
alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
select sysdate from dual;
select name,open_mode from v\$database;
spool off
set echo on
set feedback on
EOF
再写一个脚本来根据临时文件来判断数据库是否正常,该脚本供nagios插件check_nrpe调用,之所以分两个脚本,中间使用临时文件,主要是出于权限问题的考虑,nagios本身带了一个check_oracle的插件,感觉不太适用,还需要在nagios服务器端安装oracle客户端,配置tnsnames.ora文件,设置oracle相关的环境变量等等…
[*]# cat /usr/local/nagios/libexec/check_oracle.sh
[*]#!/bin/sh
[*]#functions: use monitor oracle server status with nagios nrpe plugin
[*]#author:lw.yang
[*]#modify_date: 2012-02-22
[*]
[*]STATE_OK=0
[*]STATE_CRITICAL=2
[*]
[*]if[ -f /tmp/check_oracle.log];then
[*] COUNT=$(grep -i 'READ WRITE' /tmp/check_oracle.log|wc -l)
[*] if [ $COUNT -eq 1 ];then
[*] echo "connect oracle server normal..."
[*] exit $STATE_OK
[*] else
[*] echo "database not open"
[*] exit $STATE_CRITICAL
[*] fi
[*] else
[*] echo "can't connect to oracle server..."
[*] exit $STATE_CRITICAL
[*]fi
监控效果!
# cat /tmp/check_oracle.log
SQL> alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
SQL> select sysdate from dual;
SYSDATE
-------------------
2012-02-23:10:10:03
SQL> select name,open_mode from v$database;
NAME OPEN_MODE
--------- ----------
EPROWB2BREAD WRITE
SQL> spool off
http://blog.sina.com.cn/s/blog_66e484080100hp0b.html
不安装oracle客户端,使用sqlplus连接oracle
在oracle官网下载instant client
下载地址 http://www.oracle.com/technology/software/tech/oci/instantclient/htdocs/linuxsoft.html
版本 Version 11.1.0.7.0
Instant Client Package – Basic: All files required to run OCI, OCCI, and JDBC-OCI applications
instantclient-basic-linux32-11.1.0.7.zip
Instant Client Package – SQL*Plus: Additional libraries and executable for running SQL*Plus with Instant Client
instantclient-sqlplus-linux32-11.1.0.7.zip
解压,将目录中所有以lib开头的文件copy到/usr/lib下,将sqlplus拷贝到/usr/sbin下,并执行 # ldconfig -p 生成动态连接库。
测试,# sqlplus system/1234567890@192.168.1.4:1521/orcl
正常将显示
SQL*Plus: Release 10.2.0.4.0 – Production on Mon Aug 17 16:31:08 2009
Copyright (c) 1982, 2007, Oracle. All Rights Reserved.
监控脚本
check_oracle_instant监控脚本,用perl写的,在exchange.nagios.org站点可以找到。
#!/usr/bin/perl $host = $ARGV; $port = $ARGV; $sid = $ARGV; $user = $ARGV; $pass = $ARGV; sub trim($); my @result; my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2); my @param_array = ( , , , , , , ); # is possible define own selects my @results; sub trim($) { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } sub array_rows { my ($array_rows) = @_; my $rows = @$array_rows; return $rows; } sub logon { # open (SQL,"sqlplus -s system/mismatch@\\(DESCRIPTION=\\(ADDRESS=\\(PROTOCOL=TCP\\)\\(Host=$host\\)\\(Port=$port\\)\\)\\(CONNECT_DATA=\\(SID=$sid\\)\\)\\) ) { if ($res =~ /^(ORA-\d{5})/) { return $1; } } if (logon() eq "ORA-01017") { for (my $i=0; $i<< pagesize 0 set numformat 999.999 $param_array[$i] EOF> ) { # print trim($res)."\n"; if ( $res =~/^\s*\S+/ ) { push(@results,trim($res)); } } } for ($i=0;$i<@results;$i++) { print $i." hodnota je ".$result[$i]." a ma byt ".$param_array[$i]; eval "unless (".$results[$i].$param_array[$i].$param_array[$i].") { print\"".$param_array[$i]." ".$sid." KO \\n\"; exit ".$ERRORS{"WARNING"}.";}"; } print "status and health of $sid ORACLE is OK\n"; exit $ERRORS{"OK"}; } else { print "Unable to connect to $sid ORACLE !!! "; exit $ERRORS{"CRITICAL"}; }
将这个脚本copy到/usr/local/nagios/libexec下,这是你的nagios安装目录,另外说一句,下载的脚本是windows格式的,linux服务器用的话需要用editplus或者dos2unix命令转换文件格式。
配置nagios
定义命令文件
vi /usr/local/nagios/object/etc/commands.cfg,加入
### CHECK ORACLE ###
define command {
command_name check_oracle_instant
command_line $USER1$/check_oracle_instant $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$
}
定义服务文件
define service {
host_name hostname
service_decription ORACLE: check_login_health
check_command check_oracle_instant!1521!orcl!system!1234567890
}
重启nagios服务即可。
http://www.cppblog.com/tbwshc/archive/2012/07/28/185443.html
三:使用nagios+fetion,定时去监控会话和进程数
1:创建监控脚本,该脚本放任务计划中运行,每2分钟自动执行
[*]# cat session_oracle.sh
[*]#!/bin/sh
[*]rm -rf /tmp/session_oracle.log
[*]export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db1
[*]/u01/app/oracle/product/11.2.0/db1/bin/sqlplus hr/hr@192.168.1.240:1521/orcl <<EOF
[*]set echo off
[*]set feedback off
[*]spool /tmp/session_oracle.log
[*]alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
[*]select session_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]select process_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]spool off
[*]set echo on
[*]set feedback onEOF
2:创建第二脚本,用来处理前面监控脚本的日志输出,将结果返回给监控服务器
[*]# cat /tmp/session_oracle.log
SQL> alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
SQL> select session_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]SESSION_COUNT
-------------
138
SQL> select process_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]PROCESS_COUNT
-------------
153
SQL> spool off
[*]
[*]# cat check_oracle_session.sh
[*]#!/bin/sh
[*]STATE_OK=0
[*]STATE_CRITICAL=2
[*]
[*]if[ -f /tmp/session_oracle.log];then
[*] SESSION=$(grep -A 2 'SESSION_COUNT'/tmp/session_oracle.log |tail -1|sed 's/[ ][ ]*//g')
[*] PROCESS=$(grep -A 2 'PROCESS_COUNT'/tmp/session_oracle.log |tail -1|sed 's/[ ][ ]*//g')
[*] else
[*] echo "something wrong,please check monitor script"
[*] exit $STATE_CRITICAL
[*]fi
[*]
[*]if [ $SESSION -gt 500 ] || [ $PROCESS -gt 500 ];then
[*] echo "Current session is $SESSION,process is $PROCESS "
[*] exit $STATE_CRITICAL
[*] else
[*] echo "Current session is $SESSION,process is $PROCESS "
[*] exit $STATE_OK
[*]fi
四:实际效果
# /usr/local/nagios/libexec/check_nrpe -H 192.168.1.240 -c check_oracle_session
Current session is 138,process is 153
http://skymax.blog.iyunv.com/365901/103331/
修改Nagios的check_oracle脚本来监控Oracle的临时表空间
1.前言
Nagios的Nagios Plugins中有很多程序或脚本提供给我们,用于监控相应的服务、资源等等。在Nagios Plugins中有一个用于实现对Oracle数据库进行监控的脚本,叫做check_oracle,位于Nagios安装路径下的libexec目录中。
check_oracle脚本可以监控Oracle数据库的cache、tns、tablespace等信息,但是通过“--tablespace”选项监控表空间时,我们发现这个脚本不能监控临时表空间。仔细查看该脚本,发现其中的确没有对临时表空间进行处理,现对该脚本做修改,使之能够监控Oracle的临时表空间。本文以Oracle10g作为实验数据库。
2.分析源码和问题的解决方法
查看check_oracle源码的tablespace部分,发现其对于表空间信息的获取是通过sql语句完成的。sql语句如下:
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from
(select tablespace_name,sum(bytes)/1024/1024 total
from dba_data_files group by tablespace_name) A
LEFT OUTER JOIN
(select tablespace_name,sum(bytes)/1024/1024 free
from dba_free_space group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name
WHERE a.tablespace_name='${5}';
其中${5}是表空间的名字。
由于临时文件的的信息不在dba_data_files表中,所以通过上述脚本显然不能获得临时表空间的任何信息。
那么如何获得临时表空间的空间使用情况呢,具体sql语句如下:
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from
(select tablespace_name,sum(bytes)/1024/1024 total
from dba_temp_files group by tablespace_name) A
LEFT OUTER JOIN
(select tablespace_name,sum(bytes_cached)/1024/1024 free
from v\\$temp_extent_pool group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name
WHERE a.tablespace_name='TEMP';
这条sql语句可以获得临时表空间“TEMP”的空间使用情况。
解决的方法找到了,下面我们只需将脚本做小小修改就可以达到要求了。
3.check_oracle脚本的修改
在check_oracle脚本中的“case”语句中增加一个“--tablespaceTEMP”分支,用于完成监控Oracle临时表空间的功能。修改后的脚本如下(修改、添加的部分已标出):
#! /bin/ksh
#
# latigid010@yahoo.com
# 01/06/2000
#
#This Nagios plugin was created to check Oracle status
#
PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION=`echo '$Revision: 1749 $' | sed -e 's/[^0-9.]//g'`
. $PROGPATH/utils.sh
print_usage() {
echo "Usage:"
echo "$PROGNAME --tns <Oracle Sid or Hostname/IP address>"
echo "$PROGNAME --db <ORACLE_SID>"
echo "$PROGNAME --login <ORACLE_SID>"
echo "$PROGNAME --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>"
echo "$PROGNAME --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"
echo "$PROGNAME --tablespaceTEMP <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"
echo "$PROGNAME --oranames <Hostname>"
echo "$PROGNAME --help"
echo "$PROGNAME --version"
}
print_help() {
print_revision $PROGNAME $REVISION
echo ""
print_usage
echo ""
echo "Check Oracle status"
echo ""
echo "--tns SID/IP Address"
echo " Check remote TNS server"
echo "--db SID"
echo " Check local database (search /bin/ps for PMON process) and check"
echo " filesystem for sgadefORACLE_SID.dbf"
echo "--login SID"
echo " Attempt a dummy login and alert if not ORA-01017: invalid username/password"
echo "--cache"
echo " Check local database for library and buffer cache hit ratios"
echo " --->Requires Oracle user/password and SID specified."
echo " --->Requires select on v_\$sysstat and v_\$librarycache"
echo "--tablespace"
echo " Check local database for tablespace capacity in ORACLE_SID"
echo " --->Requires Oracle user/password specified."
echo " --->Requires select on dba_data_files and dba_free_space"
echo "--tablespaceTEMP"
echo " Check local temporary database for tablespace capacity in ORACLE_SID"
echo " --->Requires Oracle user/password specified."
echo " --->Requires select on dba_temp_files and v_\$temp_extent_pool"
echo "--oranames Hostname"
echo " Check remote Oracle Names server"
echo "--help"
echo " Print this help screen"
echo "--version"
echo " Print version and license information"
echo ""
echo "If the plugin doesn't work, check that the ORACLE_HOME environment"
echo "variable is set, that ORACLE_HOME/bin is in your PATH, and the"
echo "tnsnames.ora file is locatable and is properly configured."
echo ""
echo "When checking local database status your ORACLE_SID is case sensitive."
echo ""
echo "If you want to use a default Oracle home, add in your oratab file:"
echo "*:/opt/app/oracle/product/7.3.4:N"
echo ""
support
}
case "$1" in
1)
cmd='--tns'
;;
2)
cmd='--db'
;;
*)
cmd="$1"
;;
esac
# Information options
case "$cmd" in
--help)
print_help
exit $STATE_OK
;;
-h)
print_help
exit $STATE_OK
;;
--version)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
-V)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
esac
# Hunt down a reasonable ORACLE_HOME
if [ -z "$ORACLE_HOME" ] ; then
# Adjust to taste
for oratab in /var/opt/oracle/oratab /etc/oratab
do
[ ! -f $oratab ] && continue
ORACLE_HOME=`IFS=:
while read SID ORACLE_HOME junk;
do
if [ "$SID" = "$2" -o "$SID" = "*" ] ; then
echo $ORACLE_HOME;
exit;
fi;
done < $oratab`
[ -n "$ORACLE_HOME" ] && break
done
fi
# Last resort
[ -z "$ORACLE_HOME" -a -d $PROGPATH/oracle ] && ORACLE_HOME=$PROGPATH/oracle
if [ "$cmd" != "--db" ]; then
if [ -z "$ORACLE_HOME" -o ! -d "$ORACLE_HOME" ] ; then
echo "Cannot determine ORACLE_HOME for sid $2"
exit $STATE_UNKNOWN
fi
fi
PATH=$PATH:$ORACLE_HOME/bin
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib
export ORACLE_HOME PATH LD_LIBRARY_PATH
case "$cmd" in
--tns)
tnschk=` tnsping $2`
tnschk2=` echo$tnschk | grep -c OK`
if [ ${tnschk2} -eq 1 ] ; then
tnschk3=` echo $tnschk | sed -e 's/.*(//' -e 's/).*//'`
echo "OK - reply time ${tnschk3} from $2"
exit $STATE_OK
else
echo "No TNS Listener on $2"
exit $STATE_CRITICAL
fi
;;
--oranames)
namesctl status $2 | awk '
/Server has been running for:/ {
msg = "OK: Up"
for (i = 6; i <= NF; i++) {
msg = msg " " $i
}
status = '$STATE_OK'
}
/error/ {
msg = "CRITICAL: " $0
status = '$STATE_CRITICAL'
}
END {
print msg
exit status
}'
;;
--db)
pmonchk=`ps -ef | grep -v grep | grep -c "ora_pmon_${2}$"`
if [ ${pmonchk} -ge 1 ] ; then
echo "${2} OK - ${pmonchk} PMON process(es) running"
exit $STATE_OK
#if [ -f $ORACLE_HOME/dbs/sga*${2}* ] ; then
#if [ ${pmonchk} -eq 1 ] ; then
#utime=`ls -la $ORACLE_HOME/dbs/sga*$2* | cut -c 43-55`
#echo "${2} OK - running since ${utime}"
#exit $STATE_OK
#fi
else
echo "${2} Database is DOWN"
exit $STATE_CRITICAL
fi
;;
--login)
loginchk=`sqlplus dummy/user@$2 < /dev/null`
loginchk2=` echo$loginchk | grep -c ORA-01017`
if [ ${loginchk2} -eq 1 ] ; then
echo "OK - dummy login connected"
exit $STATE_OK
else
loginchk3=` echo "$loginchk" | grep "ORA-" | head -1`
echo "CRITICAL - $loginchk3"
exit $STATE_CRITICAL
fi
;;
--cache)
if [ ${5} -gt ${6} ] ; then
echo "UNKNOWN - Warning level is less then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select (1-(pr.value/(dbg.value+cg.value)))*100
from v\\$sysstat pr, v\\$sysstat dbg, v\\$sysstat cg
where pr.name='physical reads'
and dbg.name='db block gets'
and cg.name='consistent gets';
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
buf_hr=`echo "$result" | awk '/^+$/ {print int($1)}'`
buf_hrx=`echo "$result" | awk '/^+$/ {print $1}'`
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select sum(lc.pins)/(sum(lc.pins)+sum(lc.reloads))*100
from v\\$librarycache lc;
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
lib_hr=`echo "$result" | awk '/^+$/ {print int($1)}'`
lib_hrx=`echo "$result" | awk '/^+$/ {print $1}'`
if [ $buf_hr -le ${5} -o $lib_hr -le ${5} ] ; then
echo "${2} CRITICAL - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_CRITICAL
fi
if [ $buf_hr -le ${6} -o $lib_hr -le ${6} ] ; then
echo "${2} WARNING- Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_WARNING
fi
echo "${2} OK - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_OK
;;
--tablespace)
if [ ${6} -lt ${7} ] ; then
echo "UNKNOWN - Warning level is more then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from
(select tablespace_name,sum(bytes)/1024/1024 total
from dba_data_files group by tablespace_name) A
LEFT OUTER JOIN
(select tablespace_name,sum(bytes)/1024/1024 free
from dba_free_space group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name
WHERE a.tablespace_name='${5}';
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`
ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`
ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`
ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`
if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then
echo "No data returned by Oracle - tablespace $5 not found?"
exit $STATE_UNKNOWN
fi
if [ "$ts_pct" -ge ${6} ] ; then
echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_CRITICAL
fi
if [ "$ts_pct" -ge ${7} ] ; then
echo "${2} : ${5} WARNING- $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_WARNING
fi
echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_OK
;;
--tablespaceTEMP)
# edit by sky,TEMP tablespace
if [ ${6} -lt ${7} ] ; then
echo "UNKNOWN - Warning level is more then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from
(select tablespace_name,sum(bytes)/1024/1024 total
from dba_temp_files group by tablespace_name) A
LEFT OUTER JOIN
(select tablespace_name,sum(bytes_cached)/1024/1024 free
from v\\$temp_extent_pool group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name
WHERE a.tablespace_name='${5}';
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`
ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`
ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`
ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`
if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then
echo "No data returned by Oracle - tablespace $5 not found?"
exit $STATE_UNKNOWN
fi
if [ "$ts_pct" -ge ${6} ] ; then
echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_CRITICAL
fi
if [ "$ts_pct" -ge ${7} ] ; then
echo "${2} : ${5} WARNING- $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_WARNING
fi
echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_OK
;;
*)
print_usage
exit $STATE_UNKNOWN
esac
4.测试、结语
脚本修改完了,下面测试一下。
$ ./check_oracle --tablespaceTEMP skydb sky sky TEMP 90 80
skydb : TEMP OK - 15.00% used [ 17 / 20 MB available ]|TEMP=15.00%;80;90;0;100
如果你没有成功,那可能是没有给用户分配dba_temp_files和v_$temp_extent_pool对象的select权限。
通过—help选项可以详细了解check_oracle的使用方法。
$ ./check_oracle --help
check_oracle v1749 (nagios-plugins 1.4.11)
The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute
copies of the plugins under the terms of the GNU General Public License.
For more information about these matters, see the file named COPYING.
Usage:
check_oracle --tns <Oracle Sid or Hostname/IP address>
check_oracle --db <ORACLE_SID>
check_oracle --login <ORACLE_SID>
check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
check_oracle --tablespaceTEMP <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
check_oracle --oranames <Hostname>
check_oracle --help
check_oracle --version
Check Oracle status
--tns SID/IP Address
Check remote TNS server
--db SID
Check local database (search /bin/ps for PMON process) and check
filesystem for sgadefORACLE_SID.dbf
--login SID
Attempt a dummy login and alert if not ORA-01017: invalid username/password
--cache
Check local database for library and buffer cache hit ratios
--->Requires Oracle user/password and SID specified.
--->Requires select on v_$sysstat and v_$librarycache
--tablespace
Check local database for tablespace capacity in ORACLE_SID
--->Requires Oracle user/password specified.
--->Requires select on dba_data_files and dba_free_space
--tablespaceTEMP
Check local temporary database for tablespace capacity in ORACLE_SID
--->Requires Oracle user/password specified.
--->Requires select on dba_temp_files and v$temp_extent_pool
--oranames Hostname
Check remote Oracle Names server
--help
Print this help screen
--version
Print version and license information
If the plugin doesn't work, check that the ORACLE_HOME environment
variable is set, that ORACLE_HOME/bin is in your PATH, and the
tnsnames.ora file is locatable and is properly configured.
When checking local database status your ORACLE_SID is case sensitive.
If you want to use a default Oracle home, add in your oratab file:
*:/opt/app/oracle/product/7.3.4:N
Send email to nagios-users@lists.sourceforge.net if you have questions
regarding use of this software. To submit patches or suggest improvements,
send email to nagiosplug-devel@lists.sourceforge.net.
Please include version information with all correspondence (when possible,
use output from the --version option of the plugin itself).
本文出自 “sky” 博客,请务必保留此出处http://skymax.blog.iyunv.com/365901/103331
###################################################
http://bbs.iyunv.com/viewthread.php?tid=938714&extra=page%3D1&page=
最近发现Nagios自带的监控WEB 80端口的脚本不太好用,服务已经无法提供了,还是没报警,所以就自己写了一个!
复制内容到剪贴板
代码:
#!/bin/bash
# author: honway.liu
# date: 2012-07-11
# version: 0.0.1
# desc: check web server status
Usage() {
echo "##################"
echo "$0 URL"
echo "##################"
}
if [ $# -lt 1 ];then
Usage
else
url_status=$(curl -o /dev/null -s -m 10 --connect-timeout 10 -w %{http_code} $1)
if [ $url_status -eq 200 ]; then
echo "OK - $1"
exit 0
else
echo "Cirtical - $1"
exit 2
fi
fi
欢迎大家拍砖
##########################
#####################################
http://forum.icst.org.tw/phpbb/viewtopic.php?f=16&t=14382
最後用 check_tcp -H localhost -p 1521 代替
http://bbs.chinaunix.net/thread-2035313-1-1.html
回复 #1 ppiqq 的帖子
“check_command check_tcp!192.168.0.247!3306”
这句改成:
check_command check_tcp!3306
http://storysky.blog.iyunv.com/628458/737309
,只是通过检查pid 和 port两个参数来判断ttserver是否活着
ttserver 的启动命令如下
ttserver -host 192.168.1.9 -port 11209 -thnum 8 -dmn -pid /usr/local/ttserver/session/ttserver.pid -log /usr/local/ttserver/logs/ttserver_session.log
以下是脚本内容
check_tt.sh
[*]#!/bin/bash
[*]#author storysky in 2011.07.20
[*]#check ttserver status
[*]pid1=`cat /usr/local/ttserver/session/ttserver.pid`
[*]port=`ps aux |awk '/ttserver/&&!/awk/{print $15}'`
[*]pid2=`/usr/local/ttserver/bin/tcrmgr inform -port "$port" -st 192.168.1.9 |awk '/pid/{print $2}'`
[*]if [ $pid1 -eq $pid2 ] && [ $port -eq 11209 ];
[*] then
[*] echo "OK TTserver is running"
[*] exit 0
[*] else
[*] echo "Critical TTserver is error"
[*] exit 2
[*]fi
这样就可以利用check_tcp 来得到ttserver的响应时间、uptime等信息,具体的命令格式如下:
check_tcp -H 192.168.1.9 -p 11209 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit
加入到nrpe.cfg 里面
command=/usr/local/nagios/libexec/check_tcp -H 192.168.1.9 -p 11209 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit
OK,试试吧
http://liuyu.blog.iyunv.com/183345/64064/
2、check 的使用,在安装后nagios plugins 后会产生N多check开头的文件。
就这是这些脚本的使用
那么对于apache如果只是监控端口80 并不能说明apache就正常,比如动态或者虚拟主机。其中一个网页down了但也不会报警。
于是就应该对check_tcp!80 进行修改
其修改commends.cfg添加:
define command{
command_name check_http
command_line $USER1$/check_http -H $HOSTADDRESS$ -u $ARG1$ -w $ARG2$ -c $ARG3$
}
修改services.cfg
define service{
host_name aabbcc
service_description check-http
check_command check_http!3 # 3 timeout--超时值
max_check_attempts 5
normal_check_interval 3
retry_check_interval 2
check_period 24x7
notification_interval 10
notification_period 24x7
notification_options w,u,c,r
contact_groups sagroup
}
这样就OK 了。如果你要改一些选项的,在nagios/libexec 目录 ./check_http --help
根据参数改就好了。
################################################################
http://blog.iyunv.com/wangxiaosen/article/details/5935804
nagios 命令解释
check_ssh
界面拼装参数格式如下共3个元素:
命令!端口!连接超时时间
check_ssh!22!10
check_http
界面拼装参数格式如下共4个元素:
命令!告警时延!严重告警时延!连接超时时间
check_http!0.0020!0.0050!10
check_imap
check_ftp
check_nntp
check_pop
check_udp
check_tcp
界面拼装参数格式如下共4个元素:
命令!端口!告警时延!严重告警时延!连接超时时间
check_tcp!23!0.0020!0.0050!10
备注:
check_imap=check_tcp!143
check_ftp=check_tcp!21
check_nntp=check_tcp!119
check_pop=check_udp!110
check_udp=check_tcp
check_telnet=check_tcp!23
check_smtp
界面拼装参数格式如下共4个元素:
命令!告警时延!严重告警时延!连接超时时间
check_smtp!0.0020!0.0050!10
check_ping
界面拼装参数格式如下共5个元素:
命令!告警时延,丢包率!严重告警时延,丢包率!检测数据包个数!连接超时时间
check_ping!3000.0,80%!5000.0,100%!5!10
二、命令检测详细描述
check_ssh
Usage:check_ssh [-46] [-t <timeout>] [-r <remote version>] [-p <port>] <host>
参数:
-h, --help
帮助
-V, --version
列出版本信息
-H, --hostname=ADDRESS
主机名称,IP地址,或者UNIX套接字(必须有绝对路径)
-p, --port=INTEGER
端口号(默认:22)
-4, --use-ipv4
使用IPV4协议连接
-6, --use-ipv6
使用IPV6协议连接
-t, --timeout=INTEGER
连接超时秒数(默认:10秒)
-r, --remote-version=STRING
不匹配服务器版时警告字符串,如对方的版本为OpenSSH_3.9p1
-V, --verbose
列出详细的命令调试行
举例
./check_ssh -H 192.168.2.220 -p 22 -t 10 -r OpenSSH_3.0pl
SSH WARNING - OpenSSH_3.8.1p1 Debian-8.sarge.6 (protocol 2.0) version mismatch, expected 'OpenSSH_3.0pl'
./check_ssh -H 192.168.2.220 -p 22 -t 10
SSH OK - OpenSSH_3.8.1p1 Debian-8.sarge.6 (protocol 2.0)
check_ssh -H $HOSTADDRESS$ -p 22 -t 10
界面拼装参数格式如下共3个元素命令!端口!连接超时时间
check_ssh!22!10
check_http
Usage: check_http -H <vhost> | -I <IP-address> [-u <uri>] [-p <port>]
[-w <warn time>] [-c <critical time>] [-t <timeout>] [-L]
[-a auth] [-f <ok | warn | critcal | follow>] [-e <expect>]
[-s string] [-l] [-r <regex> | -R <case-insensitive regex>] [-P string]
[-m <min_pg_size>:<max_pg_size>] [-4|-6] [-N] [-M <age>] [-A string] [-k string]
-h, --help
帮助
-V, --version
列出版本信息
-H, --hostname=ADDRESS
虚拟主机名加端口(如:excamle.com:5000)
-I, --IP-address=ADDRESS
IP地址或名称(如果无需DNS的查找,使用十进制的地址)
-p, --port=INTEGER
端口数(默认: 80)
-4, --use-ipv4
使用IPV4连接
-6, --use-ipv6
使用IPV6连接
-e, --expect=STRING
把服务器反馈的第一行(状态)转换成指定的字符串(默认是:HTTP/1. 如果指定跳跃了所有其它逻辑状态行)
-s, --expect=STRING
指定内容
-u, --url=PATH
获取或发送的URL(默认:/)
-P, --post= STRING
URL进行POST的HTTP数据
-N, --no-body
不等待文档正文:获取报头后停止读取。
(注意,这是一个HTTP的获取和发送,而不是报头)
-M, --max-age=SECONDS
如果文档超过生存期则警告。数据是如下形式的:分数是"10m",小时数是"10h",天数是 "10d"
-T, --content-type=STRING
在传输的时候指定容器类型媒体类型
-l, --linespan
允许正则表达式跨越新行(必须在前面使用 –R 或-r)
-r, --regex, ,--ereg=STRING
用正则表达式字符串搜索页
-R, --eregi=STRING
用正则表达式字符串搜索页,允许模糊查找
--invert-regex
如果找到返回CRITICAL,找不到返回OK
-a, --authorization=AUTH_PAIR
用户名:在站点最基本的密码认证
-A, --useragent=STRING
转换成字符串放在HTTP报头里发送,像"用户代理"
-k, --header=STRING
任何其它的标签被放在HTTP报头里发送。可以被附加的报头使用多次。
-L, --link=URL
在HTML 链接里隐藏发送包
-f, --onredirect
怎样解决重定向页
-m, --pagesize=INTEGER<:INTERGER>
最小最大页大小要求(BYTES)
-w, ----warning=DOUBLE
告警状态的返回时间(秒)
-c,--critical=DOUBLE
严重状态的返回时间(秒)
-t, --timeout=INTEGER
指定超时前的时间(默认10秒)
-v , --verbose
列出详细的命令调试行
举例
./check_http -H 192.168.2.220 -p 80
HTTP OK HTTP/1.1 200 OK - 5553 bytes in 0.057 seconds |time=0.057428s;;;0.000000 size=5553B;;;0
./check_http -H 192.168.2.220 -p 80 -w 0.0020 -c 0.0060
HTTP WARNING: HTTP/1.1 200 OK - 0.003 second response time |time=0.003068s;0.002000;0.006000;0.000000 size=5553B;;;0
./check_http -H 192.168.2.220 -p 80 -w 0.0030 -c 0.0040
HTTP OK HTTP/1.1 200 OK - 5553 bytes in 0.003 seconds |time=0.002673s;0.003000;0.004000;0.000000 size=5553B;;;0
./check_http -H 192.168.2.220 -p 80 -w 0.0009 -c 0.0040 -t 10
HTTP WARNING: HTTP/1.1 200 OK - 0.002 second response time |time=0.002102s;0.000900;0.004000;0.000000 size=5553B;;;0
界面拼装参数格式如下共4个元素命令!告警时延!严重告警时延!连接超时时间
check_http!0.0020!0.0050!10
check_clamd
check_imap
check_ftp
check_nntp
check_pop
check_udp
check_tcp
Usage:check_tcp -H host -p port [-w <warning time>] [-c <critical time>] [-s <send string>]
[-e <expect string>] [-q <quit string>][-m <maximum bytes>] [-d <delay>]
[-t <timeout seconds>] [-r <refuse state>] [-M <mismatch state>] [-v] [-4|-6] [-j]
[-D <days to cert expiry>] [-S <use SSL>] [-E]
-h, --help
帮助
-V, --version
列出版本信息
-H, --hostname=ADDRESS
主机名,IP地址,或则UNIX套接字 (必须是绝对路径)
-p, --port=INTEGER
端口数 (默认: 无)
-4, --use-ipv4
使用IPV4连接
-6, --use-ipv6
使用IPV6连接
-E, --escape
可以用/n,/r,/t or /发送或跳出字符串
默认情况下不加东西,/r/n加在退出的时候
-s, --send=STRING
发送服务器的字符串
-e, --expect = STRING
转换为服务器返回的字符串
-q, --quit= STRING
关闭的连接时发送给服务器的字符串
-r, --refuse=OK|warn|crit
允许 TCP 拒绝的状态http://www.kainatech.com.cn/images/smilies/shocked.gif k,warn,crit (默认:warn)
-M, --mismatch= OK|warn|crit
允许预期的字符串,当发现不匹配状态http://www.kainatech.com.cn/images/smilies/shocked.gif k,warn,crit (默认:warn)
-j, --jail
隐藏TCP套接字的输出
-m, --maxbytes=INTEGER
当接收数据包大于指定的大小时,关闭连接。
-d, --delay
支持在发送数据流和轮询反馈间等待的延迟
-w, ----warning=DOUBLE
告警状态的返回时间(秒)
-c,--critical=DOUBLE
严重状态的返回时间(秒)
-t, --timeout=INTEGER
指定超时前的时间(默认10秒)
-v , --verbose
列出详细的命令调试行
举例
./check_tcp -H 192.168.2.220 -p 22 -w 0.0023 -c 0.0067 -t 10
TCP OK - 0.002 second response time on port 22|time=0.002289s;0.002300;0.006700;0.000000;10.000000
./check_tcp -H 192.168.2.220 -p 22 -w 0.0003 -c 0.0006 -t 10
TCP WARNING - 0.000 second response time on port 22|time=0.000318s;0.000300;0.000600;0.000000;10.000000
界面拼装参数格式如下共4个元素:
命令!端口!告警时延!严重告警时延!连接超时时间
check_tcp!23!0.0020!0.0050!10
备注:
check_imap=check_tcp!143
check_ftp=check_tcp!21
check_nntp=check_tcp!119
check_pop=check_udp!110
check_udp=check_tcp
check_telnet=check_tcp!23
http://www.iyunv.com.com/Linux/2011-09/44192.htm
监控客户端81,82,22,
# vim /usr/local/nagios/etc/objects/hosts.cfg
define host {
host_name svr1.labexam.com
alias svr1
address 10.1.1.10
contact_groups sagroup
check_command check-host-alive
check_command nrpe!check_users
check_command nrpe!check_total_procs
check_command nrpe!check_load
max_check_attempts 4
notification_interval 5
notification_period 24x7
notification_options d,u,r
}
# vim svr1_services.cfg
define service {
host_name svr1.labexam.com
service_description ALIVE
check_period 24x7
max_check_attempts 2
normal_check_interval 2
retry_check_interval 1
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command check-host-alive
}
define service {
host_name svr1.labexam.com
service_description SSH
check_period 24x7
max_check_attempts 2
normal_check_interval 1
retry_check_interval 2
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!22
}
define service {
host_name svr1.labexam.com
service_descriptionHTTP_81
check_period 24x7
max_check_attempts 2
normal_check_interval 1
retry_check_interval 1
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!81
}
define service {
host_name svr1.labexam.com
service_descriptionHTTP_82
check_period 24x7
max_check_attempts 2
normal_check_interval 1
retry_check_interval 1
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!82
}
define service {
host_name svr1.labexam.com
service_description FASTCGI
check_period 24x7
max_check_attempts 2
normal_check_interval 1
retry_check_interval 1
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command check_tcp!9000
}
define service {
host_name svr1.labexam.com
service_description DISK
check_period 24x7
max_check_attempts 3
normal_check_interval 2
retry_check_interval 2
contact_groups sagroup
notification_interval 2
notification_period 24x7
notification_options w,u,c,r
check_command nrpe!check_df
}
define service {
host_name svr1.labexam.com
service_description LOAD
check_period 24x7
max_check_attempts 3
normal_check_interval 2
retry_check_interval 2
contact_groups sagroup
notification_interval 2
notification_period 24x7
notification_options w,u,c,r
check_command nrpe!check_load
}
define service {
host_name svr1.labexam.com
service_descriptionLOGOIN
check_period 24x7
max_check_attempts 2
normal_check_interval 1
retry_check_interval 1
contact_groups sagroup
notification_interval 5
notification_period 24x7
notification_options w,u,c,r
check_command nrpe!check_users
}
# vim /usr/local/nagios/etc/nagios.cfg
增加一条:
cfg_file=/usr/local/nagios/etc/objects/svr1_services.cfg
# /etc/init.d/nagios reload
Running configuration check...done.
Reloading nagios configuration...done
页:
[1]