nagios,weblogic,8080,oracle,1521

lenga 发表于 2015-9-8 08:27:02

　　在oracle服务所在主机上安装nagios的客户端，也就是nrpe和nagios-plugin：
　　1.要添加nagios用户，并且把nagios添加到和Oracle用户相同的用户组中；
　　2.vi .bash_profile，把oracle用户的环境变量配置段复制过来，追加到该文件末尾；source .bash_profile使生效。
　　3./usr/local/nagios/libexec/check_oracle --tns servename测试是否返回ok
　　4.vi /usr/local/nagios/etc/nrpe.cfg 添加内容：
　　command=/usr/local/nagios/libexec/check_oracle --tns sid
　　command=/usr/local/nagios/libexec/check_oracle --db sid
　　command=/usr/local/nagios/libexec/check_oracle --login sid
　　 5. vi /usr/local/nagios/etc/objects/commands.cfg添加如下内容：
　　# 'check_nrpe'command definition
　　define command{
   command_name check_nrpe
   command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
   }
　　
　　
# 'check_oracle_tns' command definition
define command{
   command_name check_oracle_tns
   command_line $USER1$/check_oracle --tns $ARG1$
   }
　　
　　# 'check_oracle_db' command definition
define command{
   command_name check_oracle_db
   command_line $USER1$/check_oracle --db $ARG1$
   }
　　
　　
# 'check_oracle_login' command definition
definecommand{
   command_name check_oracle_login
   command_line $USER1$/check_oracle --login $ARG1$
   }
　　
　　 6.在# vi /usr/local/nagios/etc/services/192.168.1.XXX.cfg中，
　　
　　define service{
   use                         local-service       ; Name of service template to use
   host_name                   DB_XX.XXX
   service_description          check_oracle_tns
   check_command                check_nrpe!check_oracle_tns
   notifications_enabled       1
   }
　　
　　
　　
　　define service{
   use                         local-service       ; Name of service template to use
   host_name                   DB_XX.XXX
   service_description          check_oracle_db
   check_command                check_nrpe!check_oracle_db
   notifications_enabled       1
   }
　　
　　
define service{
   use                         local-service       ; Name of service template to use
   host_name                   DB_XX.XXX
   service_description          check_oracle_login
   check_command                check_nrpe!check_oracle_login
   notifications_enabled       1
   }
　　 7.如果在nagios的web管理页面中出现错误提示：Status Information:Cannot determine ORACLE_HOME for sid servername
　　请，在oracle所在主机上，用oracle用户启动nrpe：
　　
　　$ ps aux|grep nrpe
oracle 244810.00.0399601064 ?    Ss 08:48 0:00 /usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d
　　
　　....
　　http://blog.iyunv.com/qingchn/article/details/7838145
　　最近在调整线上监控准备把Oracle加入到监控中去，然后看了下Nagios的监控oracle的插件check_oracle发现可以监控的项目还不少，因为有监控主机，所以需要远程监控oracle。check_oracle的插件帮助如下：
　　

view plaincopy

http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf

[*]check_oracle --tns <Oracle Sid or Hostname/IP address>
[*]check_oracle --db <ORACLE_SID>
[*]check_oracle --login <ORACLE_SID>
[*]check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
[*]check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
[*]check_oracle --oranames <Hostname>
[*]check_oracle --help
[*]check_oracle --version
　　由于调整只需要监控oracle进程，所以整个监控比较简单。
　　
　　1，加入oracle监控的主机commonds
　　

view plaincopy

http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf

[*]command=/usr/local/nagios/libexec/check_oracle --db lcartdg
　　
2，重启nrpe
　　
　　3，加入监控主机的service。由于之前已经添加了监控host，暂时不需要添加
　　

view plaincopy

http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf

[*]define service{
[*]    use                   generic-service
[*]    host_name             luckcart_db01,luckcart_dbbak01
[*]    service_description Check_oracle
[*]    check_command       check_nrpe!check_oracle
[*]    max_check_attempts    3
[*]    normal_check_interval 10
[*]    retry_check_interval 5
[*]    check_period          24x7
[*]    notification_interval 3
[*]    notification_period 24x7
[*]    notification_options w,u,c,r
[*]    contact_groups       admins
[*]    }
　　
4，重启nagios
　　
　　

view plaincopy

http://static.blog.iyunv.com/scripts/ZeroClipboard/ZeroClipboard.swf

[*]service ngios restart
　　
5，监控web页面，稍等就看到了oracle监控项目。
　　
　　http://blog.sina.com.cn/s/blog_5426e0180100df5z.html
　　
　　环境：Oracle 10g
CentOS 4.6 i386
Nagios 3.06
　　一、在 Oracle 所在服务器上安装 NRPE
　　#useradd nagios
　　# wget http://nchc.dl.sourceforge.net/sourceforge/nagios/nrpe-2.12.tar.gz
# tar xvfz nrpe-2.12.tar.gz
# cd nrpe-2.12
# ./configure --prefix=/usr/local/nagios
# make all
# make install-plugin
# make install-daemon
# make install-daemon-config
# make install-xinetd
注意点：
1.由于 nagios 脚本需要读取 oracle 相关文件。所以运行 nagios 的用户需要定义为 oracle 服务用户。并且修改 /etc/xinted.d/nrpe 中的配置。
　　service nrpe
{
flags = REUSE
socket_type = stream
port = 5666
wait = no
user = oracle
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
log_on_failure += USERID
disable = no
only_from = 127.0.0.1 10.0.0.99
}
　　2.将nagios服务器上libexec目录中的check_oracle和utils.sh拷贝到oracle服务器的libexec目录中，并修改 check_oracle 脚本。将 $ORACLE_HOME 以及 $PATH 手动加入。
　　ORACLE_HOME=/home/oracle/OraHome_1
PATH=$PATH:$ORACLE_HOME/bin
　　二、配置 nrpe 服务
　　修改 /usr/local/nagios/etc/nrpe.cfg 文件。加入以下内容：
　　#Check Oracle
　　command=/usr/local/nagios/libexec/check_oracle --tns sid user password
　　command=/usr/local/nagios/libexec/check_oracle --db sid user password
　　command=/usr/local/nagios/libexec/check_oracle --login sid user password
　　command=/usr/local/nagios/libexec/check_oracle --cache sid user password 80 90
　　command=/usr/local/nagios/libexec/check_oracle --tablespace sid user password USERS 90 80
　　具体参数写法请参考 check_oracle –help。
　　添加nrpe端口号：
　　vi /etc/services
添加这个
nrpe          5666/tcp    # NRPE
　　配置完成后，重启 xinetd 服务。
　　# service xinetd restart
　　测试nrpe：
　　./check_nrpe -H 127.0.0.1
　　NRPE v2.12
　　说明nrpe安装成功。
　　三、配置 Nagios 服务端
　　1.安装 nrpe 脚本支持。—参考官方文档。
2.在nagios服务器端添加 nrpe 命令配置。修改 nagios/etc/objects/command.cfg 文件：
　　define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
　　3.在nagios服务器端添加oracle主机配置文件，在 nagios/etc/objects 添加oracle 主机配置文件：oracle.cfg 。
　　define host {
use linux-server
host_name oracle
alias Oracle 10g
address 10.0.0.109
}
　　define service {
use generic-service
host_name oracle
service_description TNS Check
check_command check_nrpe!check_oracle_tns
}
define service {
use generic-service
host_name oracle
service_description DB Check
check_command check_nrpe!check_oracle_db
}
define service {
use generic-service
host_name oracle
service_description Login Check
check_command check_nrpe!check_oracle_login
}
define service {
use generic-service
host_name oracle
service_description Cache Check
check_command check_nrpe!check_oracle_cache
}
define service {
use generic-service
host_name oracle
service_description Tablespace Check
check_command check_nrpe!check_oracle_tablespace
}
　　如图：
　　插不进来图片，失败。。。
　　报错了“CHECK_NRPE: Error - Could not complete SSL handshake.
　　”，原来nrpe还需要ssl的支持，用yum安装openssl即可。
　　安装完openssl全都绿了，ok！
　　
　　
　　#########################################
　　http://oxiaobai.blog.iyunv.com/3369332/747037
　　最近根据公司的需要，开始捣腾Nagios，作为一个开源的监视工具来说，实在是颇为好用的。
　　那o小白是从官网上下的Nagios3.3.1版本（basic 和 plugin），那自己安装了一下，本来是想把安装和配置的所有过程都写下来的，但是网上已经有了不少这样的教程，而且官方的安装文档也写得十分详尽，那o小白就不再重复一次了，但是作为一名dba，至少要把check_oracle插件的使用给写出来。
　　那下面的操作是建立在Nagios顺利安装，Nagios用户有Oracle Client，并且环境变量正确的情况下，换句话说就是sqlplus和tnsnames能顺利执行。（注意，环境变量设错可能会出现sqlplus: error while loading shared libraries: libsqlplus.so: cannot open shared object file: No such file or directory）
　　首先，check_oracle -h可以知道这个插件的使用方法：
　　Usage:
check_oracle --tns <Oracle Sid or Hostname/IP address>
check_oracle --db <ORACLE_SID>
check_oracle --login <ORACLE_SID>
check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>
check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>
check_oracle --oranames <Hostname>
check_oracle --help
check_oracle --version
　　根据不同的参数提供的功能比较全的，那o小白这里就拿两个比较复杂的做例子：
　　--cache：查看library和buffer的命中率，根据CRITICAL和WARNING的阈值进行报警。
　　--tablespace：查看表空间的使用率，根据CRITICAL和WARNING的阈值进行报警。
　　首先编辑$NAGIOS_HOME/etc/objects/commands.cfg文件，添加两个条目：
　　# 'check_tablespace_oracle' command definition
define command{
   command_name check_tablespace_oracle
   command_line $USER1$/check_oracle --tablespace $ARG1$ $ARG2$ $USER4$ $ARG3$ $ARG4$ $ARG5$
   }
# 'check_rate_oracle' command definition
define command{
   command_name check_rate_oracle
   command_line $USER1$/check_oracle --cache $ARG1$ $ARG2$ $ARG3$ $ARG4$ $ARG5$
   }
为libexec下的check_oracle命令定义，用以在之后的应用的配置文件中设定。仔细看可以看到这两个配置的方式有所不同，在第一个tablespace的配置中，原来的密码位置用了一个宏来替代，为什么要用这个宏呢？由于在Nagios的网络浏览中可以看到策略的具体形式，如果用密码原文的话会有安全问题，所以可以在$NAGIOS_HOME/etc/resource.cfg文件中定义用户的宏，可以通过这个方法避免敏感信息的泄露，也可以通过设置宏来设定一些系统的路径，$USERS1$就是这个作用。在命令被最后解析的时候，所有的宏都会被替换，最多可以设置32个宏，resource.cfg中的内容如下：
　　$USER1$=/usr/local/nagios/libexec
# Store some usernames and passwords (hidden from the CGIs)
$USER4$=oracle

　　然后是应用的配置文件，这里o小白的是$NAGIOS_HOME/etc/objects/localhost.cfg,添加一个服务组定义，和两个服务：
　　服务组：#define service group
define servicegroup{
   servicegroup_name oracle-service
   alias Oracle Service
   }
　　
服务：#define a service to test check_oracle
define service{
   use                         local-service       ; Name of service template to use
   servicegroups                oracle-service
   host_name                   localhost
   service_description          Oracle_System_Space
   check_command                check_tablespace_oracle!ora11g!cy!SYSTEM!90!80
   }
define service{
   use                         local-service       ; Name of service template to use
   servicegroups                oracle-service
   host_name                   localhost
   service_description          Oracle_Buff_Rate
   check_command                check_rate_oracle!ora11g!cy!oracle!80!90
   }
　　可以看到，服务定义中根据之前的commands.cfg中的命令定义来具体传递参数，方法是用！，之前由于tablespace已经设置了宏，那这里就不需要在输入密码了，当然直接输入密码也是可行的。
　　然后就可以重启Nagios服务，service nagios restart
　　打开网络浏览器，输入nagios的网址后（通常是ip/nagios），就可以看到结果了：
　　
　　http://ylw6006.blog.iyunv.com/470441/787496
　　为了尽量避免这种问题，想到去写一个监控脚本配合nagios监控，当数据库连接异常的时候，可以第一时间收到报警短信；java程序连接数据库使用连接池，所以不一定会及时暴露出问题！
　　写一个脚本放任务计划中运行，定期去连接下数据库，查询下系统时间和数据库的状态，spool输出到临时文件上

[*]# crontab -l
[*]*/5* *          **/usr/local/nagios/libexec/connect_oracle.sh
[*]# cat /usr/local/nagios/libexec/connect_oracle.sh
#!/bin/sh
#functions: connect oracle server test
#author:lw.yang
#modify_date: 2012-02-22
[*]rm -rf /tmp/check_oracle.log
export ORACLE_HOME=/u01/app/oracle/product/10.2.0/db_1/
$ORACLE_HOME/bin/sqlplus username/password@ip:1521/services_name <<EOF
set echo off
set feedback off
spool /tmp/check_oracle.log
alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
select sysdate from dual;
select name,open_mode from v\$database;
spool off
set echo on
set feedback on
EOF
　　再写一个脚本来根据临时文件来判断数据库是否正常，该脚本供nagios插件check_nrpe调用，之所以分两个脚本，中间使用临时文件，主要是出于权限问题的考虑,nagios本身带了一个check_oracle的插件，感觉不太适用，还需要在nagios服务器端安装oracle客户端，配置tnsnames.ora文件，设置oracle相关的环境变量等等…

[*]# cat /usr/local/nagios/libexec/check_oracle.sh
[*]#!/bin/sh
[*]#functions: use monitor oracle server status with nagios nrpe plugin
[*]#author:lw.yang
[*]#modify_date: 2012-02-22
[*]
[*]STATE_OK=0
[*]STATE_CRITICAL=2
[*]
[*]if[ -f /tmp/check_oracle.log];then
[*]    COUNT=$(grep -i 'READ WRITE' /tmp/check_oracle.log|wc -l)
[*]    if [ $COUNT -eq 1 ];then
[*]    echo "connect oracle server normal..."
[*]    exit $STATE_OK
[*]    else
[*]    echo "database not open"
[*]    exit $STATE_CRITICAL
[*]    fi
[*]    else
[*]    echo "can't connect to oracle server..."
[*]    exit $STATE_CRITICAL
[*]fi
　　监控效果！
　　

　　# cat /tmp/check_oracle.log
SQL> alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
SQL> select sysdate from dual;
　　SYSDATE
-------------------
2012-02-23:10:10:03
SQL> select name,open_mode from v$database;
　　NAME    OPEN_MODE
--------- ----------
EPROWB2BREAD WRITE
SQL> spool off

　　
　　http://blog.sina.com.cn/s/blog_66e484080100hp0b.html
　　不安装oracle客户端，使用sqlplus连接oracle
在oracle官网下载instant client
下载地址 http://www.oracle.com/technology/software/tech/oci/instantclient/htdocs/linuxsoft.html
　　版本 Version 11.1.0.7.0
Instant Client Package – Basic: All files required to run OCI, OCCI, and JDBC-OCI applications
instantclient-basic-linux32-11.1.0.7.zip
　　Instant Client Package – SQL*Plus: Additional libraries and executable for running SQL*Plus with Instant Client
instantclient-sqlplus-linux32-11.1.0.7.zip
　　解压，将目录中所有以lib开头的文件copy到/usr/lib下，将sqlplus拷贝到/usr/sbin下，并执行 # ldconfig -p 生成动态连接库。
　　测试，# sqlplus system/1234567890@192.168.1.4:1521/orcl
正常将显示
SQL*Plus: Release 10.2.0.4.0 – Production on Mon Aug 17 16:31:08 2009
Copyright (c) 1982, 2007, Oracle. All Rights Reserved.
　　监控脚本
check_oracle_instant监控脚本，用perl写的，在exchange.nagios.org站点可以找到。

#!/usr/bin/perl $host = $ARGV; $port = $ARGV; $sid = $ARGV; $user = $ARGV; $pass = $ARGV; sub trim($); my @result; my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2); my @param_array = ( , , , , , , ); # is possible define own selects my @results; sub trim($) { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } sub array_rows { my ($array_rows) = @_; my $rows = @$array_rows; return $rows; } sub logon { # open (SQL,"sqlplus -s system/mismatch@\$DESCRIPTION=\\(ADDRESS=\\(PROTOCOL=TCP\$\$Host=$host\$\$Port=$port\$\\)\$CONNECT_DATA=\\(SID=$sid\$\\)\\) ) { if ($res =~ /^(ORA-\d{5})/) { return $1; } } if (logon() eq "ORA-01017") { for (my $i=0; $i<< pagesize 0 set numformat 999.999 $param_array[$i] EOF> ) { # print trim($res)."\n"; if ( $res =~/^\s*\S+/ ) { push(@results,trim($res)); } } } for ($i=0;$i<@results;$i++) { print $i." hodnota je ".$result[$i]." a ma byt ".$param_array[$i]; eval "unless (".$results[$i].$param_array[$i].$param_array[$i].") { print\"".$param_array[$i]." ".$sid." KO \\n\"; exit ".$ERRORS{"WARNING"}.";}"; } print "status and health of $sid ORACLE is OK\n"; exit $ERRORS{"OK"}; } else { print "Unable to connect to $sid ORACLE !!! "; exit $ERRORS{"CRITICAL"}; }
　　将这个脚本copy到/usr/local/nagios/libexec下，这是你的nagios安装目录，另外说一句，下载的脚本是windows格式的，linux服务器用的话需要用editplus或者dos2unix命令转换文件格式。
　　配置nagios
定义命令文件
vi /usr/local/nagios/object/etc/commands.cfg，加入
### CHECK ORACLE ###
define command {
command_name check_oracle_instant
command_line $USER1$/check_oracle_instant $HOSTADDRESS$ $ARG1$ $ARG2$ $ARG3$ $ARG4$
}
　　定义服务文件
define service {
host_name hostname
service_decription ORACLE: check_login_health
check_command check_oracle_instant!1521!orcl!system!1234567890
}
　　重启nagios服务即可。
　　
　　http://www.cppblog.com/tbwshc/archive/2012/07/28/185443.html
　　三：使用nagios+fetion，定时去监控会话和进程数
1：创建监控脚本，该脚本放任务计划中运行，每2分钟自动执行

[*]# cat session_oracle.sh
[*]#!/bin/sh
[*]rm -rf /tmp/session_oracle.log
[*]export ORACLE_HOME=/u01/app/oracle/product/11.2.0/db1
[*]/u01/app/oracle/product/11.2.0/db1/bin/sqlplus hr/hr@192.168.1.240:1521/orcl <<EOF
[*]set echo off
[*]set feedback off
[*]spool /tmp/session_oracle.log
[*]alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
[*]select session_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]select process_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]spool off
[*]set echo on
[*]set feedback onEOF
　　2：创建第二脚本，用来处理前面监控脚本的日志输出，将结果返回给监控服务器

[*]# cat /tmp/session_oracle.log
SQL> alter session set nls_date_format='YYYY-MM-DD:HH24:MI:SS';
SQL> select session_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]SESSION_COUNT
-------------
      138
SQL> select process_count from (select * from session_monitor order by time desc ) whererownum=1;
[*]PROCESS_COUNT
-------------
      153
SQL> spool off
[*]
[*]# cat check_oracle_session.sh
[*]#!/bin/sh
[*]STATE_OK=0
[*]STATE_CRITICAL=2
[*]
[*]if[ -f /tmp/session_oracle.log];then
[*] SESSION=$(grep -A 2 'SESSION_COUNT'/tmp/session_oracle.log |tail -1|sed 's/[ ][ ]*//g')
[*] PROCESS=$(grep -A 2 'PROCESS_COUNT'/tmp/session_oracle.log |tail -1|sed 's/[ ][ ]*//g')
[*] else
[*] echo "something wrong,please check monitor script"
[*] exit $STATE_CRITICAL
[*]fi
[*]
[*]if [ $SESSION -gt 500 ] || [ $PROCESS -gt 500 ];then
[*] echo "Current session is $SESSION,process is $PROCESS "
[*] exit $STATE_CRITICAL
[*] else
[*]    echo "Current session is $SESSION,process is $PROCESS "
[*]    exit $STATE_OK
[*]fi
　　四：实际效果
# /usr/local/nagios/libexec/check_nrpe -H 192.168.1.240 -c check_oracle_session
Current session is 138,process is 153
　　
　　http://skymax.blog.iyunv.com/365901/103331/

修改Nagios的check_oracle脚本来监控Oracle的临时表空间

1.前言

Nagios的Nagios Plugins中有很多程序或脚本提供给我们，用于监控相应的服务、资源等等。在Nagios Plugins中有一个用于实现对Oracle数据库进行监控的脚本，叫做check_oracle，位于Nagios安装路径下的libexec目录中。

check_oracle脚本可以监控Oracle数据库的cache、tns、tablespace等信息，但是通过“--tablespace”选项监控表空间时，我们发现这个脚本不能监控临时表空间。仔细查看该脚本，发现其中的确没有对临时表空间进行处理，现对该脚本做修改，使之能够监控Oracle的临时表空间。本文以Oracle10g作为实验数据库。

2.分析源码和问题的解决方法

查看check_oracle源码的tablespace部分，发现其对于表空间信息的获取是通过sql语句完成的。sql语句如下：

select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc

from

(select tablespace_name,sum(bytes)/1024/1024 total

   from dba_data_files group by tablespace_name) A

LEFT OUTER JOIN

(select tablespace_name,sum(bytes)/1024/1024 free

   from dba_free_space group by tablespace_name) B

ON a.tablespace_name=b.tablespace_name

WHERE a.tablespace_name='${5}';

其中${5}是表空间的名字。

由于临时文件的的信息不在dba_data_files表中，所以通过上述脚本显然不能获得临时表空间的任何信息。

那么如何获得临时表空间的空间使用情况呢，具体sql语句如下：

select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc

from

(select tablespace_name,sum(bytes)/1024/1024 total

   from dba_temp_files group by tablespace_name) A

LEFT OUTER JOIN

(select tablespace_name,sum(bytes_cached)/1024/1024 free

   from v\\$temp_extent_pool group by tablespace_name) B

ON a.tablespace_name=b.tablespace_name

WHERE a.tablespace_name='TEMP';

这条sql语句可以获得临时表空间“TEMP”的空间使用情况。

解决的方法找到了，下面我们只需将脚本做小小修改就可以达到要求了。

3.check_oracle脚本的修改

在check_oracle脚本中的“case”语句中增加一个“--tablespaceTEMP”分支，用于完成监控Oracle临时表空间的功能。修改后的脚本如下（修改、添加的部分已标出）：

#! /bin/ksh

#

# latigid010@yahoo.com

# 01/06/2000

#

#This Nagios plugin was created to check Oracle status

#

PROGNAME=`basename $0`

PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`

REVISION=`echo '$Revision: 1749 $' | sed -e 's/[^0-9.]//g'`

. $PROGPATH/utils.sh

print_usage() {

echo "Usage:"

echo "$PROGNAME --tns <Oracle Sid or Hostname/IP address>"

echo "$PROGNAME --db <ORACLE_SID>"

echo "$PROGNAME --login <ORACLE_SID>"

echo "$PROGNAME --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>"

echo "$PROGNAME --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"

echo "$PROGNAME --tablespaceTEMP <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"

echo "$PROGNAME --oranames <Hostname>"

echo "$PROGNAME --help"

echo "$PROGNAME --version"

}

print_help() {

print_revision $PROGNAME $REVISION

echo ""

print_usage

echo ""

echo "Check Oracle status"

echo ""

echo "--tns SID/IP Address"

echo " Check remote TNS server"

echo "--db SID"

echo " Check local database (search /bin/ps for PMON process) and check"

echo " filesystem for sgadefORACLE_SID.dbf"

echo "--login SID"

echo " Attempt a dummy login and alert if not ORA-01017: invalid username/password"

echo "--cache"

echo " Check local database for library and buffer cache hit ratios"

echo "    --->Requires Oracle user/password and SID specified."

echo "       --->Requires select on v_\$sysstat and v_\$librarycache"

echo "--tablespace"

echo " Check local database for tablespace capacity in ORACLE_SID"

echo "    --->Requires Oracle user/password specified."

echo "       --->Requires select on dba_data_files and dba_free_space"

echo "--tablespaceTEMP"

echo " Check local temporary database for tablespace capacity in ORACLE_SID"

echo "    --->Requires Oracle user/password specified."

echo "             --->Requires select on dba_temp_files and v_\$temp_extent_pool"

echo "--oranames Hostname"

echo " Check remote Oracle Names server"

echo "--help"

echo " Print this help screen"

echo "--version"

echo " Print version and license information"

echo ""

echo "If the plugin doesn't work, check that the ORACLE_HOME environment"

echo "variable is set, that ORACLE_HOME/bin is in your PATH, and the"

echo "tnsnames.ora file is locatable and is properly configured."

echo ""

echo "When checking local database status your ORACLE_SID is case sensitive."

echo ""

echo "If you want to use a default Oracle home, add in your oratab file:"

echo "*:/opt/app/oracle/product/7.3.4:N"

echo ""

support

}

case "$1" in

1)

cmd='--tns'

;;

2)

cmd='--db'

;;

*)

cmd="$1"

;;

esac

# Information options

case "$cmd" in

--help)

print_help

exit $STATE_OK

;;

-h)

print_help

exit $STATE_OK

;;

--version)

print_revision $PROGNAME $REVISION

exit $STATE_OK

;;

-V)

print_revision $PROGNAME $REVISION

exit $STATE_OK

;;

esac

# Hunt down a reasonable ORACLE_HOME

if [ -z "$ORACLE_HOME" ] ; then

# Adjust to taste

for oratab in /var/opt/oracle/oratab /etc/oratab

do

[ ! -f $oratab ] && continue

ORACLE_HOME=`IFS=:

while read SID ORACLE_HOME junk;

do

   if [ "$SID" = "$2" -o "$SID" = "*" ] ; then

   echo $ORACLE_HOME;

   exit;

   fi;

done < $oratab`

[ -n "$ORACLE_HOME" ] && break

done

fi

# Last resort

[ -z "$ORACLE_HOME" -a -d $PROGPATH/oracle ] && ORACLE_HOME=$PROGPATH/oracle

if [ "$cmd" != "--db" ]; then

if [ -z "$ORACLE_HOME" -o ! -d "$ORACLE_HOME" ] ; then

echo "Cannot determine ORACLE_HOME for sid $2"

exit $STATE_UNKNOWN

fi

fi

PATH=$PATH:$ORACLE_HOME/bin

LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib

export ORACLE_HOME PATH LD_LIBRARY_PATH

case "$cmd" in

--tns)

tnschk=` tnsping $2`

tnschk2=` echo$tnschk | grep -c OK`

if [ ${tnschk2} -eq 1 ] ; then

tnschk3=` echo $tnschk | sed -e 's/.*(//' -e 's/).*//'`

echo "OK - reply time ${tnschk3} from $2"

exit $STATE_OK

else

echo "No TNS Listener on $2"

exit $STATE_CRITICAL

fi

;;

--oranames)

namesctl status $2 | awk '

/Server has been running for:/ {

msg = "OK: Up"

for (i = 6; i <= NF; i++) {

   msg = msg " " $i

}

status = '$STATE_OK'

}

/error/ {

msg = "CRITICAL: " $0

status = '$STATE_CRITICAL'

}

END {

print msg

exit status

}'

;;

--db)

pmonchk=`ps -ef | grep -v grep | grep -c "ora_pmon_${2}$"`

if [ ${pmonchk} -ge 1 ] ; then

echo "${2} OK - ${pmonchk} PMON process(es) running"

exit $STATE_OK

#if [ -f $ORACLE_HOME/dbs/sga*${2}* ] ; then

#if [ ${pmonchk} -eq 1 ] ; then

#utime=`ls -la $ORACLE_HOME/dbs/sga*$2* | cut -c 43-55`

   #echo "${2} OK - running since ${utime}"

   #exit $STATE_OK

#fi

else

echo "${2} Database is DOWN"

exit $STATE_CRITICAL

fi

;;

--login)

loginchk=`sqlplus dummy/user@$2 < /dev/null`

loginchk2=` echo$loginchk | grep -c ORA-01017`

if [ ${loginchk2} -eq 1 ] ; then

echo "OK - dummy login connected"

exit $STATE_OK

else

loginchk3=` echo "$loginchk" | grep "ORA-" | head -1`

echo "CRITICAL - $loginchk3"

exit $STATE_CRITICAL

fi

;;

--cache)

if [ ${5} -gt ${6} ] ; then

echo "UNKNOWN - Warning level is less then Crit"

exit $STATE_UNKNOWN

fi

result=`sqlplus -s ${3}/${4}@${2} << EOF

set pagesize 0

set numf '9999999.99'

select (1-(pr.value/(dbg.value+cg.value)))*100

from v\\$sysstat pr, v\\$sysstat dbg, v\\$sysstat cg

where pr.name='physical reads'

and dbg.name='db block gets'

and cg.name='consistent gets';

EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then

   error=` echo "$result" | grep "ORA-" | head -1`

   echo "CRITICAL - $error"

   exit $STATE_CRITICAL

fi

buf_hr=`echo "$result" | awk '/^+$/ {print int($1)}'`

buf_hrx=`echo "$result" | awk '/^+$/ {print $1}'`

result=`sqlplus -s ${3}/${4}@${2} << EOF

set pagesize 0

set numf '9999999.99'

select sum(lc.pins)/(sum(lc.pins)+sum(lc.reloads))*100

from v\\$librarycache lc;

EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then

   error=` echo "$result" | grep "ORA-" | head -1`

   echo "CRITICAL - $error"

   exit $STATE_CRITICAL

fi

lib_hr=`echo "$result" | awk '/^+$/ {print int($1)}'`

lib_hrx=`echo "$result" | awk '/^+$/ {print $1}'`

if [ $buf_hr -le ${5} -o $lib_hr -le ${5} ] ; then

echo "${2} CRITICAL - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"

exit $STATE_CRITICAL

fi

if [ $buf_hr -le ${6} -o $lib_hr -le ${6} ] ; then

echo "${2} WARNING- Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"

exit $STATE_WARNING

fi

echo "${2} OK - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"

exit $STATE_OK

;;

--tablespace)

if [ ${6} -lt ${7} ] ; then

echo "UNKNOWN - Warning level is more then Crit"

exit $STATE_UNKNOWN

fi

result=`sqlplus -s ${3}/${4}@${2} << EOF

set pagesize 0

set numf '9999999.99'

select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc

from

(select tablespace_name,sum(bytes)/1024/1024 total

   from dba_data_files group by tablespace_name) A

LEFT OUTER JOIN

(select tablespace_name,sum(bytes)/1024/1024 free

   from dba_free_space group by tablespace_name) B

ON a.tablespace_name=b.tablespace_name

WHERE a.tablespace_name='${5}';

EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then

   error=` echo "$result" | grep "ORA-" | head -1`

   echo "CRITICAL - $error"

   exit $STATE_CRITICAL

fi

ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`

ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`

ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`

ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`

if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then

   echo "No data returned by Oracle - tablespace $5 not found?"

   exit $STATE_UNKNOWN

fi

if [ "$ts_pct" -ge ${6} ] ; then

echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_CRITICAL

fi

if [ "$ts_pct" -ge ${7} ] ; then

echo "${2} : ${5} WARNING- $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_WARNING

fi

echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_OK

;;

--tablespaceTEMP)

# edit by sky,TEMP tablespace

if [ ${6} -lt ${7} ] ; then

echo "UNKNOWN - Warning level is more then Crit"

exit $STATE_UNKNOWN

fi

result=`sqlplus -s ${3}/${4}@${2} << EOF

set pagesize 0

set numf '9999999.99'

select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc

from

(select tablespace_name,sum(bytes)/1024/1024 total

   from dba_temp_files group by tablespace_name) A

LEFT OUTER JOIN

(select tablespace_name,sum(bytes_cached)/1024/1024 free

   from v\\$temp_extent_pool group by tablespace_name) B

ON a.tablespace_name=b.tablespace_name

WHERE a.tablespace_name='${5}';

EOF`

if [ -n "`echo $result | grep ORA-`" ] ; then

   error=` echo "$result" | grep "ORA-" | head -1`

   echo "CRITICAL - $error"

   exit $STATE_CRITICAL

fi

ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`

ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`

ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`

ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`

if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then

   echo "No data returned by Oracle - tablespace $5 not found?"

   exit $STATE_UNKNOWN

fi

if [ "$ts_pct" -ge ${6} ] ; then

echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_CRITICAL

fi

if [ "$ts_pct" -ge ${7} ] ; then

echo "${2} : ${5} WARNING- $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_WARNING

fi

echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"

exit $STATE_OK

;;

*)

print_usage

exit $STATE_UNKNOWN

esac

4.测试、结语

脚本修改完了，下面测试一下。

$ ./check_oracle --tablespaceTEMP skydb sky sky TEMP 90 80

skydb : TEMP OK - 15.00% used [ 17 / 20 MB available ]|TEMP=15.00%;80;90;0;100

如果你没有成功，那可能是没有给用户分配dba_temp_files和v_$temp_extent_pool对象的select权限。

通过—help选项可以详细了解check_oracle的使用方法。

$ ./check_oracle --help

check_oracle v1749 (nagios-plugins 1.4.11)

The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute

copies of the plugins under the terms of the GNU General Public License.

For more information about these matters, see the file named COPYING.

Usage:

check_oracle --tns <Oracle Sid or Hostname/IP address>

check_oracle --db <ORACLE_SID>

check_oracle --login <ORACLE_SID>

check_oracle --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>

check_oracle --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>

check_oracle --tablespaceTEMP <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>

check_oracle --oranames <Hostname>

check_oracle --help

check_oracle --version

Check Oracle status

--tns SID/IP Address

Check remote TNS server

--db SID

Check local database (search /bin/ps for PMON process) and check

filesystem for sgadefORACLE_SID.dbf

--login SID

Attempt a dummy login and alert if not ORA-01017: invalid username/password

--cache

Check local database for library and buffer cache hit ratios

   --->Requires Oracle user/password and SID specified.

            --->Requires select on v_$sysstat and v_$librarycache

--tablespace

Check local database for tablespace capacity in ORACLE_SID

   --->Requires Oracle user/password specified.

            --->Requires select on dba_data_files and dba_free_space

--tablespaceTEMP

Check local temporary database for tablespace capacity in ORACLE_SID

   --->Requires Oracle user/password specified.

            --->Requires select on dba_temp_files and v$temp_extent_pool

--oranames Hostname

Check remote Oracle Names server

--help

Print this help screen

--version

Print version and license information

If the plugin doesn't work, check that the ORACLE_HOME environment

variable is set, that ORACLE_HOME/bin is in your PATH, and the

tnsnames.ora file is locatable and is properly configured.

When checking local database status your ORACLE_SID is case sensitive.

If you want to use a default Oracle home, add in your oratab file:

*:/opt/app/oracle/product/7.3.4:N

Send email to nagios-users@lists.sourceforge.net if you have questions

regarding use of this software. To submit patches or suggest improvements,

send email to nagiosplug-devel@lists.sourceforge.net.

Please include version information with all correspondence (when possible,

use output from the --version option of the plugin itself).

　　本文出自 “sky” 博客，请务必保留此出处http://skymax.blog.iyunv.com/365901/103331
　　###################################################
　　http://bbs.iyunv.com/viewthread.php?tid=938714&extra=page%3D1&page=
　　最近发现Nagios自带的监控WEB 80端口的脚本不太好用，服务已经无法提供了，还是没报警，所以就自己写了一个！

复制内容到剪贴板
代码:

#!/bin/bash
# author: honway.liu
# date: 2012-07-11
# version: 0.0.1
# desc: check web server status
Usage() {
echo "##################"
echo "$0 URL"
echo "##################"
}
if [ $# -lt 1 ];then
            Usage
   else
url_status=$(curl -o /dev/null -s -m 10 --connect-timeout 10 -w %{http_code} $1)
                           if [ $url_status -eq 200 ]; then
                                    echo "OK - $1"
                                    exit 0
                                             else
                                    echo "Cirtical - $1"
                                    exit 2
                           fi
fi　　
欢迎大家拍砖
　　##########################
　　
　　
#####################################
　　 http://forum.icst.org.tw/phpbb/viewtopic.php?f=16&t=14382
　　最後用 check_tcp -H localhost -p 1521 代替

　　http://bbs.chinaunix.net/thread-2035313-1-1.html
　　

回复 #1 ppiqq 的帖子
　　

“check_command                check_tcp!192.168.0.247!3306”
这句改成：
check_command                check_tcp!3306

　　
　　 http://storysky.blog.iyunv.com/628458/737309
　　
　　，只是通过检查pid 和 port两个参数来判断ttserver是否活着
ttserver 的启动命令如下
ttserver -host 192.168.1.9 -port 11209 -thnum 8 -dmn -pid /usr/local/ttserver/session/ttserver.pid -log /usr/local/ttserver/logs/ttserver_session.log
　　
　　以下是脚本内容
　　
　　check_tt.sh
　　

[*]#!/bin/bash
[*]#author storysky in 2011.07.20
[*]#check ttserver status
[*]pid1=`cat /usr/local/ttserver/session/ttserver.pid`
[*]port=`ps aux |awk '/ttserver/&&!/awk/{print $15}'`
[*]pid2=`/usr/local/ttserver/bin/tcrmgr inform -port "$port" -st 192.168.1.9 |awk '/pid/{print $2}'`
[*]if [ $pid1 -eq $pid2 ] && [ $port -eq 11209 ];
[*]    then
[*]    echo "OK TTserver is running"
[*]    exit 0
[*]    else
[*]    echo "Critical TTserver is error"
[*]    exit 2
[*]fi

　　这样就可以利用check_tcp 来得到ttserver的响应时间、uptime等信息，具体的命令格式如下：
check_tcp -H 192.168.1.9 -p 11209 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit
　　加入到nrpe.cfg 里面
command=/usr/local/nagios/libexec/check_tcp -H 192.168.1.9 -p 11209 -t 5 -E -s 'stats\r\nquit\r\n' -e 'uptime' -M crit
OK,试试吧

　　
　　http://liuyu.blog.iyunv.com/183345/64064/
　　
　　2、check 的使用，在安装后nagios plugins 后会产生N多check开头的文件。
就这是这些脚本的使用
那么对于apache如果只是监控端口80 并不能说明apache就正常，比如动态或者虚拟主机。其中一个网页down了但也不会报警。
于是就应该对check_tcp!80 进行修改
其修改commends.cfg添加：
define command{
   command_name check_http
   command_line $USER1$/check_http -H $HOSTADDRESS$ -u $ARG1$ -w $ARG2$ -c $ARG3$
   }
修改services.cfg
define service{
   host_name             aabbcc
   service_description check-http
   check_command       check_http!3 # 3 timeout--超时值
   max_check_attempts    5
   normal_check_interval 3
   retry_check_interval 2
   check_period          24x7
   notification_interval 10
   notification_period 24x7
   notification_options w,u,c,r
   contact_groups       sagroup
   }
这样就OK 了。如果你要改一些选项的，在nagios/libexec 目录 ./check_http --help
根据参数改就好了。
　　################################################################
　　http://blog.iyunv.com/wangxiaosen/article/details/5935804
　　
　　
　　nagios 命令解释
　　

check_ssh
            界面拼装参数格式如下共3个元素:
                     命令!端口!连接超时时间
                     check_ssh!22!10
check_http
            界面拼装参数格式如下共4个元素:
            命令!告警时延!严重告警时延!连接超时时间
            check_http!0.0020!0.0050!10
check_imap
check_ftp
check_nntp
check_pop
check_udp
check_tcp
            界面拼装参数格式如下共4个元素:
                     命令!端口!告警时延!严重告警时延!连接超时时间
                     check_tcp!23!0.0020!0.0050!10
                     备注:
                     check_imap=check_tcp!143
                     check_ftp=check_tcp!21
                     check_nntp=check_tcp!119
                     check_pop=check_udp!110
                     check_udp=check_tcp
                     check_telnet=check_tcp!23
check_smtp
            界面拼装参数格式如下共4个元素:
                     命令!告警时延!严重告警时延!连接超时时间
                     check_smtp!0.0020!0.0050!10
check_ping
            界面拼装参数格式如下共5个元素:
                     命令!告警时延,丢包率!严重告警时延,丢包率!检测数据包个数!连接超时时间
                     check_ping!3000.0,80%!5000.0,100%!5!10　　

二、命令检测详细描述
check_ssh
      Usage:check_ssh [-46] [-t <timeout>] [-r <remote version>] [-p <port>] <host>
            参数:
            -h, --help
                     帮助
            -V, --version
                     列出版本信息
            -H, --hostname=ADDRESS
                     主机名称,IP地址,或者UNIX套接字(必须有绝对路径)
            -p, --port=INTEGER
                     端口号(默认:22)
            -4, --use-ipv4
                     使用IPV4协议连接
            -6, --use-ipv6
                     使用IPV6协议连接
            -t, --timeout=INTEGER
                     连接超时秒数(默认:10秒)
            -r, --remote-version=STRING
                     不匹配服务器版时警告字符串,如对方的版本为OpenSSH_3.9p1
            -V, --verbose
                     列出详细的命令调试行
            举例
            ./check_ssh -H 192.168.2.220 -p 22 -t 10 -r OpenSSH_3.0pl
            SSH WARNING - OpenSSH_3.8.1p1 Debian-8.sarge.6 (protocol 2.0) version mismatch, expected 'OpenSSH_3.0pl'
            ./check_ssh -H 192.168.2.220 -p 22 -t 10
            SSH OK - OpenSSH_3.8.1p1 Debian-8.sarge.6 (protocol 2.0)
            check_ssh -H $HOSTADDRESS$ -p 22 -t 10
            界面拼装参数格式如下共3个元素命令!端口!连接超时时间
            check_ssh!22!10

check_http
            Usage: check_http -H <vhost> | -I <IP-address> [-u <uri>] [-p <port>]
                        [-w <warn time>] [-c <critical time>] [-t <timeout>] [-L]
                              [-a auth] [-f <ok | warn | critcal | follow>] [-e <expect>]
                              [-s string] [-l] [-r <regex> | -R <case-insensitive regex>] [-P string]
                              [-m <min_pg_size>:<max_pg_size>] [-4|-6] [-N] [-M <age>] [-A string] [-k string]
            -h, --help
                     帮助
            -V, --version
                     列出版本信息
            -H, --hostname=ADDRESS
                     虚拟主机名加端口(如:excamle.com:5000)
            -I, --IP-address=ADDRESS
                     IP地址或名称(如果无需DNS的查找,使用十进制的地址)
            -p, --port=INTEGER
                     端口数(默认: 80)
            -4, --use-ipv4
                     使用IPV4连接
            -6, --use-ipv6
                     使用IPV6连接
            -e, --expect=STRING
                     把服务器反馈的第一行(状态)转换成指定的字符串(默认是:HTTP/1. 如果指定跳跃了所有其它逻辑状态行)
            -s, --expect=STRING
                     指定内容
            -u, --url=PATH
                     获取或发送的URL(默认:/)
            -P, --post= STRING
                     URL进行POST的HTTP数据
            -N, --no-body
                     不等待文档正文:获取报头后停止读取。
                     (注意,这是一个HTTP的获取和发送,而不是报头)
            -M, --max-age=SECONDS
                     如果文档超过生存期则警告。数据是如下形式的:分数是"10m",小时数是"10h",天数是 "10d"
            -T, --content-type=STRING
                     在传输的时候指定容器类型媒体类型
            -l, --linespan
                     允许正则表达式跨越新行(必须在前面使用 –R 或-r)
            -r, --regex, ,--ereg=STRING
                     用正则表达式字符串搜索页
            -R, --eregi=STRING
                     用正则表达式字符串搜索页,允许模糊查找
            --invert-regex
                     如果找到返回CRITICAL,找不到返回OK
            -a, --authorization=AUTH_PAIR
                     用户名:在站点最基本的密码认证
            -A, --useragent=STRING
                     转换成字符串放在HTTP报头里发送,像"用户代理"
            -k, --header=STRING
                     任何其它的标签被放在HTTP报头里发送。可以被附加的报头使用多次。
            -L, --link=URL
                     在HTML 链接里隐藏发送包
            -f, --onredirect
                     怎样解决重定向页
            -m, --pagesize=INTEGER<:INTERGER>
                     最小最大页大小要求(BYTES)
            -w, ----warning=DOUBLE
                     告警状态的返回时间(秒)
            -c,--critical=DOUBLE
                     严重状态的返回时间(秒)
            -t, --timeout=INTEGER
                     指定超时前的时间(默认10秒)
            -v , --verbose
                     列出详细的命令调试行
            举例
            ./check_http -H 192.168.2.220 -p 80
            HTTP OK HTTP/1.1 200 OK - 5553 bytes in 0.057 seconds |time=0.057428s;;;0.000000 size=5553B;;;0
            ./check_http -H 192.168.2.220 -p 80 -w 0.0020 -c 0.0060
            HTTP WARNING: HTTP/1.1 200 OK - 0.003 second response time |time=0.003068s;0.002000;0.006000;0.000000 size=5553B;;;0
            ./check_http -H 192.168.2.220 -p 80 -w 0.0030 -c 0.0040
            HTTP OK HTTP/1.1 200 OK - 5553 bytes in 0.003 seconds |time=0.002673s;0.003000;0.004000;0.000000 size=5553B;;;0
            ./check_http -H 192.168.2.220 -p 80 -w 0.0009 -c 0.0040 -t 10
            HTTP WARNING: HTTP/1.1 200 OK - 0.002 second response time |time=0.002102s;0.000900;0.004000;0.000000 size=5553B;;;0
            界面拼装参数格式如下共4个元素命令!告警时延!严重告警时延!连接超时时间
            check_http!0.0020!0.0050!10　　
　　check_clamd
check_imap
check_ftp
check_nntp
check_pop
check_udp
check_tcp
            Usage:check_tcp -H host -p port [-w <warning time>] [-c <critical time>] [-s <send string>]
                              [-e <expect string>] [-q <quit string>][-m <maximum bytes>] [-d <delay>]
                              [-t <timeout seconds>] [-r <refuse state>] [-M <mismatch state>] [-v] [-4|-6] [-j]
                              [-D <days to cert expiry>] [-S <use SSL>] [-E]
            -h, --help
                     帮助
            -V, --version
                     列出版本信息
            -H, --hostname=ADDRESS
                     主机名,IP地址,或则UNIX套接字 (必须是绝对路径)
            -p, --port=INTEGER
                     端口数 (默认: 无)
            -4, --use-ipv4
                     使用IPV4连接
            -6, --use-ipv6
                     使用IPV6连接
            -E, --escape
                     可以用/n,/r,/t or /发送或跳出字符串
                     默认情况下不加东西,/r/n加在退出的时候
            -s, --send=STRING
                     发送服务器的字符串
            -e, --expect = STRING
                     转换为服务器返回的字符串
            -q, --quit= STRING
                     关闭的连接时发送给服务器的字符串
            -r, --refuse=OK|warn|crit
                     允许 TCP 拒绝的状态http://www.kainatech.com.cn/images/smilies/shocked.gif k,warn,crit (默认:warn)
            -M, --mismatch= OK|warn|crit
                     允许预期的字符串,当发现不匹配状态http://www.kainatech.com.cn/images/smilies/shocked.gif k,warn,crit (默认:warn)
            -j, --jail
                     隐藏TCP套接字的输出
            -m, --maxbytes=INTEGER
                     当接收数据包大于指定的大小时,关闭连接。
            -d, --delay
                     支持在发送数据流和轮询反馈间等待的延迟
            -w, ----warning=DOUBLE
                     告警状态的返回时间(秒)
            -c,--critical=DOUBLE
                     严重状态的返回时间(秒)
            -t, --timeout=INTEGER
                     指定超时前的时间(默认10秒)
            -v , --verbose
                     列出详细的命令调试行
            举例
            ./check_tcp -H 192.168.2.220 -p 22 -w 0.0023 -c 0.0067 -t 10
            TCP OK - 0.002 second response time on port 22|time=0.002289s;0.002300;0.006700;0.000000;10.000000
            ./check_tcp -H 192.168.2.220 -p 22 -w 0.0003 -c 0.0006 -t 10
            TCP WARNING - 0.000 second response time on port 22|time=0.000318s;0.000300;0.000600;0.000000;10.000000
            界面拼装参数格式如下共4个元素:
                     命令!端口!告警时延!严重告警时延!连接超时时间
                     check_tcp!23!0.0020!0.0050!10
                     备注:
                     check_imap=check_tcp!143
                     check_ftp=check_tcp!21
                     check_nntp=check_tcp!119
                     check_pop=check_udp!110
                     check_udp=check_tcp
                     check_telnet=check_tcp!23
　　
　　
　　http://www.iyunv.com.com/Linux/2011-09/44192.htm
　　
　　
　　监控客户端81,82,22,
# vim /usr/local/nagios/etc/objects/hosts.cfg
define host {
host_name svr1.labexam.com
alias svr1
address 10.1.1.10
contact_groups sagroup
check_command check-host-alive
　　
　　
check_command nrpe!check_users
check_command nrpe!check_total_procs
check_command nrpe!check_load
max_check_attempts 4
notification_interval 5
notification_period 24x7
notification_options d,u,r
}

# vim svr1_services.cfg
define service {
   host_name svr1.labexam.com
   service_description ALIVE
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 2
   retry_check_interval 1
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command check-host-alive
}

define service {
   host_name svr1.labexam.com
   service_description SSH
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 1
   retry_check_interval 2
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command check_tcp!22
}

define service {
   host_name svr1.labexam.com
   service_descriptionHTTP_81
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 1
   retry_check_interval 1
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command check_tcp!81
}

define service {
   host_name svr1.labexam.com
   service_descriptionHTTP_82
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 1
   retry_check_interval 1
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command check_tcp!82
}
define service {
   host_name svr1.labexam.com
   service_description FASTCGI
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 1
   retry_check_interval 1
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command check_tcp!9000
}
define service {
   host_name svr1.labexam.com
   service_description DISK
   check_period 24x7
   max_check_attempts 3
   normal_check_interval 2
   retry_check_interval 2
   contact_groups sagroup
   notification_interval 2
   notification_period 24x7
   notification_options w,u,c,r
   check_command nrpe!check_df
}

define service {
   host_name svr1.labexam.com
   service_description LOAD
   check_period 24x7
   max_check_attempts 3
   normal_check_interval 2
   retry_check_interval 2
   contact_groups sagroup
   notification_interval 2
   notification_period 24x7
   notification_options w,u,c,r
   check_command nrpe!check_load
}

define service {
   host_name svr1.labexam.com
   service_descriptionLOGOIN
   check_period 24x7
   max_check_attempts 2
   normal_check_interval 1
   retry_check_interval 1
   contact_groups sagroup
   notification_interval 5
   notification_period 24x7
   notification_options w,u,c,r
   check_command nrpe!check_users
}

# vim /usr/local/nagios/etc/nagios.cfg
增加一条：
cfg_file=/usr/local/nagios/etc/objects/svr1_services.cfg

# /etc/init.d/nagios reload
Running configuration check...done.
Reloading nagios configuration...done

页: [1]

运维网's Archiver

nagios,weblogic,8080,oracle,1521