|
需要用的组件
监控服务器:RHEL5(192.168.0.20)+nagios-3.0.5+nagios-plugins-1.4.11+ nrpe_2.8.1
被监控端RHEL5 (192.168.0.21)+ nagios-plugins-1.4.11+nrpe_2.8.1
1.NRPE监控插件
NRPE由两部分组成:
check_nrpe插件,运行在监控主机上。
NRPE daemon,运行在远程的linux主机上(通常就是被监控机)
整个的监控过程:
当Nagios需要监控某个远程linux主机时:
1).nagios运行check_nrpe插件,在nagios配置文件定义检查内容.
2).check_nrpe插件会通过SSL连接到远程的NRPE daemon.
3).NRPE daemon会运行相应的nagios插件来执行检查.
4).NRPE daemon将检查的结果返回给check_nrpe插件,插件将其递交给nagios处理.
注意:NRPE daemon需要nagios插件安装在远程被监控linux主机上,否则,daemon不能做任何的监控. 别外因为它们间的通信是加密的SSL,所以在编译安装时都要加上选项, ./configure --enable-ssl--with-ssl-lib=/lib/,否则也会出错.
file:///C:/Documents%20and%20Settings/Administrator/%E6%A1%8C%E9%9D%A2/%E5%9B%BE%E5%83%8F000.png
2.配置监控端
1.安装nagios
rpm -qa |grep gd
rpm -ql gd-devel-2.0.28-5.4E.el4_6.1
cd nagios-3.0.5
./configure--prefix=/usr/local/nagios --with-command-group=nagcmd --with-gd-lib=/usr/lib--with-gd-inc=/usr/include
make all
make install
make install-init
make install-config
make install-commandmode
make install-webconf #可以自动配置httpd.conf
| 2.安装nagios-plugins
cd nagios-plugins-1.4.11
./configure--with-nagios-user=nagios --with-nagios-group=nagios--enable-redhat-pthread-workaround
make
make install
| 3.安装NRPE
cd nagios-nrpe_2.8.1
./configure #默认自动添加了openssl
#因为传送过程要加密,如果后面make报错,加如下参数
rpm -qa| grep ssl
openssl-devel-0.9.7a-43.17.el4_6.1
rpm -ql openssl-devel-0.9.7a-43.17.el4_6.1| more
./configure --enable-ssl--with-ssl-lib=/lib/(当然前提要有openssl)
make all
make install-plugin
| 4.commands.cfg定义外部构件nrpe
vi/usr/local/nagios/etc/objects/commands.cfg
#添加
#check nrpe
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
| 5.配置要监控的linux主机
vi /usr/local/nagios/etc/nagios.cfg
#中间添加
cfg_file=/usr/local/nagios/etc/objects/mylinux.cfg
| 6.新建mylinux.cfg 设置监控内容
vi/usr/local/nagios/etc/objects/mylinux.cfg
define host{
use linux-server
host_name mylinux
alias mylinux
address 192.168.0.21(客户端IP既被监控的IP)
}
define service{
use generic-service
host_name mylinux
service_description check-swap
check_command check_nrpe!check_swap
}
define service{
use generic-service
host_name mylinux
service_description check-load
check_command check_nrpe!check_load
}
define service{
use generic-service
host_name mylinux
service_description check-disk
check_command check_nrpe!check_had1
define service{
use generic-service
host_name mylinux
service_description check-users
check_command check_nrpe!check_users
}
define service{
use generic-service
host_name mylinux
service_description otal_procs
check_command check_nrpe!check_total_procs
}
| 6.其它设置
chkconfig --addnagios #配置机器启动时自动启动Nagios
chkconfig nagios on
/usr/local/nagios/bin/nagios-v /usr/local/nagios/etc/nagios.cfg #检查Nagios配置文件
vi/etc/selinux/config #关闭SELinux
SELINUX=disabled
service iptables stop #关闭SELinux,或打开80,5666端口
service nagios start
| 3.配置被监控端
1.安装nagios-plugin
useradd nagios
passwd nagios
tar -zxvf nagios-plugins-1.4.12.tar.gz
cd nagios-plugins-1.4.12
./configure--with-nagios-user=nagios --with-nagios-group=nagios--enable-redhat-pthread-workaround
Make
make install
| 2.改变主目录权限
chown –R nagios.nagios/usr/local/nagios
[root@client nagios]# ll
drwxr-xr-x 2 nagiosnagios 4096 Jun 1 00:07 libexec
drwxr-xr-x 3 nagiosnagios 4096 Jun 1 00:07 share
| 3.安装客户端的nrpe
tar -zxvf nagios-nrpe_2.8.1.orig.tar.gz
cd nagios-nrpe_2.8.1
./configure (会自动加载SSL)
#如果后面make报错,加如下参数
./configure --enable-ssl--with-ssl-lib=/usr/lib/(当然前提要有openssl)
make all
make install-plugin
make install-daemon
make install-daemon-config
| 4.配置nrpe信息
vi/usr/local/nagios/etc/nrpe.cfg
allowed_hosts=192.168.0.20,127.0.0.1,192.168.0.99
| 5.启动nrpe
/usr/local/nagios/bin/nrpe-c /usr/local/nagios/etc/nrpe.cfg –d
#或
vi /etc/rc.d/rc.local
/usr/local/nagios/bin/nrpe-c /usr/local/nagios/etc/nrpe.cfg –d
| 6.验证nrpe
netstat -an | grep 5666
tcp 0 0 0.0.0.0:5666 0.0.0.0:* LISTEN
/usr/local/nagios/libexec/check_nrpe-H 127.0.0.1
NRPE v2.8.1
#服务端测试
/usr/local/nagios/libexec/check_nrpe-H l92.168.0.21
NRPE v2.8.1
#常见错误
/usr/local/nagios/libexec/check_nrpe-H 127.0.0.1
CHECK_NRPE: Error - Couldnot complete SSL handshake.
配置allowed_hosts=192.168.0.20,127.0.0.1,192.168.0.99,然后kill进程再重启就OK了
2./usr/local/nagios/libexec/check_nrpe-H 127.0.0.1
Connection refused by host
Nrpe进程没有启动
| 7.配置监控对像
被监控端做监控然后传给监控服务器绘总,设置监控详细参数主要是设置被监控端的nrpe.cfg文件
可以看到里面监控对象
vi/usr/local/nagios/etc/nrpe.cfg
command[check_users]=/usr/local/nagios/libexec/check_users-w 5 -c 10
command[check_load]=/usr/local/nagios/libexec/check_load-w 15,10,5 -c 30,25,20
command[check_hda1]=/usr/local/nagios/libexec/check_disk-w 20 -c 10 -p /dev/hda1
command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs-w 5 -c 10 -s Z
command[check_total_procs]=/usr/local/nagios/libexec/check_procs-w 150 -c 200
command[check_swap]=/usr/local/nagios/libexec/check_swap-w 20% -c 10%
| 如果nagios服务器想要监控客户机swap分区,但客户机nrpe.cfg文件没有swap监控对像,这时监控服务器就是报错(NRPE command ‘check_swap’nodefined).
到此,就可以监控linux客户机的Disk,swap,users,memory了.
1、 解包、配置:tar zxvf httpd-2.2.0.tar.gz ; cdhttpd-2.2.0 ; ./configure –prefix=/usr/local/apache 。
2、 编译安装: make ; make install 。
安装完成后,执行命令./usr/local/apache/bin/apachectl –t 检查一下apache是否正确安装。
#setting for nagios
ScriptAlias /nagios/cgi-bin /usr/local/nagios/sbin
AuthType Basic
Options ExecCGI
AllowOverride None
Order allow,deny
Allow from all
AuthName "Nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user
Alias /nagios /usr/local/nagios/share
AuthType Basic
Options None
AllowOverride None
Order allow,deny
Allow from all
AuthName "nagios Access"
AuthUserFile /usr/local/nagios/etc/htpasswd
Require valid-user
更改目录/usr/local/apache的权限 为nagios用户
执行/usr/local/apache/bin/apachl-t 检查语法
然后/usr/local/apache/bin/apachctlstart & 把apache启动到后台
注意有的版本自带了自己的apache服务,不要试验 /etc/init.d/httpd start 进行启动会冲突
四。进入到/usr/local/nagios/etc/下,目录结构如下
然后进入到 ./nagios下对nagios 配置文件进行修改
cfg_file=/usr/local/nagios//etc/objects/commands.cfg //会在 object 的目录下生成相应的配置文档
cfg_file=/usr/local/nagios//etc/objects/contacts.cfg //
cfg_file=/usr/local/nagios//etc/objects/timeperiods.cfg // cfg_file=/usr/local/nagios//etc/objects/templates.cfg // cfg_file=/usr/local/nagios//etc/objects/windows.cfg //取消的注释行,开启监测windows服务器
cfg_file=/usr/local/nagios//etc/objects/switch.cfg //取消的注释行,开启对路由和交换机的监测
command_check_interval=-10s //设定服务器掉线的允许时间,默认为1s
enable_notifications=1 //是否发送通知的过滤器
在cgi.cfg中进行修改
authorized_for_system_information=nagiosadmin,zhangbo
authorized_for_configuration_information=nagiosadmin,zhangbo
authorized_for_system_commands=zhangbo //多个用户之间用逗号隔开
authorized_for_all_services=nagiosadmin,zhangbo
authorized_for_all_hosts=nagiosadmin,zhangbo
authorized_for_all_service_commands=nagiosadmin,zhangbo
authorized_for_all_host_commands=nagiosadmin,zhangbo
下面进入到 ./object的目录下,改目录的结构如下:
再 contacts.cfg中进行修改:这里主要是联系人的相关设置
define contactgroup{
contactgroup_name admins //联系组的相应组名,后面要用到
alias Nagios Administrators //别名
members zhangbo //成员,多用户用“,”隔开
}
define contact{
contact_name zhangbo //联系人
alias Nagios zhangbo (Admin) //别名
email zhangbo@邮箱 //联系人的邮箱,通知的邮箱
}
再timeperiods.cfg中没有修改: //这里主要是一些关于监测时间,报警时间,间断时间的一些设置,保持默认即可
在localhost.cfg 下的修改:
define hostgroup{
hostgroup_name hostadmin //监视的主机组的名称
alias hostadmin //别名
members zhangbo //成员名 ,多用户用“,”隔开
}
define host{
use linux-server //使用的类定义,根据templates.cfg的定义
host_name zhangbo //主机名
alias zhangbo //别名
parents MainSwitch
address 192.168.1.251 //对应的ip地址
icon_image server.gif
statusmap_image server.gd2
2d_coords 500,200
3d_coords 500,200,100
}
再 templates.cfg 中进行修改://可以根据自己的需要进行相应类的定义
define contact{
name generic-contact //定义的类名
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r,f,s
events, and scheduled downtime events
host_notification_options d,u,r,f,s
ents, and scheduled downtime events
service_notification_commands notify-service-by-email
host_notification_commands notify-host-by-email
register 0
检测:
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
出现下面情况表示成功
Checking misc settings...
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flightcheck
对于监测是点来说,我这里都是一些windows的机器,因为系统的差异,对windows的监控得需要在各监控点上安装第三方的软件--NSClient++
NSClient++ 客户端在windos上的安装步骤
1,到官方网站上下载最新的稳定的 NSClient++ 软件包或安装程序
2. 将其安装再C盘下
3.再 cmd 的模式下,进入到安装目录,执行:
NSClient++ /uninstall
NSClient++ SysTray uninstall
NSClient++ /install
NSClient++ SysTray install
4.再安装目录下对配置文件进行修改:
将 [modules]部分的所有模块前面的注释都去掉,除了CheckWMI.dll andRemoteConfiguration.dll这两个
在[Settings]部分设置'password'选项来设置密码,作用是在nagios连接过来时要求提供密码.这一步是可选的,我这里方便起见跳过它,不要密码.
将[Settings]部分'allowed_hosts'选项的注释去掉,并且加上运行nagios的监控主机的IP.我改为如下这样allowed_hosts=127.0.0.1/32,192.168.0.111 以逗号相隔.这个地方是支持子网的,如果写成192.168.0.0/24则表示该子网内的所有机器都可以访问.如果这个地方是空白则表示所有的主机都可 以连接上来.注意是[Settings]部分的,因为[NSClient]部分也有这个选项.
必须保证[NSClient]的'port'选项并没有被注释,并且它的值是'12489',这是NSClient的默认监听端口
5.在CMD中执行nsclient++ /start启动服务,注意所在目录是C:\NSClient++
password=secret-password
下面是web的nagios的监控图形界面
对服务器监控测试正常
|
|