downmovies 发表于 2018-10-26 10:16:27

nagios 监控 mongodb 解决nagios界面显示null 问题

  网上有很多关于nagios 监控mongodb 的介绍,但是无一例外,手工执行 python 能获取客户端的值,nagios 监控界面上面显示null, 针对该问题,本人用shell 脚本重新封装了部分监控选项,让nagios 能正常监控mongodb 服务器
  如果想了解nagios 监控mongodb的部署过程,请点击:
  http://www.iyunv.com/database/201410/341855.html
  https://github.com/mzupan/nagios-plugin-mongodb/blob/master/README.md
  实现原理:
  利用shell 脚本 获取 check_mongodb.py 的 值,然后传给nagios 实现nagios 警告
系统环境变量:
  centos5.8 64bit
  python2.4.3
  pymongo1.9
安装pymongo
  tar -xvzfpymongo-1.9.tar.gz
  cd pymongo-1.9
  pythonsetup.py install
检查是否安装pymongo
  # python
  Python2.4.3 (#1, Feb 22 2012, 16:05:45)
   on linux2
  Type"help", "copyright", "credits" or"license" for more information.
  >>>import pymongo
  >>>pymongo.version
  '1.9'
  >>>import sys
  >>>sys.exit()
  mvnagios-plugin-mongodb-bycsc.zip/usr/local/nagios/libexec/
  unzip nagios-plugin-mongodb-bycsc.zip
  chown -R nagios:nagios/usr/local/nagios/libexec/nagios-plugin-mongodb
  chmod -R 755/usr/local/nagios/libexec/nagios-plugin-mongodb
  执行check_mongodb.py 看是否能正常运行,如下显示说明正常运行
  # ./check_mongodb.py -h
  。。。省略
  -c COLLECTION, --collection=COLLECTION
  Specify the collectionto check
  -T SAMPLE_TIME, --time=SAMPLE_TIME
  Time used to samplenumber of pages faults
nagios服务配置:
1.nagios 服务器root 账户 定时任务配置:
  具体参数请参考:/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh脚本
  */10 * * * */usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh10.0.8.17 ALL 30000
  */10 * * * * /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh10.0.8.18 ALL 30000
  */10 * * * * /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh10.0.8.19 ALL 30000
  利用定时任务,把以上服务器的状态检查结果重定向到/tmp文件夹下
2.nagios 服务器端 配置文件配置
commands 配置文件:
  vi/usr/local/nagios/etc/objects/commands.cfg添加:
  definecommand {
  command_name    check_mongodb
  command_line   /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh'$HOSTADDRESS$''$ARG1$' '$ARG2$'
  }
  命令解说:
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh    IP地址监控选项端口
  check_mongodb.py监控选项可以有如下:
  usage:check_mongodb.py
  check_mongodb.py:error: option -A: invalid choice: 'memordfd' (choose from 'connect','connections', 'replication_lag', 'replication_lag_percent', 'replset_state','memory', 'memory_mapped', 'lock', 'flushing', 'last_flush_time','index_miss_ratio', 'databases', 'collections', 'database_size','database_indexes', 'collection_indexes', 'collection_size', 'queues', 'oplog','journal_commits_in_wl', 'write_data_files', 'journaled', 'opcounters','current_lock', 'replica_primary', 'page_faults', 'asserts','queries_per_second', 'page_faults', 'chunks_balance', 'connect_primary','collection_state', 'row_count', 'replset_quorum')
  目前check_mongodb.sh只是配置了 'connect''connections' replset_state 'memory' 选项
  可以参考 README.md 列子进行配置!
测试配置:
  在nagios 账户下运行命令检查mongodb服务器:(nagios账户下面执行)
  su -nagios
  # su- nagios
  $/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh 10.0.8.19memory 30000
  OK -Memory Usage: 0.04GB resident, 0.78GB virtual, 0.08GB mapped, 0.16GBmappedWithJournal
  如显示以上结果,则通过
编写nagios 服务器端 mongodb 服务器的配置文件:
  # cat/usr/local/nagios/etc/objects/server-8-17.cfg
  definehost{
  use         linux-server
  host_name               server-8-17
  alias                   server-8-17
  address               10.0.8.17
  }
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   SSH
  check_command   check_ssh
  }
  ......省略其他配置文件
  #检测mongodb服务的连接时间
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   check mongodb connect 30000
  check_command            check_mongodb!connect!30000
  }
  #检查mongodb的连接数
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   check mongodb connections 30000
  check_command            check_mongodb!connections!30000
  }
  #检查mongodb内存使用率
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   check mongodb memory 30000
  check_command         check_mongodb!memory!30000
  }
  #mongo复制的状态
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   check mongodb replset state 30000
  check_command         check_mongodb!replset_state!30000
  }
  #检查mongodb复制完成的百分比率确保primary和standby的time是一致的
  defineservice{
  use             generic-service
  host_name       server-8-17
  service_description   check mongodb replication lag 30000
  check_command         check_mongodb!replication_lag!30000
  }
  备注:
  检查筏值 请在/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh 里面配置 -W -C 参数
  请点击下载 重新封装过的nagios-plugin-mongodb: http://down.51cto.com/data/2061502
  下载后 放在/usr/local/nagios/libexec/目录,
  解压:unzip nagios-plugin-mongodb-bycsc.zip
  赋权:chown -R nagios:nagios nagios-plugin-mongodb
  ##################################################################
  more /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh
  #!/bin/sh
  # crontab by user root
  # */5 * * * */usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh 192.168.0.1 ALL 27017 > /dev/null 2>&1
  # run process nrpe by user nagios
  #command=/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh connect
  #command=/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh connections
  #command=/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh memory
  #command=/usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.sh replset_state
  #VERSION="mycheck_mongodb.sh v1.0a, by csc, 2015-06-17."
  ######################################
  RUN_BY_ROOT()
  {
  tmpfile=/tmp/check_mongodb_$1_$2_$3.tmp
  case $2 in
  connect)
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.py   -H $1-A $2 -P $3 -W 2 -C 4 >$tmpfile
  ;;
  connections)
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.py   -H $1-A $2 -P $3 -W 70 -C 80 >$tmpfile
  ;;
  memory)
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.py   -H $1-A $2 -P $3 -W 20 -C 28 >$tmpfile
  ;;
  replset_state)
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.py   -H $1-A $2 -P $3 -W 0 -C 0 >$tmpfile
  ;;
  replication_lag)
  /usr/local/nagios/libexec/nagios-plugin-mongodb/check_mongodb.py   -H $1-A $2 -P $3 -W 15 -C 30 >$tmpfile
  ;;
  *)
  echo "./check_mongodb.sh192.168.0.1 connect27017"
  ;;
  esac
  }
  ######################################
  RUN_BY_NAGIOS()
  {
  tmpfile=/tmp/check_mongodb_$1_$2_$3.tmp
  if [ -f $tmpfile ];then
  cat $tmpfile|grep OK|grep -v grep >/dev/null
  if [ $? -eq 0 ];then
  /bin/echo -e |cat $tmpfile
  exit 0
  fi
  cat$tmpfile|grep WARNING|grep -v grep>/dev/null
  if [ $? -eq 0 ];then
  /bin/echo -e |cat$tmpfile
  exit 1
  fi
  cat$tmpfile |grep CRITICAL|grep -v grep>/dev/null
  if [ $? -eq 0 ];then
  /bin/echo -e |cat$tmpfile
  exit 2
  fi
  else
  echo "$tmpfile is not exist!"
  exit 1
  fi
  }
  ######################################
  USER_NAME=`/usr/bin/whoami`
  if [ "$USER_NAME" = "root" ]; then
  RUN_BY_ROOT $1 connect $3
  RUN_BY_ROOT $1 connections $3
  RUN_BY_ROOT $1 memory $3
  RUN_BY_ROOT $1 replset_state $3
  RUN_BY_ROOT $1 replication_lag $3
  else
  RUN_BY_NAGIOS $1 $2 $3
  fi

页: [1]
查看完整版本: nagios 监控 mongodb 解决nagios界面显示null 问题