season22 发表于 2019-1-26 10:50:07

prometheus+node_exporter监控系统搭建

  prometheus+node_exporter监控系统搭建:
  注:
可结合icinga2一起用,Disk IOs、Disk Throughout是icinga2没有的,traffic监控也可用这个
  参考网址:http://blog.运维网.com/youerning/2050543
  下载网址:
https://prometheus.io/download
  prometheus exporter工具网址
  https://prometheus.io/docs/instrumenting/exporters/
  服务器端:

tar zxfv prometheus-2.4.0.linux-amd64.tar.gz

mv prometheus-2.4.0.linux-amd64/space/prometheus

/space/prometheus/prometheus --config.file=/space/prometheus/prometheus.yml--storage.tsdb.path=/space/prometheus/data
  另开窗口

netstat -nplt | grep 9090
  Redhat 6设置prometheus启动脚本(只能start,没有stop和restart):

vi /etc/init.d/prometheus(注意修改路径和ip)
  #!/bin/bash
#

Comments to support chkconfig

chkconfig: 2345 98 02

description: prometheus service script
  #

Source function library.
  . /etc/init.d/functions

Default variables
  prog_name="prometheus"
config_file="/space/${prog_name}/${prog_name}.yml"
prog_path="/space/${prog_name}/${prog_name}"
data_path="/space/${prog_name}/data"
pidfile="/var/run/${prog_name}.pid"
prog_logs="/var/log/${prog_name}.log"
options="--web.listen-address=10.0.0.2:9090 --config.file=${config_file} --web.enable-lifecycle --storage.tsdb.path=${data_path}"
DESC="Prometheus Server"

Check if requirements are met
  [ -x "${prog_path}" ] || exit 1
  RETVAL=0
  start(){
action $"Starting $DESC..." su -s /bin/sh -c "nohup $prog_path $options >> $prog_logs 2>&1 &" 2> /dev/null
RETVAL=$?
PID=$(pidof ${prog_path})
[ ! -z "${PID}" ] && echo ${PID} > ${pidfile}
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/$prog_name
return $RETVAL
}
  stop(){
echo -n $"Shutting down $prog_name: "
killproc -p ${pidfile}
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/$prog_name
return $RETVAL
}
  restart() {
stop
start
}
  case "$1" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
status $prog_path
RETVAL=$?
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
RETVAL=1
esac
  :wq

/etc/init.d/prometheus start

chkconfig --level 35 prometheus on
  浏览器访问http://ip:9090
  被监控端:

tar zxfv node_exporter-0.16.0.linux-amd64.tar.gz

cd node_exporter-0.16.0.linux-amd64

./node_exporter
  另开窗口

curl 127.0.0.1:9100/metrics
  Ubuntu 14.04设置node_exporter启动脚本(只能start,没有stop和restart):

ln -s /root/node_exporter-0.16.0.linux-amd64/node_exporter /usr/bin/

vi /etc/init/node_exporter.conf

Run node_exporter
  start on startup
  script
/usr/bin/node_exporter
end script
:wq

service node_exporter start

netstat -nplt | grep 9100
  设置Ubuntu 14.04开机自启动node_exporter

vi /etc/rc.local
  service node_exporter start
  exit 0
:wq
  Ubuntu 16.04设置node_exporter启动脚本(只能start,没有stop和restart):

vi /etc/systemd/system/node-exporter.service
  
Description=Prometheus Node Exporter
After=network.target
  
ExecStart=/home/node_exporter/node_exporter
User=nobody
  
WantedBy=multi-user.target
  :wq

systemctl daemon-reload

systemctl enable node-exporter

systemctl start node-exporter

netstat -ntpl | grep 9100
  Redhat/CentOS 7启动脚本同Ubuntu 16.04
  Redhat/CentOS 6启动脚本(可执行start\stop\restart):
  将下载的node_exporter tar包解压到/space/下并改名为node_exporter

wget https://forensics.cert.org/centos/cert/6/x86_64//daemonize-1.7.3-7.el6.x86_64.rpm

rpm -i daemonize-1.7.3-7.el6.x86_64.rpm

useradd prometheus -s /sbin/nologin

mkdir /var/log/prometheus

mkdir /var/rum/prometheus

chown prometheus:prometheus /var/log/prometheus -R

chown prometheus:prometheus /var/run/promethus -R

vi /etc/init.d/node_exporter
  #!/bin/bash
#

Comments to support chkconfig

chkconfig: 2345 98 02

description: prometheus service script
  #

Source function library.
  . /etc/init.d/functions

Default variables
  prog_name="prometheus"
config_file="/space/${prog_name}/${prog_name}.yml"
prog_path="/space/${prog_name}/${prog_name}"
data_path="/space/${prog_name}/data"
pidfile="/var/run/${prog_name}.pid"
prog_logs="/var/log/${prog_name}.log"
options="--web.listen-address=10.29.60.62:9090 --config.file=${config_file} --web.enable-lifecycle --storage.tsdb.path=${data_path}"
DESC="Prometheus Server"

Check if requirements are met
  [ -x "${prog_path}" ] || exit 1
  RETVAL=0
  start(){
action $"Starting $DESC..." su -s /bin/sh -c "nohup $prog_path $options >> $prog_logs 2>&1 &" 2> /dev/null
RETVAL=$?
PID=$(pidof ${prog_path})
[ ! -z "${PID}" ] && echo ${PID} > ${pidfile}
echo
[ $RETVAL -eq 0 ] && touch /var/lock/subsys/$prog_name
return $RETVAL
}
  stop(){
echo -n $"Shutting down $prog_name: "
killproc -p ${pidfile}
RETVAL=$?
echo
[ $RETVAL -eq 0 ] && rm -f /var/lock/subsys/$prog_name
return $RETVAL
}
  restart() {
stop
start
}
  case "$1" in
start)
start
;;
stop)
stop
;;
restart)
restart
;;
status)
status $prog_path
RETVAL=$?
;;
*)
echo $"Usage: $0 {start|stop|restart|status}"
RETVAL=1
esac
  :wq

chmod +x /etc/init.d/node_exporter

vi /etc/sysconfig/node_exporter
  ARGS=""
:wq

/etc/init.d/node_exporter start

ps -ef | grep node_exporter

chkconfig --level 35 node_exporter on
  服务器端:

vi /space/prometheus/prometheus.yml(写服务器名的话需在/etc/hosts加个本地解析)

- targets: ['10.0.0.3:9100']
- targets: ['shhua01:9100']
  :wq

netstat -ntpl | grep 9090

kill pid号

/space/prometheus/prometheus --config.file=/space/prometheus/prometheus.yml --storage.tsdb.path="/space/prometheus/data"--storage.tsdb.retention=365d
  访问http://ip:9090Status——targets,可看到已监控的服务器列表
  集成到Grafana,选择prometheus,http://ip:9090即可,Grafana官网有prometheus对应的模板可导入使用
  显示总CPU core数:
  count(node_cpu_seconds_total{instance=~"$node", mode="system"}) or count(node_cpu{instance=~"$node", mode="system"}
  显示总内存大小:
  sum(node_memory_MemTotal_bytes{instance=~"$hostname:9100"})
  显示空间总大小:
  sum(node_filesystem_size_bytes{instance=~'$hostname:9100',mountpoint=~'/home'})
  显示CPU使用率:
  100 - (avg by (instance)(irate(node_cpu_seconds_total{mode="idle",instance=~'$node'})) * 100)
  显示空间使用率:
  100.0 - 100 * (node_filesystem_avail_bytes{instance=~'$node',mountpoint=~'/home'} / node_filesystem_size_bytes{instance=~'$node:9100',mountpoint=~'/home'})
  显示内存使用率:
  100 - ((sum(node_memory_Cached_bytes{instance=~"$node"})+sum(node_memory_Buffers_bytes{instance=~"$node"})+sum(node_memory_MemFree_bytes{instance=~"$node"}))/sum(node_memory_MemTotal_bytes{instance=~"$node"})*100)
  swap使用率:
  100 - (sum(node_memory_SwapFree_bytes{instance=~"$node"})/sum(node_memory_SwapTotal_bytes{instance=~"$node"})*100)



页: [1]
查看完整版本: prometheus+node_exporter监控系统搭建