色破飞机 发表于 2019-1-8 13:44:30

zabbix系列zabbix3.4监控zookeeper3.4.10

  监控zookeeper来自网上,大家一搜就可搜到了,只是zabbix版本和zookeeper有点出入,自行修改一下就可以了。

zookeeper监控要点

系统监控   这个监控linux系统以及修改linux服务器参数即可
内存使用量    ZooKeeper应当完全运行在内存中,不能使用到SWAP。Java Heap大小不能超过可用内存。
Swap使用量    使用Swap会降低ZooKeeper的性能,设置vm.swappiness = 0
网络带宽占用   如果发现ZooKeeper性能降低关注下网络带宽占用情况和丢包情况,通常情况下ZooKeeper是20%写入80%读入
磁盘使用量    ZooKeeper数据目录使用情况需要注意
磁盘I/O      ZooKeeper的磁盘写入是异步的,所以不会存在很大的I/O请求,如果ZooKeeper和其他I/O密集型服务公用应该关注下磁盘I/O情况
ZooKeeper监控
zk_avg/min/max_latency    响应一个客户端请求的时间,建议这个时间大于10个Tick就报警平均延迟/最小延迟/最大延迟
zk_outstanding_requests      排队请求的数量,当ZooKeeper超过了它的处理能力时,这个值会增大,建议设置报警阀值为10堆积请求数
zk_packets_received      接收到客户端请求的包数量收包数
zk_packets_sent      发送给客户单的包数量,主要是响应和通知 发包数
zk_max_file_descriptor_count   最大允许打开的文件数,由ulimit控制 最大文件描述符数量
zk_open_file_descriptor_count    打开文件数量,当这个值大于允许值得85%时报警 打开的文件描述符数量
Mode                运行的角色,如果没有加入集群就是standalone,加入集群式follower或者leader
zk_followers          leader角色才会有这个输出,集合中follower的个数。正常的值应该是集合成员的数量减1 follower数量
zk_pending_syncs       leader角色才会有这个输出,pending syncs的数量 准备同步数
zk_znode_count         znodes的数量 znode数量
zk_watch_count         watches的数量 watch数量
Java Heap Size         ZooKeeper Java进程的
安装依赖包

yum install -y nc
yum install -y zabbix-sender
  找到zabbix-sender安装的话,可以尝试一下
yum list all|grep zabbix-sender

监控原理描述

# echo ruok|nc 127.0.0.1 2181
imok
# echo mntr|nc 127.0.0.1 2181
zk_version      3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f, built on 03/23/2017 10:13 GMT
zk_avg_latency0
zk_max_latency13254
zk_min_latency0
zk_packets_received   3440389
zk_packets_sent 3502224
zk_num_alive_connections      2
zk_outstanding_requests 0
zk_server_state follower
zk_znode_count203
zk_watch_count100
zk_ephemerals_count   83
zk_approximate_data_size      46378
zk_open_file_descriptor_count   33
zk_max_file_descriptor_count    4096
# echo srvr|nc 127.0.0.1 2181
Zookeeper version: 3.4.10-39d3a4f269333c922ed3db283be479f9deacaa0f, built on 03/23/2017 10:13 GMT
Latency min/avg/max: 0/0/13254
Received: 3440393
Sent: 3502228
Connections: 2
Outstanding: 0
Zxid: 0x2000b2629
Mode: follower
Node count: 203
zookeeper监控脚本
  /etc/zabbix/script/zookeeper/check_zookeeper.py

#!/usr/bin/python
""" Check Zookeeper Cluster
zookeeper version should be newer than 3.4.x
# echo mntr|nc 127.0.0.1 2181
zk_version3.4.6-1569965, built on 02/20/2014 09:09 GMT
zk_avg_latency0
zk_max_latency4
zk_min_latency0
zk_packets_received 84467
zk_packets_sent 84466
zk_num_alive_connections    3
zk_outstanding_requests 0
zk_server_state follower
zk_znode_count17159
zk_watch_count2
zk_ephemerals_count 1
zk_approximate_data_size    6666471
zk_open_file_descriptor_count   29
zk_max_file_descriptor_count    102400
# echo ruok|nc 127.0.0.1 2181
imok
"""
import sys
import socket
import re
import subprocess
from StringIO import StringIO
import os
zabbix_sender = '/usr/bin/zabbix_sender'
zabbix_conf = '/etc/zabbix/zabbix_agentd.conf'
send_to_zabbix = 1
############# get zookeeper server status
class ZooKeeperServer(object):
def __init__(self, host='localhost', port='2181', timeout=1):
self._address = (host, int(port))
self._timeout = timeout
self._result= {}
def _create_socket(self):
return socket.socket()
def _send_cmd(self, cmd):
""" Send a 4letter word command to the server """
s = self._create_socket()
s.settimeout(self._timeout)
s.connect(self._address)
s.send(cmd)
data = s.recv(2048)
s.close()
return data
def get_stats(self):
""" Get ZooKeeper server stats as a map """
data_mntr = self._send_cmd('mntr')
data_ruok = self._send_cmd('ruok')
if data_mntr:
result_mntr = self._parse(data_mntr)
if data_ruok:
result_ruok = self._parse_ruok(data_ruok)
self._result = dict(result_mntr.items() + result_ruok.items())
if not self._result.has_key('zk_followers') and not self._result.has_key('zk_synced_followers') and not self._result.has_key('zk_pending_syncs'):
##### the tree metrics only exposed on leader role zookeeper server, we just set the followers' to 0
leader_only = {'zk_followers':0,'zk_synced_followers':0,'zk_pending_syncs':0}      
self._result = dict(result_mntr.items() + result_ruok.items() + leader_only.items() )
return self._result   
def _parse(self, data):
""" Parse the output from the 'mntr' 4letter word command """
h = StringIO(data)
result = {}
for line in h.readlines():
try:
key, value = self._parse_line(line)
result = value
except ValueError:
pass # ignore broken lines
return result
def _parse_ruok(self, data):
""" Parse the output from the 'ruok' 4letter word command """
h = StringIO(data)
result = {}
ruok = h.readline()
if ruok:
result['zk_server_ruok'] = ruok
return result
def _parse_line(self, line):
try:
key, value = map(str.strip, line.split('\t'))
except ValueError:
raise ValueError('Found invalid line: %s' % line)
if not key:
raise ValueError('The key is mandatory and should not be empty')
try:
value = int(value)
except (TypeError, ValueError):
pass
return key, value
def get_pid(self):
#ps -ef|grep java|grep zookeeper|awk '{print $2}'
pidarg = '''ps -ef|grep java|grep zookeeper|grep -v grep|awk '{print $2}' '''   
pidout = subprocess.Popen(pidarg,shell=True,stdout=subprocess.PIPE)
pid = pidout.stdout.readline().strip('\n')
return pid
def send_to_zabbix(self, metric):
key = "zookeeper.status[" +metric + "]"
if send_to_zabbix > 0:
#print key + ":" + str(self._result)
try:
subprocess.call() ], stdout=FNULL, stderr=FNULL, shell=False)
except OSError, detail:
print "Something went wrong while exectuting zabbix_sender : ", detail
else:
print "Simulation: the following command would be execucted :\n", zabbix_sender, "-c", zabbix_conf, "-k", key, "-o", self._result, "\n"
def usage():
"""Display program usage"""
print "\nUsage : ", sys.argv, " alive|all"
print "Modes : \n\talive : Return pid of running zookeeper\n\tall : Send zookeeper stats as well"
sys.exit(1)
accepted_modes = ['alive', 'all']
if len(sys.argv) == 2 and sys.argv in accepted_modes:
mode = sys.argv
else:
usage()
zk = ZooKeeperServer()
#print zk.get_stats()
pid = zk.get_pid()
if pid != "" andmode == 'all':
zk.get_stats()
# print zk._result
FNULL = open(os.devnull, 'w')
for key in zk._result:
zk.send_to_zabbix(key)
FNULL.close()
print pid
elif pid != "" and mode == "alive":
print pid
else:
print 0

添加zabbix-agent配置文件
  /etc/zabbix/zabbix_agentd.d/check_zookeeper.conf

UserParameter=zookeeper.status
[*],/usr/bin/python /etc/zabbix/script/zookeeper/check_zookeeper.py $1
zabbix-web添加zookeeper模板
  zbx_export_templates.xml



3.4
2018-02-02T06:22:07Z


Templates




Template ZooKeeper
Template ZooKeeper



Templates




ZooKeeper Status




zookeeper pid
0


zookeeper.status
30
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper approximate data size
2


zookeeper.status
0
90d
365d
0
3

B


0
0

0



0






0


ZooKeeper Status









zookeeper average latency
2


zookeeper.status
0
90d
365d
0
3

tick


0
0

0



0






0


ZooKeeper Status









zookeeper ephemerals count
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper leader's followers
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper max file descriptor count
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper max latency
2


zookeeper.status
0
90d
365d
0
3

tick


0
0

0



0






0


ZooKeeper Status









zookeeper min latency
2


zookeeper.status
0
90d
365d
0
3

tick


0
0

0



0






0


ZooKeeper Status









zookeeper alive connections
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper opened file descriptor count
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper outstanding requests
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper packages received
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper packages sent
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper leader's pending syncs
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper response checking
2


zookeeper.status
0
90d
0
0
1




0
0

0



0






0


ZooKeeper Status









zookeeper state role
2


zookeeper.status
0
90d
0
0
1




0
0

0



0






0


ZooKeeper Status









zookeeper leader's synced followers
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper version
2


zookeeper.status
0
90d
0
0
1




0
0

0



0






0


ZooKeeper Status









zookeeper watches count
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status









zookeeper znodes count
2


zookeeper.status
0
90d
365d
0
3




0
0

0



0






0


ZooKeeper Status


















{Template ZooKeeper:zookeeper.status.last()}>10
0

big outstanding requests number
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.last()}>10
0

big pending syncs
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.last()}>10
0

large average latency
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.last()} > {Template ZooKeeper:zookeeper.status.last()}*0.85
0

large file descriptor used
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.str(imok)}1
0

zookeeper is abnormal
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.last()}=0
0

zookeeper is not running
0


0
4

0
0




{Template ZooKeeper:zookeeper.status.abschange()}>0
0

zookeeper state role has been changed
0


0
2

0
0






ZooKeeper Alive Connections
900
200
0.0000
100.0000
1
1
1
1
0
0.0000
0.0000
0
0
0
0


0
0
00DDDD
0
2
0

Template ZooKeeper
zookeeper.status





ZooKeeper Data Size
900
200
0.0000
100.0000
1
1
1
1
0
0.0000
0.0000
0
0
0
0


0
0
00C800
0
2
0

Template ZooKeeper
zookeeper.status





ZooKeeper Latency
900
200
0.0000
100.0000
1
1
0
1
0
0.0000
0.0000
0
0
0
0


0
2
00C800
0
2
0

Template ZooKeeper
zookeeper.status



1
2
C80000
0
2
0

Template ZooKeeper
zookeeper.status



2
2
0000C8
0
2
0

Template ZooKeeper
zookeeper.status





ZooKeeper Packages Received/Sent
900
200
0.0000
100.0000
1
1
1
1
0
0.0000
0.0000
0
0
0
0


0
0
FF3333
0
2
0

Template ZooKeeper
zookeeper.status



1
0
00C800
0
2
0

Template ZooKeeper
zookeeper.status





ZooKeeper Watches Count
900
200
0.0000
100.0000
1
1
1
1
0
0.0000
0.0000
0
0
0
0


0
0
660066
0
2
0

Template ZooKeeper
zookeeper.status





ZooKeeper Znodes Count
900
200
0.0000
100.0000
1
1
0
1
0
0.0000
0.0000
0
0
0
0


0
1
FFCCFF
0
2
0

Template ZooKeeper
zookeeper.status







  上面导出的xml有个地方不太明白, 不等号 变成了≷>,官网并没有说明这种用法。

重启zabbix-agent

service zabbix-agent restart
  由于这个是使用zabbix-sender发送,在服务器端使用zabbix-get获取不了数据,不知道网友是否可以使用zabbix-get获取数据呢?
http://i2.运维网.com/images/blog/201802/02/b519d8e937b965343dc210cd69372b71.png
http://i2.运维网.com/images/blog/201802/02/d922624625a20ab645a9b2264970737b.png



页: [1]
查看完整版本: zabbix系列zabbix3.4监控zookeeper3.4.10