saltstack minion端状态监控程序
#!/usr/bin/python26# -*- coding: utf-8 -*-
"""
@author:mujibin
@modify time: 2015-11-05
"""
import os
import sys
import time
import datetime
import re
import paramiko
import socket
os.chdir(sys.path)
sys.path.append("/data1/salt/mysqlapi/salt/")
import urllib
import urllib2
import traceback
import json
import logging
import types
import commands
import MySQLdb
from multiprocessing import Pool
#from salt_repair import *
reload(sys)
sys.setdefaultencoding('utf8')
c_date = time.strftime("%Y%m%d",time.localtime())
c_time = time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()) # 统一入库时间,很重要,是前端
最新信息的依据
H3306='**com.cn'
H3308='***com.cn'
P3306=3306
P3308=3308
dp_admin='dp_admin'
HOST_PORT='3306'
HOST_USER = 'mysqlha'
HOST_PASSED = '*******'
db='test'
def getIp(domain):
import socket
myaddr = socket.getaddrinfo(domain,'http')
return myaddr
MASTERDNS= "********.com.cn"
MASTERIP = getIp(MASTERDNS) ###获取salt-master的ip地址
def sql_select(sql, port=3306, domain='*****', db='salt'):##查询
port = int(port)
array = []
try:
db = MySQLdb.connect(host=domain,user=HOST_USER,port=port,passwd=HOST_PASSED,db=db,connect_timeout=3,charset="utf8")
cursor = db.cursor()
cursor.execute(sql)
rows = cursor.fetchall()
for row in rows:
array.append(row)
return array
except Exception,e:
#print str(e)
return array
def sql_insert(sql, port=3306, domain='****', db='salt'):##插入
try:
db = MySQLdb.connect(host=domain,user=HOST_USER,port=port,passwd=HOST_PASSED,db=db,connect_timeout=3,charset="utf8")
cursor = db.cursor()
cursor.execute(sql)
db.commit()
db.close()
except Exception,e:
#print str(e)
db.rollback()
db.close()
def ssh_connect_bak(host):
client = paramiko.SSHClient()
client.load_system_host_keys()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
privatekeyfile = os.path.expanduser('/root/.ssh/id_rsa')
mykey = paramiko.RSAKey.from_private_key_file(privatekeyfile)
host=host.strip()
client.connect(host,26387,username='root',timeout=2,pkey=mykey)
return client
def ssh_connect(host):##ssh连接
client = paramiko.SSHClient()
client.load_system_host_keys()
client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
host=host.strip()
client.connect(host,26387,username='root',timeout=2)
return client
'''通过测试发现通过ssh连接到master端执行命令,一旦服务器数量多了,很容易会卡住退不
出来,程序也被卡死,没法继续跑下去,设置超时时间也不生效,具体原因没找到,换了公司
通道机来进行连接,没有再出现这个问题,用下面这个run_cmd函数'''
def run_cmd(ips,cmd,method,output,ignore_error,timeout):
_ips_= ips
_cmd_=cmd
_method_ = method
_output_ = output
_ignore_error_ = ignore_error
_timeout_ = timeout
_user_='litao'
_key_='*******'
url='https://*****.php'
argument={ 'user':_user_,'method':_method_,'output':_output_,'ignore_error':_ignore_error_,'key':_key_,'timeout':_timeout_,'ip':_ips_,'cmd':_cmd_}
try:
data = urllib.urlencode(argument)
response = urllib2.urlopen(url, data)
except Exception,e:
msg = "Call the api function error!"
#logger.debug(msg)
#logger.debug(e)
#print msg
#print e
pass
return response.read()
def ssh_cmd(host,cmd):###通过ssh去执行命令
try:
client = ssh_connect(host)
i,o,e = client.exec_command(cmd)
res = o.read()
return res
except Exception,e:
msg = "The host:%s and cmd:%s execute exception." % (host,cmd)
#print msg
pass
iplist=['10.75.16.197']
# 获取 node表内所有ip
def get_all_id():##获取所有的ip列表
sql = """
select distinct ip_in from node \
where depid=1 \
and ip_in not in \
(select node.ip_in from node, node2module where node.id=node2module.nid \
and node2module.mnamein ('dbstorage') )
"""
ids = []
rows = sql_select(sql, port=3306, domain='***.com.cn', db='**')
if rows:
for row in rows:
if row not in ids:
if row not in iplist:
ids.append(row)
return ids
def salt_minion_mon(host,p_status,s_status,re_info): ###处理salt-master端执行命令后的结果,并写入数据库
try:
status = ""
salt_mon_value = ""
salt_mon_info = ""
insertSql = ""
res_ping = p_status
salt_res = s_status
if salt_res == 1:
status = "BAD"
salt_mon_value = "BAD"
salt_mon_info="%s#NULL#NULL#NULL#NULL#%s " %(host,re_info)
insertSql = "insert into salt_mon (ip_in, salt_mon_value, salt_mon_info, ctime)values('%s','%s','%s','%s')" %(host,salt_mon_value,salt_mon_info,c_time)
#print insertSql
sql_insert(insertSql)
else:
status = "OK"
salt_mon_value = "OK"
salt_mon_info="%s#NULL#NULL#NULL#NULL#%s " %(host,re_info)
insertSql = "insert into salt_mon (ip_in, salt_mon_value, salt_mon_info, ctime) values('%s','%s','%s','%s')" %(host,salt_mon_value,salt_mon_info,c_time)
#print insertSql
sql_insert(insertSql)
except Exception,e:
msg = "salt_minion_mon failed!"
#print e
def ping_mon_by_host(host): ##获取ping结果
try:
ping_cmd = "ping -c 1 -w 2 %s > /dev/null"% host
ret = os.system(ping_cmd)
if ret == 0:
status = 0
msg = "The host %s ping ok" % host
else:
status = 1
msg = "The host %s ping failed" % host
result = {"status":status,"msg":msg}
return result
except Exception,e:
msg = """The host %d: ping_mon_by_host failed!""" % host
def check_salt_minion(host):###检查minion端状态,通过一个简单的命令
try:
#cmd = "ps -ef|grep salt-minion|grep -v grep"
cmd = "salt '%s' -t 7 cmd.run 'uptime'" %host
#cmd = "salt '%s' test.ping" %host
# cmd = "uptime"
#ret = ssh_cmd(MASTERIP,cmd)
#ret = ssh_cmd(MASTERIP,cmd)
ret = run_cmd(host,cmd,method="sync",output="text",ignore_error="true",timeout=5)
msg = ""
#if ret and 'load' in ret:
if ret and 'load' in ret:
status = 0
#msg = 'The host %s salt-minion is running ok.\n' % host
msg = 'ok'
else :
restart_salt_minion(host)
time.sleep(3)
ret1 = run_cmd(host,cmd,method="sync",output="text",ignore_error="true",timeout=5)
if ret1 and 'load' in ret1:
status = 0
#msg = 'The host %s salt-minion is running ok.\n' % host
msg = 'ok'
else:
status = 1
#msg = "The host %s salt-minion is running failed.\n" % host
try:
msg = ret1.split(':').strip()##打印出客户端的详细报错
except Exception,e:
msg = ret1
#msg = 'salt-minion failed'
result = {'status':status,'message':msg}
return result
except Exception,e:
#traceback.print_exc()
pass
def restart_salt_minion(host):##重启minion端
try:
cmd = '/etc/init.d/salt-minion restart'
#ret = ssh_cmd(host,cmd)
ret = run_cmd(host,cmd,method="sync",output="text",ignore_error="true",timeout=5)
if ret != None and "Starting salt-minion daemon" in ret and "OK" in ret:
status = 0
msg = 'The host %s salt-minion restart successed.\n' % host
else :
status = 1
msg = "The host %s salt-minion restart failed.\n" % host
result = {'status':status,'message':msg}
return result
except Exception,e:
#traceback.print_exc()
#print e
pass
def get_salt_minion_status(host):
#print '%s test########################'% host
ping_ret = ping_mon_by_host(host)
if ping_ret["status"] == 0:
salt_ret = check_salt_minion(host)
# 如果salt进程不在尝试重启一次salt
salt_status = salt_ret["status"]
salt_info =salt_ret["message"]
salt_minion_mon(host,ping_ret["status"],salt_status,salt_info)
else:
salt_status = 1
salt_info = 'ping failed'
#delete_key_by_id(host)
#insert_salt_status(host,ping_ret["status"],salt_status)
#salt_minion_mon(host,ping_ret["status"],salt_status,salt_info)
if __name__ == "__main__":
begintime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
begintime = time.strptime(begintime, '%Y-%m-%d %H:%M:%S')
begintime = datetime.datetime(*begintime[:6])
all_id=get_all_id()
pool = Pool(10)##使用进程池
pool.map(get_salt_minion_status,all_id)
pool.close()
pool.join()
endtime = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
endtime = time.strptime(endtime, '%Y-%m-%d %H:%M:%S')
endtime = datetime.datetime(*endtime[:6])
time_dvalue =(endtime - begintime).seconds
print '总执行时间:%s sec' % (time_dvalue)
print '统计的机器数 %s' % (len(all_id))##获取执行过程的时间
页:
[1]