MHA+Lvs+Keepalived实现MySQL的高可用及读负载均衡_3(MHA) 实现MySQL 高可用的 MHA 安装及配置
MHA项目: http://code.google.com/p/mysql-master-ha/wiki/TableOfContents?tm=6
Perl的相关模块下载: http://search.cpan.org/
一、安装masterha-node
客户端——4台MySQL服务器、服务端——2台Lvs服务器均需安装masterha-node结点
1 安装相关的perl模块
1.1 DBI
[iyunv@MySQL-15.11 xxm 18:26:04]#tar xf DBI-1.633.tar.gz
[iyunv@MySQL-15.11 xxm 18:26:19]#cd DBI-1.633
[iyunv@MySQL-15.11 DBI-1.633 18:26:22]#perl Makefile.PL
[iyunv@MySQL-15.11 DBI-1.633 18:27:07]#make
[iyunv@MySQL-15.11 DBI-1.633 18:27:42]#make install
[iyunv@MySQL-15.11 xxm 18:32:22]#perl -e "use DBI" ->无返回值,说明正确安装
1.2 DBD-mysql
[iyunv@MySQL-15.11 xxm 18:34:53]#tar xf DBD-mysql-4.029.tar.gz
[iyunv@MySQL-15.11 DBD-mysql-4.029 18:35:10]#perl Makefile.PL
[iyunv@MySQL-15.11 DBD-mysql-4.029 18:35:40]#make
[iyunv@MySQL-15.11 DBD-mysql-4.029 18:35:56]#make install
[iyunv@MySQL-15.11 DBD-mysql-4.029 18:36:15]#perl -e "use DBD::mysql"
2 安装 masterha-node
[iyunv@MySQL-15.11 xxm 19:15:04]#tar xf mha4mysql-node-0.56.tar.gz
[iyunv@MySQL-15.11 xxm 19:15:34]#cd mha4mysql-node-0.56
[iyunv@MySQL-15.11 mha4mysql-node-0.56 19:15:37]#perl Makefile.PL
[iyunv@MySQL-15.11 mha4mysql-node-0.56 19:16:04]#make
[iyunv@MySQL-15.11 mha4mysql-node-0.56 19:17:18]#make install
二、安装masterha-manager
仅在服务端——2 台 Lvs 服务器上安装
1 安装相关的perl模块
1.1 Config::Tiny
[iyunv@Lvs-15.23 xxm 19:48:57]#gunzip Config-Tiny-2.20.tgz
[iyunv@Lvs-15.23 xxm 19:49:01]#tar xf Config-Tiny-2.20.tar
[iyunv@Lvs-15.23 xxm 19:49:05]#cd Config-Tiny-2.20
[iyunv@Lvs-15.23 Config-Tiny-2.20 19:49:48]#perl Makefile.PL
1.2 Log::Dispatch
这个模块单独编译安装的话,所需要的依赖包太多,还是使用CPAN 方式简易些。使用 CPAN 方式,需网络正常。下列方式,一路 yes 即可。
[iyunv@Lvs-15.23 xxm 20:09:36]# perl -MCPAN -e "install Log::Dispatch"
1.3 Parallel::ForkManager
[iyunv@Lvs-15.23 xxm 20:58:33]#perl -MCPAN -e "install Parallel::ForkManager"
2 安装 masterha-manager
[iyunv@Lvs-15.23 xxm 21:07:56]#tar xf mha4mysql-manager-0.56.tar.gz
[iyunv@Lvs-15.23 xxm 21:08:15]#cd mha4mysql-manager-0.56
[iyunv@Lvs-15.23 mha4mysql-manager-0.56 21:08:17]#perl Makefile.PL
[iyunv@Lvs-15.23 mha4mysql-manager-0.56 21:08:21]#make
[iyunv@Lvs-15.23 mha4mysql-manager-0.56 21:08:40]#make install
三、配置及测试
1 masterha-manager的配置文件
1.1 在给 MHA 创建配置文件
cat << EOF >> /etc/app1.cnf
[server default]
#mysql user and password
user=mha_manager
password=123456
port=36677
#replication user and password
repl_user=repl
repl_password=123456
#ssh user and port
ssh_user=root
ssh_port=2777
#binlog directory
master_binlog_dir=/opt/data/log
#working directory on the manager
manager_workdir=/var/log/masterha/app1
#manager log file
manager_log=/var/log/masterha/app1/app1.log
#working directory on MySQL servers
remote_workdir=/var/log/masterha/app1
#If MySQL command line utilities are installed under a non-standard directory, use this option to set the directory.
client_bindir=/usr/local/mysql/bin
#If MySQL libraries are installed under a non-standard directory, use this option to set the directory.
client_libdir=/usr/local/mysql/lib
#To check by connecting/disconnecting every time, because it's more strict and it can detect TCP connection level failure more quickly.
ping_type=CONNECT
#To fully control the order of priority (i.e. host2->host3->host4..)
latest_priority=0
# IP address failover solution
master_ip_failover_script=/usr/local/scripts/master_ip_failover
#Send a report (i.e. e-mail) when failover has completed or ended with errors
report_script=/usr/local/scripts/send_report
[server-Master1]
hostname=192.168.15.11
candidate_master=1
[server-Master2]
hostname=192.168.15.12
candidate_master=1
[server-Slave1]
hostname=192.168.15.13
[server-Slave2]
hostname=192.168.15.14
EOF
1.2 master_ip_failover脚本
cat /usr/local/scripts/master_ip_failover
#!/bin/bash
#--------------第一部分 : 变量及函数定义 -----------------#
###Begin Variables define###
ssh_port=2777
cmd=/sbin/ifconfig
vip=192.168.15.31
device=eth0:0
netmk=255.255.255.0
start_vip="${cmd} ${device} ${vip} netmask ${netmk} up"
stop_vip="${cmd} ${device} ${vip} netmask ${netmk} down"
###End Variables define###
###Begin Status Funciont###
status()
{
exit 0
}
###End Status Funciont###
###Begin Stop Or Stopssh Funciont###
stop()
{
exit 0
}
###End Stop Or Stopssh Funciont###
###Begin Start Funciont###
start()
{
/usr/bin/ssh -p ${ssh_port} ${ssh_user}@${orig_master_host} ""${stop_vip}""
/usr/bin/ssh -p ${ssh_port} ${ssh_user}@${new_master_host} ""${start_vip}""
/usr/bin/ssh -p ${ssh_port} ${ssh_user}@${new_master_host} "/etc/init.d/lvsrs stop"
exit 0
}
###End Start Funciont###
#--------------第一部分 : 变量及函数定义 -----------------#
#--------------第二部分 : 命令行参数 -----------------#
###Begin Get The Command-Line Parameters###
###eval set -- "`getopt -a -q -o n -l command::,ssh_user:,orig_master_host:,orig_master_ip:,orig_master_port:,new_master_host:,new_master_ip:,new_master_port:,new_master_user:,new_master_password: -- "$@"`"
eval set -- "`getopt -a -q -o n -l command::,ssh_user:,orig_master_host:,orig_master_ip:,new_master_host:,new_master_ip: -- "$@"`"
if [ $? != 0 ] ; then echo "Terminating..." >&2 ;exit 1;fi
while true
do
case "$1" in
--command)
command="${2}";
shift;;
--ssh_user)
ssh_user="${2}";
shift;;
--orig_master_host)
orig_master_host="${2}";
shift;;
--orig_master_ip)
orig_master_ip="${2}";
shift;;
--new_master_host)
new_master_host="${2}";
shift;;
--new_master_ip)
new_master_ip="${2}";
shift;;
--)
shift;
break;;
esac
shift
done
###End Get The Command-Line Parameters###
#--------------第二部分 : 命令行参数 -----------------#
#--------------第三部分 : 函数调用 -----------------#
if [ "${command}" == "status" ];
then
status;
fi
if [ "${command}" == "stop" ] || [ "${command}" == "stopssh" ] ;
then
stop;
fi
if [ "${command}" == "start" ];
then
start;
fi
#--------------第三部分 : 函数调用 -----------------#
1.3 report脚本
cat /usr/local/scripts/send_report
#!/bin/bash
#--------------第一部分 : 变量及函数定义 -----------------#
send_report()
{
echo -e "Orig_master is ${dead_master_host}.\n New_master is ${new_master_host}.\n New_slave_hosts is ${new_slave_hosts}. \n ${body}" | mail -s ${subject} 2537657486@qq.com
}
#--------------第一部分 : 变量及函数定义 -----------------#
#--------------第二部分 : 命令行参数 -----------------#
###Begin Get The Command-Line Parameters###
eval set -- "`getopt -a -q -o n -l dead_master_host:,body:,new_slave_hosts:,subject:,new_master_host: -- "$@"`"
if [ $? != 0 ] ; then echo "Terminating..." >&2 ;exit 1;fi
while true
do
case "$1" in
--dead_master_host)
dead_master_host="${2}";
shift;;
--body)
body="${2}";
shift;;
--new_slave_hosts)
new_slave_hosts="${2}";
shift;;
--subject)
subject="${2}";
shift;;
--new_master_host)
new_master_host="${2}";
shift;;
--)
shift;
break;;
esac
shift
done
###End Get The Command-Line Parameters###
#--------------第二部分 : 命令行参数 -----------------#
send_report;
2 ssh无密码配置
2.1 masterha-manager到各 node
Lvs-15.23到各 node :
[iyunv@Lvs-15.23 ~ 10:44:34]#ssh-keygen -t rsa
[iyunv@Lvs-15.23 ~ 10:51:29]# ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.11\" "="">root@MySQL-15.11"
[iyunv@Lvs-15.23 ~ 10:52:02]# ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.12\" "="">root@MySQL-15.12"
[iyunv@Lvs-15.23 ~ 10:53:02]# ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.13\" "="">root@MySQL-15.13"
[iyunv@Lvs-15.23 ~ 10:59:04]# ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.14\" "="">root@MySQL-15.14"
Lvs-15.24到各 node :
[iyunv@Lvs-15.24 ~ 11:00:18]#ssh-keygen -t rsa
[iyunv@Lvs-15.24 ~ 11:00:22]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.11\" "="">root@MySQL-15.11"
[iyunv@Lvs-15.24 ~ 11:00:38]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.12\" "="">root@MySQL-15.12"
[iyunv@Lvs-15.24 ~ 11:01:17]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.13\" "="">root@MySQL-15.13"
[iyunv@Lvs-15.24 ~ 11:01:36]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.14\" "="">root@MySQL-15.14"
2.2 各 node 之间
MySQL-15.11到 MySQL-15.12 、 MySQL-15.13 、 MySQL-15.14 :
[iyunv@MySQL-15.11 ~ 11:03:03]#ssh-keygen -t rsa
[iyunv@MySQL-15.11 ~ 11:03:16]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.12\" "="">root@MySQL-15.12"
[iyunv@MySQL-15.11 ~ 11:03:42]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.13\" "="">root@MySQL-15.13"
[iyunv@MySQL-15.11 ~ 11:03:56]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.14\" "="">root@MySQL-15.14"
MySQL-15.12到 MySQL-15.11 、 MySQL-15.13 、 MySQL-15.14 :
[iyunv@MySQL-15.12 ~ 11:04:37]#ssh-keygen -t rsa
[iyunv@MySQL-15.12 ~ 11:04:41]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.11\" "="">root@MySQL-15.11"
[iyunv@MySQL-15.12 ~ 11:05:04]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.13\" "="">root@MySQL-15.13"
[iyunv@MySQL-15.12 ~ 11:05:23]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.14\" "="">root@MySQL-15.14"
MySQL-15.13到 MySQL-15.11 、 MySQL-15.12 、 MySQL-15.14 :
[iyunv@MySQL-15.13 ~ 10:23:55]#ssh-keygen -t rsa
[iyunv@MySQL-15.13 ~ 11:06:25]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.11\" "="">root@MySQL-15.11"
[iyunv@MySQL-15.13 ~ 11:06:47]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.12\" "="">root@MySQL-15.12"
[iyunv@MySQL-15.13 ~ 11:07:04]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.14\" "="">root@MySQL-15.14"
MySQL-15.14到 MySQL-15.11 、 MySQL-15.12 、 MySQL-15.13 :
[iyunv@MySQL-15.14 .ssh 11:02:09]#ssh-keygen -t rsa
[iyunv@MySQL-15.14 .ssh 11:07:58]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.11\" "="">root@MySQL-15.11"
[iyunv@MySQL-15.14 .ssh 11:08:33]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.12\" "="">root@MySQL-15.12"
[iyunv@MySQL-15.14 .ssh 11:08:52]#ssh-copy-id -i /root/.ssh/id_rsa.pub "-p 2777 <a href="mailto:root@MySQL-15.13\" "="">root@MySQL-15.13"
注:因为此架构masterha-manager 、 masterha-node 没有在同一个服务器上,所以上述 ssh 无密码配置可满足需求,若 masterha-manager 在某个 masterha-node 结点上,则该 node 结点的 ssh 需配置自己到自己的无密码登陆。
3 masterha-manager的命令及测试
3.1 ssh检查
Lvs-15.23、 Lvs-15.24 上分别执行看看
[iyunv@Lvs-15.23 ~ 10:59:18]#masterha_check_ssh --conf=/etc/app1.cnf
[iyunv@Lvs-15.24 ~ 11:20:46]#masterha_check_ssh --conf=/etc/app1.cnf
3.2 repl检查
Lvs-15.23、 Lvs-15.24 上分别执行看看
[iyunv@Lvs-15.23 ~ 11:38:06]#masterha_check_repl --conf=/etc/app1.cnf
[iyunv@Lvs-15.24 scripts 11:40:09]#masterha_check_repl --conf=/etc/app1.cnf
|||
结果返回“MySQL Replication Health is OK. ”则成功,若是“ MySQL Replication Health is NOT OK! ”,根据 [error] 的报错信息,进行相关修改即可。
四、应用及管理
1 masterha-node的 purge_relay_logs 的设置
每个从MySQL 的此任务时间要岔开 ,purge_relay_logs是需要连接 MySQL 的,所以连接 MySQL 的参数是必须要定义的。另外, purge_relay_logs 指定的 --host 必须是运行该命令的 server ,即不能在 host2 上指定 --host=host1 。
MySQL-15.11:
[iyunv@MySQL-15.11 ~ 12:05:04]#vi /etc/cron.d/purge_relay_logs
[iyunv@MySQL-15.11 ~ 12:05:27]#cat /etc/cron.d/purge_relay_logs
# purge relay logs at 5am
0 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.11 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
[iyunv@MySQL-15.11 ~ 12:05:36]#/etc/init.d/crond restart
MySQL-15.12:
[iyunv@MySQL-15.12 ~ 12:08:00]#vi /etc/cron.d/purge_relay_logs
[iyunv@MySQL-15.12 ~ 12:08:06]#cat /etc/cron.d/purge_relay_logs
# purge relay logs at 5am
10 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.12 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
[iyunv@MySQL-15.12 ~ 12:08:12]#/etc/init.d/crond restart
MySQL-15.13:
[iyunv@MySQL-15.13 ~ 12:08:47]#vi /etc/cron.d/purge_relay_logs
[iyunv@MySQL-15.13 ~ 12:09:06]#cat /etc/cron.d/purge_relay_logs
# purge relay logs at 5am
20 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.13 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
[iyunv@MySQL-15.13 ~ 12:09:10]#/etc/init.d/crond restart
MySQL-15.14:
[iyunv@MySQL-15.14 ~ 12:09:53]#cat /etc/cron.d/purge_relay_logs
# purge relay logs at 5am
30 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.14 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
[iyunv@MySQL-15.14 ~ 12:09:56]#/etc/init.d/crond restart
2 masterha_manager进程后台运行
2.1 masterha_manager
进程只能运行一个,所以只在Lvs-15.23 或 Lvs-15.24 上运行:
nohup masterha_manager --conf=/etc/app1.cnf < /dev/null > /var/log/masterha/app1/app1.log 2>&1 &
2.2 masterha_manager是否正常运行
[iyunv@Lvs-15.23 ~ 12:11:31]#masterha_check_status --conf=/etc/app1.cnf
app1 (pid:23361) is running(0:PING_OK), master:192.168.15.11
因为当前在Lvs-15.23 上运行,那么在 Lvs-15.24 上 check 则是 NOT_RUNNING
[iyunv@Lvs-15.24 ~ 12:13:42]#masterha_check_status --conf=/etc/app1.cnf
app1 is stopped(2:NOT_RUNNING).
3 MySQL主的自动切换测试
3.1 切换前
在Lvs-15.23 上开启 masterha_manager ,检测到当前 MySQL-15.11 为主,并运行正常
[iyunv@Lvs-15.23 xxm 17:43:33]#masterha_manager --conf=/etc/app1.cnf
Mon Jan 19 17:44:40 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 17:44:40 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 17:44:40 2015 - [info] Reading server configuration from /etc/app1.cnf..
[iyunv@Lvs-15.23 ~ 17:44:52]#masterha_check_status --conf=/etc/app1.cnf
app1 (pid:13945) is running(0:PING_OK), master:192.168.15.11
MySQL-15.11的状态:
[iyunv@MySQL-15.11 ~ 17:48:01]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 3 |
| Rpl_semi_sync_master_net_avg_wait_time | 1521 |
| Rpl_semi_sync_master_net_wait_time | 19778 |
| Rpl_semi_sync_master_net_waits | 13 |
| Rpl_semi_sync_master_no_times | 2 |
| Rpl_semi_sync_master_no_tx | 5 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 603 |
| Rpl_semi_sync_master_tx_wait_time | 1811 |
| Rpl_semi_sync_master_tx_waits | 3 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 3 |
| Rpl_semi_sync_slave_status | OFF |
+--------------------------------------------+-------+
MySQL-15.12的状态:
[iyunv@MySQL-15.12 ~ 17:18:39]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
| Rpl_semi_sync_slave_status | ON |
+--------------------------------------------+-------+
3.2 切换后
第一步:在MySQL-15.11 上停掉 MySQL 服务
[iyunv@MySQL-15.11 ~ 17:48:16]#/etc/init.d/mysqld stop
第二步:看到Lvs-15.23 的输出
[iyunv@Lvs-15.23 xxm 17:43:33]#masterha_manager --conf=/etc/app1.cnf
Mon Jan 19 17:44:40 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 17:44:40 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 17:44:40 2015 - [info] Reading server configuration from /etc/app1.cnf..
Creating /var/log/masterha/app1 if not exists.. ok.
Checking output directory is accessible or not..
ok.
Binlog found at /opt/data/log, up to log.000003
Mon Jan 19 17:53:35 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 17:53:35 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 17:53:35 2015 - [info] Reading server configuration from /etc/app1.cnf..
[iyunv@Lvs-15.23 ~ 17:45:00]#masterha_check_status --conf=/etc/app1.cnf
app1 is stopped(2:NOT_RUNNING).
|||
masterha_manager默认完成一次 failover 后,会 exit 。
第三步:查看MySQL-15.12 的状态
1 此时 MySQL-15.12 为主,有 2 台 slave ( MySQL-15.13 和 MySQL-15.14 )
[iyunv@MySQL-15.12 ~ 17:49:00]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 2 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
| Rpl_semi_sync_slave_status | OFF |
+--------------------------------------------+-------+
2 且根据定义的 master_ip_failover , MySQL-15.12 成功获得 vip
[iyunv@MySQL-15.12 ~ 17:55:49]#ip addr
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast qlen 1000
link/ether 00:50:56:b5:19:a8 brd ff:ff:ff:ff:ff:ff
inet 192.168.15.12/24 brd 192.168.15.255 scope global eth0
inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
第四步:再开启masterha_manager 并检测运行状态
[iyunv@Lvs-15.23 xxm 17:53:41]#masterha_manager --conf=/etc/app1.cnf
Mon Jan 19 17:58:20 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 17:58:20 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 17:58:20 2015 - [info] Reading server configuration from /etc/app1.cnf..
|||
此时,会即刻退出,查看日志,得知报错原因如下:
[iyunv@Lvs-15.23 app1 17:59:35]#tail app1.log
Mon Jan 19 17:58:22 2015 - [info] Replication filtering check ok.
Mon Jan 19 17:58:22 2015 - [info] GTID (with auto-pos) is not supported
Mon Jan 19 17:58:22 2015 - [info] Starting SSH connection tests..
Mon Jan 19 17:58:24 2015 - [info] All SSH connection tests passed successfully.
Mon Jan 19 17:58:24 2015 - [info] Checking MHA Node version..
Mon Jan 19 17:58:24 2015 - [info] Version check ok.
Mon Jan 19 17:58:24 2015 - [error][/usr/lib/perl5/site_perl/5.8.8/MHA/ServerManager.pm, ln492] Server 192.168.15.11(192.168.15.11:36677) is dead, but must be alive! Check server settings.
Mon Jan 19 17:58:24 2015 - [error][/usr/lib/perl5/site_perl/5.8.8/MHA/MasterMonitor.pm, ln424] Error happened on checking configurations. at /usr/lib/perl5/site_perl/5.8.8/MHA/MasterMonitor.pm line 399
Mon Jan 19 17:58:24 2015 - [error][/usr/lib/perl5/site_perl/5.8.8/MHA/MasterMonitor.pm, ln523] Error happened on monitoring servers.
Mon Jan 19 17:58:24 2015 - [info] Got exit code 1 (Not master dead).
|||
因为MySQL-15.11 已经 dead ,但是 MHA 的配置文件 app1.cnf 里并没有将该服务器的相关配置删除,所以才会有报错。若 masterha_manager 放在后台运行,那么可加上 --remove_dead_master_conf参数,当其中一个server down ,那么 MHA 会将相应的配置从配置文件中删除。
第五步:主master 修复好后, change master to MySQL-15.12 ( new master )
MySQL-15.11 change master toMySQL-15.12的过程略,在 Lvs-15.23 上开启 masterha_manager ,并添加参数 -remove_dead_master_conf
[iyunv@Lvs-15.23 xxm 17:58:24]#masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
Mon Jan 19 18:27:48 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 18:27:48 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 18:27:48 2015 - [info] Reading server configuration from /etc/app1.cnf..
此时检查masterha_manager 运行状态,可看到当前 master 为 MySQL-15.12
[iyunv@Lvs-15.23 scripts 18:27:42]#masterha_check_status --conf=/etc/app1.cnf
app1 (pid:14755) is running(0:PING_OK), master:192.168.15.12
MySQL-15.12的状态:
[iyunv@MySQL-15.12 ~ 18:23:12]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 3 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
| Rpl_semi_sync_slave_status | OFF |
+--------------------------------------------+-------+
[iyunv@MySQL-15.12 ~ 19:05:32]#ip addr|grep 192.168.15.95
inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
MySQL-15.11的状态:
[iyunv@MySQL-15.11 ~ 18:24:38]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 0 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
| Rpl_semi_sync_slave_status | ON |
+--------------------------------------------+-------+
[iyunv@MySQL-15.11 ~ 19:06:23]#ip addr|grep 192.168.15.95
此时停掉MySQL-15.12 的 MySQL 服务,可能会发现 MySQL-15.11 没有及时转为 new master ,查看错误日志如下:
[error][/usr/lib/perl5/site_perl/5.8.8/MHA/MasterFailover.pm, ln309] Last failover was done at 2015/01/19 17:00:41. Current time is too early to do failover again. If you want to do failover, manually remove /var/log/masterha/app1/app1.failover.complete and run this script again.
所以可以将目标文件删除,重新执行master 并测试。如果为了测试方便,可以添加参数 --ignore_last_failover ,忽略上次 failover 的时间,直接进行 failover ,实际应用中请慎重。
[iyunv@Lvs-15.23 app1 19:52:54]#ll
总计 64
-rw-r--r-- 1 root root 0 01-19 17:53 app1.failover.complete
-rw-r--r-- 1 root root 53638 01-19 19:07 app1.log
-rw-r--r-- 1 root root 126 01-19 17:53 saved_master_binlog_from_192.168.15.11_36677_20150119175335.binlog
[iyunv@Lvs-15.23 app1 19:52:55]#rm -f app1.failover.complete
重新开始测试:
先开启MySQL-15.12 上的 MySQL 服务
[iyunv@MySQL-15.12 ~ 19:52:12]#/etc/init.d/mysqld start
Starting MySQL.. [确定 ]
|||
然后开启Lvs-15.23 上 masterha_manager ,并检查状态
[iyunv@Lvs-15.23 app1 19:53:06]#masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
Mon Jan 19 19:56:33 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 19:56:33 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 19:56:33 2015 - [info] Reading server configuration from /etc/app1.cnf..
[iyunv@Lvs-15.23 ~ 19:56:26]#masterha_check_status --conf=/etc/app1.cnf
app1 (pid:16839) is running(0:PING_OK), master:192.168.15.12
|||
停掉MySQL-15.12 上的 MySQL 服务
[iyunv@MySQL-15.12 ~ 19:52:41]#/etc/init.d/mysqld stop
Shutting down MySQL... [确定 ]
|||
Lvs-15.23上 masterha_manager 完成自动切换
[iyunv@Lvs-15.23 app1 19:53:06]#masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
Mon Jan 19 19:56:33 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 19:56:33 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 19:56:33 2015 - [info] Reading server configuration from /etc/app1.cnf..
Creating /var/log/masterha/app1 if not exists.. ok.
Checking output directory is accessible or not..
ok.
Binlog found at /opt/data/log, up to log.000007
Mon Jan 19 19:57:46 2015 - [warning] Global configuration file /etc/masterha_default.cnf not found. Skipping.
Mon Jan 19 19:57:46 2015 - [info] Reading application default configuration from /etc/app1.cnf..
Mon Jan 19 19:57:46 2015 - [info] Reading server configuration from /etc/app1.cnf..
|||
检查MySQL-15.11 、 MySQL-15.13 的状态
MySQL-15.11已由 slave 变为 master
[iyunv@MySQL-15.11 ~ 19:53:44]#mysqlxxm -e "show slave status\G"
[iyunv@MySQL-15.11 ~ 19:58:42]#mysqlxxm -e "show global status like 'rpl_semi_sync%';"
+--------------------------------------------+-------+
| Variable_name | Value |
+--------------------------------------------+-------+
| Rpl_semi_sync_master_clients | 2 |
| Rpl_semi_sync_master_net_avg_wait_time | 0 |
| Rpl_semi_sync_master_net_wait_time | 0 |
| Rpl_semi_sync_master_net_waits | 0 |
| Rpl_semi_sync_master_no_times | 0 |
| Rpl_semi_sync_master_no_tx | 0 |
| Rpl_semi_sync_master_status | ON |
| Rpl_semi_sync_master_timefunc_failures | 0 |
| Rpl_semi_sync_master_tx_avg_wait_time | 0 |
| Rpl_semi_sync_master_tx_wait_time | 0 |
| Rpl_semi_sync_master_tx_waits | 0 |
| Rpl_semi_sync_master_wait_pos_backtraverse | 0 |
| Rpl_semi_sync_master_wait_sessions | 0 |
| Rpl_semi_sync_master_yes_tx | 0 |
| Rpl_semi_sync_slave_status | OFF |
+--------------------------------------------+-------+
MySQL-15.13的 master 已由 MySQL-15.12 变为 MySQL-15.11 :
[iyunv@MySQL-15.13 ~ 19:51:54]#mysqlxxm -e "show slave status\G"
*************************** 1. row ***************************
Slave_IO_State: Waiting for master to send event
Master_Host: 192.168.15.11
Master_User: repl
Master_Port: 36677
......
|||
再看MySQL-15.11 已被 master_ip_failover 作用,接收写的 vip
[iyunv@MySQL-15.11 ~ 19:58:44]#ip addr|grep 192.168.15.95
inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
|||
同时,Lvs-15.23 上 app1.cnf 里的 [server-Master2] 模块已被删除,此时再启动 masterha_manager 就不会再报错有结点 not alive 的错误了。
[iyunv@Lvs-15.23 ~ 20:02:31]#tail -15 /etc/app1.cnf
repl_user=repl
report_script=/usr/local/scripts/send_report
ssh_port=2777
ssh_user=root
user=mha_manager
[server-Master1]
candidate_master=1
hostname=192.168.15.11
[server-Slave1]
hostname=192.168.15.13
[server-Slave2]
hostname=192.168.15.14
至此,masterha_manager 的自动 failover 测试告一段落,具体其他的(手动 failover 等)更多更详细的功能请参考官方说明。
注:通过测试可知,slave 、 master 之间可互换,为了完全数据一致,所以 MySQL 的服务器不论主从, my.cnf 除个别参数( eg : server-id )不一样,其他操作均一致,例如创建 repl 账号、同时安装半同步复制主从插件等。
运维网声明
1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网 享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com