butter7372 发表于 2015-11-20 08:10:01

MHA+Lvs+Keepalived实现MySQL的高可用及读负载均衡_3(MHA)

MHA+Lvs+Keepalived实现MySQL的高可用及读负载均衡_3(MHA)实现MySQL高可用的MHA安装及配置
  MHA项目:http://code.google.com/p/mysql-master-ha/wiki/TableOfContents?tm=6
  Perl的相关模块下载:http://search.cpan.org/
  
  一、安装masterha-node
  客户端——4台MySQL服务器、服务端——2台Lvs服务器均需安装masterha-node结点
  
  1 安装相关的perl模块
  1.1 DBI
  #tar xf DBI-1.633.tar.gz
  #cd DBI-1.633
  #perl Makefile.PL
  #make
  #make install
  #perl -e "use DBI" ->无返回值,说明正确安装
  
  1.2 DBD-mysql
  #tar xf DBD-mysql-4.029.tar.gz
  #perl Makefile.PL
  #make
  #make install
  #perl -e "use DBD::mysql"
  
  2 安装masterha-node
  #tar xf mha4mysql-node-0.56.tar.gz
  #cd mha4mysql-node-0.56
  #perl Makefile.PL
  #make
  #make install
  
  二、安装masterha-manager
  仅在服务端——2台Lvs服务器上安装
  
  1 安装相关的perl模块
  1.1 Config::Tiny
  #gunzip Config-Tiny-2.20.tgz
  #tar xf Config-Tiny-2.20.tar
  #cd Config-Tiny-2.20
  #perl Makefile.PL
  
  1.2 Log::Dispatch
  这个模块单独编译安装的话,所需要的依赖包太多,还是使用CPAN方式简易些。使用CPAN方式,需网络正常。下列方式,一路yes即可。
  # perl -MCPAN -e "install Log::Dispatch"
  
  1.3 Parallel::ForkManager
  #perl -MCPAN -e "install Parallel::ForkManager"
  
  2 安装masterha-manager
  #tar xf mha4mysql-manager-0.56.tar.gz
  #cd mha4mysql-manager-0.56
  #perl Makefile.PL
  #make
  #make install
  
  三、配置及测试
  1 masterha-manager的配置文件
  1.1 在给MHA创建配置文件
  cat << EOF >> /etc/app1.cnf
  
  #mysql user and password
  user=mha_manager
  password=123456
  port=36677
  
  #replication user and password
  repl_user=repl
  repl_password=123456
  
  #ssh user and port
  ssh_user=root
  ssh_port=2777
  
  #binlog directory
  master_binlog_dir=/opt/data/log
  
  #working directory on the manager
  manager_workdir=/var/log/masterha/app1
  
  #manager log file
  manager_log=/var/log/masterha/app1/app1.log
  
  #working directory on MySQL servers
  remote_workdir=/var/log/masterha/app1
  
  #If MySQL command line utilities are installed under a non-standard directory, use this option to set the directory.
  client_bindir=/usr/local/mysql/bin
  
  #If MySQL libraries are installed under a non-standard directory, use this option to set the directory.
  client_libdir=/usr/local/mysql/lib
  
  #To check by connecting/disconnecting every time, because it's more strict and it can detect TCP connection level failure more quickly.
  ping_type=CONNECT
  
  #To fully control the order of priority (i.e. host2->host3->host4..)
  latest_priority=0
  
  # IP address failover solution
  master_ip_failover_script=/usr/local/scripts/master_ip_failover
  
  #Send a report (i.e. e-mail) when failover has completed or ended with errors
  report_script=/usr/local/scripts/send_report
  
  
  hostname=192.168.15.11
  candidate_master=1
  
  
  hostname=192.168.15.12
  candidate_master=1
  
  
  hostname=192.168.15.13
  
  
  hostname=192.168.15.14
  EOF
  
  1.2 master_ip_failover脚本
  cat/usr/local/scripts/master_ip_failover
  #!/bin/bash
  #--------------第一部分:变量及函数定义-----------------#
  ###Begin Variables define###
  ssh_port=2777
  cmd=/sbin/ifconfig
  vip=192.168.15.31
  device=eth0:0
  netmk=255.255.255.0
  start_vip=&quot;${cmd} ${device} ${vip} netmask ${netmk} up&quot;
  stop_vip=&quot;${cmd} ${device} ${vip} netmask ${netmk} down&quot;
  ###End Variables define###
  
  ###Begin Status Funciont###
  status()
  {
  exit 0
  }
  ###End Status Funciont###
  
  ###Begin Stop Or Stopssh Funciont###
  stop()
  {
  exit 0
  }
  ###End Stop Or Stopssh Funciont###
  
  ###Begin Start Funciont###
  start()
  {
  /usr/bin/ssh -p ${ssh_port} ${ssh_user}@${orig_master_host} &quot;&quot;${stop_vip}&quot;&quot;
  /usr/bin/ssh -p ${ssh_port} ${ssh_user}@${new_master_host} &quot;&quot;${start_vip}&quot;&quot;
  /usr/bin/ssh -p ${ssh_port} ${ssh_user}@${new_master_host} &quot;/etc/init.d/lvsrs stop&quot;
  exit 0
  }
  ###End Start Funciont###
  #--------------第一部分:变量及函数定义-----------------#
  
  #--------------第二部分:命令行参数-----------------#
  ###Begin Get The Command-Line Parameters###
  ###eval set -- &quot;`getopt -a -q -o n -l command::,ssh_user:,orig_master_host:,orig_master_ip:,orig_master_port:,new_master_host:,new_master_ip:,new_master_port:,new_master_user:,new_master_password: -- &quot;$@&quot;`&quot;
  eval set -- &quot;`getopt -a -q -o n -l command::,ssh_user:,orig_master_host:,orig_master_ip:,new_master_host:,new_master_ip: -- &quot;$@&quot;`&quot;
  
  if [ $? != 0 ] ; then echo &quot;Terminating...&quot; >&2 ;exit 1;fi
  
  while true
  do
  case &quot;$1&quot; in
  --command)
  command=&quot;${2}&quot;;
  shift;;
  --ssh_user)
  ssh_user=&quot;${2}&quot;;
  shift;;
  --orig_master_host)
  orig_master_host=&quot;${2}&quot;;
  shift;;
  --orig_master_ip)
  orig_master_ip=&quot;${2}&quot;;
  shift;;
  --new_master_host)
  new_master_host=&quot;${2}&quot;;
  shift;;
  --new_master_ip)
  new_master_ip=&quot;${2}&quot;;
  shift;;
  --)
  shift;
  break;;
  esac
  shift
  done
  ###End Get The Command-Line Parameters###
  #--------------第二部分:命令行参数-----------------#
  
  #--------------第三部分:函数调用-----------------#
  if [ &quot;${command}&quot; == &quot;status&quot; ];
  then
  status;
  fi
  
  
  if [ &quot;${command}&quot; == &quot;stop&quot; ] || [ &quot;${command}&quot; == &quot;stopssh&quot; ] ;
  then
  stop;
  fi
  
  if [ &quot;${command}&quot; == &quot;start&quot; ];
  then   
  start;
  fi
  #--------------第三部分:函数调用-----------------#
  
  1.3 report脚本
  cat/usr/local/scripts/send_report
  #!/bin/bash
  #--------------第一部分:变量及函数定义-----------------#
  send_report()
  {
  echo -e &quot;Orig_master is ${dead_master_host}.\n New_master is ${new_master_host}.\n New_slave_hosts is ${new_slave_hosts}. \n ${body}&quot; | mail -s ${subject} 2537657486@qq.com
  }
  #--------------第一部分:变量及函数定义-----------------#
  
  #--------------第二部分:命令行参数-----------------#
  ###Begin Get The Command-Line Parameters###
  eval set -- &quot;`getopt -a -q -o n -l dead_master_host:,body:,new_slave_hosts:,subject:,new_master_host: -- &quot;$@&quot;`&quot;
  
  if [ $? != 0 ] ; then echo &quot;Terminating...&quot; >&2 ;exit 1;fi
  
  while true
  do
  case &quot;$1&quot; in
  --dead_master_host)
  dead_master_host=&quot;${2}&quot;;
  shift;;
  --body)
  body=&quot;${2}&quot;;
  shift;;
  --new_slave_hosts)
  new_slave_hosts=&quot;${2}&quot;;
  shift;;
  --subject)
  subject=&quot;${2}&quot;;
  shift;;
  --new_master_host)
  new_master_host=&quot;${2}&quot;;
  shift;;
  --)
  shift;
  break;;
  esac
  shift
  done
  ###End Get The Command-Line Parameters###
  #--------------第二部分:命令行参数-----------------#
  
  send_report;
  
  2 ssh无密码配置
  2.1 masterha-manager到各node
  Lvs-15.23到各node:
  #ssh-keygen -t rsa
  # ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.11\&quot; &quot;=&quot;&quot;>root@MySQL-15.11&quot;
  # ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.12\&quot; &quot;=&quot;&quot;>root@MySQL-15.12&quot;
  # ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.13\&quot; &quot;=&quot;&quot;>root@MySQL-15.13&quot;
  # ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.14\&quot; &quot;=&quot;&quot;>root@MySQL-15.14&quot;
  
  Lvs-15.24到各node:
  #ssh-keygen -t rsa
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.11\&quot; &quot;=&quot;&quot;>root@MySQL-15.11&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.12\&quot; &quot;=&quot;&quot;>root@MySQL-15.12&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.13\&quot; &quot;=&quot;&quot;>root@MySQL-15.13&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.14\&quot; &quot;=&quot;&quot;>root@MySQL-15.14&quot;
  
  
  2.2 各node之间
  MySQL-15.11到MySQL-15.12、MySQL-15.13、MySQL-15.14:
  #ssh-keygen -t rsa
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.12\&quot; &quot;=&quot;&quot;>root@MySQL-15.12&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.13\&quot; &quot;=&quot;&quot;>root@MySQL-15.13&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.14\&quot; &quot;=&quot;&quot;>root@MySQL-15.14&quot;
  
  MySQL-15.12到MySQL-15.11、MySQL-15.13、MySQL-15.14:
  #ssh-keygen -t rsa
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.11\&quot; &quot;=&quot;&quot;>root@MySQL-15.11&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.13\&quot; &quot;=&quot;&quot;>root@MySQL-15.13&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.14\&quot; &quot;=&quot;&quot;>root@MySQL-15.14&quot;
  
  MySQL-15.13到MySQL-15.11、MySQL-15.12、MySQL-15.14:
  #ssh-keygen -t rsa
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.11\&quot; &quot;=&quot;&quot;>root@MySQL-15.11&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.12\&quot; &quot;=&quot;&quot;>root@MySQL-15.12&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.14\&quot; &quot;=&quot;&quot;>root@MySQL-15.14&quot;
  
  MySQL-15.14到MySQL-15.11、MySQL-15.12、MySQL-15.13:
  #ssh-keygen -t rsa
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.11\&quot; &quot;=&quot;&quot;>root@MySQL-15.11&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.12\&quot; &quot;=&quot;&quot;>root@MySQL-15.12&quot;
  #ssh-copy-id -i /root/.ssh/id_rsa.pub &quot;-p 2777 <a href=&quot;mailto:root@MySQL-15.13\&quot; &quot;=&quot;&quot;>root@MySQL-15.13&quot;
  
  注:因为此架构masterha-manager、masterha-node没有在同一个服务器上,所以上述ssh无密码配置可满足需求,若masterha-manager在某个masterha-node结点上,则该node结点的ssh需配置自己到自己的无密码登陆。
  
  3 masterha-manager的命令及测试
  3.1 ssh检查
  Lvs-15.23、Lvs-15.24上分别执行看看
  #masterha_check_ssh --conf=/etc/app1.cnf
  #masterha_check_ssh --conf=/etc/app1.cnf
  
  3.2 repl检查
  Lvs-15.23、Lvs-15.24上分别执行看看
  #masterha_check_repl --conf=/etc/app1.cnf
  #masterha_check_repl --conf=/etc/app1.cnf
  |||
  结果返回“MySQL Replication Health is OK.”则成功,若是“MySQL Replication Health is NOT OK!”,根据的报错信息,进行相关修改即可。
  
  四、应用及管理
  1 masterha-node的purge_relay_logs的设置
  每个从MySQL的此任务时间要岔开,purge_relay_logs是需要连接MySQL的,所以连接MySQL的参数是必须要定义的。另外,purge_relay_logs指定的--host必须是运行该命令的server,即不能在host2上指定--host=host1。
  
  MySQL-15.11:
  #vi /etc/cron.d/purge_relay_logs
  #cat /etc/cron.d/purge_relay_logs
  # purge relay logs at 5am   
  0 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.11 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
  #/etc/init.d/crond restart
  
  MySQL-15.12:
  #vi /etc/cron.d/purge_relay_logs
  #cat /etc/cron.d/purge_relay_logs
  # purge relay logs at 5am   
  10 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.12 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
  #/etc/init.d/crond restart
  
  MySQL-15.13:
  #vi /etc/cron.d/purge_relay_logs
  #cat /etc/cron.d/purge_relay_logs
  # purge relay logs at 5am   
  20 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.13 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
  #/etc/init.d/crond restart
  
  MySQL-15.14:
  #cat /etc/cron.d/purge_relay_logs
  # purge relay logs at 5am   
  30 5 * * * root /usr/bin/purge_relay_logs --user=mha_manager --host=192.168.15.14 --password='123456' --port=36677 --workdir=/opt/data/tmp --disable_relay_log_purge >> /var/log/masterha/purge_relay_logs.log 2>&1
  #/etc/init.d/crond restart
  
  2 masterha_manager进程后台运行
  2.1 masterha_manager
  进程只能运行一个,所以只在Lvs-15.23或Lvs-15.24上运行:
  nohup masterha_manager --conf=/etc/app1.cnf < /dev/null > /var/log/masterha/app1/app1.log 2>&1 &
  2.2 masterha_manager是否正常运行
  #masterha_check_status --conf=/etc/app1.cnf
  app1 (pid:23361) is running(0:PING_OK), master:192.168.15.11
  
  因为当前在Lvs-15.23上运行,那么在Lvs-15.24上check则是NOT_RUNNING
  #masterha_check_status --conf=/etc/app1.cnf
  app1 is stopped(2:NOT_RUNNING).
  
  3 MySQL主的自动切换测试
  3.1 切换前
  在Lvs-15.23上开启masterha_manager,检测到当前MySQL-15.11为主,并运行正常
  #masterha_manager --conf=/etc/app1.cnf
  Mon Jan 19 17:44:40 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 17:44:40 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 17:44:40 2015 - Reading server configuration from /etc/app1.cnf..
  
  #masterha_check_status --conf=/etc/app1.cnf
  app1 (pid:13945) is running(0:PING_OK), master:192.168.15.11
  
  MySQL-15.11的状态:
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 3   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 1521|
  | Rpl_semi_sync_master_net_wait_time         | 19778 |
  | Rpl_semi_sync_master_net_waits             | 13    |
  | Rpl_semi_sync_master_no_times            | 2   |
  | Rpl_semi_sync_master_no_tx               | 5   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 603   |
  | Rpl_semi_sync_master_tx_wait_time          | 1811|
  | Rpl_semi_sync_master_tx_waits            | 3   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 3   |
  | Rpl_semi_sync_slave_status               | OFF   |
  &#43;--------------------------------------------&#43;-------&#43;
  
  MySQL-15.12的状态:
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 0   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 0   |
  | Rpl_semi_sync_master_net_wait_time         | 0   |
  | Rpl_semi_sync_master_net_waits             | 0   |
  | Rpl_semi_sync_master_no_times            | 0   |
  | Rpl_semi_sync_master_no_tx               | 0   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 0   |
  | Rpl_semi_sync_master_tx_wait_time          | 0   |
  | Rpl_semi_sync_master_tx_waits            | 0   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 0   |
  | Rpl_semi_sync_slave_status               | ON    |
  &#43;--------------------------------------------&#43;-------&#43;
  
  3.2 切换后
  第一步:在MySQL-15.11上停掉MySQL服务
  #/etc/init.d/mysqld stop
  
  第二步:看到Lvs-15.23的输出
  #masterha_manager --conf=/etc/app1.cnf
  Mon Jan 19 17:44:40 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 17:44:40 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 17:44:40 2015 - Reading server configuration from /etc/app1.cnf..
  Creating /var/log/masterha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
  ok.
  Binlog found at /opt/data/log, up to log.000003
  Mon Jan 19 17:53:35 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 17:53:35 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 17:53:35 2015 - Reading server configuration from /etc/app1.cnf..
  
  #masterha_check_status --conf=/etc/app1.cnf
  app1 is stopped(2:NOT_RUNNING).
  |||
  masterha_manager默认完成一次failover后,会exit。
  
  第三步:查看MySQL-15.12的状态
  1 此时MySQL-15.12为主,有2台slave(MySQL-15.13和MySQL-15.14)
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 2   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 0   |
  | Rpl_semi_sync_master_net_wait_time         | 0   |
  | Rpl_semi_sync_master_net_waits             | 0   |
  | Rpl_semi_sync_master_no_times            | 0   |
  | Rpl_semi_sync_master_no_tx               | 0   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 0   |
  | Rpl_semi_sync_master_tx_wait_time          | 0   |
  | Rpl_semi_sync_master_tx_waits            | 0   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 0   |
  | Rpl_semi_sync_slave_status               | OFF   |
  &#43;--------------------------------------------&#43;-------&#43;
  2 且根据定义的master_ip_failover,MySQL-15.12成功获得vip
  #ip addr
  1: lo: <LOOPBACK,UP,LOWER_UP> mtu 16436 qdisc noqueue
  link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
  inet 127.0.0.1/8 scope host lo
  2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast qlen 1000
  link/ether 00:50:56:b5:19:a8 brd ff:ff:ff:ff:ff:ff
  inet 192.168.15.12/24 brd 192.168.15.255 scope global eth0
  inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
  
  第四步:再开启masterha_manager并检测运行状态
  #masterha_manager --conf=/etc/app1.cnf
  Mon Jan 19 17:58:20 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 17:58:20 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 17:58:20 2015 - Reading server configuration from /etc/app1.cnf..
  |||
  此时,会即刻退出,查看日志,得知报错原因如下:
  #tail app1.log
  Mon Jan 19 17:58:22 2015 - Replication filtering check ok.
  Mon Jan 19 17:58:22 2015 - GTID (with auto-pos) is not supported
  Mon Jan 19 17:58:22 2015 - Starting SSH connection tests..
  Mon Jan 19 17:58:24 2015 - All SSH connection tests passed successfully.
  Mon Jan 19 17:58:24 2015 - Checking MHA Node version..
  Mon Jan 19 17:58:24 2015 - Version check ok.
  Mon Jan 19 17:58:24 2015 - Server 192.168.15.11(192.168.15.11:36677) is dead, but must be alive! Check server settings.
  Mon Jan 19 17:58:24 2015 - Error happened on checking configurations.at /usr/lib/perl5/site_perl/5.8.8/MHA/MasterMonitor.pm line 399
  Mon Jan 19 17:58:24 2015 - Error happened on monitoring servers.
  Mon Jan 19 17:58:24 2015 - Got exit code 1 (Not master dead).
  |||
  因为MySQL-15.11已经dead,但是MHA的配置文件app1.cnf里并没有将该服务器的相关配置删除,所以才会有报错。若masterha_manager放在后台运行,那么可加上--remove_dead_master_conf参数,当其中一个server down,那么MHA会将相应的配置从配置文件中删除。
  
  第五步:主master修复好后,change master to MySQL-15.12 (new master)
  MySQL-15.11 change master toMySQL-15.12的过程略,在Lvs-15.23上开启masterha_manager,并添加参数-remove_dead_master_conf
  #masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
  Mon Jan 19 18:27:48 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 18:27:48 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 18:27:48 2015 - Reading server configuration from /etc/app1.cnf..
  
  此时检查masterha_manager运行状态,可看到当前master为MySQL-15.12
  #masterha_check_status --conf=/etc/app1.cnf
  app1 (pid:14755) is running(0:PING_OK), master:192.168.15.12
  
  MySQL-15.12的状态:
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 3   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 0   |
  | Rpl_semi_sync_master_net_wait_time         | 0   |
  | Rpl_semi_sync_master_net_waits             | 0   |
  | Rpl_semi_sync_master_no_times            | 0   |
  | Rpl_semi_sync_master_no_tx               | 0   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 0   |
  | Rpl_semi_sync_master_tx_wait_time          | 0   |
  | Rpl_semi_sync_master_tx_waits            | 0   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 0   |
  | Rpl_semi_sync_slave_status               | OFF   |
  &#43;--------------------------------------------&#43;-------&#43;
  
  #ip addr|grep 192.168.15.95
  inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
  
  MySQL-15.11的状态:
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 0   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 0   |
  | Rpl_semi_sync_master_net_wait_time         | 0   |
  | Rpl_semi_sync_master_net_waits             | 0   |
  | Rpl_semi_sync_master_no_times            | 0   |
  | Rpl_semi_sync_master_no_tx               | 0   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 0   |
  | Rpl_semi_sync_master_tx_wait_time          | 0   |
  | Rpl_semi_sync_master_tx_waits            | 0   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 0   |
  | Rpl_semi_sync_slave_status               | ON    |
  &#43;--------------------------------------------&#43;-------&#43;
  #ip addr|grep 192.168.15.95
  
  此时停掉MySQL-15.12的MySQL 服务,可能会发现MySQL-15.11没有及时转为new master,查看错误日志如下:
   Last failover was done at 2015/01/19 17:00:41. Current time is too early to do failover again. If you want to do failover, manually remove /var/log/masterha/app1/app1.failover.complete and run this script again.
  
  所以可以将目标文件删除,重新执行master并测试。如果为了测试方便,可以添加参数--ignore_last_failover,忽略上次failover的时间,直接进行failover,实际应用中请慎重。
  
  #ll
  总计 64
  -rw-r--r-- 1 root root   0 01-19 17:53 app1.failover.complete
  -rw-r--r-- 1 root root 53638 01-19 19:07 app1.log
  -rw-r--r-- 1 root root   126 01-19 17:53 saved_master_binlog_from_192.168.15.11_36677_20150119175335.binlog
  #rm -f app1.failover.complete
  
  重新开始测试:
  先开启MySQL-15.12上的MySQL服务
  #/etc/init.d/mysqld start
  Starting MySQL..                                           [确定]
  |||
  然后开启Lvs-15.23上masterha_manager,并检查状态
  #masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
  Mon Jan 19 19:56:33 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 19:56:33 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 19:56:33 2015 - Reading server configuration from /etc/app1.cnf..
  #masterha_check_status --conf=/etc/app1.cnf
  app1 (pid:16839) is running(0:PING_OK), master:192.168.15.12
  |||
  停掉MySQL-15.12上的MySQL服务
  #/etc/init.d/mysqld stop
  Shutting down MySQL...                                     [确定]
  |||
  Lvs-15.23上masterha_manager完成自动切换
  #masterha_manager --conf=/etc/app1.cnf -remove_dead_master_conf
  Mon Jan 19 19:56:33 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 19:56:33 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 19:56:33 2015 - Reading server configuration from /etc/app1.cnf..
  Creating /var/log/masterha/app1 if not exists..    ok.
  Checking output directory is accessible or not..
  ok.
  Binlog found at /opt/data/log, up to log.000007
  Mon Jan 19 19:57:46 2015 - Global configuration file /etc/masterha_default.cnf not found. Skipping.
  Mon Jan 19 19:57:46 2015 - Reading application default configuration from /etc/app1.cnf..
  Mon Jan 19 19:57:46 2015 - Reading server configuration from /etc/app1.cnf..
  |||
  检查MySQL-15.11、MySQL-15.13的状态
  MySQL-15.11已由slave变为master
  #mysqlxxm -e &quot;show slave status\G&quot;
  #mysqlxxm -e &quot;show global status like 'rpl_semi_sync%';&quot;
  &#43;--------------------------------------------&#43;-------&#43;
  | Variable_name                              | Value |
  &#43;--------------------------------------------&#43;-------&#43;
  | Rpl_semi_sync_master_clients               | 2   |
  | Rpl_semi_sync_master_net_avg_wait_time   | 0   |
  | Rpl_semi_sync_master_net_wait_time         | 0   |
  | Rpl_semi_sync_master_net_waits             | 0   |
  | Rpl_semi_sync_master_no_times            | 0   |
  | Rpl_semi_sync_master_no_tx               | 0   |
  | Rpl_semi_sync_master_status                | ON    |
  | Rpl_semi_sync_master_timefunc_failures   | 0   |
  | Rpl_semi_sync_master_tx_avg_wait_time      | 0   |
  | Rpl_semi_sync_master_tx_wait_time          | 0   |
  | Rpl_semi_sync_master_tx_waits            | 0   |
  | Rpl_semi_sync_master_wait_pos_backtraverse | 0   |
  | Rpl_semi_sync_master_wait_sessions         | 0   |
  | Rpl_semi_sync_master_yes_tx                | 0   |
  | Rpl_semi_sync_slave_status               | OFF   |
  &#43;--------------------------------------------&#43;-------&#43;
  
  MySQL-15.13的master已由MySQL-15.12变为MySQL-15.11:
  #mysqlxxm -e &quot;show slave status\G&quot;
  *************************** 1. row ***************************
  Slave_IO_State: Waiting for master to send event
  Master_Host: 192.168.15.11
  Master_User: repl
  Master_Port: 36677
  ......
  
  |||
  再看MySQL-15.11已被master_ip_failover作用,接收写的vip
  #ip addr|grep 192.168.15.95
  inet 192.168.15.95/24 brd 192.168.15.255 scope global secondary eth0:0
  
  |||
  同时,Lvs-15.23上app1.cnf里的模块已被删除,此时再启动masterha_manager就不会再报错有结点not alive的错误了。
  #tail -15 /etc/app1.cnf
  repl_user=repl
  report_script=/usr/local/scripts/send_report
  ssh_port=2777
  ssh_user=root
  user=mha_manager
  
  
  candidate_master=1
  hostname=192.168.15.11
  
  
  hostname=192.168.15.13
  
  
  hostname=192.168.15.14
  
  
  至此,masterha_manager的自动failover测试告一段落,具体其他的(手动failover等)更多更详细的功能请参考官方说明。
  注:通过测试可知,slave、master之间可互换,为了完全数据一致,所以MySQL的服务器不论主从,my.cnf除个别参数(eg:server-id)不一样,其他操作均一致,例如创建repl账号、同时安装半同步复制主从插件等。
页: [1]
查看完整版本: MHA+Lvs+Keepalived实现MySQL的高可用及读负载均衡_3(MHA)