关于ha高可用性的安装，ClusterIP和tomcat的配置

偷瓜的贼 · 发表于 2018-12-5 11:48:43

　　#*********************************************************************************************
　　# 功能：
　　#配置tomcat的高可用,|用于移动+产品的高可用架构（VIP+Tomcat）
　　# 作者：
　　#Shane.Wan(shanyiwan@msn.com)
　　# 依赖项
　　#puppetlabs-release-el-6.noarch.rpm
　　#resource-agents-3.9.5-3.1.x86_64.rpm
　　#http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/network:ha-clustering:Stable.repo
　　# 更新日志：
　　#15:55 2014/4/10修改/etc/sysconfig/network中HOSTNAME=值，否则，下次重启时，会恢复为系统安装时的默认值
　　#*********************************************************************************************
　　一、基本环境配置
　　

　　VIP:192.168.148.27
　　node1:192.168.148.31
　　node2:192.168.148.32
　　

　　1、基本配置
　　1) hosts文件，保证主机名的解析
　　注意，/etc/sysconfig/network 中的HOSTNAME设置
　　Node1:
　　# sed -i 's@$HOSTNAME=$.*@\1node1.cluster.com@g'  /etc/sysconfig/network
　　# hostname node1.cluster.com
　　Node2：
　　# sed -i 's@$HOSTNAME=$.*@\1node2.cluster.com@g' /etc/sysconfig/network
　　# hostname node2.cluster.com
　　

　　[root@node1 ~]# cat /etc/hosts
　　127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
　　::1       localhost localhost.localdomain localhost6 localhost6.localdomain6
　　192.168.148.31  node1 node1.cluster.com
　　192.168.148.32  node2 node2.cluster.com
　　

　　[root@node1 ~]# uname -n
　　node1.cluster.com
　　

　　[root@node2 ~]# cat /etc/hosts
　　127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
　　::1       localhost localhost.localdomain localhost6 localhost6.localdomain6
　　# node info
　　192.168.148.31  node1 node1.cluster.com
　　192.168.148.32  node2 node2.cluster.com
　　[root@node2 ~]# uname -n
　　node2.cluster.com
　　

　　2) 节点之间的时间同步
　　[root@node1 ~]# yum install ntp
　　[root@node1 ~]# ntpdate 时间服务器
　　

　　[root@node2 ~]# yum install ntp
　　[root@node2 ~]# ntpdate 时间服务器
　　

　　3) 各节点之间SSH互信
　　创建一个密钥并允许所有有这个密钥的用户登入
　　[root@node1 ~]# ssh-keygen  -t rsa -f ~/.ssh/id_rsa  -P ''
　　[root@node1 ~]# ssh-copy-id -i .ssh/id_rsa.pub root@node2
　　

　　4)关闭防火墙（测试环境）与SELinux
　　注意：生产环境，请开放两台服务器之间corosync的通信端口（默认在/etc/corosync.conf中mcastport: 5405指定）
　　[root@node1 ~]# service iptables stop
　　[root@node1 ~]# vi /etc/selinux/config
　　禁用SELINUX
　　SELINUX=disabled
　　

　　2、在双机节点上安装pacemaker\corosync
　　(1) 配置repo
　　[root@node1 src]# rpm -ivh puppetlabs-release-el-6.noarch.rpm
　　Preparing...             ########################################### [100%]
　　

　　[root@node2 src]# rpm -ivh puppetlabs-release-el-6.noarch.rpm
　　Preparing...             ########################################### [100%]
　　

　　(2) 安装pacemaker&corosync
　　[root@node1 ~]# yum install pacemaker corosync
　　

　　[root@node2 ~]# yum install pacemaker corosync
　　

　　[root@node2 ~]#
　　

　　3、软件配置
　　1) Corosync配置
　　[root@node1 ~]# cd /etc/corosync/
　　[root@node1 corosync]# ll
　　total 16
　　-rw-r--r--. 1 root root  445 Nov 22 18:37 corosync.conf.example
　　-rw-r--r--. 1 root root 1084 Nov 22 18:37 corosync.conf.example.udpu
　　drwxr-xr-x. 2 root root 4096 Nov 22 18:37 service.d
　　drwxr-xr-x. 2 root root 4096 Nov 22 18:37 uidgid.d
　　

　　(1) 配置corosync.conf
　　[root@node1 corosync]# cp corosync.conf.example corosync.conf
　　# 修改配置文件（每行加#标记为有更改）
　　[root@node1 corosync]# cat corosync.conf
　　# Please read the corosync.conf.5 manual page
　　compatibility: whitetank
　　

　　totem {
　　version: 2
　　secauth: on#
　　threads: 0
　　interface {
　　ringnumber: 0
　　bindnetaddr: 192.168.148.0#
　　mcastaddr: 226.94.1.1
　　mcastport: 5405
　　ttl: 1
　　}
　　}
　　

　　logging {
　　fileline: off
　　to_stderr: no
　　to_logfile: yes
　　to_syslog: yes
　　logfile: /var/log/cluster/corosync.log
　　debug: off
　　timestamp: on
　　logger_subsys {
　　subsys: AMF
　　debug: off
　　}
　　}
　　

　　amf {
　　mode: disabled
　　}
　　# enable pacemaker
　　service {
　　ver: 0
　　name: pacemaker
　　}
　　aisexec {
　　user: root
　　group: root
　　}
　　

　　(2) 生成密钥文件（依据机器性能，时间可能较久）
　　[root@node1 corosync]# corosync-keygen
　　Corosync Cluster Engine Authentication key generator.
　　Gathering 1024 bits for key from /dev/random.
　　Press keys on your keyboard to generate entropy.
　　Press keys on your keyboard to generate entropy (bits = 152).
　　Press keys on your keyboard to generate entropy (bits = 216).
　　Press keys on your keyboard to generate entropy (bits = 288).
　　Press keys on your keyboard to generate entropy (bits = 352).
　　Press keys on your keyboard to generate entropy (bits = 416).
　　Press keys on your keyboard to generate entropy (bits = 480).
　　Press keys on your keyboard to generate entropy (bits = 544).
　　Press keys on your keyboard to generate entropy (bits = 616).
　　Press keys on your keyboard to generate entropy (bits = 680).
　　Press keys on your keyboard to generate entropy (bits = 752).
　　Press keys on your keyboard to generate entropy (bits = 816).
　　Press keys on your keyboard to generate entropy (bits = 880).
　　Press keys on your keyboard to generate entropy (bits = 952).
　　Press keys on your keyboard to generate entropy (bits = 1016).
　　Writing corosync key to /etc/corosync/authkey.
　　[root@node1 corosync]# ll
　　total 24
　　-r--------. 1 root root  128 Mar  6 12:24 authkey
　　-rw-r--r--. 1 root root  560 Mar  6 11:41 corosync.conf
　　-rw-r--r--. 1 root root  445 Nov 22 18:37 corosync.conf.example
　　-rw-r--r--. 1 root root 1084 Nov 22 18:37 corosync.conf.example.udpu
　　drwxr-xr-x. 2 root root 4096 Nov 22 18:37 service.d
　　drwxr-xr-x. 2 root root 4096 Nov 22 18:37 uidgid.d
　　

　　(3) 将authkey、corosync.conf复制到node2上
　　[root@node1 corosync]# scp -p authkey corosync.conf node2:/etc/corosync/
　　

　　4、检验Corosync的安装
　　1) 启动Corosync
　　[root@node1 corosync]# service corosync start
　　Starting Corosync Cluster Engine (corosync):             [  OK  ]
　　[root@node1 corosync]# corosync-cfgtool -s
　　Printing ring status.
　　Local node ID 563390656
　　RING ID 0
　　id    = 192.168.148.31
　　status  = ring 0 active with no faults
　　

　　[root@node1 corosync]# ssh node2 service corosync start
　　Starting Corosync Cluster Engine (corosync): [  OK  ]
　　[root@node1 corosync]# ssh node2 corosync-cfgtool -s
　　Printing ring status.
　　Local node ID 898934976
　　RING ID 0
　　id    = 192.168.148.32
　　status  = ring 0 active with no faults
　　

　　

　　2) 查看启动信息
　　# 查看corosync引擎是否正常启动
　　[root@node1 corosync]# grep -e "Corosync Cluster Engine" -e "configuration file" /var/log/cluster/corosync.log
　　Mar 06 16:28:10 corosync [MAIN  ] Corosync Cluster Engine ('1.4.1'): started and ready to provide service.
　　Mar 06 16:28:10 corosync [MAIN  ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
　　

　　# 查看初始化成员节点通知是否正常发出
　　[root@node1 corosync]# grep TOTEM /var/log/cluster/corosync.log
　　Mar 06 16:28:10 corosync [TOTEM ] Initializing transport (UDP/IP Multicast).
　　Mar 06 16:28:10 corosync [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
　　Mar 06 16:28:10 corosync [TOTEM ] The network interface [192.168.148.31] is now up.
　　Mar 06 16:28:11 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:51 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:52 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:53 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:55 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:56 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:58 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:29:59 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:01 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:02 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:04 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:06 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:07 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:09 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:10 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:12 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:13 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:15 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:16 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:18 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:19 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:21 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:22 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:24 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:25 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:27 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:28 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:30 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:31 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:33 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:34 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:36 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:37 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:39 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:40 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:42 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:43 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:45 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:46 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:48 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:49 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:51 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:52 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:54 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:55 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:57 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:30:58 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:00 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:01 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:03 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:04 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:06 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:07 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:09 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:10 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:12 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:13 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:15 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　Mar 06 16:31:16 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
　　

　　# 查看pacemaker是否正常启动
　　[root@node1 corosync]# grep pcmk_startup /var/log/cluster/corosync.log
　　Mar 06 16:28:10 corosync [pcmk  ] info: pcmk_startup: CRM: Initialized
　　Mar 06 16:28:11 corosync [pcmk  ] Logging: Initialized pcmk_startup
　　Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Maximum core file size is: 18446744073709551615
　　Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Service: 9
　　Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Local hostname: node1.cluster.com
　　

　　# 检查启动过程中是否有错误产生
　　[root@node1 corosync]# grep ERROR: /var/log/cluster/corosync.log
　　Mar 06 16:28:10 corosync [pcmk  ] ERROR: process_ais_conf: You have configured a cluster using the Pacemaker plugin for Corosync. The plugin is not supported in this environment and will be removed very soon.
　　Mar 06 16:28:10 corosync [pcmk  ] ERROR: process_ais_conf:  Please see Chapter 8 of 'Clusters from Scratch' (http://www.clusterlabs.org/doc) for details on using Pacemaker with CMAN
　　

　　3) 查看集群状态
　　[root@node1 corosync]# crm_mon -1
　　Last updated: Thu Mar  6 16:39:50 2014
　　Last change: Thu Mar  6 16:39:36 2014 via crmd on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node1.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　0 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　5、安装crmsh
　　[root@node1 ~]# cd /etc/yum.repos.d/
　　[root@node1 yum.repos.d]# wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/network:ha-clustering:Stable.repo
　　[root@node1 yum.repos.d]# yum install crmsh
　　Loaded plugins: fastestmirror
　　Loading mirror speeds from cached hostfile
　　* base: mirror.bit.edu.cn
　　* centos-6-base: mirror.bit.edu.cn
　　* extras: mirror.bit.edu.cn
　　* updates: mirror.bit.edu.cn
　　Setting up Install Process
　　Resolving Dependencies
　　--> Running transaction check
　　---> Package crmsh.x86_64 0:1.2.6-6.1 will be installed
　　--> Processing Dependency: python-lxml for package: crmsh-1.2.6-6.1.x86_64
　　--> Processing Dependency: pssh for package: crmsh-1.2.6-6.1.x86_64
　　--> Processing Dependency: python-dateutil for package: crmsh-1.2.6-6.1.x86_64
　　--> Running transaction check
　　---> Package pssh.x86_64 0:2.3.1-3.2 will be installed
　　---> Package python-dateutil.noarch 0:1.4.1-6.el6 will be installed
　　---> Package python-lxml.x86_64 0:2.2.3-1.1.el6 will be installed
　　--> Finished Dependency Resolution
　　

　　Dependencies Resolved
　　

　　====================================================================================================================================
　　Package                      Arch                Version                      Repository                               Size
　　====================================================================================================================================
　　Installing:
　　crmsh                      x86_64             1.2.6-6.1                   network_ha-clustering_Stable             489 k
　　Installing for dependencies:
　　pssh                         x86_64             2.3.1-3.2                   network_ha-clustering_Stable             50 k
　　python-dateutil             noarch             1.4.1-6.el6                   base                                     84 k
　　python-lxml                x86_64             2.2.3-1.1.el6                base                                     2.0 M
　　

　　Transaction Summary
　　====================================================================================================================================
　　Install    4 Package(s)
　　

　　Total download size: 2.6 M
　　Installed size: 24 M
　　Is this ok [y/N]: y
　　Downloading Packages:
　　(1/4): crmsh-1.2.6-6.1.x86_64.rpm                                                                         | 489 kB    00:02
　　(2/4): pssh-2.3.1-3.2.x86_64.rpm                                                                            |  50 kB    00:00
　　(3/4): python-dateutil-1.4.1-6.el6.noarch.rpm                                                             |  84 kB    00:00
　　(4/4): python-lxml-2.2.3-1.1.el6.x86_64.rpm                                                                | 2.0 MB    00:08
　　------------------------------------------------------------------------------------------------------------------------------------
　　Total                                                                                              144 kB/s | 2.6 MB    00:18
　　warning: rpmts_HdrFromFdno: Header V3 RSA/SHA1 Signature, key ID 17280ddf: NOKEY
　　Retrieving key from http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/repodata/repomd.xml.key
　　Importing GPG key 0x17280DDF:
　　Userid: "network OBS Project "
　　From  : http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/repodata/repomd.xml.key
　　Is this ok [y/N]: y
　　Running rpm_check_debug
　　Running Transaction Test
　　Transaction Test Succeeded
　　Running Transaction
　　Installing : pssh-2.3.1-3.2.x86_64                                                                                           1/4
　　Installing : python-lxml-2.2.3-1.1.el6.x86_64                                                                               2/4
　　Installing : python-dateutil-1.4.1-6.el6.noarch                                                                            3/4
　　Installing : crmsh-1.2.6-6.1.x86_64                                                                                        4/4
　　Verifying  : python-dateutil-1.4.1-6.el6.noarch                                                                            1/4
　　Verifying  : python-lxml-2.2.3-1.1.el6.x86_64                                                                               2/4
　　Verifying  : pssh-2.3.1-3.2.x86_64                                                                                           3/4
　　Verifying  : crmsh-1.2.6-6.1.x86_64                                                                                        4/4
　　

　　Installed:
　　crmsh.x86_64 0:1.2.6-6.1
　　

　　Dependency Installed:
　　pssh.x86_64 0:2.3.1-3.2          python-dateutil.noarch 0:1.4.1-6.el6          python-lxml.x86_64 0:2.2.3-1.1.el6
　　

　　Complete!
　　[root@node1 yum.repos.d]#
　　

　　5、创建一个主/备集群
　　1) 浏览现有配置
　　[root@node1 yum.repos.d]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2"
　　[root@node1 yum.repos.d]#
　　[root@node1 yum.repos.d]# crm configure show xml
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　
　　

　　

　　# 核验配置
　　[root@node1 ~]# crm_verify -L -V
　　error: unpack_resources:    Resource start-up disabled since no STONITH resources have been defined
　　error: unpack_resources:    Either configure some or disable STONITH with the stonith-enabled option
　　error: unpack_resources:    NOTE: Clusters with shared data need STONITH to ensure data integrity
　　Errors found during check: config not valid
　　

　　

　　# To disable STONITH, we set the stonith-enabled cluster option to false.
　　[root@node1 ~]# crm configure property stonith-enabled=false
　　[root@node1 ~]# crm_verify -L
　　

　　2) 添加一个资源
　　(1) 配置一个IP地址（VIP），不管集群服务在哪运行，我们要一个固定的地址来提供服务。在这里我选择192.168.148.27作为浮动IP，给它取一个好记的名字 ClusterIP 并且告诉集群每30秒检查它一次
　　

　　[root@node1 ~]# crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip=192.168.148.27 cidr_netmask=32 \
　　op monitor interval=30s
　　

　　# 现在检查下IP 资源是不是已经添加了，并且看看是否处在可用状态。
　　[root@node1 ~]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip="192.168.148.27" cidr_netmask="32" \
　　op monitor interval="30s"
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2" \
　　stonith-enabled="false"
　　

　　

　　[root@node1 ~]# crm status
　　Last updated: Fri Mar  7 11:06:10 2014
　　Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node1.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node1.cluster.com
　　

　　[root@node1 ~]# ip a
　　1: lo:  mtu 16436 qdisc noqueue state UNKNOWN
　　link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
　　inet 127.0.0.1/8 scope host lo
　　2: eth0:  mtu 1500 qdisc pfifo_fast state UP qlen 1000
　　link/ether 00:50:56:b0:2f:71 brd ff:ff:ff:ff:ff:ff
　　inet 192.168.148.33/24 brd 192.168.148.255 scope global eth0
　　inet 192.168.148.27/32 brd 192.168.148.255 scope global eth0
　　

　　(2) 测试备援
　　# 首先，找到IP资源现在在哪个节点上运行
　　[root@node1 ~]# crm resource status ClusterIP
　　resource ClusterIP is running on: node1.cluster.com
　　

　　# 在运行机上关闭Pacemaker and Corosync
　　[root@node1 ~]# service corosync stop
　　Signaling Corosync Cluster Engine (corosync) to terminate: [  OK  ]
　　Waiting for corosync services to unload:..                [  OK  ]
　　

　　# 当corosync停止运行后，到另外节点上查看集群状态
　　[root@node2 ~]# crm status
　　Last updated: Fri Mar  7 11:05:20 2014
　　Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node1.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node1.cluster.com
　　

　　[root@node2 ~]# crm status
　　Last updated: Fri Mar  7 11:13:09 2014
　　Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition WITHOUT quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node2.cluster.com ]
　　OFFLINE: [ node1.cluster.com ]
　　

　　# 关于集群状态，我们有三个地方需要注意，首先，如我们所料node1已经下线了，然而我们发现ClusterIP不在任何地方运行！
　　

　　A、如果是两个节点的集群，应该设置no-quorum-policy为ignore，如果一个节点down掉，另一个节点仍能正常运行。
　　

　　

　　[root@node2 ~]# crm configure property no-quorum-policy=ignore
　　[root@node2 ~]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip="192.168.148.27" cidr_netmask="32" \
　　op monitor interval="30s"
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2" \
　　stonith-enabled="false" \
　　no-quorum-policy="ignore"
　　

　　# 过了一会，集群会在剩下的那个节点上启动这个IP。请注意集群现在依然没有达到法定人数。
　　[root@node2 ~]# crm status
　　Last updated: Fri Mar  7 11:17:27 2014
　　Last change: Fri Mar  7 11:17:02 2014 via cibadmin on node2.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition WITHOUT quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node2.cluster.com ]
　　OFFLINE: [ node1.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node2.cluster.com
　　

　　# 现在模拟节点恢复，我们启动 node1 上面的Corosync服务，然后检查集群状态。
　　[root@node1 ~]# service corosync start
　　Starting Corosync Cluster Engine (corosync):             [  OK  ]
　　

　　[root@node1 ~]# crm status
　　Last updated: Fri Mar  7 11:19:55 2014
　　Last change: Fri Mar  7 11:17:02 2014 via cibadmin on node2.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node2.cluster.com
　　[root@node1 ~]#
　　# 现在我们可以看到让某些人惊奇的事情，IP资源回到原来那个节点(node1)上去了
　　

　　B、防止资源在节点恢复后移动（resource-stickiness）
　　资源黏性（resource-stickiness）：资源更倾向于运行在哪个节点。
　　资源黏性值范围及其作用：
　　0：这是默认选项。资源放置在系统中的最适合位置。这意味着当负载能力“较好”或较差的节点变得可用时才转移资源。此选项的作用基本等同于自动故障回复，只是资源可能会转移到非之前活动的节点上；
　　大于0：资源更愿意留在当前位置，但是如果有更合适的节点可用时会移动。值越高表示资源越愿意留在当前位置；
　　小于0：资源更愿意移离当前位置。绝对值越高表示资源越愿意离开当前位置；
　　INFINITY：如果不是因节点不适合运行资源（节点关机、节点待机、达到migration-threshold 或配置更改）而强制资源转移，资源总是留在当前位置。此选项的作用几乎等同于完全禁用自动故障回复；
　　-INFINITY：资源总是移离当前位置；
　　

　　

　　[root@node1 ~]# crm configure rsc_defaults resource-stickiness=100
　　[root@node1 ~]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip="192.168.148.27" cidr_netmask="32" \
　　op monitor interval="30s"
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2" \
　　stonith-enabled="false" \
　　no-quorum-policy="ignore"
　　rsc_defaults $id="rsc-options" \
　　resource-stickiness="100"
　　

　　[root@node1 ~]# crm status
　　Last updated: Fri Mar 14 19:02:39 2014
　　Last change: Fri Mar 14 18:53:52 2014 via cibadmin on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　1 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node1.cluster.com
　　

　　3) 添加更多的资源（服务）
　　# 默认没有tomcat资源，安装资源代理包
　　# rpm -ivh resource-agents-3.9.5-3.1.x86_64.rpm
　　或提示依赖关系，使用下面命令试试
　　# yum install resource-agents-3.9.5-3.1.x86_64.rpm
　　#
　　# yum update resource-agents
　　#添加tomcat资源（ocf_heartbeat_tomcat — Manages a Tomcat servlet environment instance）
　　

　　(1) 准备工作：
　　A、使用开发安装脚本所涉及到的环境变量，配置pacemaker时要用到（使用tomcat启动脚本时可以看到）
　　Using CATALINA_BASE: /usr/local/apache-tomcat-8.0.8
　　Using CATALINA_HOME: /usr/local/apache-tomcat-8.0.8
　　Using CATALINA_TMPDIR: /usr/local/apache-tomcat-8.0.8/temp
　　Using JRE_HOME:       /usr
　　Using CLASSPATH:    /usr/local/apache-tomcat-8.0.8/bin/bootstrap.jar:/usr/local/apache-tomcat-8.0.8/bin/tomcat-juli.jar
　　

　　B、JDK位置，确定JAVA_HOME
　　[root@node1 ~]# cd /usr/java/
　　[root@node1 java]# pwd
　　/usr/java
　　[root@node1 java]# ll
　　total 4
　　lrwxrwxrwx 1 root root 16 Mar 14 18:43 default -> /usr/java/latest
　　drwxr-xr-x 8 root root 4096 Mar 14 18:42 jdk1.7.0_45
　　lrwxrwxrwx 1 root root 21 Mar 14 18:43 latest -> /usr/java/jdk1.7.0_45
　　

　　可以看出JDK的位置
　　JAVA_HOME=/usr/java/default
　　

　　C、为当前tomcat目录添加连接，为以后版本变更做准备(两台都添加)
　　[root@node1 ~]# ln -s /usr/local/apache-tomcat-8.0.8 /usr/local/tomcat
　　[root@node2 ~]# ln -s /usr/local/apache-tomcat-8.0.8 /usr/local/tomcat
　　

　　(2) 添加tomcat资源
　　[root@node1 ~]# crm configure primitive tomcat ocf:heartbeat:tomcat  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" op monitor timeout="30s" interval="15s" op start interval="0" timeout="90s" op stop interval="0" timeout="120s"
　　[root@node1 ~]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP IPaddr2 \
　　params ip=192.168.148.27 cidr_netmask=32 \
　　op monitor interval=30s
　　primitive tomcat tomcat \
　　params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
　　op monitor timeout=30s interval=15s \
　　op start interval=0 timeout=90s \
　　op stop interval=0 timeout=120s
　　property cib-bootstrap-options: \
　　dc-version=1.1.10-14.el6_5.2-368c726 \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes=2 \
　　stonith-enabled=false \
　　no-quorum-policy=ignore
　　rsc_defaults rsc-options: \
　　resource-stickiness=100
　　

　　# 考虑将rsync进程加入到集群中（设计中心）
　　[root@node1 ~]# crm configure primitive rsyncd ocf:heartbeat:rsyncd \
　　op monitor depth="0" timeout="20s" interval="60s"
　　

　　# nfsserver
　　[root@node1 ~]# crm configure primitive nfsserver ocf:heartbeat:nfsserver \
　　params \
　　nfs_init_script="/etc/init.d/nfs" \
　　nfs_shared_infodir="/var/lib/nfs" \
　　nfs_notify_cmd="/sbin/rpc.statd" \
　　nfs_ip="192.168.148.27" \
　　op monitor depth="0" timeout="40s" interval="10"
　　

　　

　　(3) 资源约束
　　A、配置资源跟IP可能不在跑在同一个节点上面，确保资源在同一节点运行（constraint）
　　# tomcat
　　[root@node1 ~]# crm configure colocation tomcat-with-ip INFINITY: tomcat ClusterIP
　　

　　# rsyncd
　　[root@node1 ~]# crm configure colocation rsyncd-with-ip INFINITY: rsyncd ClusterIP
　　

　　# nfsserver
　　[root@node1 ~]# crm configure colocation nfsserver-with-ip INFINITY: nfsserver ClusterIP
　　

　　B、控制资源的启动停止顺序，确保资源启动之前ClsterIP已经处于活动状态
　　# tomcat
　　[root@node1 ~]# crm configure order tomcat-after-ip mandatory: ClusterIP tomcat
　　

　　# rsyncd
　　[root@node1 ~]# crm configure order rsyncd-after-ip mandatory: ClusterIP rsyncd
　　

　　# nfsserver
　　[root@node1 ~]# crm configure order nfsserver-after-ip mandatory: ClusterIP nfsserver
　　

　　[root@node1 ~]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP IPaddr2 \
　　params ip=192.168.148.27 cidr_netmask=32 \
　　op monitor interval=30s
　　primitive nfsserver nfsserver \
　　params nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_ip=192.168.148.27 \
　　op monitor timeout=20s interval=10 depth=0
　　primitive tomcat tomcat \
　　params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
　　op monitor timeout=30s interval=15s \
　　op start interval=0 timeout=90s \
　　op stop interval=0 timeout=120s
　　colocation nfsserver-with-ip inf: nfsserver ClusterIP
　　colocation tomcat-with-ip inf: tomcat ClusterIP
　　order nfsserver-after-ip Mandatory: ClusterIP nfsserver
　　order tomcat-after-ip Mandatory: ClusterIP tomcat
　　property cib-bootstrap-options: \
　　dc-version=1.1.10-14.el6_5.2-368c726 \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes=2 \
　　stonith-enabled=false \
　　no-quorum-policy=ignore
　　rsc_defaults rsc-options: \
　　resource-stickiness=100
　　

　　# 最终完整的配置清单：
　　[root@node1 heartbeat]# crm configure show
　　node node1.cluster.com
　　node node2.cluster.com
　　primitive ClusterIP IPaddr2 \
　　params ip=192.168.148.27 cidr_netmask=32 \
　　op monitor interval=30s
　　primitive nfsserver nfsserver \
　　params nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_notify_cmd="/sbin/rpc.statd" nfs_ip=192.168.148.27 \
　　op monitor timeout=40s interval=10 depth=0 \
　　meta target-role=Started
　　primitive tomcat tomcat \
　　params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
　　op monitor timeout=90s interval=15s \
　　op start interval=0 timeout=120s \
　　op stop interval=0 timeout=120s \
　　meta target-role=Started
　　colocation nfsserver-with-ip inf: nfsserver ClusterIP
　　colocation tomcat-with-ip inf: tomcat ClusterIP
　　order nfsserver-after-ip Mandatory: ClusterIP nfsserver
　　order tomcat-after-ip Mandatory: ClusterIP tomcat
　　property cib-bootstrap-options: \
　　dc-version=1.1.10-14.el6_5.2-368c726 \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes=2 \
　　stonith-enabled=false \
　　no-quorum-policy=ignore \
　　last-lrm-refresh=1415113828
　　rsc_defaults rsc-options: \
　　resource-stickiness=100
　　

　　(4) 在集群中手工地移动资源（在维护时使用）
　　# 资源原来在node2上
　　[root@node1 ~]# crm status
　　Last updated: Mon Mar 17 08:32:13 2014
　　Last change: Sat Mar 15 11:48:45 2014 via cibadmin on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　2 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node2.cluster.com
　　tomcat (ocf::heartbeat:tomcat):       Started node2.cluster.com
　　

　　# 移动资源到node1上
　　[root@node1 ~]# crm resource move tomcat node1.cluster.com
　　[root@node1 ~]# crm status
　　Last updated: Tue Mar 11 11:37:50 2014
　　Last change: Tue Mar 11 11:37:43 2014 via crm_resource on node1.cluster.com
　　Stack: classic openais (with plugin)
　　Current DC: node2.cluster.com - partition with quorum
　　Version: 1.1.10-14.el6_5.2-368c726
　　2 Nodes configured, 2 expected votes
　　2 Resources configured
　　

　　

　　Online: [ node1.cluster.com node2.cluster.com ]
　　

　　ClusterIP    (ocf::heartbeat:IPaddr2):    Started node1.cluster.com
　　tomcat       (ocf::heartbeat:tomcat):       Started node1.cluster.com
　　

　　

　　二、Pacemaker基本操作
　　1、节点转换
　　把节点node1.cluster.com的状态由online转换为standby，和由standby转换为online
　　# crm node standby node1.cluster.com
　　[root@node1 ~]# crm configure show
　　node node1.cluster.com \
　　attributes standby="on"
　　node node2.cluster.com
　　primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip="192.168.148.27" cidr_netmask="32" \
　　op monitor interval="30s"
　　primitive tomcat ocf:heartbeat:tomcat \
　　params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
　　op monitor timeout="30s" interval="15s" \
　　op start interval="0" timeout="90s" \
　　op stop interval="0" timeout="120s"
　　colocation tomcat-with-ip inf: tomcat ClusterIP
　　order tomcat-after-ip inf: ClusterIP tomcat
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2" \
　　stonith-enabled="false" \
　　no-quorum-policy="ignore" \
　　last-lrm-refresh="1394178592"
　　rsc_defaults $id="rsc-options" \
　　resource-stickiness="100"
　　

　　# crm node online node1.cluster.com
　　[root@node1 ~]# crm configure show
　　node node1.cluster.com \
　　attributes standby="off"
　　node node2.cluster.com
　　primitive ClusterIP ocf:heartbeat:IPaddr2 \
　　params ip="192.168.148.27" cidr_netmask="32" \
　　op monitor interval="30s"
　　primitive tomcat ocf:heartbeat:tomcat \
　　params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
　　op monitor timeout="30s" interval="15s" \
　　op start interval="0" timeout="90s" \
　　op stop interval="0" timeout="120s"
　　colocation tomcat-with-ip inf: tomcat ClusterIP
　　order tomcat-after-ip inf: ClusterIP tomcat
　　property $id="cib-bootstrap-options" \
　　dc-version="1.1.10-14.el6_5.2-368c726" \
　　cluster-infrastructure="classic openais (with plugin)" \
　　expected-quorum-votes="2" \
　　stonith-enabled="false" \
　　no-quorum-policy="ignore" \
　　last-lrm-refresh="1394178592"
　　rsc_defaults $id="rsc-options" \
　　resource-stickiness="100"
　　

　　2、资源迁移
　　把node1.cluster.com的tomcat资源迁移到node2.cluster.com，和从node2.cluster.com接管资源
　　# crm resource migrate tomcat node2.cluster.com
　　# crm resource unmigrate tomcat node2.cluster.com
　　3、状态查看
　　# crm status
　　# crm_mon or crm_mon -1
　　

　　3、配置保存、修改和加载
　　# crm configure save 文件名
　　# crm configure edit
　　# crm configure load 文件名
　　

　　

　　

　　三、参考资料
　　http://clusterlabs.org/doc/en-US/Pacemaker/1.1-crmsh/html/Pacemaker_Explained/ap-lsb.html
　　http://www.linux-ha.org/doc/man-pages/re-ra-tomcat.html
　　

账号		自动登录	找回密码
密码			立即注册

Centos6.5×64安装配置openmeetings3.0.3详

大疆运维招人啦，

C++ :try 语句块和异常处理

C++的多态

Red Hat RHCE 8 (EX294) Cert Guide

Java/C++ 区别：看完这一篇，就够用！

别再用过时库了！这 13 个顶级 C++ 库才是

[经验分享] 关于ha高可用性的安装，ClusterIP和tomcat的配置

浏览过的版块

扫码加入运维网微信交流群