设为首页 收藏本站
查看: 827|回复: 0

[经验分享] 关于ha高可用性的安装,ClusterIP和tomcat的配置

[复制链接]

尚未签到

发表于 2018-12-5 11:48:43 | 显示全部楼层 |阅读模式
  #*********************************************************************************************
  # 功能:
  #配置tomcat的高可用,|用于移动+产品的高可用架构(VIP+Tomcat)
  # 作者:
  #Shane.Wan(shanyiwan@msn.com)
  # 依赖项
  #puppetlabs-release-el-6.noarch.rpm
  #resource-agents-3.9.5-3.1.x86_64.rpm
  #http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/network:ha-clustering:Stable.repo
  # 更新日志:
  #15:55 2014/4/10修改/etc/sysconfig/network中HOSTNAME=值,否则,下次重启时,会恢复为系统安装时的默认值
  #*********************************************************************************************
  一、基本环境配置
  

  VIP:192.168.148.27
  node1:192.168.148.31
  node2:192.168.148.32
  

  1、基本配置
  1) hosts文件,保证主机名的解析
  注意,/etc/sysconfig/network 中的HOSTNAME设置
  Node1:
  # sed -i 's@\(HOSTNAME=\).*@\1node1.cluster.com@g'  /etc/sysconfig/network
  # hostname node1.cluster.com
  Node2:
  # sed -i 's@\(HOSTNAME=\).*@\1node2.cluster.com@g' /etc/sysconfig/network
  # hostname node2.cluster.com
  

  [root@node1 ~]# cat /etc/hosts
  127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
  ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
  192.168.148.31  node1   node1.cluster.com
  192.168.148.32  node2   node2.cluster.com
  

  [root@node1 ~]# uname -n
  node1.cluster.com
  

  [root@node2 ~]# cat /etc/hosts
  127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
  ::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
  # node info
  192.168.148.31  node1   node1.cluster.com
  192.168.148.32  node2   node2.cluster.com
  [root@node2 ~]# uname -n
  node2.cluster.com
  

  2) 节点之间的时间同步
  [root@node1 ~]# yum install ntp
  [root@node1 ~]# ntpdate 时间服务器
  

  [root@node2 ~]# yum install ntp
  [root@node2 ~]# ntpdate 时间服务器
  

  3) 各节点之间SSH互信
  创建一个密钥并允许所有有这个密钥的用户登入
  [root@node1 ~]# ssh-keygen  -t rsa -f ~/.ssh/id_rsa  -P ''
  [root@node1 ~]# ssh-copy-id -i .ssh/id_rsa.pub root@node2
  

  4)关闭防火墙(测试环境)与SELinux
  注意:生产环境,请开放两台服务器之间corosync的通信端口(默认在/etc/corosync.conf中mcastport: 5405指定)
  [root@node1 ~]# service iptables stop
  [root@node1 ~]# vi /etc/selinux/config
  禁用SELINUX
  SELINUX=disabled
  

  2、在双机节点上安装pacemaker\corosync
  (1) 配置repo
  [root@node1 src]# rpm -ivh puppetlabs-release-el-6.noarch.rpm
  Preparing...                ########################################### [100%]
  

  [root@node2 src]# rpm -ivh puppetlabs-release-el-6.noarch.rpm
  Preparing...                ########################################### [100%]
  

  (2) 安装pacemaker&corosync
  [root@node1 ~]# yum install pacemaker corosync
  

  [root@node2 ~]# yum install pacemaker corosync
  

  [root@node2 ~]#
  

  3、软件配置
  1) Corosync配置
  [root@node1 ~]# cd /etc/corosync/
  [root@node1 corosync]# ll
  total 16
  -rw-r--r--. 1 root root  445 Nov 22 18:37 corosync.conf.example
  -rw-r--r--. 1 root root 1084 Nov 22 18:37 corosync.conf.example.udpu
  drwxr-xr-x. 2 root root 4096 Nov 22 18:37 service.d
  drwxr-xr-x. 2 root root 4096 Nov 22 18:37 uidgid.d
  

  (1) 配置corosync.conf
  [root@node1 corosync]# cp corosync.conf.example corosync.conf
  # 修改配置文件(每行加#标记为有更改)
  [root@node1 corosync]# cat corosync.conf
  # Please read the corosync.conf.5 manual page
  compatibility: whitetank
  

  totem {
  version: 2
  secauth: on#
  threads: 0
  interface {
  ringnumber: 0
  bindnetaddr: 192.168.148.0#
  mcastaddr: 226.94.1.1
  mcastport: 5405
  ttl: 1
  }
  }
  

  logging {
  fileline: off
  to_stderr: no
  to_logfile: yes
  to_syslog: yes
  logfile: /var/log/cluster/corosync.log
  debug: off
  timestamp: on
  logger_subsys {
  subsys: AMF
  debug: off
  }
  }
  

  amf {
  mode: disabled
  }
  # enable pacemaker
  service {
  ver: 0
  name: pacemaker
  }
  aisexec {
  user: root
  group: root
  }
  

  (2) 生成密钥文件(依据机器性能,时间可能较久)
  [root@node1 corosync]# corosync-keygen
  Corosync Cluster Engine Authentication key generator.
  Gathering 1024 bits for key from /dev/random.
  Press keys on your keyboard to generate entropy.
  Press keys on your keyboard to generate entropy (bits = 152).
  Press keys on your keyboard to generate entropy (bits = 216).
  Press keys on your keyboard to generate entropy (bits = 288).
  Press keys on your keyboard to generate entropy (bits = 352).
  Press keys on your keyboard to generate entropy (bits = 416).
  Press keys on your keyboard to generate entropy (bits = 480).
  Press keys on your keyboard to generate entropy (bits = 544).
  Press keys on your keyboard to generate entropy (bits = 616).
  Press keys on your keyboard to generate entropy (bits = 680).
  Press keys on your keyboard to generate entropy (bits = 752).
  Press keys on your keyboard to generate entropy (bits = 816).
  Press keys on your keyboard to generate entropy (bits = 880).
  Press keys on your keyboard to generate entropy (bits = 952).
  Press keys on your keyboard to generate entropy (bits = 1016).
  Writing corosync key to /etc/corosync/authkey.
  [root@node1 corosync]# ll
  total 24
  -r--------. 1 root root  128 Mar  6 12:24 authkey
  -rw-r--r--. 1 root root  560 Mar  6 11:41 corosync.conf
  -rw-r--r--. 1 root root  445 Nov 22 18:37 corosync.conf.example
  -rw-r--r--. 1 root root 1084 Nov 22 18:37 corosync.conf.example.udpu
  drwxr-xr-x. 2 root root 4096 Nov 22 18:37 service.d
  drwxr-xr-x. 2 root root 4096 Nov 22 18:37 uidgid.d
  

  (3) 将authkey、corosync.conf复制到node2上
  [root@node1 corosync]# scp -p authkey corosync.conf node2:/etc/corosync/
  

  4、检验Corosync的安装
  1) 启动Corosync
  [root@node1 corosync]# service corosync start
  Starting Corosync Cluster Engine (corosync):               [  OK  ]
  [root@node1 corosync]# corosync-cfgtool -s
  Printing ring status.
  Local node ID 563390656
  RING ID 0
  id      = 192.168.148.31
  status  = ring 0 active with no faults
  

  [root@node1 corosync]# ssh node2 service corosync start
  Starting Corosync Cluster Engine (corosync): [  OK  ]
  [root@node1 corosync]# ssh node2 corosync-cfgtool -s
  Printing ring status.
  Local node ID 898934976
  RING ID 0
  id      = 192.168.148.32
  status  = ring 0 active with no faults
  

  

  2) 查看启动信息
  # 查看corosync引擎是否正常启动
  [root@node1 corosync]# grep -e "Corosync Cluster Engine" -e "configuration file" /var/log/cluster/corosync.log
  Mar 06 16:28:10 corosync [MAIN  ] Corosync Cluster Engine ('1.4.1'): started and ready to provide service.
  Mar 06 16:28:10 corosync [MAIN  ] Successfully read main configuration file '/etc/corosync/corosync.conf'.
  

  # 查看初始化成员节点通知是否正常发出
  [root@node1 corosync]# grep TOTEM /var/log/cluster/corosync.log
  Mar 06 16:28:10 corosync [TOTEM ] Initializing transport (UDP/IP Multicast).
  Mar 06 16:28:10 corosync [TOTEM ] Initializing transmit/receive security: libtomcrypt SOBER128/SHA1HMAC (mode 0).
  Mar 06 16:28:10 corosync [TOTEM ] The network interface [192.168.148.31] is now up.
  Mar 06 16:28:11 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:51 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:52 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:53 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:55 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:56 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:58 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:29:59 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:01 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:02 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:04 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:06 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:07 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:09 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:10 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:12 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:13 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:15 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:16 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:18 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:19 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:21 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:22 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:24 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:25 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:27 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:28 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:30 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:31 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:33 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:34 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:36 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:37 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:39 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:40 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:42 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:43 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:45 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:46 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:48 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:49 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:51 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:52 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:54 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:55 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:57 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:30:58 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:00 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:01 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:03 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:04 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:06 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:07 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:09 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:10 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:12 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:13 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:15 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  Mar 06 16:31:16 corosync [TOTEM ] A processor joined or left the membership and a new membership was formed.
  

  # 查看pacemaker是否正常启动
  [root@node1 corosync]# grep pcmk_startup /var/log/cluster/corosync.log
  Mar 06 16:28:10 corosync [pcmk  ] info: pcmk_startup: CRM: Initialized
  Mar 06 16:28:11 corosync [pcmk  ] Logging: Initialized pcmk_startup
  Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Maximum core file size is: 18446744073709551615
  Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Service: 9
  Mar 06 16:28:11 corosync [pcmk  ] info: pcmk_startup: Local hostname: node1.cluster.com
  

  # 检查启动过程中是否有错误产生
  [root@node1 corosync]# grep ERROR: /var/log/cluster/corosync.log
  Mar 06 16:28:10 corosync [pcmk  ] ERROR: process_ais_conf: You have configured a cluster using the Pacemaker plugin for Corosync. The plugin is not supported in this environment and will be removed very soon.
  Mar 06 16:28:10 corosync [pcmk  ] ERROR: process_ais_conf:  Please see Chapter 8 of 'Clusters from Scratch' (http://www.clusterlabs.org/doc) for details on using Pacemaker with CMAN
  

  3) 查看集群状态
  [root@node1 corosync]# crm_mon -1
  Last updated: Thu Mar  6 16:39:50 2014
  Last change: Thu Mar  6 16:39:36 2014 via crmd on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node1.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  0 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  5、安装crmsh
  [root@node1 ~]# cd /etc/yum.repos.d/
  [root@node1 yum.repos.d]# wget http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/network:ha-clustering:Stable.repo
  [root@node1 yum.repos.d]# yum install crmsh
  Loaded plugins: fastestmirror
  Loading mirror speeds from cached hostfile
  * base: mirror.bit.edu.cn
  * centos-6-base: mirror.bit.edu.cn
  * extras: mirror.bit.edu.cn
  * updates: mirror.bit.edu.cn
  Setting up Install Process
  Resolving Dependencies
  --> Running transaction check
  ---> Package crmsh.x86_64 0:1.2.6-6.1 will be installed
  --> Processing Dependency: python-lxml for package: crmsh-1.2.6-6.1.x86_64
  --> Processing Dependency: pssh for package: crmsh-1.2.6-6.1.x86_64
  --> Processing Dependency: python-dateutil for package: crmsh-1.2.6-6.1.x86_64
  --> Running transaction check
  ---> Package pssh.x86_64 0:2.3.1-3.2 will be installed
  ---> Package python-dateutil.noarch 0:1.4.1-6.el6 will be installed
  ---> Package python-lxml.x86_64 0:2.2.3-1.1.el6 will be installed
  --> Finished Dependency Resolution
  

  Dependencies Resolved
  

  ====================================================================================================================================
  Package                       Arch                 Version                        Repository                                  Size
  ====================================================================================================================================
  Installing:
  crmsh                         x86_64               1.2.6-6.1                      network_ha-clustering_Stable               489 k
  Installing for dependencies:
  pssh                          x86_64               2.3.1-3.2                      network_ha-clustering_Stable                50 k
  python-dateutil               noarch               1.4.1-6.el6                    base                                        84 k
  python-lxml                   x86_64               2.2.3-1.1.el6                  base                                       2.0 M
  

  Transaction Summary
  ====================================================================================================================================
  Install       4 Package(s)
  

  Total download size: 2.6 M
  Installed size: 24 M
  Is this ok [y/N]: y
  Downloading Packages:
  (1/4): crmsh-1.2.6-6.1.x86_64.rpm                                                                            | 489 kB     00:02
  (2/4): pssh-2.3.1-3.2.x86_64.rpm                                                                             |  50 kB     00:00
  (3/4): python-dateutil-1.4.1-6.el6.noarch.rpm                                                                |  84 kB     00:00
  (4/4): python-lxml-2.2.3-1.1.el6.x86_64.rpm                                                                  | 2.0 MB     00:08
  ------------------------------------------------------------------------------------------------------------------------------------
  Total                                                                                               144 kB/s | 2.6 MB     00:18
  warning: rpmts_HdrFromFdno: Header V3 RSA/SHA1 Signature, key ID 17280ddf: NOKEY
  Retrieving key from http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/repodata/repomd.xml.key
  Importing GPG key 0x17280DDF:
  Userid: "network OBS Project "
  From  : http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/repodata/repomd.xml.key
  Is this ok [y/N]: y
  Running rpm_check_debug
  Running Transaction Test
  Transaction Test Succeeded
  Running Transaction
  Installing : pssh-2.3.1-3.2.x86_64                                                                                            1/4
  Installing : python-lxml-2.2.3-1.1.el6.x86_64                                                                                 2/4
  Installing : python-dateutil-1.4.1-6.el6.noarch                                                                               3/4
  Installing : crmsh-1.2.6-6.1.x86_64                                                                                           4/4
  Verifying  : python-dateutil-1.4.1-6.el6.noarch                                                                               1/4
  Verifying  : python-lxml-2.2.3-1.1.el6.x86_64                                                                                 2/4
  Verifying  : pssh-2.3.1-3.2.x86_64                                                                                            3/4
  Verifying  : crmsh-1.2.6-6.1.x86_64                                                                                           4/4
  

  Installed:
  crmsh.x86_64 0:1.2.6-6.1
  

  Dependency Installed:
  pssh.x86_64 0:2.3.1-3.2            python-dateutil.noarch 0:1.4.1-6.el6            python-lxml.x86_64 0:2.2.3-1.1.el6
  

  Complete!
  [root@node1 yum.repos.d]#
  

  5、创建一个主/备集群
  1) 浏览现有配置
  [root@node1 yum.repos.d]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2"
  [root@node1 yum.repos.d]#
  [root@node1 yum.repos.d]# crm configure show xml
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  
  

  

  # 核验配置
  [root@node1 ~]# crm_verify -L -V
  error: unpack_resources:     Resource start-up disabled since no STONITH resources have been defined
  error: unpack_resources:     Either configure some or disable STONITH with the stonith-enabled option
  error: unpack_resources:     NOTE: Clusters with shared data need STONITH to ensure data integrity
  Errors found during check: config not valid
  

  

  # To disable STONITH, we set the stonith-enabled cluster option to false.
  [root@node1 ~]# crm configure property stonith-enabled=false
  [root@node1 ~]# crm_verify -L
  

  2) 添加一个资源
  (1) 配置一个IP地址(VIP),不管集群服务在哪运行,我们要一个固定的地址来提供服务。在这里我选择192.168.148.27作为浮动IP,给它取一个好记的名字 ClusterIP 并且告诉集群 每30秒检查它一次
  

  [root@node1 ~]# crm configure primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip=192.168.148.27 cidr_netmask=32 \
  op monitor interval=30s
  

  # 现在检查下IP 资源是不是已经添加了,并且看看是否处在可用状态。
  [root@node1 ~]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip="192.168.148.27" cidr_netmask="32" \
  op monitor interval="30s"
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2" \
  stonith-enabled="false"
  

  

  [root@node1 ~]# crm status
  Last updated: Fri Mar  7 11:06:10 2014
  Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node1.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node1.cluster.com
  

  [root@node1 ~]# ip a
  1: lo:  mtu 16436 qdisc noqueue state UNKNOWN
  link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
  inet 127.0.0.1/8 scope host lo
  2: eth0:  mtu 1500 qdisc pfifo_fast state UP qlen 1000
  link/ether 00:50:56:b0:2f:71 brd ff:ff:ff:ff:ff:ff
  inet 192.168.148.33/24 brd 192.168.148.255 scope global eth0
  inet 192.168.148.27/32 brd 192.168.148.255 scope global eth0
  

  (2) 测试备援
  # 首先,找到IP资源现在在哪个节点上运行
  [root@node1 ~]# crm resource status ClusterIP
  resource ClusterIP is running on: node1.cluster.com
  

  # 在运行机上关闭Pacemaker and Corosync
  [root@node1 ~]# service corosync stop
  Signaling Corosync Cluster Engine (corosync) to terminate: [  OK  ]
  Waiting for corosync services to unload:..                 [  OK  ]
  

  # 当corosync停止运行后,到另外节点上查看集群状态
  [root@node2 ~]# crm status
  Last updated: Fri Mar  7 11:05:20 2014
  Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node1.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node1.cluster.com
  

  [root@node2 ~]# crm status
  Last updated: Fri Mar  7 11:13:09 2014
  Last change: Fri Mar  7 10:50:21 2014 via cibadmin on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition WITHOUT quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node2.cluster.com ]
  OFFLINE: [ node1.cluster.com ]
  

  # 关于集群状态,我们有三个地方需要注意,首先,如我们所料node1已经下线了,然而我们发现ClusterIP不在任何地方运行!
  

  A、如果是两个节点的集群,应该设置no-quorum-policy为ignore,如果一个节点down掉,另一个节点仍能正常运行。
  

  

  [root@node2 ~]# crm configure property no-quorum-policy=ignore
  [root@node2 ~]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip="192.168.148.27" cidr_netmask="32" \
  op monitor interval="30s"
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2" \
  stonith-enabled="false" \
  no-quorum-policy="ignore"
  

  # 过了一会,集群会在剩下的那个节点上启动这个IP。请注意集群现在依然没有达到法定人数。
  [root@node2 ~]# crm status
  Last updated: Fri Mar  7 11:17:27 2014
  Last change: Fri Mar  7 11:17:02 2014 via cibadmin on node2.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition WITHOUT quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node2.cluster.com ]
  OFFLINE: [ node1.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node2.cluster.com
  

  # 现在模拟节点恢复,我们启动 node1 上面的Corosync服务,然后检查集群状态。
  [root@node1 ~]# service corosync start
  Starting Corosync Cluster Engine (corosync):               [  OK  ]
  

  [root@node1 ~]# crm status
  Last updated: Fri Mar  7 11:19:55 2014
  Last change: Fri Mar  7 11:17:02 2014 via cibadmin on node2.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node2.cluster.com
  [root@node1 ~]#
  # 现在我们可以看到让某些人惊奇的事情,IP资源回到原来那个节点(node1)上去了
  

  B、防止资源在节点恢复后移动(resource-stickiness)
  资源黏性(resource-stickiness):资源更倾向于运行在哪个节点。
  资源黏性值范围及其作用:
  0:这是默认选项。资源放置在系统中的最适合位置。这意味着当负载能力“较好”或较差的节点变得可用时才转移资源。此选项的作用基本等同于自动故障回复,只是资源可能会转移到非之前活动的节点上;
  大于0:资源更愿意留在当前位置,但是如果有更合适的节点可用时会移动。值越高表示资源越愿意留在当前位置;
  小于0:资源更愿意移离当前位置。绝对值越高表示资源越愿意离开当前位置;
  INFINITY:如果不是因节点不适合运行资源(节点关机、节点待机、达到migration-threshold 或配置更改)而强制资源转移,资源总是留在当前位置。此选项的作用几乎等同于完全禁用自动故障回复;
  -INFINITY:资源总是移离当前位置;
  

  

  [root@node1 ~]# crm configure rsc_defaults resource-stickiness=100
  [root@node1 ~]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip="192.168.148.27" cidr_netmask="32" \
  op monitor interval="30s"
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2" \
  stonith-enabled="false" \
  no-quorum-policy="ignore"
  rsc_defaults $id="rsc-options" \
  resource-stickiness="100"
  

  [root@node1 ~]# crm status
  Last updated: Fri Mar 14 19:02:39 2014
  Last change: Fri Mar 14 18:53:52 2014 via cibadmin on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  1 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node1.cluster.com
  

  3) 添加更多的资源(服务)
  # 默认没有tomcat资源,安装资源代理包
  # rpm -ivh resource-agents-3.9.5-3.1.x86_64.rpm
  或提示依赖关系,使用下面命令试试
  # yum install resource-agents-3.9.5-3.1.x86_64.rpm
  #
  # yum update resource-agents
  #添加tomcat资源(ocf_heartbeat_tomcat — Manages a Tomcat servlet environment instance)
  

  (1) 准备工作:
  A、使用开发安装脚本所涉及到的环境变量,配置pacemaker时要用到(使用tomcat启动脚本时可以看到)
  Using CATALINA_BASE:   /usr/local/apache-tomcat-8.0.8
  Using CATALINA_HOME:   /usr/local/apache-tomcat-8.0.8
  Using CATALINA_TMPDIR: /usr/local/apache-tomcat-8.0.8/temp
  Using JRE_HOME:        /usr
  Using CLASSPATH:       /usr/local/apache-tomcat-8.0.8/bin/bootstrap.jar:/usr/local/apache-tomcat-8.0.8/bin/tomcat-juli.jar
  

  B、JDK位置,确定JAVA_HOME
  [root@node1 ~]# cd /usr/java/
  [root@node1 java]# pwd
  /usr/java
  [root@node1 java]# ll
  total 4
  lrwxrwxrwx 1 root root   16 Mar 14 18:43 default -> /usr/java/latest
  drwxr-xr-x 8 root root 4096 Mar 14 18:42 jdk1.7.0_45
  lrwxrwxrwx 1 root root   21 Mar 14 18:43 latest -> /usr/java/jdk1.7.0_45
  

  可以看出JDK的位置
  JAVA_HOME=/usr/java/default
  

  C、为当前tomcat目录添加连接,为以后版本变更做准备(两台都添加)
  [root@node1 ~]# ln -s /usr/local/apache-tomcat-8.0.8 /usr/local/tomcat
  [root@node2 ~]# ln -s /usr/local/apache-tomcat-8.0.8 /usr/local/tomcat
  

  (2) 添加tomcat资源
  [root@node1 ~]# crm configure primitive tomcat ocf:heartbeat:tomcat  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" op monitor timeout="30s" interval="15s" op start interval="0" timeout="90s" op stop interval="0" timeout="120s"
  [root@node1 ~]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP IPaddr2 \
  params ip=192.168.148.27 cidr_netmask=32 \
  op monitor interval=30s
  primitive tomcat tomcat \
  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
  op monitor timeout=30s interval=15s \
  op start interval=0 timeout=90s \
  op stop interval=0 timeout=120s
  property cib-bootstrap-options: \
  dc-version=1.1.10-14.el6_5.2-368c726 \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes=2 \
  stonith-enabled=false \
  no-quorum-policy=ignore
  rsc_defaults rsc-options: \
  resource-stickiness=100
  

  # 考虑将rsync进程加入到集群中(设计中心)
  [root@node1 ~]# crm configure primitive rsyncd ocf:heartbeat:rsyncd \
  op monitor depth="0" timeout="20s" interval="60s"
  

  # nfsserver
  [root@node1 ~]# crm configure primitive nfsserver ocf:heartbeat:nfsserver \
  params \
  nfs_init_script="/etc/init.d/nfs" \
  nfs_shared_infodir="/var/lib/nfs" \
  nfs_notify_cmd="/sbin/rpc.statd" \
  nfs_ip="192.168.148.27" \
  op monitor depth="0" timeout="40s" interval="10"
  

  

  (3) 资源约束
  A、配置资源跟IP可能不在跑在同一个节点上面, 确保资源在同一节点运行(constraint)
  # tomcat
  [root@node1 ~]# crm configure colocation tomcat-with-ip INFINITY: tomcat ClusterIP
  

  # rsyncd
  [root@node1 ~]# crm configure colocation rsyncd-with-ip INFINITY: rsyncd ClusterIP
  

  # nfsserver
  [root@node1 ~]# crm configure colocation nfsserver-with-ip INFINITY: nfsserver ClusterIP
  

  B、控制资源的启动停止顺序,确保资源启动之前ClsterIP已经处于活动状态
  # tomcat
  [root@node1 ~]# crm configure order tomcat-after-ip mandatory: ClusterIP tomcat
  

  # rsyncd
  [root@node1 ~]# crm configure order rsyncd-after-ip mandatory: ClusterIP rsyncd
  

  # nfsserver
  [root@node1 ~]# crm configure order nfsserver-after-ip mandatory: ClusterIP nfsserver
  

  [root@node1 ~]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP IPaddr2 \
  params ip=192.168.148.27 cidr_netmask=32 \
  op monitor interval=30s
  primitive nfsserver nfsserver \
  params nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_ip=192.168.148.27 \
  op monitor timeout=20s interval=10 depth=0
  primitive tomcat tomcat \
  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
  op monitor timeout=30s interval=15s \
  op start interval=0 timeout=90s \
  op stop interval=0 timeout=120s
  colocation nfsserver-with-ip inf: nfsserver ClusterIP
  colocation tomcat-with-ip inf: tomcat ClusterIP
  order nfsserver-after-ip Mandatory: ClusterIP nfsserver
  order tomcat-after-ip Mandatory: ClusterIP tomcat
  property cib-bootstrap-options: \
  dc-version=1.1.10-14.el6_5.2-368c726 \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes=2 \
  stonith-enabled=false \
  no-quorum-policy=ignore
  rsc_defaults rsc-options: \
  resource-stickiness=100
  

  # 最终完整的配置清单:
  [root@node1 heartbeat]# crm configure show
  node node1.cluster.com
  node node2.cluster.com
  primitive ClusterIP IPaddr2 \
  params ip=192.168.148.27 cidr_netmask=32 \
  op monitor interval=30s
  primitive nfsserver nfsserver \
  params nfs_init_script="/etc/init.d/nfs" nfs_shared_infodir="/var/lib/nfs" nfs_notify_cmd="/sbin/rpc.statd" nfs_ip=192.168.148.27 \
  op monitor timeout=40s interval=10 depth=0 \
  meta target-role=Started
  primitive tomcat tomcat \
  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
  op monitor timeout=90s interval=15s \
  op start interval=0 timeout=120s \
  op stop interval=0 timeout=120s \
  meta target-role=Started
  colocation nfsserver-with-ip inf: nfsserver ClusterIP
  colocation tomcat-with-ip inf: tomcat ClusterIP
  order nfsserver-after-ip Mandatory: ClusterIP nfsserver
  order tomcat-after-ip Mandatory: ClusterIP tomcat
  property cib-bootstrap-options: \
  dc-version=1.1.10-14.el6_5.2-368c726 \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes=2 \
  stonith-enabled=false \
  no-quorum-policy=ignore \
  last-lrm-refresh=1415113828
  rsc_defaults rsc-options: \
  resource-stickiness=100
  

  (4) 在集群中手工地移动资源(在维护时使用)
  # 资源原来在node2上
  [root@node1 ~]# crm status
  Last updated: Mon Mar 17 08:32:13 2014
  Last change: Sat Mar 15 11:48:45 2014 via cibadmin on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  2 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node2.cluster.com
  tomcat (ocf::heartbeat:tomcat):        Started node2.cluster.com
  

  # 移动资源到node1上
  [root@node1 ~]# crm resource move tomcat node1.cluster.com
  [root@node1 ~]# crm status
  Last updated: Tue Mar 11 11:37:50 2014
  Last change: Tue Mar 11 11:37:43 2014 via crm_resource on node1.cluster.com
  Stack: classic openais (with plugin)
  Current DC: node2.cluster.com - partition with quorum
  Version: 1.1.10-14.el6_5.2-368c726
  2 Nodes configured, 2 expected votes
  2 Resources configured
  

  

  Online: [ node1.cluster.com node2.cluster.com ]
  

  ClusterIP      (ocf::heartbeat:IPaddr2):       Started node1.cluster.com
  tomcat        (ocf::heartbeat:tomcat):        Started node1.cluster.com
  

  

  二、Pacemaker基本操作
  1、节点转换
  把节点node1.cluster.com的状态由online转换为standby,和由standby转换为online
  # crm node standby node1.cluster.com
  [root@node1 ~]# crm configure show
  node node1.cluster.com \
  attributes standby="on"
  node node2.cluster.com
  primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip="192.168.148.27" cidr_netmask="32" \
  op monitor interval="30s"
  primitive tomcat ocf:heartbeat:tomcat \
  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
  op monitor timeout="30s" interval="15s" \
  op start interval="0" timeout="90s" \
  op stop interval="0" timeout="120s"
  colocation tomcat-with-ip inf: tomcat ClusterIP
  order tomcat-after-ip inf: ClusterIP tomcat
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2" \
  stonith-enabled="false" \
  no-quorum-policy="ignore" \
  last-lrm-refresh="1394178592"
  rsc_defaults $id="rsc-options" \
  resource-stickiness="100"
  

  # crm node online node1.cluster.com
  [root@node1 ~]# crm configure show
  node node1.cluster.com \
  attributes standby="off"
  node node2.cluster.com
  primitive ClusterIP ocf:heartbeat:IPaddr2 \
  params ip="192.168.148.27" cidr_netmask="32" \
  op monitor interval="30s"
  primitive tomcat ocf:heartbeat:tomcat \
  params java_home="/usr/java/default" catalina_home="/usr/local/tomcat" catalina_base="/usr/local/tomcat" statusurl="http://127.0.0.1:80/design/" \
  op monitor timeout="30s" interval="15s" \
  op start interval="0" timeout="90s" \
  op stop interval="0" timeout="120s"
  colocation tomcat-with-ip inf: tomcat ClusterIP
  order tomcat-after-ip inf: ClusterIP tomcat
  property $id="cib-bootstrap-options" \
  dc-version="1.1.10-14.el6_5.2-368c726" \
  cluster-infrastructure="classic openais (with plugin)" \
  expected-quorum-votes="2" \
  stonith-enabled="false" \
  no-quorum-policy="ignore" \
  last-lrm-refresh="1394178592"
  rsc_defaults $id="rsc-options" \
  resource-stickiness="100"
  

  2、资源迁移
  把node1.cluster.com的tomcat资源迁移到node2.cluster.com,和从node2.cluster.com接管资源
  # crm resource migrate tomcat node2.cluster.com
  # crm resource unmigrate tomcat node2.cluster.com
  3、状态查看
  # crm status
  # crm_mon or crm_mon -1
  

  3、配置保存、修改和加载
  # crm configure save 文件名
  # crm configure edit
  # crm configure load 文件名
  

  

  

  三、参考资料
  http://clusterlabs.org/doc/en-US/Pacemaker/1.1-crmsh/html/Pacemaker_Explained/ap-lsb.html
  http://www.linux-ha.org/doc/man-pages/re-ra-tomcat.html
  





运维网声明 1、欢迎大家加入本站运维交流群:群②:261659950 群⑤:202807635 群⑦870801961 群⑧679858003
2、本站所有主题由该帖子作者发表,该帖子作者与运维网享有帖子相关版权
3、所有作品的著作权均归原作者享有,请您和我们一样尊重他人的著作权等合法权益。如果您对作品感到满意,请购买正版
4、禁止制作、复制、发布和传播具有反动、淫秽、色情、暴力、凶杀等内容的信息,一经发现立即删除。若您因此触犯法律,一切后果自负,我们对此不承担任何责任
5、所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其内容的准确性、可靠性、正当性、安全性、合法性等负责,亦不承担任何法律责任
6、所有作品仅供您个人学习、研究或欣赏,不得用于商业或者其他用途,否则,一切后果均由您自己承担,我们对此不承担任何法律责任
7、如涉及侵犯版权等问题,请您及时通知我们,我们将立即采取措施予以解决
8、联系人Email:admin@iyunv.com 网址:www.yunweiku.com

所有资源均系网友上传或者通过网络收集,我们仅提供一个展示、介绍、观摩学习的平台,我们不对其承担任何法律责任,如涉及侵犯版权等问题,请您及时通知我们,我们将立即处理,联系人Email:kefu@iyunv.com,QQ:1061981298 本贴地址:https://www.yunweiku.com/thread-643602-1-1.html 上篇帖子: zabbix通过JMX监控Tomcat及一些报错 下篇帖子: 日记 [2008年02月02日]关于tomcat的首页文件
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

扫码加入运维网微信交流群X

扫码加入运维网微信交流群

扫描二维码加入运维网微信交流群,最新一手资源尽在官方微信交流群!快快加入我们吧...

扫描微信二维码查看详情

客服E-mail:kefu@iyunv.com 客服QQ:1061981298


QQ群⑦:运维网交流群⑦ QQ群⑧:运维网交流群⑧ k8s群:运维网kubernetes交流群


提醒:禁止发布任何违反国家法律、法规的言论与图片等内容;本站内容均来自个人观点与网络等信息,非本站认同之观点.


本站大部分资源是网友从网上搜集分享而来,其版权均归原作者及其网站所有,我们尊重他人的合法权益,如有内容侵犯您的合法权益,请及时与我们联系进行核实删除!



合作伙伴: 青云cloud

快速回复 返回顶部 返回列表