Browse Source

resource-agent package update

Signed-off-by: basebuilder_pel7ppc64bebuilder0 <basebuilder@powerel.org>
master
basebuilder_pel7ppc64bebuilder0 6 years ago
parent
commit
1199308520
  1. 418
      SOURCES/NovaCompute.patch
  2. 69
      SOURCES/bz10005924-default-apache-config.patch
  3. 25
      SOURCES/bz1014641-VirtualDomain-syntax-error.patch
  4. 47
      SOURCES/bz1016140-start-predefined-domains.patch
  5. 29
      SOURCES/bz1029061-virtualdomain-parse-error.patch
  6. 15
      SOURCES/bz1033016-nfsserver-missing-etab.patch
  7. 250
      SOURCES/bz1058102-man-page-updates.patch
  8. 154
      SOURCES/bz1059988-db2-support.patch
  9. 113
      SOURCES/bz1060367-vm-monitor-wo-libvirtd.patch
  10. 178
      SOURCES/bz1060367-vm-monitor-wo-libvirtd_2.patch
  11. 441
      SOURCES/bz1064512-clvmd-agent.patch
  12. 30
      SOURCES/bz1077888-CTDB-fix-logging.patch
  13. 159
      SOURCES/bz1077888-ctdb-updates.patch
  14. 54
      SOURCES/bz1083041-virtual-domain-monitor-lxc-fix.patch
  15. 116
      SOURCES/bz1083231-fs-wait-module-load.patch
  16. 25
      SOURCES/bz1091101-nfs-error-msg-fix.patch
  17. 27
      SOURCES/bz1091101-nfs-rquotad-port-option-fix.patch
  18. 1196
      SOURCES/bz1091101-nfs-updates.patch
  19. 87
      SOURCES/bz1095944-safe-umount-option.patch
  20. 42
      SOURCES/bz1097593-LVM-warn-lvmetad.patch
  21. 32
      SOURCES/bz1105655-virtualdomain-restore-start-stop-default-timeout.patch
  22. 26
      SOURCES/bz1116166-Low-galera-be-very-generous-in-the-promotion-timeout.patch
  23. 41
      SOURCES/bz1116166-Low-galera-do-not-advertise-notify-in-the-usage.patch
  24. 1417
      SOURCES/bz1116166-galera-agent.patch
  25. 25
      SOURCES/bz1116166-galera-do-not-ignore-check_password.patch
  26. 1536
      SOURCES/bz1118029-iscsi-agents.patch
  27. 13
      SOURCES/bz1118029-iscsi-remove-write-back.patch
  28. 39
      SOURCES/bz1118029_iscsi_syntax_fix.patch
  29. 466
      SOURCES/bz1122285-ethmonitor-infiniband.patch
  30. 474
      SOURCES/bz1126073-1-nfsserver-fix-systemd-status-detection.patch
  31. 337
      SOURCES/bz1126073-2-nfsserver-fix-systemd-status-detection.patch
  32. 71
      SOURCES/bz1128933-Fix-ha_log-drop-global-__ha_log_ignore_stderr_once-h.patch
  33. 77
      SOURCES/bz1128933-Fix-ocf_exit_reason-implicit-format-string-s-for-sin.patch
  34. 26
      SOURCES/bz1128933-Fix-shellfuncs-fix-syntax-error-caused-by-exit_reaso.patch
  35. 185
      SOURCES/bz1128933-IPaddr2-exit-reason-support.patch
  36. 102
      SOURCES/bz1128933-VirtualDomain-exit-reason-support.patch
  37. 25
      SOURCES/bz1128933-binary-check-exit-reason-support.patch
  38. 2118
      SOURCES/bz1128933-exit-reason-string-updates.patch
  39. 43
      SOURCES/bz1128933-exportfs-exit-reason-support.patch
  40. 98
      SOURCES/bz1128933-introducing-exit-reason-support.patch
  41. 52
      SOURCES/bz1128933-nfsnotify-exit-reason-support.patch
  42. 97
      SOURCES/bz1128933-nfssserver-exit-reason-support.patch
  43. 69
      SOURCES/bz1135026-docker-handle-invalid-monitor-cmd.patch
  44. 145
      SOURCES/bz1135026-docker-monitor_cmd-arg.patch
  45. 61
      SOURCES/bz1135026-docker-name-arg.patch
  46. 49
      SOURCES/bz1135026-docker-stop-fix.patch
  47. 375
      SOURCES/bz1135026-introducing-docker-agent.patch
  48. 43
      SOURCES/bz1138871-avoid-check-binary-in-validate.patch
  49. 35
      SOURCES/bz1138871-mysql-error-validation-fails-monitor.patch
  50. 26
      SOURCES/bz1138871_mysql_stop_fix.patch
  51. 60
      SOURCES/bz1159328-LVM-check_writethrough.patch
  52. 520
      SOURCES/bz1160365-iface-vlan.patch.patch
  53. 97
      SOURCES/bz1168251-SAPHana-agents-update.patch
  54. 37
      SOURCES/bz1168251-SAPHana-agents-update2.patch
  55. 13
      SOURCES/bz1168251-SAPHana-agents-update3.patch
  56. 3129
      SOURCES/bz1168251-SAPHana-agents.patch
  57. 441
      SOURCES/bz1168251-SAPHana-agents_update4.patch
  58. 204
      SOURCES/bz1170376-galera-no-readonly.patch
  59. 25
      SOURCES/bz1171162-clvmd-opt-fix.patch
  60. 113
      SOURCES/bz1183136-nginx-support.patch
  61. 564
      SOURCES/bz1189187-redis-agent.patch
  62. 56
      SOURCES/bz1198681-clvm-activate-vgs-option.patch
  63. 92
      SOURCES/bz1200756-ipsrcaddr-misconfig.patch
  64. 272
      SOURCES/bz1212632-nagios.patch
  65. 43
      SOURCES/bz1213971-ethmon-opt.patch
  66. 494
      SOURCES/bz1214360-NovaCompute-update1.patch.patch
  67. 49
      SOURCES/bz1214781-lvm-partial-activation-fix.patch.patch
  68. 27
      SOURCES/bz1223615-apache-includes-fix.patch.patch
  69. 49
      SOURCES/bz1227293-dhcpd-chroot-fix.patch.patch
  70. 121
      SOURCES/bz1231032-redis-update.patch.patch
  71. 246
      SOURCES/bz1232376-oracle-agent-update.diff
  72. 133
      SOURCES/bz1242181-virtualdomain-migrate_options.patch
  73. 40
      SOURCES/bz1242558-virtualdomain-may-remove-config-file.patch
  74. 92
      SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch
  75. 45
      SOURCES/bz1249430-1-tomcat-fix-selinux-enforced.patch
  76. 112
      SOURCES/bz1249430-2-tomcat-fix-selinux-enforced.patch
  77. 1188
      SOURCES/bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch
  78. 33
      SOURCES/bz1251484-redis-client-passwd-support.patch
  79. 135
      SOURCES/bz1260713-1-sapdatabase-process-count-suser.patch
  80. 24
      SOURCES/bz1260713-2-sapdatabase-process-count-suser.patch
  81. 11
      SOURCES/bz1263348-mysql-tmpfile-leak.patch
  82. 37
      SOURCES/bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch
  83. 60
      SOURCES/bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch
  84. 728
      SOURCES/bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch
  85. 131
      SOURCES/bz1284526-galera-crash-recovery.patch
  86. 89
      SOURCES/bz1284526-galera-heuristic-recovered.patch
  87. 113
      SOURCES/bz1284526-galera-no-grastate.patch
  88. 31
      SOURCES/bz1287303-novaevacuate-invoke-off-action.patch
  89. 23
      SOURCES/bz1287314-novaevacuate-simplify-nova-check.patch
  90. 1778
      SOURCES/bz1289107-saphana-mcos-support.patch
  91. 101
      SOURCES/bz1296406-virtualdomain-migration_speed-migration_downtime.patch
  92. 33
      SOURCES/bz1299404-galera-custom-host-port.patch
  93. 35
      SOURCES/bz1301189-virtualdomain-fix-locale.patch
  94. 207
      SOURCES/bz1303037-1-portblock.patch
  95. 31
      SOURCES/bz1303037-2-portblock.patch
  96. 45
      SOURCES/bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch
  97. 259
      SOURCES/bz1305549-nova-compute-wait-nova-compute-unfence.patch
  98. 42
      SOURCES/bz1305549-redis-notify-clients-of-master-being-demoted.patch
  99. 20
      SOURCES/bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch
  100. 136
      SOURCES/bz1316130-systemd-drop-in-clvmd-LVM.patch
  101. Some files were not shown because too many files have changed in this diff Show More

418
SOURCES/NovaCompute.patch

@ -0,0 +1,418 @@ @@ -0,0 +1,418 @@
From bd60deaa906cc5fe1cd46549b1318d2b940395ef Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 11 Jun 2015 08:41:50 -0500
Subject: [PATCH] NovaCompute agent

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 4 +
heartbeat/NovaCompute | 363 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 368 insertions(+)
create mode 100644 heartbeat/NovaCompute

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 653e818..69acf3a 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -73,6 +73,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_MailTo.7 \
ocf_heartbeat_ManageRAID.7 \
ocf_heartbeat_ManageVE.7 \
+ ocf_heartbeat_NovaCompute.7 \
ocf_heartbeat_Pure-FTPd.7 \
ocf_heartbeat_Raid1.7 \
ocf_heartbeat_Route.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index e4ed4fd..b77c589 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -29,6 +29,8 @@ halibdir = $(libexecdir)/heartbeat
ocfdir = $(OCF_RA_DIR_PREFIX)/heartbeat
+ospdir = $(OCF_RA_DIR_PREFIX)/openstack
+
dtddir = $(datadir)/$(PACKAGE_NAME)
dtd_DATA = ra-api-1.dtd
@@ -50,6 +52,8 @@ send_ua_SOURCES = send_ua.c IPv6addr_utils.c
IPv6addr_LDADD = -lplumb $(LIBNETLIBS)
send_ua_LDADD = $(LIBNETLIBS)
+osp_SCRIPTS = NovaCompute
+
ocf_SCRIPTS = ClusterMon \
CTDB \
Dummy \
diff --git a/heartbeat/NovaCompute b/heartbeat/NovaCompute
new file mode 100644
index 0000000..f71abeb
--- /dev/null
+++ b/heartbeat/NovaCompute
@@ -0,0 +1,363 @@
+#!/bin/sh
+#
+#
+# NovaCompute agent manages compute daemons.
+#
+# Copyright (c) 2015
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+###
+
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="NovaCompute" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+OpenStack Nova Compute Server.
+</longdesc>
+<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>
+
+<parameters>
+
+<parameter name="auth_url" unique="0" required="1">
+<longdesc lang="en">
+Authorization URL for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Authorization URL</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="username" unique="0" required="1">
+<longdesc lang="en">
+Username for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Username</shortdesc>
+</parameter>
+
+<parameter name="password" unique="0" required="1">
+<longdesc lang="en">
+Password for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Password</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="tenant_name" unique="0" required="1">
+<longdesc lang="en">
+Tenant name for connecting to keystone in admin context.
+Note that with Keystone V3 tenant names are only unique within a domain.
+</longdesc>
+<shortdesc lang="en">Tenant name</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="domain" unique="0" required="0">
+<longdesc lang="en">
+DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
+</longdesc>
+<shortdesc lang="en">DNS domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="endpoint_type" unique="0" required="0">
+<longdesc lang="en">
+Nova API location (internal, public or admin URL)
+</longdesc>
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="no_shared_storage" unique="0" required="0">
+<longdesc lang="en">
+Disable shared storage recovery for instances. Use at your own risk!
+</longdesc>
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="120" />
+<action name="stop" timeout="300" />
+<action name="monitor" timeout="20" interval="10" depth="0"/>
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="5" />
+<action name="notify" timeout="600" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# don't exit on TERM, to test that lrmd makes sure that we do exit
+trap sigterm_handler TERM
+sigterm_handler() {
+ ocf_log info "They use TERM to bring us down. No such luck."
+ return
+}
+
+nova_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+nova_pid() {
+ ps axf | grep python.*nova-compute | grep -v grep | awk '{print $1}'
+}
+
+nova_start() {
+ nova_monitor
+ if [ $? = $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ export LIBGUESTFS_ATTACH_METHOD=appliance
+ su nova -s /bin/sh -c /usr/bin/nova-compute &
+
+ rc=$OCF_NOT_RUNNING
+ ocf_log info "Waiting for nova to start"
+ while [ $rc != $OCF_SUCCESS ]; do
+ nova_monitor
+ rc=$?
+ done
+
+## TEMPORARY disable call to "service enable" that seems to create
+## issues and it is unnecessary since fence_compute doesn't disable
+## the service
+
+# if [ "x${OCF_RESKEY_domain}" != x ]; then
+# export service_host="${NOVA_HOST}.${OCF_RESKEY_domain}"
+# else
+# export service_host="${NOVA_HOST}"
+# fi
+
+# python -c "import os; from novaclient import client as nova_client; nova = nova_client.Client('2', os.environ.get('OCF_RESKEY_username'), os.environ.get('OCF_RESKEY_password'), os.environ.get('OCF_RESKEY_tenant_name'), os.environ.get('OCF_RESKEY_auth_url')); nova.services.enable(os.environ.get('service_host'), 'nova-compute');"
+
+# rc=$?
+# if [ $rc != 0 ]; then
+# ocf_exit_reason "nova.services.enable failed $rc"
+# exit $OCF_NOT_RUNNING
+# fi
+
+ return $OCF_SUCCESS
+}
+
+nova_stop() {
+ pid=$(nova_pid)
+ if [ "x$pid" != x ]; then
+ su nova -c "kill -TERM $pid" -s /bin/bash
+ fi
+
+ while [ "x$pid" != x ]; do
+ sleep 1
+ pid=$(nova_pid)
+ done
+
+ return $OCF_SUCCESS
+}
+
+nova_monitor() {
+ pid=$(nova_pid)
+ if [ "x$pid" != x ]; then
+ ## TEMPORARY disable call to fence_compute to avoid noise on first
+ ## first startup due to nova-compute not being fast enough to populate
+ ## the db and fence_compute checking if node exists and it's enabled
+ #state=$(fence_compute ${fence_options} -o status -n $NOVA_HOST | grep Status)
+ #if [ "x$state" = "xStatus: ON" ]; then
+ return $OCF_SUCCESS
+ #else
+ # ocf_exit_reason "Nova status: $state"
+ # return $OCF_ERR_GENERIC
+ #fi
+ fi
+
+ return $OCF_NOT_RUNNING
+}
+
+nova_notify() {
+ if [ "x${OCF_RESKEY_CRM_meta_notify_operation}" != "xstop" ]; then
+ return $OCF_SUCCESS
+ elif [ "x${OCF_RESKEY_CRM_meta_notify_type}" != "xpost" ]; then
+ return $OCF_SUCCESS
+ fi
+
+ # Only the first node not stopping performs evacuates for now
+ # Can we allow all of them to do it? It would make this block much simpler.
+ for host in ${OCF_RESKEY_CRM_meta_notify_active_uname}; do
+ for stop in ${OCF_RESKEY_CRM_meta_notify_stop_uname}; do
+ if [ "$stop" = "$host" ]; then
+ : $host is one of the nodes that is stopping
+
+ elif [ "x$(echo ${host} | awk -F. '{print $1}')" != "x$(uname -n | awk -F. '{print $1}')" ]; then
+ : We are not the first non-stopping node
+ return $OCF_SUCCESS
+
+ else
+ # Also repeat for any peer NOT in active_uname somehow?
+ for node in $OCF_RESKEY_CRM_meta_notify_stop_uname; do
+ ocf_log info "Performing evacuations for $node"
+ fence_compute ${fence_options} -o reboot -n $node
+ done
+ return $OCF_SUCCESS
+ fi
+ done
+ done
+}
+
+nova_validate() {
+ rc=$OCF_SUCCESS
+ fence_options=""
+
+ check_binary openstack-config
+ check_binary fence_compute
+ check_binary nova-compute
+
+ if [ ! -f /etc/nova/nova.conf ]; then
+ ocf_exit_reason "/etc/nova/nova.conf not found"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_domain}" ]; then
+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL) ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ # we take a chance here and hope that host is either not configured
+ # or configured in nova.conf
+
+ NOVA_HOST=$(openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
+ if [ $? = 1 ]; then
+ if [ "x${OCF_RESKEY_domain}" != x ]; then
+ NOVA_HOST=$(uname -n | awk -F. '{print $1}')
+ else
+ NOVA_HOST=$(uname -n)
+ fi
+ fi
+
+ # We only need to check a configured value, calculated ones are fine
+ openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null
+ if [ $? = 0 ]; then
+ if [ "x${OCF_RESKEY_domain}" != x ]; then
+ short_host=$(uname -n | awk -F. '{print $1}')
+ if [ "x$NOVA_HOST" != "x${short_host}" ]; then
+ ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function"
+ rc=$OCF_ERR_CONFIGURED
+ fi
+
+ elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then
+ ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function"
+ rc=$OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ if [ $rc != $OCF_SUCCESS ]; then
+ exit $rc
+ fi
+ return $rc
+}
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) nova_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+nova_validate
+
+case $__OCF_ACTION in
+start) nova_start;;
+stop) nova_stop;;
+monitor) nova_monitor;;
+notify) nova_notify;;
+validate-all) exit $OCF_SUCCESS;;
+*) nova_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
--
1.8.4.2

69
SOURCES/bz10005924-default-apache-config.patch

@ -0,0 +1,69 @@ @@ -0,0 +1,69 @@
diff --git a/heartbeat/apache b/heartbeat/apache
index 726e8fb..1369804 100755
--- a/heartbeat/apache
+++ b/heartbeat/apache
@@ -63,7 +63,8 @@ fi
LOCALHOST="http://localhost"
HTTPDOPTS="-DSTATUS"
DEFAULT_IBMCONFIG=/opt/IBMHTTPServer/conf/httpd.conf
-DEFAULT_NORMCONFIG="/etc/apache2/httpd.conf"
+DEFAULT_SUSECONFIG="/etc/apache2/httpd.conf"
+DEFAULT_RHELCONFIG="/etc/httpd/conf/httpd.conf"
#
# You can also set
# HTTPD
@@ -82,7 +83,8 @@ CMD=`basename $0`
# assumed if no config file is specified. If this command is
# invoked as *IBM*, then the default config file name is
# $DEFAULT_IBMCONFIG, otherwise the default config file
-# will be $DEFAULT_NORMCONFIG.
+# will be either $DEFAULT_RHELCONFIG or $DEFAULT_SUSECONFIG depending
+# on which is detected.
usage() {
cat <<-!
usage: $0 action
@@ -146,7 +148,7 @@ validate_default_config() {
# the relevant config is generated and valid. We're also taking
# this opportunity to enable mod_status if it's not present.
validate_default_suse_config() {
- if [ "$CONFIGFILE" = "$DEFAULT_NORMCONFIG" ] && \
+ if [ "$CONFIGFILE" = "$DEFAULT_SUSECONFIG" ] && \
grep -Eq '^Include[[:space:]]+/etc/apache2/sysconfig.d/include.conf' "$CONFIGFILE"
then
[ -x "/usr/sbin/a2enmod" ] && ocf_run -q /usr/sbin/a2enmod status
@@ -336,6 +338,16 @@ apache_monitor() {
esac
}
+detect_default_config()
+{
+ if [ -f $DEFAULT_SUSECONFIG ]; then
+ echo $DEFAULT_SUSECONFIG
+ else
+ echo $DEFAULT_RHELCONFIG
+ fi
+}
+
+
apache_meta_data(){
cat <<END
<?xml version="1.0"?>
@@ -373,7 +385,7 @@ This file is parsed to provide defaults for various other
resource agent parameters.
</longdesc>
<shortdesc lang="en">configuration file path</shortdesc>
-<content type="string" default="/etc/apache2/httpd.conf" />
+<content type="string" default="$(detect_default_config)" />
</parameter>
<parameter name="httpd">
@@ -548,7 +560,7 @@ find_httpd_prog() {
if [ "X$OCF_RESKEY_httpd" != X -a "X$HTTPD" != X ]; then
ocf_log info "Using $HTTPD as HTTPD"
fi
- DefaultConfig=$DEFAULT_NORMCONFIG
+ DefaultConfig=$(detect_default_config)
;;
esac
}

25
SOURCES/bz1014641-VirtualDomain-syntax-error.patch

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
From a165410d95a7976e5249530b08d4dbeca7a7df27 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 2 Oct 2013 15:43:56 -0500
Subject: [PATCH] Fix: VirtualDomain: Fixes comparison of uninitialized variable during force stop

---
heartbeat/VirtualDomain | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index b7ac912..04b4390 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -295,7 +295,7 @@ VirtualDomain_Start() {
force_stop()
{
local out ex
- local status
+ local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
--
1.7.1

47
SOURCES/bz1016140-start-predefined-domains.patch

@ -0,0 +1,47 @@ @@ -0,0 +1,47 @@
From f00dcaf19467e3d96d9790d386b860b53ca381f9 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 7 Oct 2013 19:37:43 -0500
Subject: [PATCH] High: VirtualDomain: Ensure it is possible to manage a libvirt domain defined outside of VirtualDomain

---
heartbeat/VirtualDomain | 17 +++++++++++++++++
1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index f7ed100..8d5e181 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -257,6 +257,15 @@ VirtualDomain_Status() {
return $rc
}
+verify_undefined() {
+ for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name`; do
+ if [ "$dom" = "$DOMAIN_NAME" ]; then
+ virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
+ return
+ fi
+ done
+}
+
VirtualDomain_Start() {
local snapshotimage
@@ -276,6 +285,14 @@ VirtualDomain_Start() {
return $OCF_ERR_GENERIC
fi
+ # Make sure domain is undefined before creating.
+ # The 'create' command guarantees that the domain will be
+ # undefined on shutdown, but requires the domain to be undefined.
+ # if a user defines the domain
+ # outside of this agent, we have to ensure that the domain
+ # is restored to an 'undefined' state before creating.
+ verify_undefined
+
virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
rc=$?
if [ $rc -ne 0 ]; then
--
1.7.1

29
SOURCES/bz1029061-virtualdomain-parse-error.patch

@ -0,0 +1,29 @@ @@ -0,0 +1,29 @@
From bd3b09252eedbeeab2635f82259714975702257e Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 20 Jan 2014 10:29:23 -0600
Subject: [PATCH] High: VirtualDomain: Fixes parsing domain name from xml file.

If the domain xml is not generated by virsh, it is possible
VirtualDomain will not be able to detect the domain's name
from the xml file. This is a result of the parsing command
not taking into account trailing whitespace characters.
---
heartbeat/VirtualDomain | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 3ca4f6d..11c8df9 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -565,7 +565,7 @@ if [ ! -r $OCF_RESKEY_config ]; then
fi
# Retrieve the domain name from the xml file.
-DOMAIN_NAME=`egrep '.*<name>.*</name>$' ${OCF_RESKEY_config} | sed -e 's/.*<name>\(.*\)<\/name>$/\1/' 2>/dev/null`
+DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/' 2>/dev/null`
if [ -z $DOMAIN_NAME ]; then
ocf_log err "This is unexpected. Cannot determine domain name."
exit $OCF_ERR_GENERIC
--
1.8.4.2

15
SOURCES/bz1033016-nfsserver-missing-etab.patch

@ -0,0 +1,15 @@ @@ -0,0 +1,15 @@
diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index 2f62df4..bc326e5 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -307,6 +307,10 @@ prepare_directory ()
[ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak"
[ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR"
+ [ -f "$fp/etab" ] || touch "$fp/etab"
+ [ -f "$fp/xtab" ] || touch "$fp/xtab"
+ [ -f "$fp/rmtab" ] || touch "$fp/rmtab"
+
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
}

250
SOURCES/bz1058102-man-page-updates.patch

@ -0,0 +1,250 @@ @@ -0,0 +1,250 @@
From 3afd24d578006d68746f9ce35321d0ed34df92e2 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 25 Aug 2014 15:01:13 -0500
Subject: [PATCH 3/4] High: doc: Add pcs to man page example section

PCS and CRM SHELL now have their own example sections in the
resource-agent man pages. Below is an example of the CRM SHELL
and PCS examples for the IPaddr2 agent.

EXAMPLE CRM SHELL
The following is an example configuration for a IPaddr2 resource using the crm(8) shell:

primitive p_IPaddr2 ocf:heartbeat:IPaddr2 \
params \
ip=string \
op monitor depth="0" timeout="20s" interval="10s"

EXAMPLE PCS
The following is an example configuration for a IPaddr2 resource using pcs(8)

pcs resource create p_IPaddr2 ocf:heartbeat:IPaddr2 \
ip=string \
op monitor depth="0" timeout="20s" interval="10s"
---
doc/man/ra2refentry.xsl | 141 +++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 123 insertions(+), 18 deletions(-)

diff --git a/doc/man/ra2refentry.xsl b/doc/man/ra2refentry.xsl
index 41a60aa..ac148ef 100644
--- a/doc/man/ra2refentry.xsl
+++ b/doc/man/ra2refentry.xsl
@@ -50,7 +50,8 @@
<xsl:apply-templates select="$this" mode="description"/>
<xsl:apply-templates select="$this" mode="parameters"/>
<xsl:apply-templates select="$this" mode="actions"/>
- <xsl:apply-templates select="$this" mode="example"/>
+ <xsl:apply-templates select="$this" mode="examplecrmsh"/>
+ <xsl:apply-templates select="$this" mode="examplepcs"/>
<xsl:apply-templates select="$this" mode="seealso"/>
</xsl:template>
@@ -403,10 +404,10 @@
</xsl:template>
- <!-- Mode Example -->
- <xsl:template match="resource-agent" mode="example">
+ <!-- Mode Example CRM Shell-->
+ <xsl:template match="resource-agent" mode="examplecrmsh">
<refsection>
- <title>Example</title>
+ <title>Example CRM Shell</title>
<para>
<xsl:text>The following is an example configuration for a </xsl:text>
<xsl:value-of select="@name"/>
@@ -428,7 +429,7 @@
<xsl:text> \
params \
</xsl:text>
- <xsl:apply-templates select="parameters" mode="example"/>
+ <xsl:apply-templates select="parameters" mode="examplecrmsh"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="@name"/><xsl:text> \</xsl:text>
@@ -440,7 +441,7 @@
<xsl:text>
meta allow-migrate="true" \</xsl:text>
</xsl:if>
- <xsl:apply-templates select="actions" mode="example"/>
+ <xsl:apply-templates select="actions" mode="examplecrmsh"/>
</programlisting>
<!-- Insert a master/slave set definition if the resource
agent supports promotion and demotion -->
@@ -457,15 +458,15 @@
</refsection>
</xsl:template>
- <xsl:template match="parameters" mode="example">
- <xsl:apply-templates select="parameter[@required = 1]" mode="example"/>
+ <xsl:template match="parameters" mode="examplecrmsh">
+ <xsl:apply-templates select="parameter[@required = 1]" mode="examplecrmsh"/>
</xsl:template>
- <xsl:template match="parameter" mode="example">
+ <xsl:template match="parameter" mode="examplecrmsh">
<xsl:text> </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text>=</xsl:text>
- <xsl:apply-templates select="content" mode="example"/>
+ <xsl:apply-templates select="content" mode="examplecrmsh"/>
<xsl:text> \</xsl:text>
<xsl:if test="following-sibling::parameter/@required = 1">
<xsl:text>
@@ -473,7 +474,7 @@
</xsl:if>
</xsl:template>
- <xsl:template match="content" mode="example">
+ <xsl:template match="content" mode="examplecrmsh">
<xsl:choose>
<xsl:when test="@default != ''">
<xsl:text>"</xsl:text>
@@ -486,23 +487,23 @@
</xsl:choose>
</xsl:template>
- <xsl:template match="actions" mode="example">
+ <xsl:template match="actions" mode="examplecrmsh">
<!-- In the CRM shell example, show only the monitor action -->
- <xsl:apply-templates select="action[@name = 'monitor']" mode="example"/>
+ <xsl:apply-templates select="action[@name = 'monitor']" mode="examplecrmsh"/>
</xsl:template>
- <xsl:template match="action" mode="example">
+ <xsl:template match="action" mode="examplecrmsh">
<xsl:text>
op </xsl:text>
<xsl:value-of select="@name"/>
<xsl:text> </xsl:text>
- <xsl:apply-templates select="@*" mode="example"/>
+ <xsl:apply-templates select="@*" mode="examplecrmsh"/>
<xsl:if test="following-sibling::action/@name = 'monitor'">
<xsl:text>\</xsl:text>
</xsl:if>
</xsl:template>
- <xsl:template match="action/@*" mode="example">
+ <xsl:template match="action/@*" mode="examplecrmsh">
<xsl:choose>
<xsl:when test="name() = 'name'"><!-- suppress --></xsl:when>
<xsl:otherwise>
@@ -517,9 +518,113 @@
</xsl:if>
</xsl:template>
- <xsl:template match="longdesc" mode="example"/>
+ <xsl:template match="longdesc" mode="examplecrmsh"/>
- <xsl:template match="shortdesc" mode="example"/>
+ <xsl:template match="shortdesc" mode="examplecrmsh"/>
+
+ <!-- Mode Example PCS-->
+ <xsl:template match="resource-agent" mode="examplepcs">
+ <refsection>
+ <title>Example PCS</title>
+ <para>
+ <xsl:text>The following is an example configuration for a </xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> resource using </xsl:text>
+ <citerefentry><refentrytitle>pcs</refentrytitle><manvolnum>8</manvolnum></citerefentry>
+ </para>
+ <programlisting>
+ <xsl:text>pcs resource create p_</xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="$class"/>
+ <xsl:text>:</xsl:text>
+ <xsl:value-of select="$provider"/>
+ <xsl:text>:</xsl:text>
+ <xsl:choose>
+ <xsl:when test="parameters/parameter[@required = 1]">
+ <xsl:value-of select="@name"/>
+ <xsl:text> \
+</xsl:text>
+ <xsl:apply-templates select="parameters" mode="examplepcs"/>
+ </xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="@name"/><xsl:text> \</xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ <xsl:apply-templates select="actions" mode="examplepcs"/>
+
+ <!-- Insert a master/slave set definition if the resource
+ agent supports promotion and demotion -->
+ <xsl:if test="actions/action/@name = 'promote' and actions/action/@name = 'demote'">
+ <xsl:text>--master</xsl:text>
+ </xsl:if>
+ </programlisting>
+
+ </refsection>
+ </xsl:template>
+
+ <xsl:template match="parameters" mode="examplepcs">
+ <xsl:apply-templates select="parameter[@required = 1]" mode="examplepcs"/>
+ </xsl:template>
+
+ <xsl:template match="parameter" mode="examplepcs">
+ <xsl:text> </xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text>=</xsl:text>
+ <xsl:apply-templates select="content" mode="examplepcs"/>
+ <xsl:text> \</xsl:text>
+ <xsl:if test="following-sibling::parameter/@required = 1">
+ <xsl:text>
+</xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template match="content" mode="examplepcs">
+ <xsl:choose>
+ <xsl:when test="@default != ''">
+ <xsl:text>"</xsl:text>
+ <xsl:value-of select="@default"/>
+ <xsl:text>"</xsl:text>
+ </xsl:when>
+ <xsl:otherwise>
+ <replaceable><xsl:value-of select="@type"/></replaceable>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+
+ <xsl:template match="actions" mode="examplepcs">
+ <!-- In the CRM shell example, show only the monitor action -->
+ <xsl:apply-templates select="action[@name = 'monitor']" mode="examplepcs"/>
+ </xsl:template>
+
+ <xsl:template match="action" mode="examplepcs">
+ <xsl:text>
+ op </xsl:text>
+ <xsl:value-of select="@name"/>
+ <xsl:text> </xsl:text>
+ <xsl:apply-templates select="@*" mode="examplepcs"/>
+ <xsl:if test="following-sibling::action/@name = 'monitor'">
+ <xsl:text>\</xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template match="action/@*" mode="examplepcs">
+ <xsl:choose>
+ <xsl:when test="name() = 'name'"><!-- suppress --></xsl:when>
+ <xsl:otherwise>
+ <xsl:value-of select="name()"/>
+ <xsl:text>="</xsl:text>
+ <xsl:value-of select="current()"/>
+ <xsl:text>" </xsl:text>
+ </xsl:otherwise>
+ </xsl:choose>
+ <xsl:if test="following-sibling::*">
+ <xsl:text> </xsl:text>
+ </xsl:if>
+ </xsl:template>
+
+ <xsl:template match="longdesc" mode="examplepcs"/>
+ <xsl:template match="shortdesc" mode="examplepcs"/>
<xsl:template match="resource-agent" mode="seealso">
<refsection>
--
1.8.4.2

154
SOURCES/bz1059988-db2-support.patch

@ -0,0 +1,154 @@ @@ -0,0 +1,154 @@
From c954c6470fe61c73396b45ca75310d146997f81b Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 29 Apr 2015 11:16:18 -0500
Subject: [PATCH 5/6] db2 support

---
heartbeat/db2 | 60 +++++++++++++++++++++++++++++++++++++++++++++--------------
1 file changed, 46 insertions(+), 14 deletions(-)

diff --git a/heartbeat/db2 b/heartbeat/db2
index f9db2f8..fed2d86 100755
--- a/heartbeat/db2
+++ b/heartbeat/db2
@@ -132,6 +132,9 @@ END
db2_validate() {
local db2home db2sql db2instance
+ # db2 uses korn shell
+ check_binary "ksh"
+
# check required instance vars
if [ -z "$OCF_RESKEY_instance" ]
then
@@ -208,6 +211,14 @@ db2_validate() {
return $OCF_SUCCESS
}
+master_score()
+{
+ if ! have_binary "crm_master"; then
+ return
+ fi
+
+ crm_master $*
+}
#
# Run the given command as db2 instance user
@@ -380,8 +391,17 @@ db2_check_config_compatibility() {
#
db2_start() {
local output start_cmd db
+ local start_opts="dbpartitionnum $db2node"
+
+ # If we detect that db partitions are not in use, and no
+ # partition is explicitly specified, activate without
+ # partition information. This allows db2 instances without
+ # partition support to be managed.
+ if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -a "$db2sql/db2nodes.cfg" ]; then
+ start_opts=""
+ fi
- if output=$(runasdb2 db2start dbpartitionnum $db2node)
+ if output=$(runasdb2 db2start $start_opts)
then
ocf_log info "DB2 instance $instance($db2node) started: $output"
else
@@ -473,10 +493,15 @@ db2_start() {
#
db2_stop_bg() {
local rc output
+ local stop_opts="dbpartitionnum $db2node"
rc=$OCF_SUCCESS
- if output=$(runasdb2 db2stop force dbpartitionnum $db2node)
+ if [ -z "$OCF_RESKEY_dbpartitionnum" ] && ! [ -a "$db2sql/db2nodes.cfg" ]; then
+ stop_opts=""
+ fi
+
+ if output=$(runasdb2 db2stop force $stop_opts)
then
ocf_log info "DB2 instance $instance($db2node) stopped: $output"
else
@@ -502,13 +527,13 @@ db2_stop() {
local stop_timeout grace_timeout stop_bg_pid i must_kill
# remove master score
- crm_master -D -l reboot
+ master_score -D -l reboot
# be very early here in order to avoid stale data
rm -f $STATE_FILE
- if ! db2_instance_status
- then
+ db2_instance_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
ocf_log info "DB2 instance $instance already stopped"
return $OCF_SUCCESS
fi
@@ -585,7 +610,12 @@ db2_instance_status() {
local pscount
pscount=$(runasdb2 $db2bin/db2nps $db2node | cut -c9- | grep ' db2[^ ]' | wc -l)
- test $pscount -ge 4
+ if [ $pscount -ge 4 ]; then
+ return $OCF_SUCCESS;
+ elif [ $pscount -ge 1 ]; then
+ return $OCF_GENERIC_ERR
+ fi
+ return $OCF_NOT_RUNNING
}
#
@@ -626,12 +656,14 @@ db2_hadr_status() {
#
db2_monitor() {
local CMD output hadr db
+ local rc
- if ! db2_instance_status
- then
+ db2_instance_status
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
# instance is dead remove master score
- crm_master -D -l reboot
- exit $OCF_NOT_RUNNING
+ master_score -D -l reboot
+ exit $rc
fi
[ $db2node = 0 ] || return 0
@@ -667,22 +699,22 @@ db2_monitor() {
ocf_log err "DB2 message: $output"
# dead primary, remove master score
- crm_master -D -l reboot
+ master_score -D -l reboot
return $OCF_ERR_GENERIC
esac
fi
ocf_log debug "DB2 database $instance($db2node)/$db appears to be working"
- ocf_is_ms && crm_master -v 10000 -l reboot
+ ocf_is_ms && master_score -v 10000 -l reboot
;;
Standby/*Peer)
- crm_master -v 8000 -l reboot
+ master_score -v 8000 -l reboot
;;
Standby/*)
ocf_log warn "DB2 database $instance($db2node)/$db in status $hadr can never be promoted"
- crm_master -D -l reboot
+ master_score -D -l reboot
;;
*)
--
1.8.4.2

113
SOURCES/bz1060367-vm-monitor-wo-libvirtd.patch

@ -0,0 +1,113 @@ @@ -0,0 +1,113 @@
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 6f80981..b159c2c 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -65,10 +65,10 @@ for this virtual domain.
<longdesc lang="en">
Hypervisor URI to connect to. See the libvirt documentation for
details on supported URI formats. The default is system dependent.
-Determine your systems default uri by running 'virsh --quiet uri'
+Determine the system's default uri by running 'virsh --quiet uri'.
</longdesc>
<shortdesc lang="en">Hypervisor URI</shortdesc>
-<content type="string" />
+<content type="string"/>
</parameter>
<parameter name="force_stop" unique="0" required="0">
@@ -202,15 +202,44 @@ update_utilization() {
fi
}
+# attempt to check domain status outside of libvirt using the emulator process
+pid_status()
+{
+ local rc=$OCF_ERR_GENERIC
+ local emulator
+
+ emulator=$(basename $(egrep '[[:space:]]*<emulator>.*</emulator>[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*<emulator>\(.*\)<\/emulator>[[:space:]]*$/\1/'))
+
+ case "$emulator" in
+ qemu-kvm|qemu-system-*)
+ ps awx | grep -E "[q]emu-(kvm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ # domain exists and is running
+ ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
+ rc=$OCF_SUCCESS
+ else
+ # domain pid does not exist on local machine
+ ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
+ rc=$OCF_NOT_RUNNING
+ fi
+ ;;
+ # This can be expanded to check for additional emulators
+ *)
+ ;;
+ esac
+
+ return $rc
+}
+
VirtualDomain_Status() {
local try=0
rc=$OCF_ERR_GENERIC
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
- status="`virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1`"
+ status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1|tr 'A-Z' 'a-z')
case "$status" in
- *"error:"*"Domain not found"*|"shut off")
+ *"error:"*"domain not found"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
# Domain not found: domain is not defined and thus not started
@@ -226,7 +255,7 @@ VirtualDomain_Status() {
ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
rc=$OCF_SUCCESS
;;
- ""|*"Failed to reconnect to the hypervisor"*|"no state")
+ ""|*"failed to "*"connect to the hypervisor"*|"no state")
# Empty string may be returned when virsh does not
# receive a reply from libvirtd.
# "no state" may occur when the domain is currently
@@ -240,6 +269,14 @@ VirtualDomain_Status() {
# the domain if necessary.
ocf_log error "Virtual domain $DOMAIN_NAME has no state during stop operation, bailing out."
return $OCF_ERR_GENERIC;
+ elif [ "$__OCF_ACTION" = "monitor" ]; then
+ pid_status
+ rc=$?
+ if [ $rc -ne $OCF_ERR_GENERIC ]; then
+ # we've successfully determined the domains status outside of libvirt
+ return $rc
+ fi
+
else
# During all other actions, we just wait and try
# again, relying on the CRM/LRM to time us out if
@@ -312,11 +349,11 @@ force_stop()
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
- out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1)
+ out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z')
ex=$?
echo >&2 "$out"
case $ex$out in
- *"error:"*"domain is not running"*|*"error:"*"Domain not found"*)
+ *"error:"*"domain is not running"*|*"error:"*"domain not found"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
return $OCF_ERR_GENERIC ;;
@@ -544,8 +581,8 @@ case $1 in
;;
esac
-OCF_RESKEY_hypervisor_default="$(virsh --quiet uri)"
-: ${OCF_RESKEY_hypervisor=${OCF_RESKEY_hypervisor_default}}
+# Grab the virsh uri default, but only if hypervisor isn't set
+: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri)}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"

178
SOURCES/bz1060367-vm-monitor-wo-libvirtd_2.patch

@ -0,0 +1,178 @@ @@ -0,0 +1,178 @@
diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index b159c2c..3a6b6a9 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -21,11 +21,13 @@ OCF_RESKEY_force_stop_default=0
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
+OCF_RESKEY_CRM_meta_timeout_default=90000
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
: ${OCF_RESKEY_migrateport=${OCF_RESKEY_migrateport_default}}
+: ${OCF_RESKEY_CRM_meta_timeout=${OCF_RESKEY_CRM_meta_timeout_default}}
#######################################################################
## I'd very much suggest to make this RA use bash,
@@ -165,8 +167,8 @@ Restore state on start/stop
</parameters>
<actions>
-<action name="start" timeout="90" />
-<action name="stop" timeout="90" />
+<action name="start" timeout="$OCF_RESKEY_CRM_meta_timeout_default" />
+<action name="stop" timeout="$OCF_RESKEY_CRM_meta_timeout_default" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="migrate_from" timeout="60" />
@@ -183,9 +185,17 @@ set_util_attr() {
local cval outp
cval=$(crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>/dev/null)
+ if [ $? -ne 0 ] && [ -z "$cval" ]; then
+ crm_resource -Q -r $OCF_RESOURCE_INSTANCE -z -g $attr 2>&1 | grep -e "not connected" > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ ocf_log debug "Unable to set utilization attribute, cib is not available"
+ return
+ fi
+ fi
+
if [ "$cval" != "$val" ]; then
- outp=`crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1` ||
- ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
+ outp=$(crm_resource -r $OCF_RESOURCE_INSTANCE -z -p $attr -v $val 2>&1) ||
+ ocf_log warn "crm_resource failed to set utilization attribute $attr: $outp"
fi
}
@@ -193,22 +203,49 @@ update_utilization() {
local dom_cpu dom_mem
if ocf_is_true "$OCF_RESKEY_autoset_utilization_cpu"; then
- dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/CPU\(s\)/{print $2}')
+ dom_cpu=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/CPU\(s\)/{print $2}')
test -n "$dom_cpu" && set_util_attr cpu $dom_cpu
fi
if ocf_is_true "$OCF_RESKEY_autoset_utilization_hv_memory"; then
- dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} | awk '/Max memory/{printf("%d", $3/1024)}')
+ dom_mem=$(LANG=C virsh $VIRSH_OPTIONS dominfo ${DOMAIN_NAME} 2>/dev/null | awk '/Max memory/{printf("%d", $3/1024)}')
test -n "$dom_mem" && set_util_attr hv_memory "$dom_mem"
fi
}
+get_emulator()
+{
+ local emulator=""
+
+ emulator=$(virsh $VIRSH_OPTIONS dumpxml $DOMAIN_NAME 2>/dev/null | sed -n -e 's/[[:space:]]*<emulator>\(.*\)<\/emulator>[[:space:]]*$/\1/p')
+ if [ -z "$emulator" ] && [ -a "$EMULATOR_STATE" ]; then
+ emulator=$(cat $EMULATOR_STATE)
+ fi
+ if [ -z "$emulator" ]; then
+ emulator=$(cat ${OCF_RESKEY_config} | sed -n -e 's/[[:space:]]*<emulator>\(.*\)<\/emulator>[[:space:]]*$/\1/p')
+ fi
+
+ if [ -n "$emulator" ]; then
+ basename $emulator
+ else
+ ocf_log error "Unable to determine emulator for $DOMAIN_NAME"
+ fi
+}
+
+update_emulator_cache()
+{
+ local emulator
+
+ emulator=$(get_emulator)
+ if [ -n "$emulator" ]; then
+ echo $emulator > $EMULATOR_STATE
+ fi
+}
+
# attempt to check domain status outside of libvirt using the emulator process
pid_status()
{
local rc=$OCF_ERR_GENERIC
- local emulator
-
- emulator=$(basename $(egrep '[[:space:]]*<emulator>.*</emulator>[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*<emulator>\(.*\)<\/emulator>[[:space:]]*$/\1/'))
+ local emulator=$(get_emulator)
case "$emulator" in
qemu-kvm|qemu-system-*)
@@ -237,13 +274,13 @@ VirtualDomain_Status() {
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
- status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1|tr 'A-Z' 'a-z')
+ status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
case "$status" in
*"error:"*"domain not found"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
# Domain not found: domain is not defined and thus not started
- ocf_log debug "Virtual domain $DOMAIN_NAME is currently $status."
+ ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)"
rc=$OCF_NOT_RUNNING
;;
running|paused|idle|blocked|"in shutdown")
@@ -282,12 +319,13 @@ VirtualDomain_Status() {
# again, relying on the CRM/LRM to time us out if
# this takes too long.
ocf_log info "Virtual domain $DOMAIN_NAME currently has no state, retrying."
- sleep 1
fi
+ sleep 1
;;
*)
# any other output is unexpected.
ocf_log error "Virtual domain $DOMAIN_NAME has unknown status \"$status\"!"
+ sleep 1
;;
esac
done
@@ -295,7 +333,7 @@ VirtualDomain_Status() {
}
verify_undefined() {
- for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name`; do
+ for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do
if [ "$dom" = "$DOMAIN_NAME" ]; then
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
return
@@ -340,6 +378,7 @@ VirtualDomain_Start() {
while ! VirtualDomain_Monitor; do
sleep 1
done
+
return $OCF_SUCCESS
}
@@ -530,6 +569,7 @@ VirtualDomain_Monitor() {
done
fi
+ update_emulator_cache
update_utilization
return ${rc}
@@ -582,7 +622,7 @@ case $1 in
esac
# Grab the virsh uri default, but only if hypervisor isn't set
-: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri)}
+: ${OCF_RESKEY_hypervisor=$(virsh --quiet uri 2>/dev/null)}
# Set options to be passed to virsh:
VIRSH_OPTIONS="--connect=${OCF_RESKEY_hypervisor} --quiet"
@@ -608,6 +648,8 @@ if [ -z $DOMAIN_NAME ]; then
exit $OCF_ERR_GENERIC
fi
+EMULATOR_STATE="${HA_RSCTMP}/VirtualDomain-${DOMAIN_NAME}-emu.state"
+
case $1 in
start)
VirtualDomain_Start

441
SOURCES/bz1064512-clvmd-agent.patch

@ -0,0 +1,441 @@ @@ -0,0 +1,441 @@
From 61dd0f9ca20b0f252996b6f610b4473ba83ca97a Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 12 Feb 2014 12:36:21 -0500
Subject: [PATCH] High: clvm: Introducing clvmd resource agent

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/clvm | 396 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 398 insertions(+)
create mode 100755 heartbeat/clvm

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 3bf569a..49b5c58 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -94,6 +94,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_anything.7 \
ocf_heartbeat_apache.7 \
ocf_heartbeat_asterisk.7 \
+ ocf_heartbeat_clvm.7 \
ocf_heartbeat_conntrackd.7 \
ocf_heartbeat_db2.7 \
ocf_heartbeat_dhcpd.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index bc95f89..2c3056d 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -61,6 +61,7 @@ ocf_SCRIPTS = ClusterMon \
asterisk \
nginx \
AudibleAlarm \
+ clvm \
conntrackd \
db2 \
dhcpd \
diff --git a/heartbeat/clvm b/heartbeat/clvm
new file mode 100755
index 0000000..3e7701d
--- /dev/null
+++ b/heartbeat/clvm
@@ -0,0 +1,396 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+. ${OCF_FUNCTIONS_DIR}/ocf-directories
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="clvm" version="0.9">
+<version>1.0</version>
+
+<longdesc lang="en">
+This agent manages the clvmd daemon.
+</longdesc>
+<shortdesc lang="en">clvmd</shortdesc>
+
+<parameters>
+<parameter name="with_cmirrord" unique="0" required="0">
+<longdesc lang="en">
+Start with cmirrord (cluster mirror log daemon).
+</longdesc>
+<shortdesc lang="en">activate cmirrord</shortdesc>
+<content type="boolean" default="false" />
+</parameter>
+
+<parameter name="daemon_options" unique="0">
+<longdesc lang="en">
+Options to clvmd. Refer to clvmd.8 for detailed descriptions.
+</longdesc>
+<shortdesc lang="en">Daemon Options</shortdesc>
+<content type="string" default="-d0"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="90" />
+<action name="stop" timeout="90" />
+<action name="monitor" timeout="90" interval="30" depth="0" />
+<action name="reload" timeout="90" />
+<action name="meta-data" timeout="10" />
+<action name="validate-all" timeout="20" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+: ${OCF_RESKEY_daemon_options:="-d0"}
+
+sbindir=$HA_SBIN_DIR
+if [ -z $sbindir ]; then
+ sbindir=/usr/sbin
+fi
+DAEMON="clvmd"
+CMIRROR="cmirrord"
+DAEMON_PATH="${sbindir}/clvmd"
+CMIRROR_PATH="${sbindir}/cmirrord"
+LOCK_FILE="/var/lock/subsys/$DAEMON"
+LVM_VGCHANGE=${sbindir}/vgchange
+LVM_VGDISPLAY=${sbindir}/vgdisplay
+LVM_VGSCAN=${sbindir}/vgscan
+
+# Leaving this in for legacy. We do not want to advertize
+# the abilty to set options in the systconfig exists, we want
+# to expand the OCF style options as necessary instead.
+[ -f /etc/sysconfig/cluster ] && . /etc/sysconfig/cluster
+[ -f /etc/sysconfig/$DAEMON ] && . /etc/sysconfig/$DAEMON
+
+CLVMD_TIMEOUT="90"
+if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+ CLVMD_TIMEOUT=$(($OCF_RESKEY_CRM_meta_timeout/1000))
+fi
+
+clvmd_usage()
+{
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+clvmd_validate()
+{
+ # check_binary will exit with OCF_ERR_INSTALLED
+ # when binary is missing
+ check_binary "pgrep"
+ check_binary $DAEMON_PATH
+ if ocf_is_true $OCF_RESKEY_with_cmirrord; then
+ check_binary $CMIRROR_PATH
+ fi
+
+ if [ "$__OCF_ACTION" != "monitor" ]; then
+ check_binary "killall"
+ check_binary $LVM_VGCHANGE
+ check_binary $LVM_VGDISPLAY
+ check_binary $LVM_VGSCAN
+ fi
+
+ # Future validation checks here.
+ return $OCF_SUCCESS
+}
+
+check_process()
+{
+ local binary=$1
+ local pidfile="${HA_RSCTMP}/${binary}-${OCF_RESOURCE_INSTANCE}.pid"
+ local pid
+
+ ocf_log debug "Checking status for ${binary}."
+ if [ -e "$pidfile" ]; then
+ cat /proc/$(cat $pidfile)/cmdline 2>/dev/null | grep -a "${binary}" > /dev/null 2>&1
+ if [ $? -eq 0 ];then
+ # shortcut without requiring pgrep to search through all procs
+ return $OCF_SUCCESS
+ fi
+ fi
+
+ pid=$(pgrep ${binary})
+ case $? in
+ 0)
+ ocf_log info "PID file (pid:${pid} at $pidfile) created for ${binary}."
+ echo "$pid" > $pidfile
+ return $OCF_SUCCESS;;
+ 1)
+ rm -f "$pidfile" > /dev/null 2>&1
+ ocf_log info "$binary is not running"
+ return $OCF_NOT_RUNNING;;
+ *)
+ rm -f "$pidfile" > /dev/null 2>&1
+ ocf_log err "Error encountered detecting pid status of $binary"
+ return $OCF_ERR_GENERIC;;
+ esac
+}
+
+clvmd_status()
+{
+ local rc
+ local mirror_rc
+ clvmd_validate
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_log error "Unable to monitor, Environment validation failed."
+ return $?
+ fi
+
+ check_process $DAEMON
+ rc=$?
+ mirror_rc=$rc
+
+ if ocf_is_true $OCF_RESKEY_with_cmirrord; then
+ check_process $CMIRROR
+ mirror_rc=$?
+ fi
+
+ # If these ever don't match, return error to force recovery
+ if [ $mirror_rc -ne $rc ]; then
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $rc
+}
+
+# NOTE: replace this with vgs, once display filter per attr is implemented.
+clustered_vgs() {
+ ${LVM_VGDISPLAY} 2>/dev/null | awk 'BEGIN {RS="VG Name"} {if (/Clustered/) print $1;}'
+}
+
+wait_for_process()
+{
+ local binary=$1
+ local timeout=$2
+ local count=0
+
+ ocf_log info "Waiting for $binary to exit"
+ usleep 500000
+ while [ $count -le $timeout ]; do
+ check_process $binary
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ ocf_log info "$binary terminated"
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ count=$((count+1))
+ done
+
+ return $OCF_ERR_GENERIC
+}
+
+time_left()
+{
+ local end=$1
+ local default=$2
+ local now=$SECONDS
+ local result=0
+
+ result=$(( $end - $now ))
+ if [ $result -lt $default ]; then
+ return $default
+ fi
+ return $result
+}
+
+clvmd_stop()
+{
+ local LVM_VGS
+ local rc=$OCF_SUCCESS
+ local end=$(( $SECONDS + $CLVMD_TIMEOUT ))
+
+ clvmd_status
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ return $OCF_SUCCESS
+ fi
+
+ check_process $DAEMON
+ if [ $? -ne $OCF_NOT_RUNNING ]; then
+ LVM_VGS="$(clustered_vgs)"
+
+ if [ -n "$LVM_VGS" ]; then
+ ocf_log info "Deactivating clustered VG(s):"
+ ocf_run ${LVM_VGCHANGE} -anl $LVM_VGS
+ if [ $? -ne 0 ]; then
+ ocf_log error "Failed to deactivate volume groups, cluster vglist = $LVM_VGS"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ ocf_log info "Signaling $DAEMON to exit"
+ killall -TERM $DAEMON
+ if [ $? != 0 ]; then
+ ocf_log error "Failed to signal -TERM to $DAEMON"
+ return $OCF_ERR_GENERIC
+ fi
+
+ wait_for_process $DAEMON $CLVMD_TIMEOUT
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_log error "$DAEMON failed to exit"
+ return $rc
+ fi
+
+ rm -f $LOCK_FILE
+ fi
+
+ check_process $CMIRROR
+ if [ $? -ne $OCF_NOT_RUNNING ] && ocf_is_true $OCF_RESKEY_with_cmirrord; then
+ local timeout
+ ocf_log info "Signaling $CMIRROR to exit"
+ killall -INT $CMIRROR
+
+ time_left $end 10; timeout=$?
+ wait_for_process $CMIRROR $timeout
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ killall -KILL $CMIRROR
+ time_left $end 10; timeout=$?
+ wait_for_process $CMIRROR $(time_left $end 10)
+ rc=$?
+ fi
+ fi
+
+ return $rc
+}
+
+start_process()
+{
+ local binary_path=$1
+ local opts=$2
+
+ check_process "$(basename $binary_path)"
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_log info "Starting $binary_path: "
+ ocf_run $binary_path $opts
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log error "Failed to launch $binary_path, exit code $rc"
+ exit $OCF_ERR_GENERIC
+ fi
+ fi
+
+ return $OCF_SUCCESS
+}
+
+clvmd_activate_all()
+{
+ # Activate all volume groups by leaving the
+ # "volume group name" parameter empty
+ ocf_run ${LVM_VGCHANGE} -aay
+ if [ $? -ne 0 ]; then
+ ocf_log info "Failed to activate VG(s):"
+ clvmd_stop
+ return $OCF_ERR_GENERIC
+ fi
+ return $OCF_SUCCESS
+}
+
+clvmd_start()
+{
+ local rc=0
+ local CLVMDOPTS="-T${CLVMD_TIMEOUT} $OCF_RESKEY_daemon_options"
+
+ clvmd_validate
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_log error "Unable to start, Environment validation failed."
+ return $?
+ fi
+
+ clvmd_status
+ if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_log debug "$DAEMON already started"
+ clvmd_activate_all
+ return $?;
+ fi
+
+ # if either of these fail, script will exit OCF_ERR_GENERIC
+ if ocf_is_true $OCF_RESKEY_with_cmirrord; then
+ start_process $CMIRROR_PATH
+ fi
+ start_process $DAEMON_PATH $CLVMDOPTS
+
+ # Refresh local cache.
+ #
+ # It's possible that new PVs were added to this, or other VGs
+ # while this node was down. So we run vgscan here to avoid
+ # any potential "Missing UUID" messages with subsequent
+ # LVM commands.
+
+ # The following step would be better and more informative to the user:
+ # 'action "Refreshing VG(s) local cache:" ${LVM_VGSCAN}'
+ # but it could show warnings such as:
+ # 'clvmd not running on node x-y-z Unable to obtain global lock.'
+ # and the action would be shown as FAILED when in reality it didn't.
+ # Ideally vgscan should have a startup mode that would not print
+ # unnecessary warnings.
+
+ ${LVM_VGSCAN} > /dev/null 2>&1
+ touch $LOCK_FILE
+
+ clvmd_activate_all
+
+ clvmd_status
+ return $?
+}
+
+case $__OCF_ACTION in
+ meta-data) meta_data
+ exit $OCF_SUCCESS;;
+
+ start) clvmd_start;;
+
+ stop) clvmd_stop;;
+
+ monitor) clvmd_status;;
+
+ validate-all) clvmd_validate;;
+
+ usage|help) clvmd_usage;;
+
+ *) clvmd_usage
+ exit $OCF_ERR_UNIMPLEMENTED;;
+esac
+
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
+
--
1.8.4.2

30
SOURCES/bz1077888-CTDB-fix-logging.patch

@ -0,0 +1,30 @@ @@ -0,0 +1,30 @@
diff --git a/heartbeat/CTDB b/heartbeat/CTDB
index b23ffae..3e36dd0 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB
@@ -572,10 +572,22 @@ ctdb_start() {
# Use logfile by default, or syslog if asked for
- local log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
- if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
- log_option="--syslog"
- elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
+ # --logging supported from v4.3.0 and --logfile / --syslog support
+ # has been removed from newer versions
+ version=$(ctdb version | awk '{print $NF}')
+ ocf_version_cmp "$version" "4.2.14"
+ if [ "$?" -eq "2" ]; then
+ log_option="--logging=file:$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--logging=syslog"
+ fi
+ else
+ log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--syslog"
+ fi
+ fi
+ if [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
# ensure the logfile's directory exists, otherwise ctdb will fail to start
mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile)
fi

159
SOURCES/bz1077888-ctdb-updates.patch

@ -0,0 +1,159 @@ @@ -0,0 +1,159 @@
From f681e6798d3a5ead5a0e077d6e73343b266ef56f Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 29 Apr 2015 11:18:25 -0500
Subject: [PATCH 6/6] CTDB fixes

---
heartbeat/CTDB | 61 +++++++++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 48 insertions(+), 13 deletions(-)

diff --git a/heartbeat/CTDB b/heartbeat/CTDB
index d1e8d03..1cf9d8c 100755
--- a/heartbeat/CTDB
+++ b/heartbeat/CTDB
@@ -72,6 +72,19 @@
#######################################################################
# Default parameter values:
+# Some distro's ctdb package stores the persistent db in /var/lib/ctdb,
+# others store in /var/ctdb. This attempts to detect the correct default
+# directory.
+var_prefix="/var/lib/ctdb"
+if [ ! -d "$var_prefix" ] && [ -d "/var/ctdb" ]; then
+ var_prefix="/var/ctdb"
+fi
+
+run_prefix="/run"
+if [ ! -d "$var_prefix" ] && [ -d "/var/run" ]; then
+ var_prefix="/var/run"
+fi
+
: ${OCF_RESKEY_ctdb_manages_samba:=no}
: ${OCF_RESKEY_ctdb_manages_winbind:=no}
: ${OCF_RESKEY_ctdb_service_smb:=""}
@@ -84,9 +97,10 @@
: ${OCF_RESKEY_ctdb_config_dir:=/etc/ctdb}
: ${OCF_RESKEY_ctdb_binary:=/usr/bin/ctdb}
: ${OCF_RESKEY_ctdbd_binary:=/usr/sbin/ctdbd}
-: ${OCF_RESKEY_ctdb_socket:=/var/lib/ctdb/ctdb.socket}
-: ${OCF_RESKEY_ctdb_dbdir:=/var/lib/ctdb}
+: ${OCF_RESKEY_ctdb_dbdir:=${var_prefix}}
: ${OCF_RESKEY_ctdb_logfile:=/var/log/ctdb/log.ctdb}
+: ${OCF_RESKEY_ctdb_rundir:=${run_prefix}/ctdb}
+: ${OCF_RESKEY_ctdb_socket:=${OCF_RESKEY_ctdb_rundir}/ctdbd.socket}
: ${OCF_RESKEY_ctdb_debuglevel:=2}
: ${OCF_RESKEY_smb_conf:=/etc/samba/smb.conf}
@@ -104,12 +118,13 @@ meta_data() {
<longdesc lang="en">
This resource agent manages CTDB, allowing one to use Clustered Samba in a
-Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2) on
+Linux-HA/Pacemaker cluster. You need a shared filesystem (e.g. OCFS2 or GFS2) on
which the CTDB lock will be stored. Create /etc/ctdb/nodes containing a list
of private IP addresses of each node in the cluster, then configure this RA
-as a clone. To have CTDB manage Samba, set ctdb_manages_samba="yes".
-Note that this option will be deprecated in future, in favour of configuring
-a separate Samba resource.
+as a clone. This agent expects the samba and windbind resources
+to be managed outside of CTDB's control as a separate set of resources controlled
+by the cluster manager. The optional support for enabling CTDB management of these
+daemons will be depreciated.
For more information see http://linux-ha.org/wiki/CTDB_(resource_agent)
</longdesc>
@@ -235,7 +250,7 @@ Full path to the domain socket that ctdbd will create, used for
local clients to attach and communicate with the ctdb daemon.
</longdesc>
<shortdesc lang="en">CTDB socket location</shortdesc>
-<content type="string" default="/var/lib/ctdb/ctdb.socket" />
+<content type="string" default="${OCF_RESKEY_ctdb_socket}" />
</parameter>
<parameter name="ctdb_dbdir" unique="1" required="0">
@@ -244,7 +259,7 @@ The directory to put the local CTDB database files in.
Persistent database files will be put in ctdb_dbdir/persistent.
</longdesc>
<shortdesc lang="en">CTDB database directory</shortdesc>
-<content type="string" default="/var/lib/ctdb" />
+<content type="string" default="${OCF_RESKEY_ctdb_dbdir}" />
</parameter>
<parameter name="ctdb_logfile" unique="0" required="0">
@@ -256,6 +271,15 @@ value "syslog".
<content type="string" default="/var/log/ctdb/log.ctdb" />
</parameter>
+<parameter name="ctdb_rundir" unique="0" required="0">
+<longdesc lang="en">
+Full path to ctdb runtime directory, used for storage of socket
+lock state.
+</longdesc>
+<shortdesc lang="en">CTDB runtime directory location</shortdesc>
+<content type="string" default="${OCF_RESKEY_ctdb_rundir}" />
+</parameter>
+
<parameter name="ctdb_debuglevel" unique="0" required="0">
<longdesc lang="en">
What debug level to run at (0-10). Higher means more verbose.
@@ -538,7 +562,16 @@ ctdb_start() {
# Use logfile by default, or syslog if asked for
local log_option="--logfile=$OCF_RESKEY_ctdb_logfile"
- [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ] && log_option="--syslog"
+ if [ "$OCF_RESKEY_ctdb_logfile" = "syslog" ]; then
+ log_option="--syslog"
+ elif [ ! -d "$(dirname $OCF_RESKEY_ctdb_logfile)" ]; then
+ # ensure the logfile's directory exists, otherwise ctdb will fail to start
+ mkdir -p $(dirname $OCF_RESKEY_ctdb_logfile)
+ fi
+
+ # ensure ctdb's rundir exists, otherwise it will fail to start
+ mkdir -p $OCF_RESKEY_ctdb_rundir 2>/dev/null
+
# public addresses file (should not be present, but need to set for correctness if it is)
local pub_addr_option=""
[ -f "${OCF_RESKEY_ctdb_config_dir}/public_addresses" ] && \
@@ -562,7 +595,7 @@ ctdb_start() {
if [ $? -ne 0 ]; then
# cleanup smb.conf
cleanup_smb_conf
-
+
ocf_exit_reason "Failed to execute $OCF_RESKEY_ctdbd_binary."
return $OCF_ERR_GENERIC
else
@@ -589,10 +622,10 @@ ctdb_start() {
fi
done
fi
-
+
# ctdbd will (or can) actually still be running at this point, so kill it
ctdb_stop
-
+
ocf_exit_reason "Timeout waiting for CTDB to stabilize"
return $OCF_ERR_GENERIC
}
@@ -601,7 +634,7 @@ ctdb_start() {
ctdb_stop() {
# Do nothing if already stopped
pkill -0 -f $OCF_RESKEY_ctdbd_binary || return $OCF_SUCCESS
-
+
# Tell it to die nicely
invoke_ctdb shutdown >/dev/null 2>&1
rv=$?
@@ -645,6 +678,8 @@ ctdb_monitor() {
if [ $? -ne 0 ]; then
if echo $status | grep -qs 'Connection refused'; then
return $OCF_NOT_RUNNING
+ elif echo $status | grep -qs 'No such file or directory'; then
+ return $OCF_NOT_RUNNING
else
ocf_exit_reason "CTDB status call failed: $status"
return $OCF_ERR_GENERIC
--
1.8.4.2

54
SOURCES/bz1083041-virtual-domain-monitor-lxc-fix.patch

@ -0,0 +1,54 @@ @@ -0,0 +1,54 @@
From 4e2576c0b339537790e253c11d9dfcf99b7b114d Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Tue, 1 Apr 2014 17:28:39 -0400
Subject: [PATCH] Low: VirtualDomain: Allow monitoring of lxc domains without
libvirtd

---
heartbeat/VirtualDomain | 20 ++++++++++++++------
1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index b0cdd5f..692a2ef 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -249,15 +249,17 @@ pid_status()
case "$emulator" in
qemu-kvm|qemu-system-*)
+ rc=$OCF_NOT_RUNNING
ps awx | grep -E "[q]emu-(kvm|system).*-name $DOMAIN_NAME " > /dev/null 2>&1
if [ $? -eq 0 ]; then
- # domain exists and is running
- ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
rc=$OCF_SUCCESS
- else
- # domain pid does not exist on local machine
- ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
- rc=$OCF_NOT_RUNNING
+ fi
+ ;;
+ libvirt_lxc)
+ rc=$OCF_NOT_RUNNING
+ ps awx | grep -E "[l]ibvirt_lxc.*-name $DOMAIN_NAME " > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ rc=$OCF_SUCCESS
fi
;;
# This can be expanded to check for additional emulators
@@ -265,6 +267,12 @@ pid_status()
;;
esac
+ if [ $rc -eq $OCF_SUCCESS ]; then
+ ocf_log debug "Virtual domain $DOMAIN_NAME is currently running."
+ elif [ $rc -eq $OCF_NOT_RUNNING ]; then
+ ocf_log debug "Virtual domain $DOMAIN_NAME is currently not running."
+ fi
+
return $rc
}
--
1.8.4.2

116
SOURCES/bz1083231-fs-wait-module-load.patch

@ -0,0 +1,116 @@ @@ -0,0 +1,116 @@
From d0ecd287511e49891245c68cd323e8f232aa033b Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 6 Aug 2014 14:05:18 -0400
Subject: [PATCH] High: Filesystem: when loading kernel modules wait for
filesystem to initialize

When the Filesystem agent is managing a filesystem type that
is not present in /proc/filesystems, the agent attempts to
load the kernel module for that filesystem.

This patch improves on that logic by
1. verifying that modprobe worked
2. give the module a brief period of time to initialize.

Item 2 is important because there is a brief period
of time between when modprobe returns loading the gfs2
module, and when gfs2 will show up in the /proc/filesystems
list. Without retrying the search of the /proc/filesystems
file, a gfs2 filesystem may fail to start correctly because
it will look like the filesystem isn't supported.
---
heartbeat/Filesystem | 71 +++++++++++++++++++++++++++++++++++++++-------------
1 file changed, 53 insertions(+), 18 deletions(-)

diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 9209818..9892b39 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -450,6 +450,58 @@ is_fsck_needed() {
esac
}
+fstype_supported()
+{
+ local support="$FSTYPE"
+ local rc
+
+ if [ "X${HOSTOS}" != "XOpenBSD" ];then
+ # skip checking /proc/filesystems for obsd
+ return $OCF_SUCCESS
+ fi
+
+ if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
+ : No FSTYPE specified, rely on the system has the right file-system support already
+ return $OCF_SUCCESS
+ fi
+
+ # support fuse-filesystems (e.g. GlusterFS)
+ case $FSTYPE in
+ glusterfs) support="fuse";;
+ esac
+
+ grep -w "$support"'$' /proc/filesystems >/dev/null
+ if [ $? -eq 0 ]; then
+ # found the fs type
+ return $OCF_SUCCESS
+ fi
+
+ # if here, we should attempt to load the module and then
+ # check the if the filesystem support exists again.
+ $MODPROBE $support >/dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems and failed to load kernal module"
+ return $OCF_ERR_INSTALLED
+ fi
+
+ # It is possible for the module to load and not be complete initialized
+ # before we check /proc/filesystems again. Give this a few trys before
+ # giving up entirely.
+ for try in $(seq 5); do
+ grep -w "$support"'$' /proc/filesystems >/dev/null
+ if [ $? -eq 0 ] ; then
+ # yes. found the filesystem after doing the modprobe
+ return $OCF_SUCCESS
+ fi
+ ocf_log debug "Unable to find support for $FSTYPE in /proc/filesystems after modprobe, trying again"
+ sleep 1
+ done
+
+ ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems"
+ return $OCF_ERR_INSTALLED
+}
+
+
#
# START: Start up the filesystem
#
@@ -472,24 +524,7 @@ Filesystem_start()
return $OCF_SUCCESS
fi
- if [ "X${HOSTOS}" != "XOpenBSD" ];then
- if [ -z "$FSTYPE" -o "$FSTYPE" = none ]; then
- : No FSTYPE specified, rely on the system has the right file-system support already
- else
- local support="$FSTYPE"
- # support fuse-filesystems (e.g. GlusterFS)
- case $FSTYPE in
- glusterfs) support="fuse";;
- esac
- grep -w "$support"'$' /proc/filesystems >/dev/null ||
- $MODPROBE $support >/dev/null
- grep -w "$support"'$' /proc/filesystems >/dev/null
- if [ $? -ne 0 ] ; then
- ocf_log err "Couldn't find filesystem $FSTYPE in /proc/filesystems"
- return $OCF_ERR_INSTALLED
- fi
- fi
- fi
+ fstype_supported || exit $OCF_ERR_INSTALLED
# Check the filesystem & auto repair.
# NOTE: Some filesystem types don't need this step... Please modify
--
1.8.4.2

25
SOURCES/bz1091101-nfs-error-msg-fix.patch

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
From 5475e17858d143747e69b1bf9e8d230e74642561 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 11 Jul 2014 11:22:20 -0400
Subject: [PATCH] Low: nfsnotify: fixes error message output

---
heartbeat/nfsnotify | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify
index 2e242de..2d0bbfc 100755
--- a/heartbeat/nfsnotify
+++ b/heartbeat/nfsnotify
@@ -269,7 +269,7 @@ v3notify_start()
ocf_log info "sending notifications with source address $ip"
$SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd"
if [ $? -ne 0 ]; then
- ocf_log err "sm-notify with source host set to, $source_host, failed. view syslog for more information"
+ ocf_log err "sm-notify with source host set to, $ip, failed. view syslog for more information"
return $OCF_ERR_GENERIC
fi
done
--
1.8.4.2

27
SOURCES/bz1091101-nfs-rquotad-port-option-fix.patch

@ -0,0 +1,27 @@ @@ -0,0 +1,27 @@
From 8042f21aaefd0616df4b0ef1df2f8e3f301786c4 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 16 Jul 2014 11:18:56 -0400
Subject: [PATCH] Low: nfsserver: only set rquotad options when port is set

---
heartbeat/nfsserver | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index e44da1c..ac921f3 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -428,7 +428,9 @@ set_env_args()
set_arg "LOCKD_TCPPORT" "$OCF_RESKEY_lockd_tcp_port" "$tmpconfig" "true"
# rquotad_port
- set_arg "RPCRQUOTADOPTS" "-p $OCF_RESKEY_rquotad_port" "$tmpconfig" "true"
+ if [ -n "$OCF_RESKEY_rquotad_port" ]; then
+ set_arg "RPCRQUOTADOPTS" "-p $OCF_RESKEY_rquotad_port" "$tmpconfig" "true"
+ fi
# override local nfs config. preserve previous local config though.
if [ -s $tmpconfig ]; then
--
1.8.4.2

1196
SOURCES/bz1091101-nfs-updates.patch

File diff suppressed because it is too large Load Diff

87
SOURCES/bz1095944-safe-umount-option.patch

@ -0,0 +1,87 @@ @@ -0,0 +1,87 @@
diff --git a/heartbeat/Filesystem b/heartbeat/Filesystem
index 9209818..6a852df 100755
--- a/heartbeat/Filesystem
+++ b/heartbeat/Filesystem
@@ -196,6 +196,26 @@ Only set this to "true" if you know what you are doing!
<content type="boolean" default="$OCF_RESKEY_force_clones_default" />
</parameter>
+<parameter name="force_unmount">
+<longdesc lang="en">
+This option allows specifying how to handle processes that are
+currently accessing the mount directory.
+
+"true" : Default value, kill processes accessing mount point
+"safe" : Kill processes accessing mount point using methods that
+ avoid functions that could potentially block during process
+ detection
+"false" : Do not kill any processes.
+
+The 'safe' option uses shell logic to walk the /procs/ directory
+for pids using the mount point while the default option uses the
+fuser cli tool. fuser is known to perform operations that can potentially
+block if unresponsive nfs mounts are in use on the system.
+</longdesc>
+<shortdesc lang="en">Kill processes before unmount</shortdesc>
+<content type="boolean" default="true" />
+</parameter>
+
</parameters>
<actions>
@@ -701,6 +721,25 @@ Filesystem_notify() {
done
}
+get_pids()
+{
+ local dir=$1
+ local procs
+ local mmap_procs
+
+ if ocf_is_true "$FORCE_UNMOUNT"; then
+ if [ "X${HOSTOS}" = "XOpenBSD" ];then
+ fstat | grep $dir | awk '{print $3}'
+ else
+ $FUSER -m $dir 2>/dev/null
+ fi
+ elif [ "$FORCE_UNMOUNT" = "safe" ]; then
+ procs=$(find /proc/[0-9]*/ -type l -lname "${dir}/*" -or -lname "${dir}" 2>/dev/null | awk -F/ '{print $3}')
+ mmap_procs=$(grep " ${dir}" /proc/[0-9]*/maps | awk -F/ '{print $3}')
+ echo -e "${procs}\n${mmap_procs}" | sort | uniq
+ fi
+}
+
signal_processes() {
local dir=$1
local sig=$2
@@ -708,15 +747,9 @@ signal_processes() {
# fuser returns a non-zero return code if none of the
# specified files is accessed or in case of a fatal
# error.
- pids=$(
- if [ "X${HOSTOS}" = "XOpenBSD" ];then
- fstat | grep $dir | awk '{print $3}'
- else
- $FUSER -m $dir 2>/dev/null
- fi
- )
+ pids=$(get_pids "$dir")
if [ -z "$pids" ]; then
- ocf_log info "No processes on $dir were signalled"
+ ocf_log info "No processes on $dir were signalled. force_unmount is set to '$FORCE_UNMOUNT'"
return
fi
for pid in $pids; do
@@ -1002,6 +1035,11 @@ if [ $# -ne 1 ]; then
fi
# Check the OCF_RESKEY_ environment variables...
+FORCE_UNMOUNT="yes"
+if [ -n "${OCF_RESKEY_force_unmount}" ]; then
+ FORCE_UNMOUNT=$OCF_RESKEY_force_unmount
+fi
+
DEVICE=$OCF_RESKEY_device
FSTYPE=$OCF_RESKEY_fstype
if [ ! -z "$OCF_RESKEY_options" ]; then

42
SOURCES/bz1097593-LVM-warn-lvmetad.patch

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
From 4f6ebfc537b2d3671112a54873081685d47066db Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 18 Jul 2014 12:31:55 -0400
Subject: [PATCH] Low: LVM: Warn users about the danger of lvmetad

---
heartbeat/LVM | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/heartbeat/LVM b/heartbeat/LVM
index 4378cd3..27cdfbd 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -545,6 +545,25 @@ LVM_validate_all() {
check_binary $AWK
##
+ # lvmetad is a daemon that caches lvm metadata to improve the
+ # performance of LVM commands. This daemon should never be used when
+ # volume groups exist that are being managed by the cluster. The lvmetad
+ # daemon introduces a response lag, where certain LVM commands look like
+ # they have completed (like vg activation) when in fact the command
+ # is still in progress by the lvmetad. This can cause reliability issues
+ # when managing volume groups in the cluster. For Example, if you have a
+ # volume group that is a dependency for another application, it is possible
+ # the cluster will think the volume group is activated and attempt to start
+ # the application before volume group is really accesible... lvmetad is bad.
+ ##
+ lvm dumpconfig global/use_lvmetad | grep 'use_lvmetad.*=.*1' > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ # for now warn users that lvmetad is enabled and that they should disable it. In the
+ # future we may want to consider refusing to start, or killing the lvmetad daemon.
+ ocf_log warn "Disable lvmetad in lvm.conf. lvmetad should never be enabled in a clustered environment. Set use_lvmetad=0 and kill the lvmetad process"
+ fi
+
+ ##
# Off-the-shelf tests...
##
VGOUT=`vgck ${VOLUME} 2>&1`
--
1.8.4.2

32
SOURCES/bz1105655-virtualdomain-restore-start-stop-default-timeout.patch

@ -0,0 +1,32 @@ @@ -0,0 +1,32 @@
From 458c003e7f6f0caa2e1c7f4386e458a039500427 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 19 Jun 2014 14:52:36 -0500
Subject: [PATCH] High: VirtualDomain: restore advertised start and stop
timeout values to a sane value.

The meta_timeout default value is 90000 milliseconds. That value
was used in the xml output to represent the default start and stop
timeout which is reflected in seconds... not milliseconds. A
90000 second timeout doesn't make sense as a default.
---
heartbeat/VirtualDomain | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index c44c090..b0cdd5f 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -167,8 +167,8 @@ Restore state on start/stop
</parameters>
<actions>
-<action name="start" timeout="$OCF_RESKEY_CRM_meta_timeout_default" />
-<action name="stop" timeout="$OCF_RESKEY_CRM_meta_timeout_default" />
+<action name="start" timeout="90" />
+<action name="stop" timeout="90" />
<action name="status" depth="0" timeout="30" interval="10" />
<action name="monitor" depth="0" timeout="30" interval="10" />
<action name="migrate_from" timeout="60" />
--
1.8.4.2

26
SOURCES/bz1116166-Low-galera-be-very-generous-in-the-promotion-timeout.patch

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
From 54c26715a8eb5688081ea6e26cabe54d9de762d7 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 30 Jul 2014 13:03:14 -0500
Subject: [PATCH 6/6] Low: galera: be very generous in the promotion timeout to
allow SST to complete on large databases

---
heartbeat/galera | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index a361d7b..994aad0 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -235,7 +235,7 @@ Cluster check user password
<action name="monitor" depth="0" timeout="30" interval="20" />
<action name="monitor" role="Master" depth="0" timeout="30" interval="10" />
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
-<action name="promote" timeout="120" />
+<action name="promote" timeout="300" />
<action name="demote" timeout="120" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
--
1.8.4.2

41
SOURCES/bz1116166-Low-galera-do-not-advertise-notify-in-the-usage.patch

@ -0,0 +1,41 @@ @@ -0,0 +1,41 @@
From bc1e7bdcedc1bb1bf473787f373261452e37e337 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 30 Jul 2014 12:59:46 -0500
Subject: [PATCH 5/6] Low: galera: do not advertise notify in the usage

---
heartbeat/galera | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index 386daaf..a361d7b 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -79,7 +79,7 @@ fi
usage() {
cat <<UEND
-usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote|notify)
+usage: $0 (start|stop|validate-all|meta-data|monitor|promote|demote)
$0 manages a galera Database as an HA resource.
@@ -237,7 +237,6 @@ Cluster check user password
<action name="monitor" role="Slave" depth="0" timeout="30" interval="30" />
<action name="promote" timeout="120" />
<action name="demote" timeout="120" />
-<action name="notify" timeout="90" />
<action name="validate-all" timeout="5" />
<action name="meta-data" timeout="5" />
</actions>
@@ -683,7 +682,6 @@ case "$1" in
monitor) galera_monitor;;
promote) galera_promote;;
demote) galera_demote;;
- notify) galera_notify;;
validate-all) exit $OCF_SUCCESS;;
*) usage
--
1.8.4.2

1417
SOURCES/bz1116166-galera-agent.patch

File diff suppressed because it is too large Load Diff

25
SOURCES/bz1116166-galera-do-not-ignore-check_password.patch

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
From e5c5c087ecf152bd69f5795024bfc655394c3c18 Mon Sep 17 00:00:00 2001
From: Andreas Kurz <andreas.kurz@gmail.com>
Date: Thu, 18 Sep 2014 23:06:36 +0200
Subject: [PATCH 1/6] High: galera: do not ignore specified check_password

---
heartbeat/galera | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index 54654f8..386daaf 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -672,7 +672,7 @@ fi
MYSQL_OPTIONS_CHECK="-nNE --user=${OCF_RESKEY_check_user}"
if [ -n "${OCF_RESKEY_check_passwd}" ]; then
- MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${MYSQL_PASSWORD}"
+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
# What kind of method was invoked?
--
1.8.4.2

1536
SOURCES/bz1118029-iscsi-agents.patch

File diff suppressed because it is too large Load Diff

13
SOURCES/bz1118029-iscsi-remove-write-back.patch

@ -0,0 +1,13 @@ @@ -0,0 +1,13 @@
diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit
index b9c1139..ffd66ff 100755
--- a/heartbeat/iSCSILogicalUnit
+++ b/heartbeat/iSCSILogicalUnit
@@ -362,7 +362,7 @@ iSCSILogicalUnit_start() {
lio-t)
# For lio, we first have to create a target device, then
# add it to the Target Portal Group as an LU.
- ocf_run targetcli /backstores/block create name=${OCF_RESOURCE_INSTANCE} dev=${OCF_RESKEY_path} write_back=False || exit $OCF_ERR_GENERIC
+ ocf_run targetcli /backstores/block create name=${OCF_RESOURCE_INSTANCE} dev=${OCF_RESKEY_path} || exit $OCF_ERR_GENERIC
if [ -n "${OCF_RESKEY_scsi_sn}" ]; then
echo ${OCF_RESKEY_scsi_sn} > /sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/wwn/vpd_unit_serial
fi

39
SOURCES/bz1118029_iscsi_syntax_fix.patch

@ -0,0 +1,39 @@ @@ -0,0 +1,39 @@
From 3a9d34b37c3959c75b60a3598ed0786e5c48a7b3 Mon Sep 17 00:00:00 2001
From: jprades <jprades@presenzia.net>
Date: Wed, 17 Sep 2014 17:54:10 -0400
Subject: [PATCH] High: iSCSILogicalUnit: fixes syntax errors

---
heartbeat/iSCSILogicalUnit | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/heartbeat/iSCSILogicalUnit b/heartbeat/iSCSILogicalUnit
index c4cee0d..b9c1139 100755
--- a/heartbeat/iSCSILogicalUnit
+++ b/heartbeat/iSCSILogicalUnit
@@ -419,11 +419,11 @@ iSCSILogicalUnit_stop() {
${initiator} ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
fi
done
- lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_#{${OCF_RESKEY_lun}/"
+ lun_configfs_path="/sys/kernel/config/target/iscsi/${OCF_RESKEY_target_iqn}/tpgt_1/lun/lun_${OCF_RESKEY_lun}/"
if [ -e "${lun_configfs_path}" ]; then
ocf_run lio_node --dellun=${OCF_RESKEY_target_iqn} 1 ${OCF_RESKEY_lun} || exit $OCF_ERR_GENERIC
fi
- block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESKEY_INSTANCE}/udev_path"
+ block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path"
if [ -e "${block_configfs_path}" ]; then
ocf_run tcm_node --freedev=iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE} || exit $OCF_ERR_GENERIC
fi
@@ -478,7 +478,7 @@ iSCSILogicalUnit_monitor() {
[ -e ${configfs_path} ] && [ `cat ${configfs_path}` = "${OCF_RESKEY_path}" ] && return $OCF_SUCCESS
# if we aren't activated, is a block device still left over?
- block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESKEY_INSTANCE}/udev_path"
+ block_configfs_path="/sys/kernel/config/target/core/iblock_${OCF_RESKEY_lio_iblock}/${OCF_RESOURCE_INSTANCE}/udev_path"
[ -e ${block_configfs_path} ] && ocf_log warn "existing block without an active lun: ${block_configfs_path}"
[ -e ${block_configfs_path} ] && return $OCF_ERR_GENERIC
--
1.8.4.2

466
SOURCES/bz1122285-ethmonitor-infiniband.patch

@ -0,0 +1,466 @@ @@ -0,0 +1,466 @@
From feffc766c48a1010c1bf4f8b1db74795d06dbd50 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 25 Aug 2014 14:57:09 -0500
Subject: [PATCH 2/4] ethmonitor updates

---
heartbeat/ethmonitor | 290 +++++++++++++++++++++++++++++++++------------------
1 file changed, 187 insertions(+), 103 deletions(-)

diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index b85d7fc..a447391 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -1,14 +1,14 @@
#!/bin/sh
#
-# OCF Resource Agent compliant script.
-# Monitor the vitality of a local network interface.
+# OCF Resource Agent compliant script.
+# Monitor the vitality of a local network interface.
#
# Based on the work by Robert Euhus and Lars Marowsky-Brée.
#
# Transfered from Ipaddr2 into ethmonitor by Alexander Krauth
#
# Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Brée
-# All Rights Reserved.
+# All Rights Reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
@@ -29,12 +29,12 @@
# along with this program; if not, write the Free Software Foundation,
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
#
-# OCF parameters are as below
+# OCF parameters are as below
#
# OCF_RESKEY_interface
# OCF_RESKEY_multiplicator
# OCF_RESKEY_name
-# OCF_RESKEY_repeat_count
+# OCF_RESKEY_repeat_count
# OCF_RESKEY_repeat_interval
# OCF_RESKEY_pktcnt_timeout
# OCF_RESKEY_arping_count
@@ -70,10 +70,13 @@ The resource configuration requires a monitor operation, because the monitor doe
In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value.
The name of the attribute value is configured in the 'name' option of this RA.
-Example constraint configuration:
+Example constraint configuration using crmsh
location loc_connected_node my_resource_grp \
rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0
+Example constraint configuration using pcs. Only allow 'my_resource' to run on nodes where eth0 ethernet device is available.
+pcs constraint location my_resource rule score=-INFINITY ethmonitor-eth0 ne 1
+
The ethmonitor works in 3 different modes to test the interface vitality.
1. call ip to see if the link status is up (if link is down -> error)
2. call ip and watch the RX counter (if packages come around in a certain time -> success)
@@ -157,14 +160,30 @@ Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answ
<content type="integer" default="5"/>
</parameter>
+<parameter name="infiniband_device">
+<longdesc lang="en">
+For interfaces that are infiniband devices.
+</longdesc>
+<shortdesc lang="en">infiniband device</shortdesc>
+<content type="string" />
+</parameter>
+
+<parameter name="infiniband_port">
+<longdesc lang="en">
+For infiniband devices, this is the port to monitor.
+</longdesc>
+<shortdesc lang="en">infiniband port</shortdesc>
+<content type="integer" />
+</parameter>
+
</parameters>
<actions>
-<action name="start" timeout="20s" />
-<action name="stop" timeout="20s" />
-<action name="status" depth="0" timeout="20s" interval="10s" />
-<action name="monitor" depth="0" timeout="20s" interval="10s" />
-<action name="meta-data" timeout="5s" />
-<action name="validate-all" timeout="20s" />
+<action name="start" timeout="60s" />
+<action name="stop" timeout="20s" />
+<action name="status" depth="0" timeout="60s" interval="10s" />
+<action name="monitor" depth="0" timeout="60s" interval="10s" />
+<action name="meta-data" timeout="5s" />
+<action name="validate-all" timeout="20s" />
</actions>
</resource-agent>
END
@@ -173,7 +192,7 @@ END
}
#
-# Return true, if the interface exists
+# Return true, if the interface exists
#
is_interface() {
#
@@ -181,14 +200,25 @@ is_interface() {
#
local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \
| cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'`
- [ "$iface" != "" ]
+ [ "$iface" != "" ]
+}
+
+infiniband_status()
+{
+ local device="$OCF_RESKEY_infiniband_device"
+
+ if [ -n "$OCF_RESKEY_infiniband_port" ]; then
+ device="${OCF_RESKEY_infiniband_device}:${OCF_RESKEY_infiniband_port}"
+ fi
+
+ ibstatus ${device} | grep -q ACTIVE
}
if_init() {
local rc
if [ X"$OCF_RESKEY_interface" = "X" ]; then
- ocf_log err "Interface name (the interface parameter) is mandatory"
+ ocf_exit_reason "Interface name (the interface parameter) is mandatory"
exit $OCF_ERR_CONFIGURED
fi
@@ -196,60 +226,67 @@ if_init() {
if is_interface $NIC
then
- case "$NIC" in
- *:*) ocf_log err "Do not specify a virtual interface : $OCF_RESKEY_interface"
- exit $OCF_ERR_CONFIGURED;;
- *) ;;
- esac
+ case "$NIC" in
+ *:*) ocf_exit_reason "Do not specify a virtual interface : $OCF_RESKEY_interface"
+ exit $OCF_ERR_CONFIGURED;;
+ *) ;;
+ esac
else
- case $__OCF_ACTION in
- validate-all) ocf_log err "Interface $NIC does not exist"
- exit $OCF_ERR_CONFIGURED;;
- *) ocf_log warn "Interface $NIC does not exist"
- ## It might be a bond interface which is temporarily not available, therefore we want to continue here
- ;;
- esac
+ case $__OCF_ACTION in
+ validate-all)
+ ocf_exit_reason "Interface $NIC does not exist"
+ exit $OCF_ERR_CONFIGURED;;
+ *)
+ ## It might be a bond interface which is temporarily not available, therefore we want to continue here
+ ocf_log warn "Interface $NIC does not exist"
+ ;;
+ esac
fi
: ${OCF_RESKEY_multiplier:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then
- ocf_log err "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]"
+ ocf_exit_reason "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]"
exit $OCF_ERR_CONFIGURED
fi
ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"}
- REP_COUNT=${OCF_RESKEY_repeat_count:-5}
+ REP_COUNT=${OCF_RESKEY_repeat_count:-5}
if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then
- ocf_log err "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]"
+ ocf_exit_reason "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]"
exit $OCF_ERR_CONFIGURED
- fi
+ fi
REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10}
if ! ocf_is_decimal "$REP_INTERVAL_S"; then
- ocf_log err "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]"
+ ocf_exit_reason "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_pktcnt_timeout:="5"}
if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then
- ocf_log err "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]"
+ ocf_exit_reason "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_count:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then
- ocf_log err "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]"
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_timeout:="1"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then
- ocf_log err "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]"
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]"
exit $OCF_ERR_CONFIGURED
fi
: ${OCF_RESKEY_arping_cache_entries:="5"}
if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then
- ocf_log err "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]"
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]"
exit $OCF_ERR_CONFIGURED
fi
- return $OCF_SUCCESS
+
+ if [ -n "$OCF_RESKEY_infiniband_device" ]; then
+ #ibstatus is required if an infiniband_device is provided
+ check_binary ibstatus
+ fi
+ return $OCF_SUCCESS
}
# get the link status on $NIC
@@ -277,7 +314,7 @@ watch_pkt_counter () {
for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do
sleep 0.1
RX_PACKETS_NEW="`get_rx_packets`"
- ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW"
+ ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW"
if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then
ocf_log debug "we received some packets."
return 0
@@ -308,7 +345,7 @@ do_arping () {
}
#
-# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level
+# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level
#
# 09: check for nonempty ARP cache
# 10: watch for packet counter changes
@@ -322,21 +359,47 @@ do_arping () {
# the tests for higher check levels are run.
#
if_check () {
+ local arp_list
# always check link status first
link_status="`get_link_status`"
ocf_log debug "link_status: $link_status (1=up, 0=down)"
- [ $link_status -eq 0 ] && return $OCF_NOT_RUNNING
+
+ if [ $link_status -eq 0 ]; then
+ ocf_log notice "link_status: DOWN"
+ return $OCF_NOT_RUNNING
+ fi
+
+ # if this is an infiniband device, try ibstatus script
+ if [ -n "$OCF_RESKEY_infiniband_device" ]; then
+ if infiniband_status; then
+ return $OCF_SUCCESS
+ fi
+ ocf_log info "Infiniband device $OCF_RESKEY_infiniband_device is not available, check ibstatus for more information"
+ return $OCF_NOT_RUNNING
+ fi
# watch for packet counter changes
- ocf_log debug "watch for packet counter changes"
- watch_pkt_counter && return $OCF_SUCCESS
+ ocf_log debug "watch for packet counter changes"
+ watch_pkt_counter
+ if [ $? -eq 0 ]; then
+ return $OCF_SUCCESS
+ else
+ ocf_log debug "No packets received during packet watch timeout"
+ fi
# check arping ARP cache entries
- ocf_log debug "check arping ARP cache entries"
- for ip in `get_arp_list`; do
+ ocf_log debug "check arping ARP cache entries"
+ arp_list=`get_arp_list`
+ for ip in `echo $arp_list`; do
do_arping $ip && return $OCF_SUCCESS
done
+ # if we get here, the ethernet device is considered not running.
+ # provide some logging information
+ if [ -z "$arp_list" ]; then
+ ocf_log info "No ARP cache entries found to arping"
+ fi
+
# watch for packet counter changes in promiscios mode
# ocf_log debug "watch for packet counter changes in promiscios mode"
# be sure switch off promiscios mode in any case
@@ -362,67 +425,89 @@ END
}
set_cib_value() {
- local score=`expr $1 \* $OCF_RESKEY_multiplier`
- attrd_updater -n $ATTRNAME -v $score -q
- local rc=$?
- case $rc in
- 0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
- *) ocf_log warn "attrd_updater: Could not update $ATTRNAME = $score: rc=$rc";;
- esac
- return $rc
+ local score=`expr $1 \* $OCF_RESKEY_multiplier`
+ attrd_updater -n $ATTRNAME -v $score -q
+ local rc=$?
+ case $rc in
+ 0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
+ *) ocf_log warn "attrd_updater: Could not update $ATTRNAME = $score: rc=$rc";;
+ esac
+ return $rc
}
if_monitor() {
- ha_pseudo_resource $OCF_RESOURCE_INSTANCE monitor
- local pseudo_status=$?
- if [ $pseudo_status -ne $OCF_SUCCESS ]; then
- exit $pseudo_status
- fi
-
- local mon_rc=$OCF_NOT_RUNNING
- local attr_rc=$OCF_NOT_RUNNING
- local runs=0
- local start_time
- local end_time
- local sleep_time
- while [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]
- do
- start_time=`date +%s%N`
- if_check
- mon_rc=$?
- REP_COUNT=$(( $REP_COUNT - 1 ))
- if [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]; then
- ocf_log warn "Monitoring of $OCF_RESOURCE_INSTANCE failed, $REP_COUNT retries left."
- end_time=`date +%s%N`
- sleep_time=`echo "scale=9; ( $start_time + ( $REP_INTERVAL_S * 1000000000 ) - $end_time ) / 1000000000" | bc -q 2> /dev/null`
- sleep $sleep_time 2> /dev/null
- runs=$(($runs + 1))
- fi
-
- if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then
- ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error"
- fi
- done
-
- ocf_log debug "Monitoring return code: $mon_rc"
- if [ $mon_rc -eq $OCF_SUCCESS ]; then
- set_cib_value 1
- attr_rc=$?
- else
- ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed."
- set_cib_value 0
- attr_rc=$?
- fi
-
- ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors.
- ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself.
- exit $attr_rc
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE monitor
+ local pseudo_status=$?
+ if [ $pseudo_status -ne $OCF_SUCCESS ]; then
+ exit $pseudo_status
+ fi
+
+ local mon_rc=$OCF_NOT_RUNNING
+ local attr_rc=$OCF_NOT_RUNNING
+ local runs=0
+ local start_time
+ local end_time
+ local sleep_time
+ while [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]
+ do
+ start_time=`date +%s%N`
+ if_check
+ mon_rc=$?
+ REP_COUNT=$(( $REP_COUNT - 1 ))
+ if [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]; then
+ ocf_log warn "Monitoring of $OCF_RESOURCE_INSTANCE failed, $REP_COUNT retries left."
+ end_time=`date +%s%N`
+ sleep_time=`echo "scale=9; ( $start_time + ( $REP_INTERVAL_S * 1000000000 ) - $end_time ) / 1000000000" | bc -q 2> /dev/null`
+ sleep $sleep_time 2> /dev/null
+ runs=$(($runs + 1))
+ fi
+
+ if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then
+ ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error"
+ fi
+ done
+
+ ocf_log debug "Monitoring return code: $mon_rc"
+ if [ $mon_rc -eq $OCF_SUCCESS ]; then
+ set_cib_value 1
+ attr_rc=$?
+ else
+ ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed."
+ set_cib_value 0
+ attr_rc=$?
+ fi
+
+ ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors.
+ ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself.
+ exit $attr_rc
+}
+
+if_stop()
+{
+ attrd_updater -D -n $ATTRNAME
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop
}
+if_start()
+{
+ local rc
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE start
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Failure to create ethmonitor state file"
+ return $rc
+ fi
+
+ # perform the first monitor during the start operation
+ if_monitor
+ return $?
+}
+
+
if_validate() {
- check_binary $IP2UTIL
- check_binary arping
- if_init
+ check_binary $IP2UTIL
+ check_binary arping
+ if_init
}
case $__OCF_ACTION in
@@ -436,18 +521,17 @@ esac
if_validate
case $__OCF_ACTION in
-start) ha_pseudo_resource $OCF_RESOURCE_INSTANCE start
+start) if_start
exit $?
;;
-stop) attrd_updater -D -n $ATTRNAME
- ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop
+stop) if_stop
exit $?
;;
monitor|status) if_monitor
exit $?
;;
validate-all) exit $?
- ;;
+ ;;
*) if_usage
exit $OCF_ERR_UNIMPLEMENTED
;;
--
1.8.4.2

474
SOURCES/bz1126073-1-nfsserver-fix-systemd-status-detection.patch

@ -0,0 +1,474 @@ @@ -0,0 +1,474 @@
diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver
--- a/heartbeat/nfsserver 2016-02-05 09:04:19.350003826 +0100
+++ b/heartbeat/nfsserver 2016-02-05 09:04:58.463395839 +0100
@@ -208,9 +208,9 @@
</parameters>
<actions>
-<action name="start" timeout="90" />
-<action name="stop" timeout="60s" />
-<action name="monitor" depth="0" timeout="30s" interval="10" />
+<action name="start" timeout="40" />
+<action name="stop" timeout="20s" />
+<action name="monitor" depth="0" timeout="20s" interval="10" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="30" />
</actions>
@@ -327,11 +327,12 @@
nfs_exec()
{
local cmd=$1
+ local svc=$2
set_exec_mode
case $EXEC_MODE in
1) ${OCF_RESKEY_nfs_init_script} $cmd;;
- 2) systemctl $cmd nfs-server.service ;;
+ 2) systemctl $cmd ${svc}.service ;;
esac
}
@@ -353,21 +354,117 @@
nfsserver_monitor ()
{
+ # Skip trying to start processes once before failing
+ # when run from nfsserver_start ()
+ if [ "$1" == "fromstart" ]; then
+ ocf_log info "fromstart"
+ fromstart=1
+ else
+ tries=1
+ fi
+
+ # systemd
+ if [ "$EXEC_MODE" -eq "2" ]; then
+ ocf_log info "Status: rpcbind"
+ rpcinfo &> /dev/null
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpcbind is not running"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+
+ ocf_log info "Status: nfs-mountd"
+ rpcinfo -t localhost 100005 &> /dev/null
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "nfs-mountd is not running"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+
+ ocf_log info "Status: nfs-idmapd"
+ fn=`mktemp`
+ nfs_exec status nfs-idmapd > $fn 2>&1
+ rc=$?
+ ocf_log debug "$(cat $fn)"
+ rm -f $fn
+ if [ "$rc" -ne "0" ]; then
+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ ocf_log info "Tried to start services: rc: $rc"
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "nfs-idmapd is not running"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+
+ ocf_log info "Status: rpc-statd"
+ rpcinfo -t localhost 100024 &> /dev/null
+ rc=$?
+ if [ "$rc" -ne "0" ]; then
+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "rpc-statd is not running"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+ fi
+
fn=`mktemp`
- nfs_exec status > $fn 2>&1
+ nfs_exec status nfs-server > $fn 2>&1
rc=$?
ocf_log debug "$(cat $fn)"
rm -f $fn
- #Adapte LSB status code to OCF return code
+ tfn="/proc/fs/nfsd/threads"
+ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then
+ if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads"
+ return $OCF_NOT_RUNNING
+ fi
+ fi
+
+ #Adapt LSB status code to OCF return code
if [ $rc -eq 0 ]; then
# don't report success if nfs servers are up
# without locking daemons.
v3locking_exec "status"
rc=$?
if [ $rc -ne 0 ]; then
- ocf_exit_reason "NFS server is up, but the locking daemons are down"
- rc=$OCF_ERR_GENERIC
+ if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then
+ nfsserver_start frommonitor
+ rc=$?
+ let tries=$tries-1
+ fi
+ if [ "$rc" -ne "0" ]; then
+ ocf_exit_reason "NFS server is up, but the locking daemons are down"
+ rc=$OCF_ERR_GENERIC
+ fi
fi
return $rc
elif [ $rc -eq 3 ]; then
@@ -391,12 +488,7 @@
# only write to the tmp /etc/sysconfig/nfs if sysconfig exists.
# otherwise this distro does not support setting these options.
if [ -d "/etc/sysconfig" ]; then
- # replace if the value exists, append otherwise
- if grep "^\s*${key}=" $file ; then
- sed -i "s/\s*${key}=.*$/${key}=\"${value}\"/" $file
- else
- echo "${key}=\"${value}\"" >> $file
- fi
+ echo "${key}=\"${value}\"" >> $file
elif [ "$requires_sysconfig" = "true" ]; then
ocf_log warn "/etc/sysconfig/nfs not found, unable to set port and nfsd args."
fi
@@ -409,11 +501,6 @@
local tmpconfig=$(mktemp ${HA_RSCTMP}/nfsserver-tmp-XXXXX)
local statd_args
- if [ -f "$NFS_SYSCONFIG" ]; then
- ## Take the $NFS_SYSCONFIG file as our skeleton
- cp $NFS_SYSCONFIG $tmpconfig
- fi
-
# nfsd args
set_arg "RPCNFSDARGS" "$OCF_RESKEY_nfsd_args" "$tmpconfig" "true"
@@ -444,20 +531,14 @@
# override local nfs config. preserve previous local config though.
if [ -s $tmpconfig ]; then
- cat $NFS_SYSCONFIG | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1
+ cat $NFS_SYSCONFIG | grep -e "$NFS_SYSCONFIG_AUTOGEN_TAG"
if [ $? -ne 0 ]; then
# backup local nfs config if it doesn't have our HA autogen tag in it.
mv -f $NFS_SYSCONFIG $NFS_SYSCONFIG_LOCAL_BACKUP
fi
-
- cat $tmpconfig | grep -q -e "$NFS_SYSCONFIG_AUTOGEN_TAG" > /dev/null 2>&1
- if [ $? -ne 0 ]; then
- echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG
- echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG
- cat $tmpconfig >> $NFS_SYSCONFIG
- else
- cat $tmpconfig > $NFS_SYSCONFIG
- fi
+ echo "# $NFS_SYSCONFIG_AUTOGEN_TAG" > $NFS_SYSCONFIG
+ echo "# local config backup stored here, '$NFS_SYSCONFIG_LOCAL_BACKUP'" >> $NFS_SYSCONFIG
+ cat $tmpconfig >> $NFS_SYSCONFIG
fi
rm -f $tmpconfig
}
@@ -476,14 +557,13 @@
[ -d "$fp/$STATD_DIR/sm" ] || mkdir -p "$fp/$STATD_DIR/sm"
[ -d "$fp/$STATD_DIR/sm.ha" ] || mkdir -p "$fp/$STATD_DIR/sm.ha"
[ -d "$fp/$STATD_DIR/sm.bak" ] || mkdir -p "$fp/$STATD_DIR/sm.bak"
- [ -n "`id -u rpcuser 2>/dev/null`" -a "`id -g rpcuser 2>/dev/null`" ] &&
- chown -R rpcuser.rpcuser "$fp/$STATD_DIR"
+ [ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown -R rpcuser.rpcuser "$fp/$STATD_DIR"
[ -f "$fp/etab" ] || touch "$fp/etab"
[ -f "$fp/xtab" ] || touch "$fp/xtab"
[ -f "$fp/rmtab" ] || touch "$fp/rmtab"
- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 >/dev/null 2>&1
+ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null
[ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state"
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
}
@@ -563,15 +643,15 @@
terminate()
{
- local pids
- local i=0
+ declare pids
+ declare i=0
while : ; do
pids=$(binary_status $1)
[ -z "$pids" ] && return 0
kill $pids
sleep 1
- i=$((i + 1))
+ ((i++))
[ $i -gt 3 ] && return 1
done
}
@@ -579,22 +659,22 @@
killkill()
{
- local pids
- local i=0
+ declare pids
+ declare i=0
while : ; do
pids=$(binary_status $1)
[ -z "$pids" ] && return 0
kill -9 $pids
sleep 1
- i=$((i + 1))
+ ((i++))
[ $i -gt 3 ] && return 1
done
}
stop_process()
{
- local process=$1
+ declare process=$1
ocf_log info "Stopping $process"
if terminate $process; then
@@ -665,9 +745,14 @@
nfsserver_start ()
{
+ # Skip monitor check when run from nfsserver_monitor ()
+ if [ "$1" == "frommonitor" ]; then
+ frommonitor=1
+ fi
+
local rc;
- if nfsserver_monitor; then
+ if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then
ocf_log debug "NFS server is already started"
return $OCF_SUCCESS
fi
@@ -693,11 +778,32 @@
modprobe nfsd
fi
+ # systemd
+ if [ "$EXEC_MODE" -eq "2" ]; then
+ nfs_exec start rpcbind
+ local i=10
+ while [ "$i" -gt 0 ]; do
+ ocf_log info "Start: rpcbind i: $i"
+ rpcinfo &> /dev/null
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break;
+ fi
+ sleep 1
+ let i=$i-1
+ done
+ if [ "$i" -eq 0 ]; then
+ ocf_exit_reason "Failed to start rpcbind"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
# check to see if we need to start rpc.statd
v3locking_exec "status"
if [ $? -ne $OCF_SUCCESS ]; then
v3locking_exec "start"
rc=$?
+ ocf_log info "Start: v3locking: $rc"
if [ $rc -ne 0 ]; then
ocf_exit_reason "Failed to start NFS server locking daemons"
return $rc
@@ -706,8 +812,65 @@
ocf_log info "rpc.statd already up"
fi
+ # systemd
+ if [ "$EXEC_MODE" -eq "2" ]; then
+ nfs_exec start nfs-mountd
+ local i=10
+ while [ "$i" -gt 0 ]; do
+ ocf_log info "Start: nfs-mountd i: $i"
+ rpcinfo -t localhost 100005 &> /dev/null
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break;
+ fi
+ sleep 1
+ let i=$i-1
+ done
+ if [ "$i" -eq 0 ]; then
+ ocf_exit_reason "Failed to start nfs-mountd"
+ return $OCF_ERR_GENERIC
+ fi
+
+ nfs_exec start nfs-idmapd
+ local i=10
+ while [ "$i" -gt 0 ]; do
+ ocf_log info "Start: nfs-idmapd i: $i"
+ fn=`mktemp`
+ nfs_exec status nfs-idmapd > $fn 2>&1
+ rc=$?
+ ocf_log debug "$(cat $fn)"
+ rm -f $fn
+ if [ "$rc" -eq "0" ]; then
+ break;
+ fi
+ sleep 1
+ let i=$i-1
+ done
+ if [ "$i" -eq 0 ]; then
+ ocf_exit_reason "Failed to start nfs-idmapd"
+ return $OCF_ERR_GENERIC
+ fi
+
+ nfs_exec start rpc-statd
+ local i=10
+ while [ "$i" -gt 0 ]; do
+ ocf_log info "Start: rpc-statd i: $i"
+ rpcinfo -t localhost 100024 &> /dev/null
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ break;
+ fi
+ sleep 1
+ let i=$i-1
+ done
+ if [ "$i" -eq 0 ]; then
+ ocf_exit_reason "Failed to start rpc-statd"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
fn=`mktemp`
- nfs_exec start > $fn 2>&1
+ nfs_exec start nfs-server > $fn 2>&1
rc=$?
ocf_log debug "$(cat $fn)"
rm -f $fn
@@ -717,6 +880,12 @@
return $rc
fi
+ tfn="/proc/fs/nfsd/threads"
+ if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then
+ ocf_exit_reason "Failed to start NFS server: /proc/fs/nfsd/threads"
+ return $OCF_ERR_GENERIC
+ fi
+
notify_locks
ocf_log info "NFS server started"
@@ -733,24 +902,71 @@
cp -rpf $STATD_PATH/sm $STATD_PATH/sm.bak /var/lib/nfs/state $STATD_PATH/sm.ha > /dev/null 2>&1
fn=`mktemp`
- nfs_exec stop > $fn 2>&1
+ nfs_exec stop nfs-server > $fn 2>&1
rc=$?
ocf_log debug "$(cat $fn)"
rm -f $fn
+ if [ $rc -ne 0 ]; then
+ ocf_exit_reason "Failed to stop NFS server"
+ return $rc
+ fi
+
+ # systemd
+ if [ "$EXEC_MODE" -eq "2" ]; then
+ ocf_log info "Stop: threads"
+ tfn="/proc/fs/nfsd/threads"
+ if [ -f "$tfn" ] && [ "$(cat $tfn)" -gt "0" ]; then
+ ocf_exit_reason "NFS server failed to stop: /proc/fs/nfsd/threads"
+ return $OCF_ERR_GENERIC
+ fi
+
+ nfs_exec stop rpc-statd &> /dev/null
+ ocf_log info "Stop: rpc-statd"
+ rpcinfo -t localhost 100024 &> /dev/null
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop rpc-statd"
+ return $OCF_ERR_GENERIC
+ fi
+
+ nfs_exec stop nfs-idmapd &> /dev/null
+ ocf_log info "Stop: nfs-idmapd"
+ fn=`mktemp`
+ nfs_exec status nfs-idmapd > $fn 2>&1
+ rc=$?
+ ocf_log debug "$(cat $fn)"
+ rm -f $fn
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop nfs-idmapd"
+ return $OCF_ERR_GENERIC
+ fi
+
+ nfs_exec stop nfs-mountd &> /dev/null
+ ocf_log info "Stop: nfs-mountd"
+ rpcinfo -t localhost 100005 &> /dev/null
+ rc=$?
+ if [ "$rc" -eq "0" ]; then
+ ocf_exit_reason "Failed to stop nfs-mountd"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
v3locking_exec "stop"
if [ $? -ne 0 ]; then
ocf_exit_reason "Failed to stop NFS locking daemons"
rc=$OCF_ERR_GENERIC
fi
- if [ $rc -eq 0 ]; then
- unbind_tree
- ocf_log info "NFS server stopped"
- else
- ocf_exit_reason "Failed to stop NFS server"
+ # systemd
+ if [ "$EXEC_MODE" -eq "2" ]; then
+ nfs_exec stop rpcbind &> /dev/null
+ ocf_log info "Stop: rpcbind"
fi
- return $rc
+
+ unbind_tree
+ ocf_log info "NFS server stopped"
+ return 0
}
nfsserver_validate ()

337
SOURCES/bz1126073-2-nfsserver-fix-systemd-status-detection.patch

@ -0,0 +1,337 @@ @@ -0,0 +1,337 @@
diff -uNr a/heartbeat/nfsserver b/heartbeat/nfsserver
--- a/heartbeat/nfsserver 2016-07-21 12:40:55.417326145 +0200
+++ b/heartbeat/nfsserver 2016-07-21 12:04:49.000000000 +0200
@@ -352,45 +352,22 @@
nfsserver_monitor ()
{
- # Skip trying to start processes once before failing
- # when run from nfsserver_start ()
- if [ "$1" == "fromstart" ]; then
- ocf_log info "fromstart"
- fromstart=1
- else
- tries=1
- fi
-
# systemd
if [ "$EXEC_MODE" -eq "2" ]; then
ocf_log info "Status: rpcbind"
- rpcinfo &> /dev/null
+ rpcinfo > /dev/null 2>&1
rc=$?
if [ "$rc" -ne "0" ]; then
- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpcbind is not running"
- return $OCF_NOT_RUNNING
- fi
+ ocf_exit_reason "rpcbind is not running"
+ return $OCF_NOT_RUNNING
fi
ocf_log info "Status: nfs-mountd"
- rpcinfo -t localhost 100005 &> /dev/null
+ rpcinfo -t localhost 100005 > /dev/null 2>&1
rc=$?
if [ "$rc" -ne "0" ]; then
- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "nfs-mountd is not running"
- return $OCF_NOT_RUNNING
- fi
+ ocf_exit_reason "nfs-mountd is not running"
+ return $OCF_NOT_RUNNING
fi
ocf_log info "Status: nfs-idmapd"
@@ -400,31 +377,16 @@
ocf_log debug "$(cat $fn)"
rm -f $fn
if [ "$rc" -ne "0" ]; then
- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- ocf_log info "Tried to start services: rc: $rc"
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "nfs-idmapd is not running"
- return $OCF_NOT_RUNNING
- fi
+ ocf_exit_reason "nfs-idmapd is not running"
+ return $OCF_NOT_RUNNING
fi
ocf_log info "Status: rpc-statd"
- rpcinfo -t localhost 100024 &> /dev/null
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
rc=$?
if [ "$rc" -ne "0" ]; then
- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "rpc-statd is not running"
- return $OCF_NOT_RUNNING
- fi
+ ocf_exit_reason "rpc-statd is not running"
+ return $OCF_NOT_RUNNING
fi
fi
@@ -436,15 +398,8 @@
tfn="/proc/fs/nfsd/threads"
if [ ! -f "$tfn" ] || [ "$(cat $tfn)" -le "0" ]; then
- if [ ! "$fromstart" ] && [ "$tries" -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads"
- return $OCF_NOT_RUNNING
- fi
+ ocf_exit_reason "NFS server not running: /proc/fs/nfsd/threads"
+ return $OCF_NOT_RUNNING
fi
#Adapt LSB status code to OCF return code
@@ -454,15 +409,8 @@
v3locking_exec "status"
rc=$?
if [ $rc -ne 0 ]; then
- if [ ! "$fromstart" ] && [ $tries -gt "0" ]; then
- nfsserver_start frommonitor
- rc=$?
- let tries=$tries-1
- fi
- if [ "$rc" -ne "0" ]; then
- ocf_exit_reason "NFS server is up, but the locking daemons are down"
- rc=$OCF_ERR_GENERIC
- fi
+ ocf_exit_reason "NFS server is up, but the locking daemons are down"
+ rc=$OCF_ERR_GENERIC
fi
return $rc
elif [ $rc -eq 3 ]; then
@@ -561,7 +509,7 @@
[ -f "$fp/xtab" ] || touch "$fp/xtab"
[ -f "$fp/rmtab" ] || touch "$fp/rmtab"
- dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 &> /dev/null
+ dd if=/dev/urandom of=$fp/$STATD_DIR/state bs=1 count=4 > /dev/null 2>&1
[ -n "`id -u rpcuser`" -a "`id -g rpcuser`" ] && chown rpcuser.rpcuser "$fp/$STATD_DIR/state"
[ $SELINUX_ENABLED -eq 0 ] && chcon -R "$SELINUX_LABEL" "$fp"
}
@@ -656,15 +604,15 @@
terminate()
{
- declare pids
- declare i=0
+ local pids
+ local i=0
while : ; do
pids=$(binary_status $1)
[ -z "$pids" ] && return 0
kill $pids
sleep 1
- ((i++))
+ i=$((i + 1))
[ $i -gt 3 ] && return 1
done
}
@@ -672,22 +620,22 @@
killkill()
{
- declare pids
- declare i=0
+ local pids
+ local i=0
while : ; do
pids=$(binary_status $1)
[ -z "$pids" ] && return 0
kill -9 $pids
sleep 1
- ((i++))
+ i=$((i + 1))
[ $i -gt 3 ] && return 1
done
}
stop_process()
{
- declare process=$1
+ local process=$1
ocf_log info "Stopping $process"
if terminate $process; then
@@ -758,14 +706,9 @@
nfsserver_start ()
{
- # Skip monitor check when run from nfsserver_monitor ()
- if [ "$1" == "frommonitor" ]; then
- frommonitor=1
- fi
-
local rc;
- if [ ! "$frommonitor" ] && nfsserver_monitor fromstart; then
+ if nfsserver_monitor; then
ocf_log debug "NFS server is already started"
return $OCF_SUCCESS
fi
@@ -796,21 +739,17 @@
# systemd
if [ "$EXEC_MODE" -eq "2" ]; then
nfs_exec start rpcbind
- local i=10
- while [ "$i" -gt 0 ]; do
+ local i=1
+ while : ; do
ocf_log info "Start: rpcbind i: $i"
- rpcinfo &> /dev/null
+ rpcinfo > /dev/null 2>&1
rc=$?
if [ "$rc" -eq "0" ]; then
break;
fi
sleep 1
- let i=$i-1
+ i=$((i + 1))
done
- if [ "$i" -eq 0 ]; then
- ocf_exit_reason "Failed to start rpcbind"
- return $OCF_ERR_GENERIC
- fi
fi
# check to see if we need to start rpc.statd
@@ -830,25 +769,21 @@
# systemd
if [ "$EXEC_MODE" -eq "2" ]; then
nfs_exec start nfs-mountd
- local i=10
- while [ "$i" -gt 0 ]; do
+ local i=1
+ while : ; do
ocf_log info "Start: nfs-mountd i: $i"
- rpcinfo -t localhost 100005 &> /dev/null
+ rpcinfo -t localhost 100005 > /dev/null 2>&1
rc=$?
if [ "$rc" -eq "0" ]; then
break;
fi
sleep 1
- let i=$i-1
+ i=$((i + 1))
done
- if [ "$i" -eq 0 ]; then
- ocf_exit_reason "Failed to start nfs-mountd"
- return $OCF_ERR_GENERIC
- fi
nfs_exec start nfs-idmapd
- local i=10
- while [ "$i" -gt 0 ]; do
+ local i=1
+ while : ; do
ocf_log info "Start: nfs-idmapd i: $i"
fn=`mktemp`
nfs_exec status nfs-idmapd > $fn 2>&1
@@ -859,29 +794,21 @@
break;
fi
sleep 1
- let i=$i-1
+ i=$((i + 1))
done
- if [ "$i" -eq 0 ]; then
- ocf_exit_reason "Failed to start nfs-idmapd"
- return $OCF_ERR_GENERIC
- fi
nfs_exec start rpc-statd
- local i=10
- while [ "$i" -gt 0 ]; do
+ local i=1
+ while : ; do
ocf_log info "Start: rpc-statd i: $i"
- rpcinfo -t localhost 100024 &> /dev/null
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
rc=$?
if [ "$rc" -eq "0" ]; then
break;
fi
sleep 1
- let i=$i-1
+ i=$((i + 1))
done
- if [ "$i" -eq 0 ]; then
- ocf_exit_reason "Failed to start rpc-statd"
- return $OCF_ERR_GENERIC
- fi
fi
fn=`mktemp`
@@ -936,16 +863,16 @@
return $OCF_ERR_GENERIC
fi
- nfs_exec stop rpc-statd &> /dev/null
+ nfs_exec stop rpc-statd > /dev/null 2>&1
ocf_log info "Stop: rpc-statd"
- rpcinfo -t localhost 100024 &> /dev/null
+ rpcinfo -t localhost 100024 > /dev/null 2>&1
rc=$?
if [ "$rc" -eq "0" ]; then
ocf_exit_reason "Failed to stop rpc-statd"
return $OCF_ERR_GENERIC
fi
- nfs_exec stop nfs-idmapd &> /dev/null
+ nfs_exec stop nfs-idmapd > /dev/null 2>&1
ocf_log info "Stop: nfs-idmapd"
fn=`mktemp`
nfs_exec status nfs-idmapd > $fn 2>&1
@@ -957,9 +884,9 @@
return $OCF_ERR_GENERIC
fi
- nfs_exec stop nfs-mountd &> /dev/null
+ nfs_exec stop nfs-mountd > /dev/null 2>&1
ocf_log info "Stop: nfs-mountd"
- rpcinfo -t localhost 100005 &> /dev/null
+ rpcinfo -t localhost 100005 > /dev/null 2>&1
rc=$?
if [ "$rc" -eq "0" ]; then
ocf_exit_reason "Failed to stop nfs-mountd"
@@ -975,8 +902,11 @@
# systemd
if [ "$EXEC_MODE" -eq "2" ]; then
- nfs_exec stop rpcbind &> /dev/null
+ nfs_exec stop rpcbind > /dev/null 2>&1
ocf_log info "Stop: rpcbind"
+
+ nfs_exec stop rpc-gssd > /dev/null 2>&1
+ ocf_log info "Stop: rpc-gssd"
fi
unbind_tree

71
SOURCES/bz1128933-Fix-ha_log-drop-global-__ha_log_ignore_stderr_once-h.patch

@ -0,0 +1,71 @@ @@ -0,0 +1,71 @@
From 2364eff6a6837ae4418f1876f7f29459fdeec3bb Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 22 Sep 2014 15:26:59 +0200
Subject: [PATCH 4/6] Fix: ha_log: drop global __ha_log_ignore_stderr_once hack

Use a helper function instead,
which understands --ignore-stderr as first parameter.
---
heartbeat/ocf-shellfuncs.in | 23 +++++++++--------------
1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index c370fca..fd916e7 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -43,14 +43,6 @@ unset LANGUAGE; export LANGUAGE
__SCRIPT_NAME=`basename $0`
-# This is internal to shellfuncs.
-# When set, ha_log can be used in a way that guarantees
-# that stderr will not be printed to. This allows us to
-# use ocf_exit_reason to print a string to stderr and use
-# ha_log to print the same string to the other log facilities
-# without having duplicate messages sent to stderr.
-__ha_log_ignore_stderr_once=""
-
if [ -z "$OCF_ROOT" ]; then
: ${OCF_ROOT=@OCF_ROOT_DIR@}
fi
@@ -189,12 +181,11 @@ set_logtag() {
fi
}
-ha_log() {
- local ignore_stderr="$__ha_log_ignore_stderr_once"
+__ha_log() {
+ local ignore_stderr=false
local loglevel
- # always reset this variable
- __ha_log_ignore_stderr_once=""
+ [ "x$1" = "x--ignore-stderr" ] && ignore_stderr=true && shift
[ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
# if we're connected to a tty, then output to stderr
@@ -257,6 +248,11 @@ ha_log() {
fi
}
+ha_log()
+{
+ __ha_log "$@"
+}
+
ha_debug() {
if [ "x${HA_debug}" = "x0" ] ; then
@@ -383,8 +379,7 @@ ocf_exit_reason()
msg=$(printf "${fmt}" "$@")
printf >&2 "%s%s\n" "$cookie" "$msg"
- __ha_log_ignore_stderr_once="true"
- ha_log "ERROR: $msg"
+ __ha_log --ignore-stderr "ERROR: $msg"
}
#
--
1.8.4.2

77
SOURCES/bz1128933-Fix-ocf_exit_reason-implicit-format-string-s-for-sin.patch

@ -0,0 +1,77 @@ @@ -0,0 +1,77 @@
From de3c26d6333a00210de8d112cdb90dc8c2e19367 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Mon, 22 Sep 2014 14:58:58 +0200
Subject: [PATCH 3/6] Fix: ocf_exit_reason: implicit format string "%s" for
single argument version

Also, don't use the $msg as format string, but via "%s%s" "$cookie" "$msg".
Or, depending on presence of % sequences in $msg,
you'd get different output on stderr and via ha_log.

Without the patch:

( OCF_ROOT=$PWD dash -c '. heartbeat/ocf-shellfuncs.in ; ocf_exit_reason "0.x% Bugs less"' )
dash: 372: printf: % B: invalid directive
ocf-exit-reason:0.x
( OCF_ROOT=$PWD dash -c '. heartbeat/ocf-shellfuncs.in ; ocf_exit_reason "0.x% bugs less"' )
ocf-exit-reason:0.xugs less

With this patch:

( OCF_ROOT=$PWD dash -c '. heartbeat/ocf-shellfuncs.in ; ocf_exit_reason "0.x% Bugs less"' )
ocf-exit-reason:0.x% Bugs less
( OCF_ROOT=$PWD dash -c '. heartbeat/ocf-shellfuncs.in ; ocf_exit_reason "0.x% bugs less"' )
ocf-exit-reason:0.x% bugs less
---
heartbeat/ocf-shellfuncs.in | 27 +++++++++++++++++++--------
1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 9ba8e26..c370fca 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -356,22 +356,33 @@ ocf_log() {
ocf_exit_reason()
{
local cookie="$OCF_EXIT_REASON_PREFIX"
- local fmt="$1"
+ local fmt
local msg
- if [ $# -lt 1 ]; then
- ocf_log err "Not enough arguments [$#] to ocf_log_exit_msg."
- fi
+ # No argument is likely not intentional.
+ # Just one argument implies a printf format string of just "%s".
+ # "Least surprise" in case some interpolated string from variable
+ # expansion or other contains a percent sign.
+ # More than one argument: first argument is going to be the format string.
+ case $# in
+ 0) ocf_log err "Not enough arguments to ocf_log_exit_msg." ;;
+ 1) fmt="%s" ;;
+
+ *) fmt=$1
+ shift
+ case $fmt in
+ *%*) : ;; # ok, does look like a format string
+ *) ocf_log warn "Does not look like format string: [$fmt]" ;;
+ esac ;;
+ esac
+
if [ -z "$cookie" ]; then
# use a default prefix
cookie="ocf-exit-reason:"
fi
- shift
-
msg=$(printf "${fmt}" "$@")
-
- printf >&2 "%s${msg}\n" "$cookie"
+ printf >&2 "%s%s\n" "$cookie" "$msg"
__ha_log_ignore_stderr_once="true"
ha_log "ERROR: $msg"
}
--
1.8.4.2

26
SOURCES/bz1128933-Fix-shellfuncs-fix-syntax-error-caused-by-exit_reaso.patch

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
From da05792dae917d67b529a27b0605166774bb21b9 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Sun, 21 Sep 2014 11:19:07 -0400
Subject: [PATCH 2/6] Fix: shellfuncs: fix syntax error caused by exit_reason
support for dash shell.

---
heartbeat/ocf-shellfuncs.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index ff7c32d..9ba8e26 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -356,7 +356,7 @@ ocf_log() {
ocf_exit_reason()
{
local cookie="$OCF_EXIT_REASON_PREFIX"
- local fmt=$1
+ local fmt="$1"
local msg
if [ $# -lt 1 ]; then
--
1.8.4.2

185
SOURCES/bz1128933-IPaddr2-exit-reason-support.patch

@ -0,0 +1,185 @@ @@ -0,0 +1,185 @@
From a8adbaa0716f0fa39e41293fe81530686f64e2c8 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 1 Aug 2014 15:31:38 -0400
Subject: [PATCH] High: IPaddr2: support ocf_exit_reason

---
heartbeat/IPaddr2 | 40 +++++++++++++++++++++-------------------
1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
index b645288..2791ea0 100755
--- a/heartbeat/IPaddr2
+++ b/heartbeat/IPaddr2
@@ -342,12 +342,12 @@ ip_init() {
local rc
if [ X`uname -s` != "XLinux" ]; then
- ocf_log err "IPaddr2 only supported Linux."
+ ocf_exit_reason "IPaddr2 only supported Linux."
exit $OCF_ERR_INSTALLED
fi
if [ X"$OCF_RESKEY_ip" = "X" ]; then
- ocf_log err "IP address (the ip parameter) is mandatory"
+ ocf_exit_reason "IP address (the ip parameter) is mandatory"
exit $OCF_ERR_CONFIGURED
fi
@@ -359,7 +359,7 @@ ip_init() {
then
: YAY!
else
- ocf_log err "You must be root for $__OCF_ACTION operation."
+ ocf_exit_reason "You must be root for $__OCF_ACTION operation."
exit $OCF_ERR_PERM
fi
@@ -382,14 +382,14 @@ ip_init() {
IP_INC_NO=`expr ${OCF_RESKEY_CRM_meta_clone:-0} + 1`
if ocf_is_true ${OCF_RESKEY_lvs_support} && [ $IP_INC_GLOBAL -gt 1 ]; then
- ocf_log err "LVS and load sharing do not go together well"
+ ocf_exit_reason "LVS and load sharing do not go together well"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$IP_INC_GLOBAL" && [ $IP_INC_GLOBAL -gt 0 ]; then
:
else
- ocf_log err "Invalid OCF_RESKEY_incarnations_max_global [$IP_INC_GLOBAL], should be positive integer"
+ ocf_exit_reason "Invalid meta-attribute clone_max [$IP_INC_GLOBAL], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
@@ -397,20 +397,20 @@ ip_init() {
if [ $? -ne 0 ];then
FAMILY=inet
if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
- ocf_log err "IPv4 does not support lvs_ipv6_addrlabel"
+ ocf_exit_reason "IPv4 does not support lvs_ipv6_addrlabel"
exit $OCF_ERR_CONFIGURED
fi
else
FAMILY=inet6
if ocf_is_true $OCF_RESKEY_lvs_support ;then
- ocf_log err "The IPv6 does not support lvs_support"
+ ocf_exit_reason "The IPv6 does not support lvs_support"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_true $OCF_RESKEY_lvs_ipv6_addrlabel ;then
if ocf_is_decimal "$OCF_RESKEY_lvs_ipv6_addrlabel_value" && [ $OCF_RESKEY_lvs_ipv6_addrlabel_value -ge 0 ]; then
:
else
- ocf_log err "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer"
+ ocf_exit_reason "Invalid lvs_ipv6_addrlabel_value [$OCF_RESKEY_lvs_ipv6_addrlabel_value], should be positive integer"
exit $OCF_ERR_CONFIGURED
fi
fi
@@ -446,7 +446,7 @@ ip_init() {
ocf_log warn "[$FINDIF] failed"
exit $OCF_SUCCESS
else
- ocf_log err "[$FINDIF] failed"
+ ocf_exit_reason "[$FINDIF] failed"
exit $rc
fi
fi
@@ -769,7 +769,8 @@ END
}
ip_start() {
- if [ -z "$NIC" ]; then # no nic found or specified
+ if [ -z "$NIC" ]; then
+ ocf_exit_reason "No nic found or specified"
exit $OCF_ERR_CONFIGURED
fi
@@ -799,7 +800,7 @@ ip_start() {
--local-node $IP_INC_NO \
--hashmode $IP_CIP_HASH
if [ $? -ne 0 ]; then
- ocf_log err "iptables failed"
+ ocf_exit_reason "iptables failed"
exit $OCF_ERR_GENERIC
fi
fi
@@ -822,7 +823,7 @@ ip_start() {
add_interface $OCF_RESKEY_ip $NETMASK ${BRDCAST:-none} $NIC $IFLABEL
if [ $? -ne 0 ]; then
- ocf_log err "$CMD failed."
+ ocf_exit_reason "$CMD failed."
exit $OCF_ERR_GENERIC
fi
fi
@@ -897,6 +898,7 @@ ip_stop() {
if [ "$ip_del_if" = "yes" ]; then
delete_interface $OCF_RESKEY_ip $NIC $NETMASK
if [ $? -ne 0 ]; then
+ ocf_exit_reason "Unable to remove IP [${OCF_RESKEY_ip} from interface [ $NIC ]"
exit $OCF_ERR_GENERIC
fi
@@ -940,7 +942,7 @@ set_send_arp_program() {
ARP_SEND_FUN=run_send_ib_arp
;;
*)
- ocf_log err "unrecognized arp_sender value: $OCF_RESKEY_arp_sender"
+ ocf_exit_reason "unrecognized arp_sender value: $OCF_RESKEY_arp_sender"
exit $OCF_ERR_CONFIGURED
;;
esac
@@ -975,21 +977,21 @@ ip_validate() {
if ocf_is_true "$OCF_RESKEY_unique_clone_address" &&
! ocf_is_true "$OCF_RESKEY_CRM_meta_globally_unique"; then
- ocf_log err "unique_clone_address makes sense only with meta globally_unique set"
+ ocf_exit_reason "unique_clone_address makes sense only with meta globally_unique set"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_interval" && [ $OCF_RESKEY_arp_interval -gt 0 ]; then
:
else
- ocf_log err "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]"
+ ocf_exit_reason "Invalid OCF_RESKEY_arp_interval [$OCF_RESKEY_arp_interval]"
exit $OCF_ERR_CONFIGURED
fi
if ocf_is_decimal "$OCF_RESKEY_arp_count" && [ $OCF_RESKEY_arp_count -gt 0 ]; then
:
else
- ocf_log err "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]"
+ ocf_exit_reason "Invalid OCF_RESKEY_arp_count [$OCF_RESKEY_arp_count]"
exit $OCF_ERR_CONFIGURED
fi
@@ -1001,13 +1003,13 @@ ip_validate() {
sourceip|sourceip-sourceport|sourceip-sourceport-destport)
;;
*)
- ocf_log err "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]"
+ ocf_exit_reason "Invalid OCF_RESKEY_clusterip_hash [$IP_CIP_HASH]"
exit $OCF_ERR_CONFIGURED
;;
esac
if ocf_is_true ${OCF_RESKEY_lvs_support}; then
- ecf_log err "LVS and load sharing not advised to try"
+ ocf_exit_reason "LVS and load sharing not advised to try"
exit $OCF_ERR_CONFIGURED
fi
@@ -1020,7 +1022,7 @@ ip_validate() {
esac
if [ $valid -eq 0 ]; then
- ocf_log err "Invalid IF_MAC [$IF_MAC]"
+ ocf_exit_reason "Invalid IF_MAC [$IF_MAC]"
exit $OCF_ERR_CONFIGURED
fi
--
1.8.4.2

102
SOURCES/bz1128933-VirtualDomain-exit-reason-support.patch

@ -0,0 +1,102 @@ @@ -0,0 +1,102 @@
From 0501ed8086e054d9b076719c5bd131edbc95db5b Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 1 Aug 2014 16:06:22 -0400
Subject: [PATCH] High: VirtualDomain: exit reason support

---
heartbeat/VirtualDomain | 19 ++++++++++---------
1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
index 3a6b6a9..c44c090 100755
--- a/heartbeat/VirtualDomain
+++ b/heartbeat/VirtualDomain
@@ -356,7 +356,7 @@ VirtualDomain_Start() {
rm -f $snapshotimage
return $OCF_SUCCESS
fi
- ocf_log error "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory."
+ ocf_exit_reason "Failed to restore ${DOMAIN_NAME} from state file in ${OCF_RESKEY_snapshot} directory."
return $OCF_ERR_GENERIC
fi
@@ -371,7 +371,7 @@ VirtualDomain_Start() {
virsh $VIRSH_OPTIONS create ${OCF_RESKEY_config}
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log error "Failed to start virtual domain ${DOMAIN_NAME}."
+ ocf_exit_reason "Failed to start virtual domain ${DOMAIN_NAME}."
return $OCF_ERR_GENERIC
fi
@@ -395,6 +395,7 @@ force_stop()
*"error:"*"domain is not running"*|*"error:"*"domain not found"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
+ ocf_exit_reason "forced stop failed"
return $OCF_ERR_GENERIC ;;
0*)
while [ $status != $OCF_NOT_RUNNING ]; do
@@ -525,14 +526,14 @@ VirtualDomain_Migrate_To() {
virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri}
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log err "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc"
+ ocf_exit_reason "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."
return $OCF_SUCCESS
fi
else
- ocf_log err "$DOMAIN_NAME: migrate_to: Not active locally!"
+ ocf_exit_reason "$DOMAIN_NAME: migrate_to: Not active locally!"
return $OCF_ERR_GENERIC
fi
}
@@ -560,7 +561,7 @@ VirtualDomain_Monitor() {
# A monitor script returned a non-success exit
# code. Stop iterating over the list of scripts, log a
# warning message, and propagate $OCF_ERR_GENERIC.
- ocf_log warn "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
+ ocf_exit_reason "Monitor command \"${script}\" for domain ${DOMAIN_NAME} returned ${script_rc} with output: ${script_output}"
rc=$OCF_ERR_GENERIC
break
else
@@ -582,13 +583,13 @@ VirtualDomain_Validate_All() {
done
if [ -z $OCF_RESKEY_config ]; then
- ocf_log error "Missing configuration parameter \"config\"."
+ ocf_exit_reason "Missing configuration parameter \"config\"."
return $OCF_ERR_CONFIGURED
fi
if ocf_is_true $OCF_RESKEY_force_stop; then
if [ -n "$OCF_RESKEY_snapshot" ]; then
- ocf_log error "The 'force_stop' and 'snapshot' options can not be used together."
+ ocf_exit_reason "The 'force_stop' and 'snapshot' options can not be used together."
return $OCF_ERR_CONFIGURED
fi
fi
@@ -601,7 +602,7 @@ VirtualDomain_Validate_All() {
elif [ "$__OCF_ACTION" = "stop" ]; then
ocf_log info "Configuration file $OCF_RESKEY_config not readable, resource considered stopped."
else
- ocf_log error "Configuration file $OCF_RESKEY_config does not exist or is not readable."
+ ocf_exit_reason "Configuration file $OCF_RESKEY_config does not exist or is not readable."
return $OCF_ERR_INSTALLED
fi
fi
@@ -644,7 +645,7 @@ fi
# Retrieve the domain name from the xml file.
DOMAIN_NAME=`egrep '[[:space:]]*<name>.*</name>[[:space:]]*$' ${OCF_RESKEY_config} | sed -e 's/[[:space:]]*<name>\(.*\)<\/name>[[:space:]]*$/\1/' 2>/dev/null`
if [ -z $DOMAIN_NAME ]; then
- ocf_log err "This is unexpected. Cannot determine domain name."
+ ocf_exit_reason "Unable to determine domain name."
exit $OCF_ERR_GENERIC
fi
--
1.8.4.2

25
SOURCES/bz1128933-binary-check-exit-reason-support.patch

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
From 6029211e47a83cec4a6c4e44a967e967cb0b92fb Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 1 Aug 2014 13:13:05 -0400
Subject: [PATCH] High: ocf-binaries: have 'check_binary' set exit reason

---
heartbeat/ocf-binaries.in | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/ocf-binaries.in b/heartbeat/ocf-binaries.in
index a78a348..cbf70db 100644
--- a/heartbeat/ocf-binaries.in
+++ b/heartbeat/ocf-binaries.in
@@ -56,7 +56,7 @@ check_binary () {
if ! have_binary "$1"; then
if [ "$OCF_NOT_RUNNING" = 7 ]; then
# Chances are we have a fully setup OCF environment
- ocf_log err "Setup problem: couldn't find command: $1"
+ ocf_exit_reason "Setup problem: couldn't find command: $1"
else
echo "Setup problem: couldn't find command: $1"
fi
--
1.8.4.2

2118
SOURCES/bz1128933-exit-reason-string-updates.patch

File diff suppressed because it is too large Load Diff

43
SOURCES/bz1128933-exportfs-exit-reason-support.patch

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
From e334f036ab02ec6cdf4cf463e26d4f32e592f15c Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 15 Aug 2014 11:03:36 -0500
Subject: [PATCH] High: exportfs: support exit reason string

---
heartbeat/exportfs | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/heartbeat/exportfs b/heartbeat/exportfs
index 471da24..3f91037 100755
--- a/heartbeat/exportfs
+++ b/heartbeat/exportfs
@@ -239,7 +239,7 @@ exportfs_monitor ()
ocf_log info "Directory ${OCF_RESKEY_directory} is not exported to ${OCF_RESKEY_clientspec} (stopped)."
return $OCF_NOT_RUNNING;;
*)
- ocf_log err "Unable to determine export status for ${OCF_RESKEY_directory}."
+ ocf_exit_reason "Unable to determine export status for ${OCF_RESKEY_directory}."
return $OCF_ERR_GENERIC;;
esac
}
@@ -340,7 +340,7 @@ exportfs_stop ()
ocf_log info "Un-exported file system"
return $OCF_SUCCESS
else
- ocf_log err "Failed to un-export file system"
+ ocf_exit_reason "Failed to un-export file system"
exit $OCF_ERR_GENERIC
fi
}
@@ -348,7 +348,7 @@ exportfs_stop ()
exportfs_validate_all ()
{
if [ ! -d $OCF_RESKEY_directory ]; then
- ocf_log err "$OCF_RESKEY_directory does not exist or is not a directory"
+ ocf_exit_reason "$OCF_RESKEY_directory does not exist or is not a directory"
return $OCF_ERR_INSTALLED
fi
}
--
1.8.4.2

98
SOURCES/bz1128933-introducing-exit-reason-support.patch

@ -0,0 +1,98 @@ @@ -0,0 +1,98 @@
From 0dfe07cbd9e74e0f7f3c85a42085972bf24e1d24 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 15 Aug 2014 10:50:06 -0500
Subject: [PATCH] Introducing exit reason string support

---
heartbeat/ocf-shellfuncs.in | 48 ++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/heartbeat/ocf-shellfuncs.in b/heartbeat/ocf-shellfuncs.in
index 254da57..ff7c32d 100644
--- a/heartbeat/ocf-shellfuncs.in
+++ b/heartbeat/ocf-shellfuncs.in
@@ -43,6 +43,14 @@ unset LANGUAGE; export LANGUAGE
__SCRIPT_NAME=`basename $0`
+# This is internal to shellfuncs.
+# When set, ha_log can be used in a way that guarantees
+# that stderr will not be printed to. This allows us to
+# use ocf_exit_reason to print a string to stderr and use
+# ha_log to print the same string to the other log facilities
+# without having duplicate messages sent to stderr.
+__ha_log_ignore_stderr_once=""
+
if [ -z "$OCF_ROOT" ]; then
: ${OCF_ROOT=@OCF_ROOT_DIR@}
fi
@@ -182,12 +190,20 @@ set_logtag() {
}
ha_log() {
+ local ignore_stderr="$__ha_log_ignore_stderr_once"
local loglevel
+
+ # always reset this variable
+ __ha_log_ignore_stderr_once=""
+
[ none = "$HA_LOGFACILITY" ] && HA_LOGFACILITY=""
# if we're connected to a tty, then output to stderr
if tty >/dev/null; then
if [ "x$HA_debug" = "x0" -a "x$loglevel" = xdebug ] ; then
return 0
+ elif [ "$ignore_stderr" = "true" ]; then
+ # something already printed this error to stderr, so ignore
+ return 0
fi
if [ "$HA_LOGTAG" ]; then
echo "$HA_LOGTAG: $*"
@@ -226,7 +242,7 @@ ha_log() {
echo "$HA_LOGTAG: "`hadate`"${*}" >> $HA_LOGFILE
fi
if
- [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ]
+ [ -z "$HA_LOGFACILITY" -a -z "$HA_LOGFILE" ] && ! [ "$ignore_stderr" = "true" ]
then
: appending to stderr
echo `hadate`"${*}" >&2
@@ -331,6 +347,36 @@ ocf_log() {
}
#
+# ocf_exit_reason: print exit error string to stderr
+# Usage: Allows the OCF script to provide a string
+# describing why the exit code was returned.
+# Arguments: reason - required, The string that represents why the error
+# occured.
+#
+ocf_exit_reason()
+{
+ local cookie="$OCF_EXIT_REASON_PREFIX"
+ local fmt=$1
+ local msg
+
+ if [ $# -lt 1 ]; then
+ ocf_log err "Not enough arguments [$#] to ocf_log_exit_msg."
+ fi
+ if [ -z "$cookie" ]; then
+ # use a default prefix
+ cookie="ocf-exit-reason:"
+ fi
+
+ shift
+
+ msg=$(printf "${fmt}" "$@")
+
+ printf >&2 "%s${msg}\n" "$cookie"
+ __ha_log_ignore_stderr_once="true"
+ ha_log "ERROR: $msg"
+}
+
+#
# ocf_deprecated: Log a deprecation warning
# Usage: ocf_deprecated [param-name]
# Arguments: param-name optional, name of a boolean resource
--
1.8.4.2

52
SOURCES/bz1128933-nfsnotify-exit-reason-support.patch

@ -0,0 +1,52 @@ @@ -0,0 +1,52 @@
From 566544cb98bc4e373ac75fa8c6281ef031a673ca Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 1 Aug 2014 13:13:39 -0400
Subject: [PATCH] High: nfsnotify: set exit reason strings in nfsnotify agent

---
heartbeat/nfsnotify | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/heartbeat/nfsnotify b/heartbeat/nfsnotify
index 2d0bbfc..5f72d58 100755
--- a/heartbeat/nfsnotify
+++ b/heartbeat/nfsnotify
@@ -152,7 +152,7 @@ check_statd_pidfile()
return $OCF_SUCCESS
fi
- ocf_log err "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients"
+ ocf_exit_reason "$(cat $pidfile) for $binary is no longer running, sm-notify needs to re-notify clients"
return $OCF_ERR_GENERIC
fi
@@ -179,7 +179,7 @@ write_statd_pid()
return $OCF_NOT_RUNNING;;
*)
rm -f "$pidfile" > /dev/null 2>&1
- ocf_log err "Error encountered detecting pid status of $binary"
+ ocf_exit_reason "Error encountered detecting pid status of $binary"
return $OCF_ERR_GENERIC;;
esac
}
@@ -243,7 +243,7 @@ v3notify_start()
ocf_log info "sending notifications on default source address."
$SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -P $cur_statd
if [ $? -ne 0 ]; then
- ocf_log err "sm-notify failed, view syslog for more information."
+ ocf_exit_reason "sm-notify execution failed, view syslog for more information"
return $OCF_ERR_GENERIC
fi
@@ -269,7 +269,7 @@ v3notify_start()
ocf_log info "sending notifications with source address $ip"
$SM_NOTIFY_BINARY -f $OCF_RESKEY_notify_args -v $ip -P "$cur_statd"
if [ $? -ne 0 ]; then
- ocf_log err "sm-notify with source host set to, $ip, failed. view syslog for more information"
+ ocf_exit_reason "sm-notify with source host set to [ $ip ] failed. view syslog for more information"
return $OCF_ERR_GENERIC
fi
done
--
1.8.4.2

97
SOURCES/bz1128933-nfssserver-exit-reason-support.patch

@ -0,0 +1,97 @@ @@ -0,0 +1,97 @@
From dab933121dfff2b4e9c141c141a196ddc40e9d56 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Fri, 1 Aug 2014 13:21:11 -0400
Subject: [PATCH] High: nfsserver: support exit string in nfsserver agent

---
heartbeat/nfsserver | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/heartbeat/nfsserver b/heartbeat/nfsserver
index ac921f3..de1a802 100755
--- a/heartbeat/nfsserver
+++ b/heartbeat/nfsserver
@@ -317,7 +317,7 @@ set_exec_mode()
fi
fi
- ocf_log err "No init script or systemd unit file detected for nfs server"
+ ocf_exit_reason "No init script or systemd unit file detected for nfs server"
exit $OCF_ERR_INSTALLED
}
@@ -366,7 +366,7 @@ nfsserver_monitor ()
v3locking_exec "status"
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log error "NFS server is up, but the locking daemons are down"
+ ocf_exit_reason "NFS server is up, but the locking daemons are down"
rc=$OCF_ERR_GENERIC
fi
return $rc
@@ -682,7 +682,7 @@ nfsserver_start ()
v3locking_exec "start"
rc=$?
if [ $rc -ne 0 ]; then
- ocf_log error "Failed to start NFS server locking daemons"
+ ocf_exit_reason "Failed to start NFS server locking daemons"
return $rc
fi
else
@@ -696,7 +696,7 @@ nfsserver_start ()
rm -f $fn
if [ $rc -ne 0 ]; then
- ocf_log err "Failed to start NFS server"
+ ocf_exit_reason "Failed to start NFS server"
return $rc
fi
@@ -723,16 +723,16 @@ nfsserver_stop ()
v3locking_exec "stop"
if [ $? -ne 0 ]; then
- ocf_log err "Failed to stop NFS locking daemons"
+ ocf_exit_reason "Failed to stop NFS locking daemons"
rc=$OCF_ERR_GENERIC
fi
if [ $rc -eq 0 ]; then
unbind_tree
ocf_log info "NFS server stopped"
- return $OCF_SUCCESS
+ else
+ ocf_exit_reason "Failed to stop NFS server"
fi
- ocf_log err "Failed to stop NFS server"
return $rc
}
@@ -746,13 +746,13 @@ nfsserver_validate ()
if [ -n "$OCF_RESKEY_CRM_meta_clone" ] && [ -n "$OCF_RESKEY_nfs_shared_infodir" ]; then
- ocf_log err "This RA does not support clone mode when a shared info directory is in use."
+ ocf_exit_reason "This RA does not support clone mode when a shared info directory is in use."
exit $OCF_ERR_CONFIGURED
fi
if [ -n "$OCF_RESKEY_nfs_smnotify_retry_time" ]; then
if ! ocf_is_decimal "$OCF_RESKEY_nfs_smnotify_retry_time"; then
- ocf_log err "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]"
+ ocf_exit_reason "Invalid nfs_smnotify_retry_time [$OCF_RESKEY_nfs_smnotify_retry_time]"
exit $OCF_ERR_CONFIGURED
fi
fi
@@ -760,7 +760,7 @@ nfsserver_validate ()
case ${OCF_RESKEY_nfs_notify_cmd##*/} in
sm-notify|rpc.statd) ;;
*)
- ocf_log err "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]"
+ ocf_exit_reason "Invalid nfs_notify_cmd [$OCF_RESKEY_nfs_notify_cmd]"
exit $OCF_ERR_CONFIGURED
;;
esac
--
1.8.4.2

69
SOURCES/bz1135026-docker-handle-invalid-monitor-cmd.patch

@ -0,0 +1,69 @@ @@ -0,0 +1,69 @@
From c25542d8808640fae7fad39e27e95e83ffde2e31 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 27 Oct 2014 18:22:27 -0400
Subject: [PATCH] Low: docker: indicate when monitor_cmd is not available after
startup

---
heartbeat/docker | 29 ++++++++++++++++++++++-------
1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/heartbeat/docker b/heartbeat/docker
index 929b26b..a0dcee4 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -168,15 +168,28 @@ END
monitor_cmd_exec()
{
local rc=$OCF_SUCCESS
- if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
- out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
- rc=$?
- if [ $rc -ne 0 ]; then
- ocf_log info "monitor cmd failed with exit code $rc"
- ocf_log info "stdout/stderr: $out"
- rc=$OCF_ERR_GENERIC
+ local out
+
+ if [ -z "$OCF_RESKEY_monitor_cmd" ]; then
+ return $rc
+ fi
+
+ out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log info "monitor cmd exit code = $rc"
+ ocf_log info "stdout/stderr: $out"
+
+ if [ $rc -eq 127 ]; then
+ ocf_exit_reason "monitor_cmd, ${OCF_RESKEY_monitor_cmd} , not found within container."
+ # there is no recovering from this, exit immediately
+ exit $OCF_ERR_ARGS
fi
+ rc=$OCF_ERR_GENERIC
+ else
+ ocf_log info "monitor cmd passed: exit code = $rc"
fi
+
return $rc
}
@@ -288,6 +301,7 @@ docker_start()
monitor_cmd_exec
if [ $? -eq $OCF_SUCCESS ]; then
+ ocf_log notice "Container $CONTAINER started successfully"
return $OCF_SUCCESS
fi
@@ -365,6 +379,7 @@ docker_validate()
fi
if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
+ ocf_log info "checking for nsenter, which is required when 'monitor_cmd' is specified"
check_binary nsenter
fi
--
1.8.4.2

145
SOURCES/bz1135026-docker-monitor_cmd-arg.patch

@ -0,0 +1,145 @@ @@ -0,0 +1,145 @@
From 804b68824372f98e23b858f6284160c1f2b0e19f Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Sat, 25 Oct 2014 20:54:14 -0400
Subject: [PATCH 2/2] High: docker: monitor_cmd option for executing status
script within container

---
heartbeat/docker | 76 +++++++++++++++++++++++++++++++++++++++++++++++++-------
1 file changed, 67 insertions(+), 9 deletions(-)

diff --git a/heartbeat/docker b/heartbeat/docker
index cdf4e82..929b26b 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -106,6 +106,20 @@ it has initialized.
<content type="string"/>
</parameter>
+<parameter name="monitor_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specifiy the full path of a command to launch within the container to check
+the health of the container. This command must return 0 to indicate that
+the container is healthy. A non-zero return code will indicate that the
+container has failed and should be recovered.
+
+The command is executed using nsenter. In the future 'docker exec' will
+be used once it is more widely supported.
+</longdesc>
+<shortdesc lang="en">monitor command</shortdesc>
+<content type="string"/>
+</parameter>
+
<parameter name="force_kill" required="0" unique="0">
<longdesc lang="en">
Kill a container immediately rather than waiting for it to gracefully
@@ -150,6 +164,22 @@ Expects to have a fully populated OCF RA-compliant environment set.
END
}
+
+monitor_cmd_exec()
+{
+ local rc=$OCF_SUCCESS
+ if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
+ out=$(echo "$OCF_RESKEY_monitor_cmd" | nsenter --target $(docker inspect --format {{.State.Pid}} ${CONTAINER}) --mount --uts --ipc --net --pid 2>&1)
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ ocf_log info "monitor cmd failed with exit code $rc"
+ ocf_log info "stdout/stderr: $out"
+ rc=$OCF_ERR_GENERIC
+ fi
+ fi
+ return $rc
+}
+
container_exists()
{
docker inspect $CONTAINER > /dev/null 2>&1
@@ -171,7 +201,7 @@ remove_container()
ocf_run docker rm $CONTAINER
}
-docker_monitor()
+docker_simple_status()
{
local val
@@ -195,11 +225,25 @@ docker_monitor()
return $OCF_NOT_RUNNING
}
+docker_monitor()
+{
+ local rc=0
+
+ docker_simple_status
+ rc=$?
+
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+ monitor_cmd_exec
+}
+
docker_start()
{
local run_opts="-d --name=${CONTAINER}"
# check to see if the container has already started
- docker_monitor
+ docker_simple_status
if [ $? -eq $OCF_SUCCESS ]; then
return $OCF_SUCCESS
fi
@@ -233,19 +277,29 @@ docker_start()
return $OCF_ERR_GENERIC
fi
- docker_monitor
- if [ $? -ne $OCF_SUCCESS ]; then
- ocf_exit_reason "Newly created docker container exited after start"
- return $OCF_ERR_GENERIC
- fi
- return $OCF_SUCCESS
+ # wait for monitor to pass before declaring that the container is started
+ while true; do
+ docker_simple_status
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Newly created docker container exited after start"
+ return $OCF_ERR_GENERIC
+ fi
+
+ monitor_cmd_exec
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ ocf_exit_reason "waiting on monitor_cmd to pass after start"
+ sleep 1
+ done
}
docker_stop()
{
local timeout=60
- docker_monitor
+ docker_simple_status
if [ $? -eq $OCF_NOT_RUNNING ]; then
remove_container
return $OCF_SUCCESS
@@ -310,6 +364,10 @@ docker_validate()
exit $OCF_ERR_CONFIGURED
fi
+ if [ -n "$OCF_RESKEY_monitor_cmd" ]; then
+ check_binary nsenter
+ fi
+
image_exists
if [ $? -ne 0 ]; then
ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
--
1.8.4.2

61
SOURCES/bz1135026-docker-name-arg.patch

@ -0,0 +1,61 @@ @@ -0,0 +1,61 @@
From 0f1b107a50dd2ba51277f6962dd0c28dfb8976fc Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Sat, 25 Oct 2014 20:23:55 -0400
Subject: [PATCH 1/2] High: docker: replace 'container' argument with 'name'

I realized that the 'container' argument means something special in
pacemaker. In order to avoid confusion, the 'container' argument for
this agent has been changed to 'name'. Anyone using 'container' as
an argument right now will not be affected. The option still works, it
is depreciated now though.
---
heartbeat/docker | 19 ++++++++++++++++---
1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/heartbeat/docker b/heartbeat/docker
index 37a449b..cdf4e82 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -59,7 +59,7 @@ The docker image to base this container off of.
<content type="string"/>
</parameter>
-<parameter name="container" required="0" unique="0">
+<parameter name="name" required="0" unique="0">
<longdesc lang="en">
The name to give the created container. By default this will
be that resource's instance name.
@@ -87,6 +87,11 @@ users to do things such as setting a custom entry point and injecting
environment variables into the newly created container. Note the '-d'
option is supplied regardless of this value to force containers to run
in the background.
+
+NOTE: Do not explicitly specify the --name argument in the run_opts. This
+agent will set --name using either the resource's instance or the name
+provided in the 'name' argument of this agent.
+
</longdesc>
<shortdesc lang="en">run options</shortdesc>
<content type="string"/>
@@ -314,8 +319,16 @@ docker_validate()
return $OCF_SUCCESS
}
-: ${OCF_RESKEY_container=${OCF_RESOURCE_INSTANCE}}
-CONTAINER=$OCF_RESKEY_container
+: ${OCF_RESKEY_name=${OCF_RESOURCE_INSTANCE}}
+
+if [ -n "$OCF_RESKEY_container" ]; then
+ # we'll keep the container attribute around for a bit in order not to break
+ # any existing deployments. The 'name' attribute is prefered now though.
+ CONTAINER=$OCF_RESKEY_container
+ ocf_log warn "The 'container' attribute is depreciated"
+else
+ CONTAINER=$OCF_RESKEY_name
+fi
case $__OCF_ACTION in
meta-data) meta_data
--
1.8.4.2

49
SOURCES/bz1135026-docker-stop-fix.patch

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
From 05fb27218f3b8a78bff0b0e668c8d38feeb93dca Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 23 Oct 2014 14:20:14 -0400
Subject: [PATCH] High: docker: properly remove stale container during stop
when 'reuse' is not enabled

---
heartbeat/docker | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/heartbeat/docker b/heartbeat/docker
index 546c423..37a449b 100755
--- a/heartbeat/docker
+++ b/heartbeat/docker
@@ -157,6 +157,11 @@ remove_container()
return 0
fi
+ container_exists
+ if [ $? -ne 0 ]; then
+ # don't attempt to remove a container that doesn't exist
+ return 0
+ fi
ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
ocf_run docker rm $CONTAINER
}
@@ -210,7 +215,10 @@ docker_start()
if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
ocf_log info "starting existing container $CONTAINER."
ocf_run docker start $CONTAINER
- else
+ else
+ # make sure any previous container matching our container name is cleaned up first.
+ # we already know at this point it wouldn't be running
+ remove_container
ocf_log info "running container $CONTAINER for the first time"
ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
fi
@@ -234,6 +242,7 @@ docker_stop()
local timeout=60
docker_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
+ remove_container
return $OCF_SUCCESS
fi
--
1.8.4.2

375
SOURCES/bz1135026-introducing-docker-agent.patch

@ -0,0 +1,375 @@ @@ -0,0 +1,375 @@
From 6d4180b5ed46cda544e008b242f024b2ab143a83 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 23 Oct 2014 09:37:18 -0500
Subject: [PATCH] introducing docker agent

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/docker | 330 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 332 insertions(+)
create mode 100755 heartbeat/docker

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index e97c7e9..ee29756 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -98,6 +98,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_conntrackd.7 \
ocf_heartbeat_db2.7 \
ocf_heartbeat_dhcpd.7 \
+ ocf_heartbeat_docker.7 \
ocf_heartbeat_eDir88.7 \
ocf_heartbeat_ethmonitor.7 \
ocf_heartbeat_exportfs.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index aab521f..f763533 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -65,6 +65,7 @@ ocf_SCRIPTS = ClusterMon \
conntrackd \
db2 \
dhcpd \
+ docker \
Delay \
eDir88 \
EvmsSCC \
diff --git a/heartbeat/docker b/heartbeat/docker
new file mode 100755
index 0000000..546c423
--- /dev/null
+++ b/heartbeat/docker
@@ -0,0 +1,330 @@
+#!/bin/sh
+#
+# The docker HA resource agent creates and launches a docker container
+# based off a supplied docker image. Containers managed by this agent
+# are both created and removed upon the agent's start and stop actions.
+#
+# Copyright (c) 2014 David Vossel <dvossel@redhat.com>
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+#######################################################################
+
+meta_data()
+{
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="docker" version="0.9">
+<version>1.0</version>
+
+<longdesc lang="en">
+The docker HA resource agent creates and launches a docker container
+based off a supplied docker image. Containers managed by this agent
+are both created and removed upon the agent's start and stop actions.
+</longdesc>
+<shortdesc lang="en">Docker container resource agent.</shortdesc>
+
+<parameters>
+<parameter name="image" required="1" unique="0">
+<longdesc lang="en">
+The docker image to base this container off of.
+</longdesc>
+<shortdesc lang="en">docker image</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="container" required="0" unique="0">
+<longdesc lang="en">
+The name to give the created container. By default this will
+be that resource's instance name.
+</longdesc>
+<shortdesc lang="en">docker container name</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="allow_pull" unique="0">
+<longdesc lang="en">
+Allow the image to be pulled from the configured docker registry when
+the image does not exist locally. NOTE, this can drastically increase
+the time required to start the container if the image repository is
+pulled over the network.
+</longdesc>
+<shortdesc lang="en">Allow pulling non-local images</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+<parameter name="run_opts" required="0" unique="0">
+<longdesc lang="en">
+Add options to be appended to the 'docker run' command which is used
+when creating the container during the start action. This option allows
+users to do things such as setting a custom entry point and injecting
+environment variables into the newly created container. Note the '-d'
+option is supplied regardless of this value to force containers to run
+in the background.
+</longdesc>
+<shortdesc lang="en">run options</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="run_cmd" required="0" unique="0">
+<longdesc lang="en">
+Specifiy a command to launch within the container once
+it has initialized.
+</longdesc>
+<shortdesc lang="en">run command</shortdesc>
+<content type="string"/>
+</parameter>
+
+<parameter name="force_kill" required="0" unique="0">
+<longdesc lang="en">
+Kill a container immediately rather than waiting for it to gracefully
+shutdown
+</longdesc>
+<shortdesc lang="en">force kill</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+<parameter name="reuse" required="0" unique="0">
+<longdesc lang="en">
+Allow the container to be reused after stopping the container. By default
+containers are removed after stop. With the reuse option containers
+will persist after the container stops.
+</longdesc>
+<shortdesc lang="en">reuse container</shortdesc>
+<content type="boolean"/>
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="90" />
+<action name="stop" timeout="90" />
+<action name="monitor" timeout="30" interval="30" depth="0" />
+<action name="meta-data" timeout="5" />
+<action name="validate-all" timeout="30" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+REQUIRE_IMAGE_PULL=0
+
+docker_usage()
+{
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+container_exists()
+{
+ docker inspect $CONTAINER > /dev/null 2>&1
+}
+
+remove_container()
+{
+ if ocf_is_true "$OCF_RESKEY_reuse"; then
+ # never remove the container if we have reuse enabled.
+ return 0
+ fi
+
+ ocf_log notice "Cleaning up inactive container, ${CONTAINER}."
+ ocf_run docker rm $CONTAINER
+}
+
+docker_monitor()
+{
+ local val
+
+ container_exists
+ if [ $? -ne 0 ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ # retrieve the 'Running' attribute for the container
+ val=$(docker inspect --format {{.State.Running}} $CONTAINER 2>/dev/null)
+ if [ $? -ne 0 ]; then
+ #not running as a result of container not being found
+ return $OCF_NOT_RUNNING
+ fi
+
+ if ocf_is_true "$val"; then
+ # container exists and is running
+ return $OCF_SUCCESS
+ fi
+
+ return $OCF_NOT_RUNNING
+}
+
+docker_start()
+{
+ local run_opts="-d --name=${CONTAINER}"
+ # check to see if the container has already started
+ docker_monitor
+ if [ $? -eq $OCF_SUCCESS ]; then
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_run_opts" ]; then
+ run_opts="$run_opts $OCF_RESKEY_run_opts"
+ fi
+
+ if [ $REQUIRE_IMAGE_PULL -eq 1 ]; then
+ ocf_log notice "Beginning pull of image, ${OCF_RESKEY_image}"
+ docker pull "${OCF_RESKEY_image}"
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "failed to pull image ${OCF_RESKEY_image}"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_reuse" && container_exists; then
+ ocf_log info "starting existing container $CONTAINER."
+ ocf_run docker start $CONTAINER
+ else
+ ocf_log info "running container $CONTAINER for the first time"
+ ocf_run docker run $run_opts $OCF_RESKEY_image $OCF_RESKEY_run_cmd
+ fi
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "docker failed to launch container"
+ return $OCF_ERR_GENERIC
+ fi
+
+ docker_monitor
+ if [ $? -ne $OCF_SUCCESS ]; then
+ ocf_exit_reason "Newly created docker container exited after start"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+docker_stop()
+{
+ local timeout=60
+ docker_monitor
+ if [ $? -eq $OCF_NOT_RUNNING ]; then
+ return $OCF_SUCCESS
+ fi
+
+ if [ -n "$OCF_RESKEY_CRM_meta_timeout" ]; then
+ timeout=$((($OCF_RESKEY_CRM_meta_timeout/1000) -10 ))
+ if [ $timeout -lt 10 ]; then
+ timeout=10
+ fi
+ fi
+
+ if ocf_is_true "$OCF_RESKEY_force_kill"; then
+ ocf_run docker kill $CONTAINER
+ else
+ ocf_log debug "waiting $timeout second[s] before killing container"
+ ocf_run docker stop -t=$timeout $CONTAINER
+ fi
+
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to stop container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ remove_container
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "Failed to remove stopped container, ${CONTAINER}, based on image, ${OCF_RESKEY_image}."
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+image_exists()
+{
+ local res=1
+
+
+ echo "${OCF_RESKEY_image}" | grep -q ":"
+ if [ $? -eq 0 ]; then
+ docker images | awk '{print $1 ":" $2}' | grep "^${OCF_RESKEY_image}\$" > /dev/null 2>&1
+ else
+ docker images | awk '{print $1}' | grep "^${OCF_RESKEY_image}\$" > /dev/null 2>&1
+ fi
+ if [ $? -eq 0 ]; then
+ return 0
+ fi
+ if ocf_is_true "$OCF_RESKEY_allow_pull"; then
+ REQUIRE_IMAGE_PULL=1
+ ocf_log notice "Image (${OCF_RESKEY_image}) does not exist locally but will be pulled during start"
+ return 0
+ fi
+ # image not found.
+ return 1
+}
+
+docker_validate()
+{
+ check_binary docker
+ if [ -z "$OCF_RESKEY_image" ]; then
+ ocf_exit_reason "'image' option is required"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ image_exists
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "base image, ${OCF_RESKEY_image}, could not be found."
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+: ${OCF_RESKEY_container=${OCF_RESOURCE_INSTANCE}}
+CONTAINER=$OCF_RESKEY_container
+
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS;;
+start)
+ docker_validate
+ docker_start;;
+stop) docker_stop;;
+monitor) docker_monitor;;
+validate-all) docker_validate;;
+usage|help) docker_usage
+ exit $OCF_SUCCESS
+ ;;
+*) docker_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
+
--
1.8.4.2

43
SOURCES/bz1138871-avoid-check-binary-in-validate.patch

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
From 328b228321e71260f9c0ea4b926b43f208aef158 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Tue, 7 Oct 2014 16:11:28 -0400
Subject: [PATCH 2/2] High: mysql-common: avoid use of check_binary in common
validation function.

Since the environment validation exit code needs to be interpreted
differently now for monitor operations, we need to avoid functions like
'check_binary' that exit the process immediately upon failure. Instead
we should use 'have_binary' in this situation.

This allows the mysql agent to work properly in a scenario where the entire
mysql install resides on shared storage.
---
heartbeat/mysql-common.sh | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/heartbeat/mysql-common.sh b/heartbeat/mysql-common.sh
index a02f8cd..310f487 100755
--- a/heartbeat/mysql-common.sh
+++ b/heartbeat/mysql-common.sh
@@ -94,8 +94,16 @@ CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INST
mysql_common_validate()
{
- check_binary $OCF_RESKEY_binary
- check_binary $OCF_RESKEY_client_binary
+
+ if ! have_binary "$OCF_RESKEY_binary"; then
+ ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_binary"
+ return $OCF_ERR_INSTALLED;
+ fi
+
+ if ! have_binary "$OCF_RESKEY_client_binary"; then
+ ocf_exit_reason "Setup problem: couldn't find command: $OCF_RESKEY_client_binary"
+ return $OCF_ERR_INSTALLED;
+ fi
if [ ! -f $OCF_RESKEY_config ]; then
ocf_exit_reason "Config $OCF_RESKEY_config doesn't exist";
--
1.8.4.2

35
SOURCES/bz1138871-mysql-error-validation-fails-monitor.patch

@ -0,0 +1,35 @@ @@ -0,0 +1,35 @@
From 6ac8332d16837a3481341316e61962e6f78694dd Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Tue, 7 Oct 2014 16:11:19 -0400
Subject: [PATCH 1/2] High: mysql: report error when validation fails during
monitor yet pid is still active

---
heartbeat/mysql | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/heartbeat/mysql b/heartbeat/mysql
index 6cfe0a0..d895369 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1007,7 +1007,16 @@ LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
stop) ;;
- monitor) exit $OCF_NOT_RUNNING;;
+ monitor)
+ mysql_common_status "info"
+ if [ $? -eq $OCF_SUCCESS ]; then
+ # if validatation fails and pid is active, always treat this as an error
+ ocf_exit_reason "environment validation failed, active pid is in unknown state."
+ exit $OCF_ERR_GENERIC
+ fi
+ # validation failed and pid is not active, it's safe to say this instance is inactive.
+ exit $OCF_NOT_RUNNING;;
+
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
esac
--
1.8.4.2

26
SOURCES/bz1138871_mysql_stop_fix.patch

@ -0,0 +1,26 @@ @@ -0,0 +1,26 @@
From 42a016eb56d79f287190f3abe68c2a7e1b3ca50b Mon Sep 17 00:00:00 2001
From: John Ruemker <jruemker@redhat.com>
Date: Wed, 17 Sep 2014 18:02:03 -0400
Subject: [PATCH] High: mysql: do not report success on 'stop' if validation
fails

---
heartbeat/mysql | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/mysql b/heartbeat/mysql
index dc862f5..6cfe0a0 100755
--- a/heartbeat/mysql
+++ b/heartbeat/mysql
@@ -1006,7 +1006,7 @@ rc=$?
LSB_STATUS_STOPPED=3
if [ $rc -ne 0 ]; then
case "$1" in
- stop) exit $OCF_SUCCESS;;
+ stop) ;;
monitor) exit $OCF_NOT_RUNNING;;
status) exit $LSB_STATUS_STOPPED;;
*) exit $rc;;
--
1.8.4.2

60
SOURCES/bz1159328-LVM-check_writethrough.patch

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
From 8d25da64ab9dee8545a0c52f7db08213a03ea106 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Tue, 28 Feb 2017 15:46:40 +0100
Subject: [PATCH] LVM: add check_writethrough parameter

---
heartbeat/LVM | 19 +++++++++++++++++++
1 file changed, 19 insertions(+)

diff --git a/heartbeat/LVM b/heartbeat/LVM
index 90a900b..5b265f5 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -29,6 +29,8 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+OCF_RESKEY_check_writethrough_default="false"
+
#######################################################################
@@ -106,6 +108,14 @@ logical volumes.
<content type="string" default="false" />
</parameter>
+<parameter name="check_writethrough" unique="0" required="0">
+<longdesc lang="en">
+If set to true, check if cache_mode is set to writethrough.
+</longdesc>
+<shortdesc lang="en">Check if cache_mode is set to writethrough</shortdesc>
+<content type="string" default="${OCF_RESKEY_check_writethrough_default}" />
+</parameter>
+
</parameters>
<actions>
@@ -583,6 +593,13 @@ LVM_validate_all() {
exit $OCF_ERR_GENERIC
fi
+ if ocf_is_true "$OCF_RESKEY_check_writethrough"; then
+ if ! lvs --noheadings -o cache_mode "$OCF_RESKEY_volgrpname" | grep -q "writethrough"; then
+ ocf_exit_reason "LVM cache is not in writethrough mode."
+ exit $OCF_ERR_CONFIGURED
+ fi
+ fi
+
##
# If exclusive activation is not enabled, then
# further checking of proper setup is not necessary
@@ -690,6 +707,8 @@ if [ -n "$OCF_RESKEY_tag" ]; then
OUR_TAG=$OCF_RESKEY_tag
fi
+: ${OCF_RESKEY_check_writethrough=${OCF_RESKEY_check_writethrough_default}}
+
# What kind of method was invoked?
case "$1" in

520
SOURCES/bz1160365-iface-vlan.patch.patch

@ -0,0 +1,520 @@ @@ -0,0 +1,520 @@
From 0305c97abc49d0f7a93b3602a745805f7e8776d3 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 25 Jun 2015 16:23:45 -0500
Subject: [PATCH 1/3] bz1160365-iface-vlan.patch

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/iface-vlan | 475 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 477 insertions(+)
create mode 100755 heartbeat/iface-vlan

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 653e818..091ec24 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -107,6 +107,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_iSCSILogicalUnit.7 \
ocf_heartbeat_iSCSITarget.7 \
ocf_heartbeat_ids.7 \
+ ocf_heartbeat_iface-vlan.7 \
ocf_heartbeat_iscsi.7 \
ocf_heartbeat_jboss.7 \
ocf_heartbeat_lxc.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index e4ed4fd..6df4080 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -76,6 +76,7 @@ ocf_SCRIPTS = ClusterMon \
fio \
galera \
ids \
+ iface-vlan \
iscsi \
ICP \
IPsrcaddr \
diff --git a/heartbeat/iface-vlan b/heartbeat/iface-vlan
new file mode 100755
index 0000000..bc8583c
--- /dev/null
+++ b/heartbeat/iface-vlan
@@ -0,0 +1,475 @@
+#!/bin/sh
+#
+# OCF Resource Agent compliant iface-vlan script.
+#
+# Implements network VLAN interface management
+#
+# Copyright (C) 2013 Red Hat, Inc. All rights reserved.
+# Author: Fabio M. Di Nitto <fdinitto@redhat.com>
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# TODO:
+#
+# OCF parameters are as below
+# OCF_RESKEY_vlan_interface
+# OCF_RESKEY_vlan_id
+# OCF_RESKEY_vlan_name
+# OCF_RESKEY_vlan_reorder_hdr
+# OCF_RESKEY_vlan_gvrp
+# OCF_RESKEY_vlan_mvrp
+# OCF_RESKEY_vlan_loose_binding
+#
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_vlan_reorder_hdr_default=1
+OCF_RESKEY_vlan_gvrp_default=0
+OCF_RESKEY_vlan_mvrp_default=0
+OCF_RESKEY_vlan_loose_binding_default=0
+OCF_RESKEY_vlan_name_default=${OCF_RESKEY_vlan_interface}.${OCF_RESKEY_vlan_id}
+
+: ${OCF_RESKEY_vlan_name=${OCF_RESKEY_vlan_name_default}}
+: ${OCF_RESKEY_vlan_reorder_hdr=${OCF_RESKEY_vlan_reorder_hdr_default}}
+: ${OCF_RESKEY_vlan_gvrp=${OCF_RESKEY_vlan_gvrp_default}}
+
+# don't set defaults for mvrp or loose binding since both
+# are rather new kernel features and they might not be supported
+#: ${OCF_RESKEY_vlan_mvrp=${OCF_RESKEY_vlan_mvrp_default}}
+#: ${OCF_RESKEY_vlan_loose_binding=${OCF_RESKEY_vlan_loose_binding_default}}
+
+#######################################################################
+
+vlan_usage() {
+ cat <<END
+usage: $0 {start|stop|status|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+vlan_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="iface-vlan">
+ <version>1.0</version>
+
+ <longdesc lang="en">
+ This resource manages VLAN network interfaces.
+ It can add, remove, configure VLANs.
+ </longdesc>
+
+ <shortdesc lang="en">
+ Manages VLAN network interfaces.
+ </shortdesc>
+
+ <parameters>
+ <parameter name="vlan_interface" unique="1" required="1">
+ <longdesc lang="en">
+ Define the interface where VLAN should be attached.
+ </longdesc>
+ <shortdesc lang="en">
+ Network interface.
+ </shortdesc>
+ <content type="string"/>
+ </parameter>
+
+ <parameter name="vlan_id" unique="1" required="1">
+ <longdesc lang="en">
+ Define the VLAN ID. It has to be a value between 0 and 4094.
+ </longdesc>
+ <shortdesc lang="en">
+ Define the VLAN ID.
+ </shortdesc>
+ <content type="integer"/>
+ </parameter>
+
+ <parameter name="vlan_name" unique="1">
+ <longdesc lang="en">
+ Define the name of the VLAN interface (max 15 charaters).
+ </longdesc>
+ <shortdesc lang="en">
+ Name of the VLAN.
+ </shortdesc>
+ <content type="string" default="vlan_interface.vlan.id" />
+ </parameter>
+
+ <parameter name="vlan_reorder_hdr" unique="0">
+ <longdesc lang="en">
+ Enable or disable header reordering.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable or disable header reordering.
+ </shortdesc>
+ <content type="boolean" default="${OCF_RESKEY_vlan_reorder_hdr_default}"/>
+ </parameter>
+
+ <parameter name="vlan_gvrp" unique="0">
+ <longdesc lang="en">
+ Enable or disable GARP VLAN registration protocol.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable or disable gvrp.
+ </shortdesc>
+ <content type="boolean" default="${OCF_RESKEY_vlan_gvrp_default}"/>
+ </parameter>
+
+ <parameter name="vlan_mvrp" unique="0">
+ <longdesc lang="en">
+ Enable or disable Multiple VLAN Registration Protocol.
+ Please note that most distributions do not ship a version of iproute2
+ that supports mvrp yet, even if the kernel has support for it.
+ Check output of $IPADDR2 link add type vlan --help in the FLAG
+ section to verify if mvrp support is available.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable or disable mvrp.
+ </shortdesc>
+ <content type="boolean" default="${OCF_RESKEY_vlan_mvrp_default}"/>
+ </parameter>
+
+ <parameter name="vlan_loose_binding" unique="0">
+ <longdesc lang="en">
+ Enable or disable VLAN loose bind. By default the VLAN interface
+ admin status (UP/DOWN) follows the underneath inteface status.
+ Enabling loose bind allows the VLAN to disconnect from the
+ interface status. Be very careful that enabling loose binding
+ could invalidate this agent monitor operations.
+ Please note that most distributions do not ship a version of iproute2
+ that supports loose_binding yet, even if the kernel has support for it.
+ Check output of $IPADDR2 link add type vlan --help in the FLAG
+ section to verify if loose_binding support is available.
+ </longdesc>
+ <shortdesc lang="en">
+ Enable or disable loose binding.
+ </shortdesc>
+ <content type="boolean" default="${OCF_RESKEY_vlan_loose_binding_default}"/>
+ </parameter>
+ </parameters>
+
+ <actions>
+ <action name="start" timeout="30s" />
+ <action name="stop" timeout="20s" />
+ <action name="status" timeout="20s" depth="0" interval="10s" />
+ <action name="monitor" timeout="20s" depth="0" interval="10s" />
+ <action name="meta-data" timeout="5s" />
+ <action name="validate-all" timeout="20s" />
+ </actions>
+</resource-agent>
+END
+}
+
+# check if the interface is admin up/down
+
+iface_is_up() {
+ if ! $IP2UTIL -o link show $1 | \
+ sed -e 's#.*<##g' -e 's#>.*##' -e 's#LOWER_UP##g' | \
+ grep -q UP; then
+ return 1
+ fi
+ return 0
+}
+
+# check if the slaves have link layer up/down
+# see kernel network documentation on meaning of LOWER_UP flag
+# for more in depth explanation on how it works
+# NOTE: this check is not reliable in virt environment
+# since interfaces are always LOWER_UP. There is no way
+# from the guest to know if the host has disconnected somehow
+
+iface_lower_is_up() {
+ if ! $IP2UTIL -o link show $1 | \
+ grep -q LOWER_UP; then
+ return 1
+ fi
+ return 0
+}
+
+vlan_validate() {
+ check_binary $IP2UTIL
+
+ if [ -z "$OCF_RESKEY_vlan_interface" ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_interface: value cannot be empty"
+ return 1
+ fi
+
+ # the echo .. is the equivalent of strlen in bash
+ #
+ # /usr/include/linux/if.h:#define IFNAMSIZ 16
+ # needs to include 0 byte end string
+
+ if [ "${#OCF_RESKEY_vlan_interface}" -gt 15 ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_interface: name is too long"
+ return 1
+ fi
+
+ if [ ! -d "/sys/class/net" ]; then
+ ocf_log err "Unable to find sysfs network class in /sys"
+ return 1
+ fi
+
+ if [ ! -e "/sys/class/net/$OCF_RESKEY_vlan_interface" ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_interface: $OCF_RESKEY_vlan_interface does not exists"
+ return 1
+ fi
+
+ if [ -z "$OCF_RESKEY_vlan_id" ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_id: value cannot be empty"
+ return 1
+ fi
+ if ! ocf_is_decimal "$OCF_RESKEY_vlan_id" || \
+ [ "$OCF_RESKEY_vlan_id" -gt "4094" ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_id: must be a decimal value (0 to 4094 included)"
+ return 1
+ fi
+
+ if [ "${#OCF_RESKEY_vlan_name}" -gt 15 ]; then
+ ocf_log err "Invalid OCF_RESKEY_vlan_name: name is too long"
+ return 1
+ fi
+
+ return 0
+}
+
+vlan_check() {
+ if [ -e "/sys/class/net/$OCF_RESKEY_vlan_name" ]; then
+ if [ ! -e "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" ]; then
+ return $OCF_ERR_GENERIC
+ fi
+ else
+ if [ -e "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" ]; then
+ error="$(rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to remove stale lock file for vlan $OCF_RESKEY_vlan_name: $error"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ return $OCF_NOT_RUNNING
+ fi
+
+ if ! iface_is_up $OCF_RESKEY_vlan_interface; then
+ if ocf_is_true "$OCF_RESKEY_vlan_loose_binding"; then
+ ocf_log warn "Interface $OCF_RESKEY_vlan_interface is administratively down"
+ else
+ ocf_log err "Interface $OCF_RESKEY_vlan_interface is administratively down"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ if ! iface_is_up $OCF_RESKEY_vlan_name; then
+ ocf_log err "VLAN $OCF_RESKEY_vlan_name is administratively down"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ! iface_lower_is_up $OCF_RESKEY_vlan_name; then
+ ocf_log err "VLAN $OCF_RESKEY_vlan_name has no active link-layer"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+# we need a simpler stop version to clean after us if start fails
+# without involving any error checking
+# rolling back in case of failure is otherwise complex
+
+vlan_force_stop() {
+ $IP2UTIL link delete "$OCF_RESKEY_vlan_name" >/dev/null 2>&1
+ rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1
+}
+
+vlan_start() {
+ # check if the vlan already exists
+ vlan_check
+ ret=$?
+ if [ "$ret" != "$OCF_NOT_RUNNING" ]; then
+ return $ret
+ fi
+
+ # make sure kernel module is loaded
+ if [ ! -e /proc/net/vlan ]; then
+ error="$(modprobe 8021q 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to load kernel 8021q driver: $error"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+
+ # generate options
+ VLANOPTS=""
+
+ if [ -n "$OCF_RESKEY_vlan_reorder_hdr" ]; then
+ if ocf_is_true "$OCF_RESKEY_vlan_reorder_hdr"; then
+ VLANOPTS="reorder_hdr on"
+ else
+ VLANOPTS="reorder_hdr off"
+ fi
+ fi
+
+ if [ -n "$OCF_RESKEY_vlan_gvrp" ]; then
+ if ocf_is_true "$OCF_RESKEY_vlan_gvrp"; then
+ VLANOPTS="$VLANOPTS gvrp on"
+ else
+ VLANOPTS="$VLANOPTS gvrp off"
+ fi
+ fi
+
+ if [ -n "$OCF_RESKEY_vlan_mvrp" ]; then
+ if ocf_is_true "$OCF_RESKEY_vlan_mvrp"; then
+ VLANOPTS="$VLANOPTS mvrp on"
+ else
+ VLANOPTS="$VLANOPTS mvrp off"
+ fi
+ fi
+
+ if [ -n "$OCF_RESKEY_vlan_loose_binding" ]; then
+ if ocf_is_true "$OCF_RESKEY_vlan_loose_binding"; then
+ VLANOPTS="$VLANOPTS loose_binding on"
+ else
+ VLANOPTS="$VLANOPTS loose_binding off"
+ fi
+ fi
+
+ # create the VLAN
+ error="$($IP2UTIL link add link "$OCF_RESKEY_vlan_interface" name "$OCF_RESKEY_vlan_name" type vlan id "$OCF_RESKEY_vlan_id" $VLANOPTS 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to create VLAN $OCF_RESKEY_vlan_name: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ # set the interface up
+ error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_interface" up 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_interface up: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ # set the vlan up
+ error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_name" up 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_name up: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ error="$(touch "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to create lock file for VLAN $OCF_RESKEY_vlan_name: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+vlan_stop() {
+ vlan_check
+ ret=$?
+ if [ "$ret" = "$OCF_NOT_RUNNING" ]; then
+ return $OCF_SUCCESS
+ fi
+ if [ "$ret" != "$OCF_SUCCESS" ]; then
+ return $ret
+ fi
+
+ # set vlan down
+ error="$($IP2UTIL link set dev "$OCF_RESKEY_vlan_name" down 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to set VLAN $OCF_RESKEY_vlan_name down: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ # delete vlan
+ error="$($IP2UTIL link delete "$OCF_RESKEY_vlan_name" 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to delete VLAN $OCF_RESKEY_vlan_name: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ error="$(rm -f "$HA_RSCTMP/iface-vlan.$OCF_RESKEY_vlan_name" 2>&1)"
+ if [ "$?" != "0" ]; then
+ ocf_log err "Unable to remove lock file for VLAN $OCF_RESKEY_vlan_name: $error"
+ return $OCF_ERR_GENERIC
+ fi
+
+ return $OCF_SUCCESS
+}
+
+case $__OCF_ACTION in
+ meta-data)
+ vlan_meta_data
+ exit $OCF_SUCCESS
+ ;;
+ usage|help)
+ vlan_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+if [ ! -d "$HA_RSCTMP" ]; then
+ ocf_log debug "$HA_RSCTMP not found, we are probably being executed manually"
+ mkdir -p "$HA_RSCTMP"
+fi
+
+if [ -n "$__OCF_ACTION" ] && ! vlan_validate; then
+ exit $OCF_ERR_CONFIGURED
+fi
+
+case $__OCF_ACTION in
+ start|stop)
+ if ! ocf_is_root; then
+ ocf_log err "You must be root for $__OCF_ACTION operation."
+ exit $OCF_ERR_PERM
+ fi
+ ;;
+esac
+
+case $__OCF_ACTION in
+ start)
+ vlan_start
+ ret=$?
+ if [ "$ret" != "$OCF_SUCCESS" ]; then
+ vlan_force_stop
+ fi
+ exit $ret
+ ;;
+ stop)
+ vlan_stop
+ exit $?
+ ;;
+ status|monitor)
+ vlan_check
+ exit $?
+ ;;
+ validate-all)
+ # vlan_validate above does the trick
+ ;;
+ *)
+ vlan_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+# vi:sw=4:ts=8:
--
1.8.4.2

97
SOURCES/bz1168251-SAPHana-agents-update.patch

@ -0,0 +1,97 @@ @@ -0,0 +1,97 @@
diff --git a/heartbeat/SAPHana b/heartbeat/SAPHana
index f4db17a..412152b 100644
--- a/heartbeat/SAPHana
+++ b/heartbeat/SAPHana
@@ -137,7 +137,7 @@ function saphana_meta_data() {
<shortdesc lang="en">Manages two SAP HANA instances in system replication (SR).</shortdesc>
<longdesc lang="en">
The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured
-in system replication. This first version is limitted to the scale-up scenario. Scale-Up is
+in system replication. This first version is limitted to the scale-up scenario. Scale-Out is
not supported in this version.
Managing the two SAP HANA instances means that the resource agent controls the start/stop of the
@@ -231,7 +231,9 @@ The resource agent uses the following four interfaces provided by SAP:
<parameter name="SAPHanaFilter" unique="0" required="0">
<shortdesc lang="en">Define SAPHana resource agent messages to be printed</shortdesc>
<longdesc lang="en">Define SAPHana resource agent messages to be printed.
- This parameter should only be set of been requested by SUSE support. The default is sufficient for normal operation.
+ This parameter should only be set if requested by support. The default is sufficient for normal operation.
+ Values: ra-act-lpa-dec-flow
+ You could specify any combination of the above values like "ra-act-flow"
</longdesc>
<content type="string" default="" />
</parameter>
@@ -480,7 +482,7 @@ function get_crm_master()
# globals: sr_name(w), remoteHost(w), otherNodes(w)
# globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w)
# globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w)
-# globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w)
+# globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w)
# saphana_init : Define global variables with default values, if optional parameters are not set
#
function saphana_init() {
@@ -497,6 +499,8 @@ function saphana_init() {
super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)"
sid=$(echo "$SID" | tr [:upper:] [:lower:])
sidadm="${sid}adm"
+ # TODO PRIO3: Do we need a parameter for the RA to be able to adjust hdbSrQueryTimeout?
+ hdbSrQueryTimeout=180
# DONE: PRIO4: SAPVIRHOST might be different to NODENAME
# DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes
# try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691
@@ -827,7 +831,7 @@ function analyze_hana_sync_status()
super_ocf_log err "ACT: Secure store users are missing (see best practice manual how to setup the users)"
rc=$OCF_ERR_CONFIGURED
fi
- hana_sync_status=$(timeout 60 $DIR_EXECUTABLE/hdbsql -a -x -U $secUser $query_state); sqlrc=$?
+ hana_sync_status=$(timeout $hdbSrQueryTimeout $DIR_EXECUTABLE/hdbsql -a -x -U $secUser $query_state); sqlrc=$?
hana_sync_status=$(echo $hana_sync_status | dequote)
super_ocf_log debug "DBG: hdbsql rc=$sqlrc hana_sync_status=\"$hana_sync_status\""
if [ "$sqlrc" -eq 0 -a "$hana_sync_status" != "" ]; then
@@ -846,10 +850,10 @@ function analyze_hana_sync_status()
# TODO: PRIO9: for first we assume there is only ONE secondary site (like ROT)
# TODO: PRIO3: should we loop over all cluster nodes fetching their roles-attribute? To minimize sql-queries?
#
- all_secondary_hosts=$(timeout 60 hdbsql -a -x -U $secUser $query_secondaries ); sqlrc=$?
+ all_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_secondaries ); sqlrc=$?
all_secondary_hosts=$(echo $all_secondary_hosts | dequote);
if [ "$sqlrc" -eq 0 ]; then
- all_broken_secondary_hosts=$(timeout 60 hdbsql -a -x -U $secUser $query_failed_secondaries); sqlrc=$?
+ all_broken_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_failed_secondaries); sqlrc=$?
all_broken_secondary_hosts=$(echo $all_broken_secondary_hosts | dequote);
if [ "$sqlrc" -eq 0 ]; then
if [ -n "$all_broken_secondary_hosts" ]; then
@@ -869,9 +873,9 @@ function analyze_hana_sync_status()
fi
fi
else
- # return codes 19: license error -> set SFAIL!
case "$sqlrc" in
19 )
+ # return codes 19: license error -> set SFAIL!
# DONE: PRIO1: We should NOT set SFAIL, if HDB is exactly broken now
# When HDB breaks during monitor this could prevent a prositive remote failover
super_ocf_log warn "ACT: Was not able to fetch HANA SYNC STATUS - set sync status to SFAIL for ALL OTHER cluster hosts"
diff --git a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology
index 19fbbb4..082ad29 100644
--- a/heartbeat/SAPHanaTopology
+++ b/heartbeat/SAPHanaTopology
@@ -123,7 +123,7 @@ function sht_meta_data() {
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHanaTopology">
- <version>0.149.3</version>
+ <version>0.149.4</version>
<shortdesc lang="en">Analyzes SAP HANA System Replication Topology.</shortdesc>
<longdesc lang="en">This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to
all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases.
@@ -172,7 +172,7 @@ SAPHanaTopology scans the output table of landscapeHostConfiguration.py to ident
<shortdesc lang="en">Define type of SAPHanaTopology RA messages to be printed</shortdesc>
<longdesc lang="en">Define type of SAPHanaTopology RA messages to be printed.
Define SAPHana resource agent messages to be printed.
- This parameter should only be set of been requested by SUSE support. The default is sufficient for normal operation.
+ This parameter should only be set if requested by support. The default is sufficient for normal operation.
Values: ra-act-lpa-dec-flow
You could specify any combination of the above values like "ra-act-flow"
</longdesc>

37
SOURCES/bz1168251-SAPHana-agents-update2.patch

@ -0,0 +1,37 @@ @@ -0,0 +1,37 @@
diff --git a/heartbeat/SAPHana b/heartbeat/SAPHana
index 412152b..1ff6a7d 100644
--- a/heartbeat/SAPHana
+++ b/heartbeat/SAPHana
@@ -356,7 +356,8 @@ function get_hana_attribute()
local attr_node=$1
local attr_name=$2
local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q; rc=$?
+ local attr_default=${4:-}
+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"; rc=$?
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -373,9 +374,10 @@ function set_hana_attribute()
local attr_value=$2
local attr_name=$3
local attr_store=${4:-reboot} # DONE: PRIO5 get this (optional) from parameter
+ local attr_default=${5:-}
local rc=1
local attr_old=""
- attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store); get_rc=$?
+ attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$?
if [ "$attr_old" != "$attr_value" ]; then
super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store; rc=$?
@@ -578,8 +580,8 @@ function saphana_init() {
remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]});
if [ -z "$remoteHost" ]; then
if [ ${#otherNodes[@]} -eq 1 ]; then # we are a 2 node cluster, lets assume the other is the remote-host
- remoteHost=${otherNodes[0]}
- remoteNode=$remoteHost
+ remoteNode=${otherNodes[0]}
+ remoteHost=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_VHOST[@]} "$remoteNode");
super_ocf_log debug "DBG: auto-guess remoteHost=$remoteHost"
else
super_ocf_log debug "DBG: Could not auto-guess remoteHost out of list (${otherNodes[@]})"

13
SOURCES/bz1168251-SAPHana-agents-update3.patch

@ -0,0 +1,13 @@ @@ -0,0 +1,13 @@
--- a/heartbeat/SAPHana 2015-05-07 07:47:41.654914103 -0500
+++ b/heartbeat/SAPHana 2015-05-07 07:47:06.164755744 -0500
@@ -1733,8 +1733,8 @@
analyze_hana_sync_status
;;
esac
- rem_role=$(get_hana_attribute ${remoteHost} ${ATTR_NAME_HANA_ROLES[@]})
- rem_clone_status=$(get_hana_attribute ${remoteHost} ${ATTR_NAME_HANA_CLONE_STATE[@]})
+ rem_role=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_ROLES[@]})
+ rem_clone_status=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_CLONE_STATE[@]})
if [ "$promote_attr" = "DEMOTED" -a "$rem_clone_status" = "PROMOTED" ]; then
case "$rem_role" in
[234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster

3129
SOURCES/bz1168251-SAPHana-agents.patch

File diff suppressed because it is too large Load Diff

441
SOURCES/bz1168251-SAPHana-agents_update4.patch

@ -0,0 +1,441 @@ @@ -0,0 +1,441 @@
diff --git a/heartbeat/SAPHana b/heartbeat/SAPHana
index 1913dc3..ed0443b 100644
--- a/heartbeat/SAPHana
+++ b/heartbeat/SAPHana
@@ -48,6 +48,8 @@ HANA_STATE_SECONDARY=1
HANA_STATE_STANDALONE=2
HANA_STATE_DEFECT=3
+debug_attributes=0
+
SH=/bin/sh
#
@@ -132,19 +134,19 @@ function saphana_meta_data() {
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHana">
-<version>0.149.4</version>
+<version>0.149.7</version>
<shortdesc lang="en">Manages two SAP HANA instances in system replication (SR).</shortdesc>
<longdesc lang="en">
The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured
-in system replication. This first version is limitted to the scale-up scenario. Scale-Out is
+in system replication. This first version is limited to the scale-up scenario. Scale-Out is
not supported in this version.
Managing the two SAP HANA instances means that the resource agent controls the start/stop of the
instances. In addition the resource agent is able to monitor the SAP HANA databases to check their
availability on landscape host configuration level. For this monitoring the resource agent relies on interfaces
provided by SAP. A third task of the resource agent is to also check the synchronisation status
-of the two SAP HANA databases. If the synchronisation is not "SOK", than the cluster avoids to
+of the two SAP HANA databases. If the synchronisation is not "SOK", then the cluster avoids to
failover to the secondary side, if the primary fails. This is to improve the data consistency.
The resource agent uses the following four interfaces provided by SAP:
@@ -162,7 +164,7 @@ The resource agent uses the following four interfaces provided by SAP:
3. hdbnsutil
The interface hdbnsutil is used to check the "topology" of the system replication as well as the current configuration
- (primary/secondary) of a SAP HANA database instance. A second task of the interface is the posibility to run a
+ (primary/secondary) of a SAP HANA database instance. A second task of the interface is the possibility to run a
system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register).
4. hdbsql / systemReplicationStatus
@@ -198,7 +200,7 @@ The resource agent uses the following four interfaces provided by SAP:
</parameter>
<parameter name="AUTOMATED_REGISTER" unique="0" required="0">
<shortdesc lang="en">Define, if a former primary should automatically be registered.</shortdesc>
- <longdesc lang="en">The parameter AUTOMATED_REGISTER defines, wether a former primary instance should
+ <longdesc lang="en">The parameter AUTOMATED_REGISTER defines, whether a former primary instance should
be registered automatically by the resource agent during cluster/resource start, if the DUPLICATE_PRIMARY_TIMEOUT is expired... TDB
</longdesc>
<content type="boolean" default="false" />
@@ -207,7 +209,7 @@ The resource agent uses the following four interfaces provided by SAP:
<shortdesc lang="en">Time difference needed between to primary time stamps, if a dual-primary situation occurs</shortdesc>
<longdesc lang="en">Time difference needed between to primary time stamps,
if a dual-primary situation occurs. If the time difference is
- less than the time gap, than the cluster hold one or both instances in a "WAITING" status. This is to give a admin
+ less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin
a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After
this registration to the new primary all data will be overwritten by the system replication.
</longdesc>
@@ -316,7 +318,7 @@ function remoteHost2remoteNode()
# descript: is_clone : find out if we are configured to run in a Master/Slave configuration
# rc: 0: it is a clone, 1: it is not a clone
#
-# DONE: PRIO2: For the first shippment (scale-out) we need to limit the clones to 2
+# DONE: PRIO2: For the first shipment (scale-out) we need to limit the clones to 2
#
function is_clone() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
@@ -356,8 +358,14 @@ function get_hana_attribute()
local attr_node=$1
local attr_name=$2
local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
- local attr_default=${4:-}
- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"; rc=$?
+ local attr_default=${5:-}
+ local attr_val=""
+ attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"); rc=$?
+ if [ $debug_attributes -eq 1 ]; then
+ dstr=$(date)
+ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE
+ fi
+ echo "$attr_val"
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -381,6 +389,10 @@ function set_hana_attribute()
if [ "$attr_old" != "$attr_value" ]; then
super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store; rc=$?
+ if [ $debug_attributes -eq 1 ]; then
+ dstr=$(date)
+ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
+ fi
else
super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}"
rc=0
@@ -448,7 +460,7 @@ scoring_crm_master()
local roles="$1"
local sync="$2"
local skip=0
- local myScore=-1
+ local myScore=""
for scan in "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}"; do
if [ $skip -eq 0 ]; then
read rolePatt syncPatt score <<< $scan
@@ -461,7 +473,10 @@ scoring_crm_master()
fi
done
super_ocf_log debug "DBG: scoring_crm_master adjust score $myScore"
- set_crm_master $myScore
+ # TODO: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925
+ if [ -n "$myScore" ]; then
+ set_crm_master $myScore
+ fi
}
#
@@ -1068,6 +1083,27 @@ function saphana_start_primary()
case "$lpa_dec" in
0 ) # LPA says start-up
lpa_advice="start"
+ # TODO: PRIO1: We need to do a special handling for remote being a 234-Secondary in SR Status SOK
+ # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover )
+ # then lpa_advice="wait"
+ remoteRole=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_ROLES[@]})
+ remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]})
+ super_ocf_log info "DEC: saphana_primary - checking remoteStatus"
+ if ocf_is_true "${PreferSiteTakeover}"; then
+ remoteStatus="$remoteRole:$remoteSync"
+ case "$remoteStatus" in
+ [234]:S:*:SOK | [234]:S:*:PRIM )
+ lpa_advice="wait"
+ # TODO: PRIO3: Split WAIT into WAIT4TAKEOVER
+ super_ocf_log info "DEC: saphana_primary - waiting for secondary to takeover (SOK, PreferSiteTakover)"
+ ;;
+ * )
+ super_ocf_log info "DEC: saphana_primary - remoteStatus is: $remoteStatus"
+ ;;
+ esac
+ else
+ super_ocf_log info "DEC: saphana_primary - PreferSiteTakeover set to false"
+ fi
;;
1) # LPA says register!
lpa_advice="register"
@@ -1075,7 +1111,7 @@ function saphana_start_primary()
2) # LPA says wait for second LPT
lpa_advice="wait"
;;
- 3 | 4 ) # LPA says something is completely wrong - FAIL resource
+ 3 | 4 ) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss
lpa_advice="fail"
;;
* ) # LPA failed with an unkonown status - FAIL resource
@@ -1098,7 +1134,7 @@ function saphana_start_primary()
super_ocf_log info "LPA: landcape: UP, LPA: start ==> keep running"
LPTloc=$(date '+%s')
lpa_set_lpt $LPTloc
- rc=$OCF_SUCCSESS
+ rc=$OCF_SUCCESS
;;
1 ) # landcape says we are down, lets start and adjust scores and return code
super_ocf_log info "LPA: landcape: DOWN, LPA: start ==> start instance"
@@ -1149,7 +1185,7 @@ function saphana_start_primary()
case "$lss" in
2 | 3 | 4 ) # as we ARE up we just keep it up
# TODO: PRIO3: I now change from "just keep it up to take that down"
- # TODO: PRIO3: OCF_SUCCSESS, OCF_NOT_RUNNING or OCF_ERR_xxxx ?
+ # TODO: PRIO3: OCF_SUCCESS, OCF_NOT_RUNNING or OCF_ERR_xxxx ?
set_crm_master -9000
#scoring_crm_master "$my_role" "$my_sync"
rc=$OCF_ERR_GENERIC
@@ -1159,7 +1195,7 @@ function saphana_start_primary()
# TODO: PRIO3: Check, if WAITING is correct here
set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -9000
- rc=$OCF_SUCCSESS
+ rc=$OCF_SUCCESS
;;
esac
;;
@@ -1277,7 +1313,7 @@ function saphana_start_secondary()
super_ocf_log info "ACT: PRIMARY seams to be down now ==> WAITING"
set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -INFINITY
- rc=$OCF_SUCCSESS
+ rc=$OCF_SUCCESS
fi
else
lpa_set_lpt 30
@@ -1286,7 +1322,7 @@ function saphana_start_secondary()
super_ocf_log info "ACT: wait_for_primary_master ==> WAITING"
set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -INFINITY
- rc=$OCF_SUCCSESS
+ rc=$OCF_SUCCESS
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
@@ -1453,7 +1489,8 @@ function lpa_init_lpt() {
# LPTlocal > LPTremore ===> rc=0 (start)
# LPTRemote > LPTlocal ===> rc=1 (register)
# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait)
-# LPTRemote is not initialized (0)
+# LPTRemote is not initialized or node not kown in cluster (crm_mon -l) (0)
+# TODO: PRIO1: Need to introduce a return-code 3 for remote sides lpa not ready
# THEN:
# WAIT ==> like STALEMATE-HANDLING ===> rc=2 (wait)
#
@@ -1625,7 +1662,6 @@ function saphana_monitor_primary()
else
super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING"
fi
-
return $OCF_SUCCESS
fi
promoted=0;
@@ -1853,11 +1889,11 @@ function saphana_monitor_secondary()
scoring_crm_master "$my_role" "$my_sync"
;;
"SFAIL" ) # This is currently NOT a possible node to promote
- super_ocf_log info "DEC: secondary with sync status FAILED ==> EXCLUDE as posible takeover node"
+ super_ocf_log info "DEC: secondary with sync status FAILED ==> EXCLUDE as possible takeover node"
set_crm_master -INFINITY
;;
"*" ) # Unknown sync status
- super_ocf_log info "DEC: secondary with sync status UKNOWN/UNDEFINED ==> EXCLUDE as posible takeover node"
+ super_ocf_log info "DEC: secondary with sync status UKNOWN/UNDEFINED ==> EXCLUDE as possible takeover node"
set_crm_master -INFINITY
;;
esac
@@ -1889,10 +1925,12 @@ function saphana_monitor_clone() {
local rc=$OCF_ERR_GENERIC
local promoted=0
local init_attribute=0
+ local lpaRc=0
+ local mRc=0
+ local myMaster=-1
my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]})
my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]})
- lpa_check_lpt_status # TODO: PRIO3 : remove that line later - its only to call lpa_check_lpt_status much more often for checking
if ocf_is_probe; then
super_ocf_log debug "DBG: PROBE ONLY"
@@ -1904,6 +1942,16 @@ function saphana_monitor_clone() {
#
check_for_primary; primary_status=$?
if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then
+ # FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent
+ # TODO: PRIO1: Maybe we need a lpa-check here to
+ if ocf_is_probe; then
+ myMaster=$(get_crm_master); mRc=$?
+ if [ $mRc -ne 0 ]; then
+ set_crm_master 5
+ elif [ $myMaster -eq -1 ]; then
+ set_crm_master 5
+ fi
+ fi
saphana_monitor_primary; rc=$?
else
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then
diff --git a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology
index 082ad29..1d4887f 100644
--- a/heartbeat/SAPHanaTopology
+++ b/heartbeat/SAPHanaTopology
@@ -14,6 +14,7 @@
# Support: linux@sap.com
# License: GNU General Public License (GPL)
# Copyright: (c) 2014 SUSE Linux Products GmbH
+# (c) 2015 SUSE Linux GmbH
#
# An example usage:
# See usage() function below for more details...
@@ -39,6 +40,8 @@ HANA_STATE_SECONDARY=1
HANA_STATE_STANDALONE=2
HANA_STATE_DEFECT=3
+debug_attributes=0
+
SH=/bin/sh
#
@@ -123,7 +126,7 @@ function sht_meta_data() {
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHanaTopology">
- <version>0.149.4</version>
+ <version>0.149.6</version>
<shortdesc lang="en">Analyzes SAP HANA System Replication Topology.</shortdesc>
<longdesc lang="en">This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to
all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases.
@@ -205,7 +208,13 @@ function get_hana_attribute()
local attr_node=$1
local attr_name=$2
local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q; rc=$?
+ local attr_val=""
+ attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q); rc=$?
+ if [ $debug_attributes -eq 1 ]; then
+ dstr=$(date)
+ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE
+ fi
+ echo "$attr_val"
if [ $rc -ne 0 ]; then
super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -G -n "$attr_name" -l $attr_store -q"
fi
@@ -230,6 +239,10 @@ function set_hana_attribute()
attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store); get_rc=$?
if [ "$attr_old" != "$attr_value" ]; then
super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
+ if [ $debug_attributes -eq 1 ]; then
+ dstr=$(date)
+ echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
+ fi
crm_attribute -N $attr_node -v "$attr_value" -n "$attr_name" -l $attr_store; rc=$?
if [ $rc -ne 0 ]; then
super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store"
@@ -377,18 +390,32 @@ function sht_init() {
*openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');;
*cman* ) nodelist=$(crm_node -l);;
esac
+ #### SAP-CALL
hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null)
super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)"
site=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}')
srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}')
- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site)
+ if [ $debug_attributes -eq 1 ]; then
+ dstr=$(date)
+ echo "$dstr: SAPHanaTopology: srmode=$srmode" >> /var/log/fhATTRIBUTE
+ fi
+ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site)
super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING"
#
# filter all non-cluster mappings
#
- hanaRemoteHost=$(for n1 in $nodelist; do for n2 in $MAPPING; do if [ "$n1" == "$n2" ]; then echo $n1; fi; done; done )
- super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
+ # DONE: PRIO2: Need mapping between HANA HOSTS not cluster NODES
+ local hanaVHost
+ hanaRemoteHost=$(for n1 in $nodelist; do
+ hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]})
+ for n2 in $MAPPING; do
+ if [ "$hanaVHost" == "$n2" ]; then
+ echo $hanaVHost;
+ fi;
+ done;
+ done )
+ super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
+ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS"
return $OCF_SUCCESS
}
@@ -422,6 +449,7 @@ function check_for_primary() {
super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>"
dump=$( echo $node_status | hexdump -C );
super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>"
+ #### SAP-CALL
node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status"
@@ -440,6 +468,7 @@ function check_for_primary() {
#
function start_saphostagent()
{
+ ### SAP-CALL
if [ -x "${HOSTEXEC_PATH}" ]; then
${HOSTEXEC_PATH} pf=${HOSTEXEC_PROFILE_PATH}
fi
@@ -453,9 +482,10 @@ function start_saphostagent()
#
function stop_saphostagent()
{
- if [ -x "${HOSTEXEC_PATH}" ]; then
- ${HOSTEXEC_PATH} -stop
- fi
+ ### SAP-CALL
+ if [ -x "${HOSTEXEC_PATH}" ]; then
+ ${HOSTEXEC_PATH} -stop
+ fi
}
#
@@ -586,7 +616,7 @@ function sht_validate() {
#
function sht_start_clone() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
- local rc=$OCF_NOT_RUNNING
+ local rc=$OCF_NOT_RUNNING
sht_start; rc=$?
return $rc
}
@@ -666,27 +696,30 @@ function sht_monitor_clone() {
# DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API?
# try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691
# We rely on the following format: SID is word#4, NR is work#6, vHost is word#8
+ #### SAP-CALL
vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \
| awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null )
- super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)"
+ # super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)"
if [ -n "$vName" ]; then
set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]}
else
vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]})
fi
#site=$(get_site_name)
+ #### SAP-CALL
hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?"
hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName})
#if [ -z "$MAPPING" ]; then
# super_ocf_log info "ACT: Did not find remote Host at this moment"
#fi
- # FH TODO PRIO1: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST"
+ # FH TODO PRIO3: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST"
if [ -n "$hanaRemoteHost" ]; then
set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]}
fi
set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]}
- set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]}
- set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]}
+ if [ -n "$site" ]; then
+ set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]}
+ fi
case "$hanaPrim" in
P ) ;;
S ) # only secondary may propargate its sync status
@@ -701,7 +734,6 @@ function sht_monitor_clone() {
done
;;
esac
- #ATTR_NAME_HANA_STATUS # TODO: PRIO5: For SCALE-OUT: Fill that attribute later
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}

204
SOURCES/bz1170376-galera-no-readonly.patch

@ -0,0 +1,204 @@ @@ -0,0 +1,204 @@
diff --git a/heartbeat/galera b/heartbeat/galera
index 994aad0..d74a70d 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -342,6 +342,14 @@ is_readonly()
master_exists()
{
+ if [ "$__OCF_ACTION" = "demote" ]; then
+ # We don't want to detect master instances during demote.
+ # 1. we could be detecting ourselves as being master, which is no longer the case.
+ # 2. we could be detecting other master instances that are in the process of shutting down.
+ # by not detecting other master instances in "demote" we are deferring this check
+ # to the next recurring monitor operation which will be much more accurate
+ return 1
+ fi
# determine if a master instance is already up and is healthy
crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
return $?
@@ -441,20 +449,24 @@ galera_promote()
extra_opts="--wsrep-cluster-address=gcomm://"
else
ocf_exit_reason "Failure, Attempted to promote Master instance of $OCF_RESOURCE_INSTANCE before bootstrap node has been detected."
+ clear_last_commit
return $OCF_ERR_GENERIC
fi
-
fi
- # make sure the read only instance is stopped
- mysql_common_stop
- rc=$?
- if [ $rc -ne $OCF_SUCCESS ] && [ $rc -ne $OCF_NOT_RUNNING ]; then
- ocf_exit_reason "Failed to stop read-only galera instance during promotion to Master"
- return $rc
+ galera_monitor
+ if [ $? -eq $OCF_RUNNING_MASTER ]; then
+ if ocf_is_true $bootstrap; then
+ promote_everyone
+ clear_bootstrap_node
+ ocf_log info "boostrap node already up, promoting the rest of the galera instances."
+ fi
+ clear_last_commit
+ return $OCF_SUCCESS
fi
- sleep 4
+ # last commit is no longer relevant once promoted
+ clear_last_commit
mysql_common_prepare_dirs
mysql_common_start "$extra_opts"
@@ -492,9 +504,6 @@ galera_promote()
wait_for_sync
fi
- # last commit is no longer relevant once promoted
- clear_last_commit
-
ocf_log info "Galera started"
return $OCF_SUCCESS
}
@@ -510,14 +519,14 @@ galera_demote()
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
+ clear_last_commit
- # start again in slave mode so the new last commit is recorded
+ # record last commit by "starting" galera. start is just detection of the last sequence number
galera_start
}
galera_start()
{
- local extra_opts='--read-only=true'
local last_commit
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
@@ -526,22 +535,39 @@ galera_start()
return $OCF_ERR_CONFIGURED
fi
- mysql_common_prepare_dirs
- mysql_common_start "$extra_opts"
-
- is_readonly
- if [ $? -ne 0 ]; then
- ocf_exit_reason "Slave instance did not start correctly in read-only mode, Make sure local galera.cnf does not have wsrep_cluster_address set."
+ galera_monitor
+ if [ $? -eq $OCF_RUNNING_MASTER ]; then
+ ocf_exit_reason "master galera instance started outside of the cluster's control"
return $OCF_ERR_GENERIC
fi
- ocf_log info "attempting to detect last commit version"
- while [ -z "$last_commit" ]; do
- last_commit=$(get_status_variable "wsrep_last_committed")
- if [ -z "$last_commit" ]; then
- sleep 1
+ mysql_common_prepare_dirs
+
+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
+ local tmp=$(mktemp)
+ ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
+ --pid-file=$OCF_RESKEY_pid \
+ --socket=$OCF_RESKEY_socket \
+ --datadir=$OCF_RESKEY_datadir \
+ --user=$OCF_RESKEY_user \
+ --wsrep-recover > $tmp 2>&1
+
+ last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
+ rm -f $tmp
+
+ if [ "$last_commit" = "-1" ]; then
+ last_commit="0"
fi
- done
+ fi
+
+ if [ -z "$last_commit" ]; then
+ ocf_exit_reason "Unable to detect last known write sequence number"
+ clear_last_commit
+ return $OCF_ERR_GENERIC
+ fi
ocf_log info "Last commit version found: $last_commit"
set_last_commit $last_commit
@@ -567,28 +593,40 @@ galera_monitor()
if ocf_is_probe; then
status_loglevel="info"
fi
-
+
mysql_common_status $status_loglevel
rc=$?
- # If status returned an error, return that immediately
- if [ $rc -ne $OCF_SUCCESS ]; then
+ if [ $rc -eq $OCF_NOT_RUNNING ]; then
+ last_commit=$(get_last_commit $node)
+ if [ -n "$last_commit" ]; then
+ # if last commit is set, this instance is considered started in slave mode
+ rc=$OCF_SUCCESS
+ master_exists
+ if [ $? -ne 0 ]; then
+ detect_first_master
+ else
+ # a master instance exists and is healthy, promote this
+ # local read only instance
+ # so it can join the master galera cluster.
+ set_master_score
+ fi
+ fi
+ return $rc
+ elif [ $rc -ne $OCF_SUCCESS ]; then
return $rc
fi
+ # if we make it here, mysql is running. Check cluster status now.
+
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
ocf_exit_reason "local node <${NODENAME}> is started, but is not a member of the wsrep_cluster_address <${OCF_RESKEY_wsrep_cluster_address}>"
return $OCF_ERR_GENERIC
fi
- is_readonly
- if [ $? -ne 0 ]; then
- is_primary
- if [ $? -ne 0 ]; then
- ocf_exit_reason "local node <${NODENAME}> is neither in primary mode nor in read_only mode. Unknown state."
- return $OCF_ERR_GENERIC
- fi
+ is_primary
+ if [ $? -eq 0 ]; then
if ocf_is_probe; then
# restore master score during probe
@@ -596,18 +634,10 @@ galera_monitor()
set_master_score
fi
rc=$OCF_RUNNING_MASTER
- else
- master_exists
- if [ $? -ne 0 ]; then
- detect_first_master
- else
- # a master instance exists and is healthy, promote this
- # local read only instance
- # so it can join the master galera cluster.
- set_master_score
- fi
+ else
+ ocf_exit_reason "local node <${NODENAME}> is started, but not in primary mode. Unknown state."
+ rc=$OCF_ERR_GENERIC
fi
- # TODO look at what is done in the wait script
return $rc
}

25
SOURCES/bz1171162-clvmd-opt-fix.patch

@ -0,0 +1,25 @@ @@ -0,0 +1,25 @@
From e0f3e2190cfef76b9d7383a0009b678ed2ef4b17 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 29 Apr 2015 11:08:55 -0500
Subject: [PATCH 1/6] bz1171162-clvmd-opt-fix

---
heartbeat/clvm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/clvm b/heartbeat/clvm
index dcefcca..a1e2bc4 100755
--- a/heartbeat/clvm
+++ b/heartbeat/clvm
@@ -370,7 +370,7 @@ clvmd_start()
if ocf_is_true $OCF_RESKEY_with_cmirrord; then
start_process $CMIRROR_PATH
fi
- start_process $DAEMON_PATH $CLVMDOPTS
+ start_process $DAEMON_PATH "$CLVMDOPTS"
# Refresh local cache.
#
--
1.8.4.2

113
SOURCES/bz1183136-nginx-support.patch

@ -0,0 +1,113 @@ @@ -0,0 +1,113 @@
From d828c825c58f2da4b4edd6548c5fd254842a0add Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 29 Apr 2015 11:15:18 -0500
Subject: [PATCH 4/6] nginx agent support

---
heartbeat/nginx | 27 ++++++++++++---------------
1 file changed, 12 insertions(+), 15 deletions(-)

diff --git a/heartbeat/nginx b/heartbeat/nginx
index 65fd8f2..fadc545 100755
--- a/heartbeat/nginx
+++ b/heartbeat/nginx
@@ -31,7 +31,7 @@
# OCF_RESKEY_status10regex
# OCF_RESKEY_status10url
# OCF_RESKEY_client
-# OCF_RESKEY_testurl
+# OCF_RESKEY_test20url
# OCF_RESKEY_test20regex
# OCF_RESKEY_test20conffile
# OCF_RESKEY_test20name
@@ -416,7 +416,7 @@ start_nginx() {
return $OCF_SUCCESS
fi
if
- ocf_run $NGINXD -t -c $CONFIGFILE
+ ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE
then
: Configuration file $CONFIGFILE looks OK
else
@@ -442,7 +442,7 @@ start_nginx() {
[ $ec -eq $OCF_NOT_RUNNING ]
then
tries=`expr $tries + 1`
- ocf_log info "Waiting for $NGINXD -c $CONFIGFILE to come up (try $tries)"
+ ocf_log info "Waiting for $NGINXD $OPTIONS -c $CONFIGFILE to come up (try $tries)"
true
else
false
@@ -727,25 +727,25 @@ For example, you can set this paramter to "wget" if you prefer that to curl.
<content type="string" />
</parameter>
-<parameter name="testurl">
+<parameter name="test20url">
<longdesc lang="en">
URL to test. If it does not start with "http", then it's
considered to be relative to the document root address.
</longdesc>
-<shortdesc lang="en">Level 10 monitor url</shortdesc>
+<shortdesc lang="en">Level 20 monitor url</shortdesc>
<content type="string" />
</parameter>
<parameter name="test20regex">
<longdesc lang="en">
-Regular expression to match in the output of testurl.
+Regular expression to match in the output of test20url.
Case insensitive.
</longdesc>
<shortdesc lang="en">Level 20 monitor regular expression</shortdesc>
<content type="string" />
</parameter>
-<parameter name="testconffile">
+<parameter name="test20conffile">
<longdesc lang="en">
A file which contains a more complex test configuration. Could be useful if
you have to check more than one web application or in case sensitive
@@ -785,14 +785,11 @@ Extra options to apply when starting nginx.
</parameters>
<actions>
-<action name="start" timeout="40s" />
+<action name="start" timeout="60s" />
<action name="stop" timeout="60s" />
<action name="reload" timeout="40s" />
<action name="status" timeout="30s" />
-<action name="monitor" timeout="30s" depth="0" interval="10s" />
-<action name="monitor" timeout="30s" depth="10" interval="30s" />
-<action name="monitor" timeout="45s" depth="20" />
-<action name="monitor" timeout="60s" depth="30" />
+<action name="monitor" timeout="30s" depth="0" interval="20s" />
<action name="meta-data" timeout="5" />
<action name="validate-all" timeout="5" />
</actions>
@@ -838,11 +835,11 @@ validate_all_nginx() {
exit $OCF_ERR_CONFIGURED
fi
if
- ocf_run $NGINXD -t -c $CONFIGFILE
+ ocf_run $NGINXD $OPTIONS -t -c $CONFIGFILE
then
: Cool $NGINXD likes $CONFIGFILE
else
- ocf_log err "$NGINXD -t -c $CONFIGFILE reported a configuration error."
+ ocf_log err "$NGINXD $OPTIONS -t -c $CONFIGFILE reported a configuration error."
return $OCF_ERR_CONFIGURED
fi
return $OCF_SUCCESS
@@ -859,7 +856,7 @@ then
OPTIONS="$OCF_RESKEY_options"
CLIENT=${OCF_RESKEY_client}
TESTREGEX=${OCF_RESKEY_status10regex:-'Reading: [0-9]+ Writing: [0-9]+ Waiting: [0-9]+'}
- TESTURL="$OCF_RESKEY_status10url"
+ TESTURL="$OCF_RESKEY_test20url"
TESTREGEX20=${OCF_RESKEY_test20regex}
TESTCONFFILE="$OCF_RESKEY_test20conffile"
TESTNAME="$OCF_RESKEY_test20name"
--
1.8.4.2

564
SOURCES/bz1189187-redis-agent.patch

@ -0,0 +1,564 @@ @@ -0,0 +1,564 @@
From d83b9a9394ef69ca2801c84dee46094a224ca654 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 5 Mar 2015 13:47:58 -0600
Subject: [PATCH] redis agent support

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/redis | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 521 insertions(+)
create mode 100644 heartbeat/redis

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 43d60d9..653e818 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -125,6 +125,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_pound.7 \
ocf_heartbeat_proftpd.7 \
ocf_heartbeat_rabbitmq-cluster.7 \
+ ocf_heartbeat_redis.7 \
ocf_heartbeat_rsyncd.7 \
ocf_heartbeat_rsyslog.7 \
ocf_heartbeat_scsi2reservation.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 3bcf2d9..e4ed4fd 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -105,6 +105,7 @@ ocf_SCRIPTS = ClusterMon \
rabbitmq-cluster \
Raid1 \
Route \
+ redis \
rsyncd \
rsyslog \
SAPDatabase \
diff --git a/heartbeat/redis b/heartbeat/redis
new file mode 100644
index 0000000..6b479b2
--- /dev/null
+++ b/heartbeat/redis
@@ -0,0 +1,519 @@
+#!/bin/bash
+
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
+
+: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
+: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
+: ${OCF_RESKEY_user:=redis}
+: ${OCF_RESKEY_rundir:=/var/run/redis}
+: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
+: ${OCF_RESKEY_socket_name:=redis.sock}
+: ${OCF_RESKEY_port:=6379}
+
+if [ -z "$OCF_RESKEY_config" ]; then
+ if [ -f "/etc/redis.conf" ]; then
+ OCF_RESKEY_config="/etc/redis.conf"
+ else
+ OCF_RESKEY_config="/etc/redis/redis.conf"
+ fi
+fi
+
+CHECK_SLAVE_STATE=0
+
+REDIS_SERVER="$OCF_RESKEY_bin"
+REDIS_CLIENT="$OCF_RESKEY_client_bin"
+REDIS_CONFIG="$OCF_RESKEY_config"
+REDIS_USER="$OCF_RESKEY_user"
+REDIS_RUNDIR="$OCF_RESKEY_rundir"
+REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
+REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
+REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
+
+function meta_data() {
+ cat <<EOI
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="redis">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource agent script for redis server.
+
+This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
+When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
+</longdesc>
+
+<shortdesc lang="en">Redis server</shortdesc>
+
+<parameters>
+<parameter name="bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-server\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_bin}" />
+</parameter>
+
+<parameter name="client_bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-cli\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_client_bin}" />
+</parameter>
+
+<parameter name="config" unique="1" required="0">
+<longdesc lang="en">
+Path to 'redis.conf'
+</longdesc>
+<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
+<content type="string" default="${OCF_RESKEY_config}" />
+</parameter>
+
+<parameter name="user" unique="0" required="0">
+<longdesc lang="en">
+User to run redis as
+</longdesc>
+<shortdesc lang="en">Redis user</shortdesc>
+<content type="string" default="${OCF_RESKEY_user}" />
+</parameter>
+
+<parameter name="rundir" unique="1" required="0">
+<longdesc lang="en">
+Directory to store socket and pid file in
+</longdesc>
+<shortdesc lang="en">Redis var/run dir</shortdesc>
+<content type="string" default="${OCF_RESKEY_rundir}"/>
+</parameter>
+
+<parameter name="pidfile_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the pidfile. Will be created in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis pidfile name</shortdesc>
+<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
+</parameter>
+
+<parameter name="socket_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the socket. Will be crated in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis socket name</shortdesc>
+<content type="string" default="${OCF_RESKEY_socket_name}"/>
+</parameter>
+
+<parameter name="port" unique="0" required="0">
+<longdesc lang="en">
+Port for replication client to connect to on remote server
+</longdesc>
+<shortdesc lang="en">Replication port</shortdesc>
+<content type="string" default="${OCF_RESKEY_port}"/>
+</parameter>
+
+<parameter name="wait_last_known_master" unique="0" required="0">
+<longdesc lang="en">
+During redis cluster bootstrap, wait for the last known master to be
+promoted before allowing any other instances in the cluster to be
+promoted. This lessens the risk of data loss when persistent data
+is in use.
+</longdesc>
+<shortdesc lang="en">Wait for last known master</shortdesc>
+<content type="boolean" default="false"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120" />
+<action name="stop" timeout="120" />
+<action name="status" timeout="60" />
+<action name="monitor" depth="0" timeout="60" interval="45" />
+<action name="monitor" role="Master" depth="0" timeout="60" interval="20" />
+<action name="monitor" role="Slave" depth="0" timeout="60" interval="60" />
+<action name="promote" timeout="120" />
+<action name="demote" timeout="120" />
+<action name="notify" timeout="90" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+EOI
+}
+
+INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
+MASTER_HOST=""
+MASTER_ACTIVE_CACHED=""
+MASTER_ACTIVE=""
+
+master_is_active()
+{
+ if [ -z "$MASTER_ACTIVE_CACHED" ]; then
+ # determine if a master instance is already up and is healthy
+ crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
+ MASTER_ACTIVE=$?
+ MASTER_ACTIVE_CACHED="true"
+ fi
+ return $MASTER_ACTIVE
+}
+
+function set_master()
+{
+ MASTER_HOST="$1"
+ ${CRM_ATTR_REPL_INFO} -v "$1" -q
+}
+
+function last_known_master()
+{
+ if [ -z "$MASTER_HOST" ]; then
+ MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)"
+ fi
+ echo "$MASTER_HOST"
+}
+
+function crm_master_reboot() {
+ "${HA_SBIN_DIR}/crm_master" -l reboot "$@"
+}
+
+function calculate_score()
+{
+ perf_score="$1"
+ connected_clients="$2"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
+ # only set perferred score by slave_priority if
+ # we are not waiting for the last known master. Otherwise
+ # we want the agent to have complete control over the scoring.
+ perf_score=""
+ connected_clients="0"
+ fi
+
+ if [[ -z "$perf_score" ]]; then
+ if [[ "$(last_known_master)" == "$NODENAME" ]]; then
+ perf_score=1000
+ else
+ perf_score=1
+ fi
+ fi
+ perf_score=$(( perf_score + connected_clients ))
+ echo "$perf_score"
+}
+
+function set_score()
+{
+ local score="$1"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
+ local last_master="$(last_known_master)"
+ if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
+ ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
+ return
+ fi
+ fi
+
+ ocf_log debug "monitor: Setting master score to '$score'"
+ crm_master_reboot -v "$score"
+}
+
+function redis_client() {
+ ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@"
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+}
+
+function simple_status() {
+ local pid
+
+ if ! [ -f "$REDIS_PIDFILE" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
+
+ ocf_log debug "monitor: redis-server running under pid $pid"
+
+ return $OCF_SUCCESS
+}
+
+function monitor() {
+ local res
+
+ simple_status
+ res=$?
+ if (( res != OCF_SUCCESS )); then
+ return $res
+ fi
+
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+ if [[ -z "${info[role]}" ]]; then
+ ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ocf_is_ms; then
+ # Here we see if a score has already been set.
+ # If score isn't set we the redis setting 'slave_priority'.
+ # If that isn't set, we default to 1000 for a master, and 1 for slave.
+ # We then add 1 for each connected client
+ score="$(crm_master_reboot --get-value --quiet 2>/dev/null)"
+ if [[ -z "$score" ]]; then
+ score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
+ set_score "$score"
+ fi
+
+ if [[ "${info[role]}" == "master" ]]; then
+ if ocf_is_probe; then
+ set_master "$NODENAME"
+ fi
+ return $OCF_RUNNING_MASTER
+ fi
+
+ if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
+ if [[ "${info[master_link_status]}" != "up" ]]; then
+ ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
+ return $OCF_ERR_GENERIC
+ fi
+ if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
+ ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+function start() {
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "start: redis is already running"
+ return $OCF_SUCCESS
+ fi
+
+ [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
+ chown -R "$REDIS_USER" "$REDIS_RUNDIR"
+
+ ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
+ output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
+
+ while true; do
+ # wait for redis to start
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+
+ if (( info[loading] == 0 )); then
+ break
+ elif (( info[loading] == 1 )); then
+ sleep "${info[loading_eta_seconds]}"
+ elif pidof "$REDIS_SERVER" >/dev/null; then
+ # unknown error, but the process still exists.
+ # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
+ # See https://github.com/antirez/redis/issues/2368
+ # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
+ sleep 1
+ else
+ ocf_log err "start: Unknown error waiting for redis to start"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ ocf_is_ms && demote # pacemaker expects resources to start in slave mode
+
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }"
+ return $status
+}
+
+function stop() {
+ monitor
+ status=$?
+
+ if (( status == OCF_NOT_RUNNING )); then
+ ocf_log info "stop: redis is already stopped"
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ kill -TERM "$pid"
+
+ while true; do
+ simple_status
+ status=$?
+ if (( status == OCF_NOT_RUNNING )); then
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ done
+}
+
+function promote() {
+ monitor
+ status=$?
+
+ if (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "promote: Already running as master"
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ elif (( status != OCF_SUCCESS )); then
+ ocf_log err "promote: Node is not running as a slave"
+ return $OCF_ERR_GENERIC
+ fi
+
+ redis_client slaveof no one
+
+ monitor
+ status=$?
+ if (( status == OCF_RUNNING_MASTER )); then
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "promote: Unknown error while promoting to master (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function demote() {
+ local master_host
+ local master_port
+
+ CHECK_SLAVE_STATE=1
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )); then
+ ocf_log info "demote: Already running as slave"
+ return $OCF_SUCCESS
+ elif (( status == OCF_NOT_RUNNING )); then
+ ocf_log err "demote: Failed to demote, redis not running."
+ return $OCF_NOT_RUNNING
+ fi
+
+ master_host="$(last_known_master)"
+ master_port="${REDIS_REPLICATION_PORT}"
+
+ # The elected master has to remain a slave during startup.
+ # During this period a placeholder master host is assigned.
+ if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ elif ! master_is_active; then
+ # no master has been promoted yet. we'll be notified when the
+ # master starts.
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ fi
+
+ ocf_log info "demote: Setting master to '$master_host'"
+
+ redis_client slaveof "$master_host" "$master_port"
+
+ # wait briefly for the slave to connect to the master
+ for (( c=1; c <= 20; c++ ))
+ do
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ done
+
+ ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function notify() {
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "$mode" in
+ post-demote|post-promote) # change the master
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then # were a slave
+ # calling demote updates the slave's connection
+ # to the newly appointed Master instance.
+ demote
+ fi
+ ;;
+ esac
+ return $OCF_SUCCESS
+}
+
+function validate() {
+ if [[ -x "$REDIS_SERVER" ]]; then
+ ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -x "$REDIS_CLIENT" ]]; then
+ ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -f "$REDIS_CONFIG" ]]; then
+ ocf_log err "validate: $REDIS_CONFIG does not exist"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if ! getent passwd "$REDIS_USER" &>/dev/null; then
+ ocf_log err "validate: $REDIS_USER is not a valid user"
+ return $OCF_ERR_CONFIGURED
+ fi
+}
+
+NODENAME=$(ocf_local_nodename)
+
+ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"
+
+case "${1:-$__OCF_ACTION}" in
+ status|monitor)
+ monitor
+ ;;
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop && start
+ ;;
+ promote)
+ promote
+ ;;
+ demote)
+ demote
+ ;;
+ notify)
+ notify
+ ;;
+ meta-data)
+ meta_data
+ ;;
+ validate-all)
+ validate
+ ;;
+ *)
+ echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}"
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+status=$?
+ocf_log debug "exit_status=$status"
+exit $status
--
1.8.4.2

56
SOURCES/bz1198681-clvm-activate-vgs-option.patch

@ -0,0 +1,56 @@ @@ -0,0 +1,56 @@
From b5ac7d0e49bb3b967c3865438067a95606db959a Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 27 Apr 2015 16:35:03 -0400
Subject: [PATCH] High: clvm: activate_vgs option for enable/disable of
automatic vg activation

---
heartbeat/clvm | 18 ++++++++++++++++++
1 file changed, 18 insertions(+)

diff --git a/heartbeat/clvm b/heartbeat/clvm
index 9d312cc..23e6f9f 100755
--- a/heartbeat/clvm
+++ b/heartbeat/clvm
@@ -60,6 +60,18 @@ Options to clvmd. Refer to clvmd.8 for detailed descriptions.
<shortdesc lang="en">Daemon Options</shortdesc>
<content type="string" default="-d0"/>
</parameter>
+
+<parameter name="activate_vgs" unique="0">
+<longdesc lang="en">
+Whether or not to activate all cluster volume groups after starting
+the clvmd or not. Note that clustered volume groups will always be
+deactivated before the clvmd stops regardless of what this option
+is set to.
+</longdesc>
+<shortdesc lang="en">Activate volume groups</shortdesc>
+<content type="boolean" default="true"/>
+</parameter>
+
</parameters>
<actions>
@@ -77,6 +89,7 @@ END
#######################################################################
: ${OCF_RESKEY_daemon_options:="-d0"}
+: ${OCF_RESKEY_activate_vgs:="true"}
sbindir=$HA_SBIN_DIR
if [ -z $sbindir ]; then
@@ -322,6 +335,11 @@ start_process()
clvmd_activate_all()
{
+
+ if ! ocf_is_true "$OCF_RESKEY_activate_vgs"; then
+ ocf_log info "skipping vg activation, activate_vgs is set to $OCF_RESKEY_activate_vgs"
+ return $OCF_SUCCESS
+ fi
# Activate all volume groups by leaving the
# "volume group name" parameter empty
ocf_run ${LVM_VGCHANGE} -aay
--
1.8.4.2

92
SOURCES/bz1200756-ipsrcaddr-misconfig.patch

@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
From 3c383f3dbb3b5351b25d33aa6e516ab8fc04a26a Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Tue, 28 Apr 2015 11:47:21 -0500
Subject: [PATCH] High: IPsrcaddr: return correct error code during stop when
misconfigured

---
heartbeat/IPsrcaddr | 45 +++++++++++++++++++++++++++++++--------------
1 file changed, 31 insertions(+), 14 deletions(-)

diff --git a/heartbeat/IPsrcaddr b/heartbeat/IPsrcaddr
index 8163c0c..33c5be6 100755
--- a/heartbeat/IPsrcaddr
+++ b/heartbeat/IPsrcaddr
@@ -387,15 +387,27 @@ ip_status() {
srca_validate_all() {
- check_binary $AWK
- check_binary $IFCONFIG
+ if [ -z "$OCF_RESKEY_ipaddress" ]; then
+ # usage
+ ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+
+ if ! [ "x$SYSTYPE" = "xLinux" ]; then
+ # checks after this point are only relevant for linux.
+ return $OCF_SUCCESS
+ fi
+
+ check_binary $AWK
+ check_binary $IFCONFIG
# The IP address should be in good shape
if CheckIP "$ipaddress"; then
:
else
ocf_exit_reason "Invalid IP address [$ipaddress]"
- exit $OCF_ERR_CONFIGURED
+ return $OCF_ERR_CONFIGURED
fi
if ocf_is_probe; then
@@ -407,8 +419,9 @@ srca_validate_all() {
:
else
ocf_exit_reason "We are not serving [$ipaddress], hence can not make it a preferred source address"
- exit $OCF_ERR_INSTALLED
+ return $OCF_ERR_INSTALLED
fi
+ return $OCF_SUCCESS
}
if
@@ -430,18 +443,22 @@ case $1 in
;;
esac
-if
- [ -z "$OCF_RESKEY_ipaddress" ]
-then
-# usage
- ocf_exit_reason "Please set OCF_RESKEY_ipaddress to the preferred source IP address!"
- exit $OCF_ERR_CONFIGURED
-fi
-
ipaddress="$OCF_RESKEY_ipaddress"
-if [ "x$SYSTYPE" = "xLinux" ]; then
- srca_validate_all
+srca_validate_all
+rc=$?
+if [ $rc -ne $OCF_SUCCESS ]; then
+ case $1 in
+ # if we can't validate the configuration during a stop, that
+ # means the resources isn't configured correctly. There's no way
+ # to actually stop the resource in this situation because there's
+ # no way it could have even started. Return success here
+ # to indicate that the resource is not running, otherwise the
+ # stop action will fail causing the node to be fenced just because
+ # of a mis configuration.
+ stop) exit $OCF_SUCCESS;;
+ *) exit $rc;;
+ esac
fi
findif_out=`$FINDIF -C`
--
1.8.4.2

272
SOURCES/bz1212632-nagios.patch

@ -0,0 +1,272 @@ @@ -0,0 +1,272 @@
diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
--- a/doc/man/Makefile.am 2016-06-06 10:32:26.889194520 +0200
+++ b/doc/man/Makefile.am 2016-06-06 10:33:28.850643243 +0200
@@ -118,6 +118,7 @@
ocf_heartbeat_lxc.7 \
ocf_heartbeat_mysql.7 \
ocf_heartbeat_mysql-proxy.7 \
+ ocf_heartbeat_nagios.7 \
ocf_heartbeat_named.7 \
ocf_heartbeat_nfsnotify.7 \
ocf_heartbeat_nfsserver.7 \
diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
--- a/heartbeat/Makefile.am 2016-06-06 10:32:26.889194520 +0200
+++ b/heartbeat/Makefile.am 2016-06-06 10:33:02.418878409 +0200
@@ -97,6 +97,7 @@
ManageVE \
mysql \
mysql-proxy \
+ nagios \
named \
nfsnotify \
nfsserver \
diff -uNr a/heartbeat/nagios b/heartbeat/nagios
--- a/heartbeat/nagios 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/nagios 2016-06-06 10:33:02.418878409 +0200
@@ -0,0 +1,246 @@
+#!/bin/sh
+#
+# License: GNU General Public License (GPL)
+# (c) 2015 T.J. Yang, O. Albrigtsen
+# and Linux-HA contributors
+#
+# -----------------------------------------------------------------------------
+# O C F R E S O U R C E S C R I P T S P E C I F I C A T I O N
+# -----------------------------------------------------------------------------
+#
+# NAME
+# nagios : OCF resource agent script for Nagios Server
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Defaults
+OCF_RESKEY_user_default="nagios"
+OCF_RESKEY_group_default="nagios"
+OCF_RESKEY_binary_default="/usr/sbin/nagios"
+OCF_RESKEY_config_default="/etc/nagios/nagios.cfg"
+OCF_RESKEY_log_default="/var/log/nagios/nagios.log"
+OCF_RESKEY_retention_default="/var/log/nagios/retention.dat"
+OCF_RESKEY_command_default="/var/log/nagios/rw/nagios.cmd"
+OCF_RESKEY_pid_default="/var/run/nagios.pid"
+
+: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
+: ${OCF_RESKEY_group=${OCF_RESKEY_group_default}}
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_config=${OCF_RESKEY_config_default}}
+: ${OCF_RESKEY_log=${OCF_RESKEY_log_default}}
+: ${OCF_RESKEY_retention=${OCF_RESKEY_retention_default}}
+: ${OCF_RESKEY_command=${OCF_RESKEY_command_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+
+nagios_usage() {
+ cat <<END
+ usage: $0 (start|stop|validate-all|meta-data|help|usage|monitor)
+ $0 manages a Nagios instance as an OCF HA resource.
+ The 'start' operation starts the instance.
+ The 'stop' operation stops the instance.
+ The 'status' operation reports whether the instance is running
+ The 'monitor' operation reports whether the instance seems to be working
+ The 'validate-all' operation reports whether the parameters are valid
+END
+}
+
+nagios_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="nagios">
+<version>0.75</version>
+
+<longdesc lang="en">OCF Resource script for Nagios 3.x or 4.x. It manages a Nagios instance as a HA resource.</longdesc>
+<shortdesc lang="en">Nagios resource agent</shortdesc>
+
+<parameters>
+
+<parameter name="user">
+ <longdesc lang="en">User running Nagios daemon (for file permissions)</longdesc>
+ <shortdesc lang="en">Nagios user</shortdesc>
+ <content type="string" default="${OCF_RESKEY_user_default}" />
+</parameter>
+
+<parameter name="group">
+ <longdesc lang="en">Group running Nagios daemon (for file permissions)</longdesc>
+ <shortdesc lang="en">Nagios group</shortdesc>
+ <content type="string" default="${OCF_RESKEY_group_default}" />
+</parameter>
+
+<parameter name="binary">
+ <longdesc lang="en">Location of the Nagios binary</longdesc>
+ <shortdesc lang="en">Nagios binary</shortdesc>
+ <content type="string" default="${OCF_RESKEY_binary_default}" />
+</parameter>
+
+<parameter name="config">
+ <longdesc lang="en">Configuration file</longdesc>
+ <shortdesc lang="en">Nagios config</shortdesc>
+ <content type="string" default="${OCF_RESKEY_config_default}" />
+</parameter>
+
+<parameter name="log">
+ <longdesc lang="en">Location of the Nagios log</longdesc>
+ <shortdesc lang="en">Nagios log</shortdesc>
+ <content type="string" default="${OCF_RESKEY_log_default}" />
+</parameter>
+
+<parameter name="retention">
+ <longdesc lang="en">Location of the Nagios retention file</longdesc>
+ <shortdesc lang="en">Nagios retention file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_retention_default}" />
+</parameter>
+
+<parameter name="command">
+ <longdesc lang="en">Location of the Nagios external command file</longdesc>
+ <shortdesc lang="en">Nagios command file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_command_default}" />
+</parameter>
+
+<parameter name="pid">
+ <longdesc lang="en">Location of the Nagios pid/lock</longdesc>
+ <shortdesc lang="en">Nagios pid file</shortdesc>
+ <content type="string" default="${OCF_RESKEY_pid_default}" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20" />
+<action name="stop" timeout="20" />
+<action name="status" timeout="20" />
+<action name="monitor" depth="0" timeout="20" interval="10" start-delay="10" />
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="20" />
+</actions>
+</resource-agent>
+END
+}
+
+
+nagios_start() {
+ nagios_validate_all
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ return $rc
+ fi
+
+
+ # if resource is already running,no need to continue code after this.
+ if nagios_monitor; then
+ ocf_log info "Nagios is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # Remove ${OCF_RESKEY_pid} if it exists
+ rm -f ${OCF_RESKEY_pid}
+
+ ocf_run -q touch ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
+ chown ${OCF_RESKEY_user}:${OCF_RESKEY_group} ${OCF_RESKEY_log} ${OCF_RESKEY_retention} ${OCF_RESKEY_pid}
+ rm -f ${OCF_RESKEY_command}
+ [ -x /sbin/restorecon ] && /sbin/restorecon ${OCF_RESKEY_pid}
+ ocf_run -q ${OCF_RESKEY_binary} -d ${OCF_RESKEY_config}
+
+ while ! nagios_monitor; do
+ sleep 1
+ done
+
+ if [ $? -eq "0" ]; then
+ ocf_log info "Nagios started"
+ return ${OCF_SUCCESS}
+ fi
+
+ return $OCF_SUCCESS
+}
+
+nagios_stop() {
+ nagios_monitor
+ if [ "$?" -ne "$OCF_SUCCESS" ]; then
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ rm -f ${OCF_RESKEY_pid}
+
+ return $OCF_SUCCESS
+ fi
+
+ kill `cat ${OCF_RESKEY_pid}`
+
+ # Wait for process to stop
+ while nagios_monitor; do
+ sleep 1
+ done
+
+ return $OCF_SUCCESS
+}
+
+nagios_monitor(){
+ ocf_pidfile_status ${OCF_RESKEY_pid} > /dev/null 2>&1
+ case "$?" in
+ 0)
+ rc=$OCF_SUCCESS
+ ;;
+ 1|2)
+ rc=$OCF_NOT_RUNNING
+ ;;
+ *)
+ rc=$OCF_ERR_GENERIC
+ ;;
+ esac
+ return $rc
+}
+
+nagios_validate_all(){
+ check_binary ${OCF_RESKEY_binary}
+
+ if [ ! -f ${OCF_RESKEY_config} ]; then
+ ocf_exit_reason "Configuration file ${OCF_RESKEY_config} not found"
+ return ${OCF_ERR_INSTALLED}
+ fi
+
+ ${OCF_RESKEY_binary} -v ${OCF_RESKEY_config} > /dev/null 2>&1;
+ if [ $? -ne "0" ]; then
+ ocf_exit_reason "Configuration check failed"
+ return ${OCF_ERR_INSTALLED}
+ fi
+}
+
+
+# **************************** MAIN SCRIPT ************************************
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) nagios_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) nagios_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# This OCF agent script need to be run as root user.
+if ! ocf_is_root; then
+ echo "$0 agent script need to be run as root user."
+ ocf_log debug "$0 agent script need to be run as root user."
+ exit $OCF_ERR_GENERIC
+fi
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) nagios_start;;
+stop) nagios_stop;;
+status|monitor) nagios_monitor;;
+validate-all) nagios_validate_all;;
+*) nagios_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+exit $rc
+
+# End of this script

43
SOURCES/bz1213971-ethmon-opt.patch

@ -0,0 +1,43 @@ @@ -0,0 +1,43 @@
From 3e969507468bea12e1d126b31b222ad248780a80 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Wed, 29 Apr 2015 11:13:26 -0500
Subject: [PATCH 3/6] ethmonitor link_statys_only option

---
heartbeat/ethmonitor | 13 +++++++++++++
1 file changed, 13 insertions(+)

diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
index a447391..d0ec4ef 100755
--- a/heartbeat/ethmonitor
+++ b/heartbeat/ethmonitor
@@ -176,6 +176,14 @@ For infiniband devices, this is the port to monitor.
<content type="integer" />
</parameter>
+<parameter name="link_status_only">
+<longdesc lang="en">
+Only report success based on link status. Do not perform RX counter or arping related connectivity tests.
+</longdesc>
+<shortdesc lang="en">link status check only</shortdesc>
+<content type="boolean" default="false" />
+</parameter>
+
</parameters>
<actions>
<action name="start" timeout="60s" />
@@ -378,6 +386,11 @@ if_check () {
return $OCF_NOT_RUNNING
fi
+ # if using link_status_only, skip RX count and arping related tests
+ if ocf_is_true "$OCF_RESKEY_link_status_only"; then
+ return $OCF_SUCCESS
+ fi
+
# watch for packet counter changes
ocf_log debug "watch for packet counter changes"
watch_pkt_counter
--
1.8.4.2

494
SOURCES/bz1214360-NovaCompute-update1.patch.patch

@ -0,0 +1,494 @@ @@ -0,0 +1,494 @@
From 8c92227bce9cc4fe177eea5b2f7c9016e96434f9 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 29 Jun 2015 13:03:17 -0500
Subject: [PATCH 1/3] bz1214360-NovaCompute-update1.patch

---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 3 +-
heartbeat/NovaCompute | 73 ++++++------
heartbeat/NovaEvacuate | 311 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 352 insertions(+), 36 deletions(-)
create mode 100755 heartbeat/NovaEvacuate

diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 42a57fe..d32426b 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -74,6 +74,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_ManageRAID.7 \
ocf_heartbeat_ManageVE.7 \
ocf_heartbeat_NovaCompute.7 \
+ ocf_heartbeat_NovaEvacuate.7 \
ocf_heartbeat_Pure-FTPd.7 \
ocf_heartbeat_Raid1.7 \
ocf_heartbeat_Route.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 0bebf97..1034632 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -52,7 +52,8 @@ send_ua_SOURCES = send_ua.c IPv6addr_utils.c
IPv6addr_LDADD = -lplumb $(LIBNETLIBS)
send_ua_LDADD = $(LIBNETLIBS)
-osp_SCRIPTS = NovaCompute
+osp_SCRIPTS = NovaCompute \
+ NovaEvacuate
ocf_SCRIPTS = ClusterMon \
CTDB \
diff --git a/heartbeat/NovaCompute b/heartbeat/NovaCompute
index f71abeb..09eee38 100644
--- a/heartbeat/NovaCompute
+++ b/heartbeat/NovaCompute
@@ -107,15 +107,26 @@ Disable shared storage recovery for instances. Use at your own risk!
<content type="boolean" default="0" />
</parameter>
+<parameter name="evacuation_delay" unique="0" required="0">
+<longdesc lang="en">
+How long to wait for nova to finish evacuating instances elsewhere
+before starting nova-compute. Only used when the agent detects
+evacuations might be in progress.
+
+You may need to increase the start timeout when increasing this value.
+</longdesc>
+<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
+<content type="integer" default="120" />
+</parameter>
+
</parameters>
<actions>
-<action name="start" timeout="120" />
+<action name="start" timeout="600" />
<action name="stop" timeout="300" />
<action name="monitor" timeout="20" interval="10" depth="0"/>
<action name="validate-all" timeout="20" />
<action name="meta-data" timeout="5" />
-<action name="notify" timeout="600" />
</actions>
</resource-agent>
END
@@ -132,7 +143,7 @@ sigterm_handler() {
nova_usage() {
cat <<END
-usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
+usage: $0 {start|stop|monitor|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
@@ -148,6 +159,26 @@ nova_start() {
return $OCF_SUCCESS
fi
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ if [ "x$state" = x ]; then
+ : never been fenced
+
+ elif [ "x$state" = xno ]; then
+ : has been evacuated, however it could have been 1s ago
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+
+ else
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
+ while [ "x$state" != "xno" ]; do
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ sleep 5
+ done
+
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+ fi
+
export LIBGUESTFS_ATTACH_METHOD=appliance
su nova -s /bin/sh -c /usr/bin/nova-compute &
@@ -212,33 +243,7 @@ nova_monitor() {
}
nova_notify() {
- if [ "x${OCF_RESKEY_CRM_meta_notify_operation}" != "xstop" ]; then
- return $OCF_SUCCESS
- elif [ "x${OCF_RESKEY_CRM_meta_notify_type}" != "xpost" ]; then
- return $OCF_SUCCESS
- fi
-
- # Only the first node not stopping performs evacuates for now
- # Can we allow all of them to do it? It would make this block much simpler.
- for host in ${OCF_RESKEY_CRM_meta_notify_active_uname}; do
- for stop in ${OCF_RESKEY_CRM_meta_notify_stop_uname}; do
- if [ "$stop" = "$host" ]; then
- : $host is one of the nodes that is stopping
-
- elif [ "x$(echo ${host} | awk -F. '{print $1}')" != "x$(uname -n | awk -F. '{print $1}')" ]; then
- : We are not the first non-stopping node
- return $OCF_SUCCESS
-
- else
- # Also repeat for any peer NOT in active_uname somehow?
- for node in $OCF_RESKEY_CRM_meta_notify_stop_uname; do
- ocf_log info "Performing evacuations for $node"
- fence_compute ${fence_options} -o reboot -n $node
- done
- return $OCF_SUCCESS
- fi
- done
- done
+ return $OCF_SUCCESS
}
nova_validate() {
@@ -246,7 +251,6 @@ nova_validate() {
fence_options=""
check_binary openstack-config
- check_binary fence_compute
check_binary nova-compute
if [ ! -f /etc/nova/nova.conf ]; then
@@ -337,6 +341,7 @@ nova_validate() {
return $rc
}
+: ${OCF_RESKEY_evacuation_delay=120}
case $__OCF_ACTION in
meta-data) meta_data
exit $OCF_SUCCESS
@@ -346,12 +351,10 @@ usage|help) nova_usage
;;
esac
-nova_validate
-
case $__OCF_ACTION in
-start) nova_start;;
+start) nova_validate; nova_start;;
stop) nova_stop;;
-monitor) nova_monitor;;
+monitor) nova_validate; nova_monitor;;
notify) nova_notify;;
validate-all) exit $OCF_SUCCESS;;
*) nova_usage
diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
new file mode 100755
index 0000000..f9a24f1
--- /dev/null
+++ b/heartbeat/NovaEvacuate
@@ -0,0 +1,311 @@
+#!/bin/sh
+#
+#
+# NovaCompute agent manages compute daemons.
+#
+# Copyright (c) 2015
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+###
+
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="NovaEvacuate" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged.
+</longdesc>
+<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc>
+
+<parameters>
+
+<parameter name="auth_url" unique="0" required="1">
+<longdesc lang="en">
+Authorization URL for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Authorization URL</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="username" unique="0" required="1">
+<longdesc lang="en">
+Username for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Username</shortdesc>
+</parameter>
+
+<parameter name="password" unique="0" required="1">
+<longdesc lang="en">
+Password for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Password</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="tenant_name" unique="0" required="1">
+<longdesc lang="en">
+Tenant name for connecting to keystone in admin context.
+Note that with Keystone V3 tenant names are only unique within a domain.
+</longdesc>
+<shortdesc lang="en">Tenant name</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="endpoint_type" unique="0" required="0">
+<longdesc lang="en">
+Nova API location (internal, public or admin URL)
+</longdesc>
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="no_shared_storage" unique="0" required="0">
+<longdesc lang="en">
+Disable shared storage recovery for instances. Use at your own risk!
+</longdesc>
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="20" />
+<action name="stop" timeout="20" />
+<action name="monitor" timeout="600" interval="10" depth="0"/>
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# don't exit on TERM, to test that lrmd makes sure that we do exit
+trap sigterm_handler TERM
+sigterm_handler() {
+ ocf_log info "They use TERM to bring us down. No such luck."
+ return
+}
+
+evacuate_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+evacuate_stop() {
+ rm -f "$statefile"
+ return $OCF_SUCCESS
+}
+
+evacuate_start() {
+ touch "$statefile"
+ # Do not invole monitor here so that the start timeout can be low
+ return $?
+}
+
+update_evacuation() {
+ attrd_updater -p -n evacute -Q -N ${1} -v ${2}
+ arc=$?
+ if [ ${arc} != 0 ]; then
+ ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
+ fi
+ return ${arc}
+}
+
+handle_evacuations() {
+ while [ $# -gt 0 ]; do
+ node=$1
+ state=$2
+ shift; shift;
+ need_evacuate=0
+
+ case $state in
+ "") ;;
+ no) ocf_log debug "$node is either fine or already handled";;
+ yes) need_evacuate=1;;
+ *@*)
+ where=$(echo $state | awk -F@ '{print $1}')
+ when=$(echo $state | awk -F@ '{print $2}')
+ now=$(date +%s)
+
+ if [ $(($now - $when)) -gt 60 ]; then
+ ocf_log info "Processing partial evacuation of $node by $where at $when"
+ need_evacuate=1
+ else
+ # Give some time for any in-flight evacuations to either complete or fail
+ # Nova won't react well if there are two overlapping requests
+ ocf_log info "Deferring processing partial evacuation of $node by $where at $when"
+ fi
+ ;;
+ esac
+
+ if [ $need_evacuate = 1 ]; then
+ found=0
+ ocf_log notice "Initiating evacuation of $node"
+
+ for known in $(fence_compute ${fence_options} -o list | tr -d ','); do
+ if [ ${known} = ${node} ]; then
+ found=1
+ break
+ fi
+ done
+
+ if [ $found = 0 ]; then
+ ocf_log info "Nova does not know about ${node}"
+ # Dont mark as no because perhaps nova is unavailable right now
+ continue
+ fi
+
+ update_evacuation ${node} "$(uname -n)@$(date +%s)"
+ if [ $? != 0 ]; then
+ return $OCF_SUCCESS
+ fi
+
+ fence_compute ${fence_options} -o reboot -n $node
+ rc=$?
+
+ if [ $rc = 0 ]; then
+ update_evacuation ${node} no
+ ocf_log notice "Completed evacuation of $node"
+ else
+ ocf_log warn "Evacuation of $node failed: $rc"
+ update_evacuation ${node} yes
+ fi
+ fi
+ done
+
+ return $OCF_SUCCESS
+}
+
+evacuate_monitor() {
+ if [ ! -f "$statefile" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}')
+ return $OCF_SUCCESS
+}
+
+evacuate_validate() {
+ rc=$OCF_SUCCESS
+ fence_options=""
+
+ check_binary fence_compute
+
+ # Is the state directory writable?
+ state_dir=$(dirname $statefile)
+ touch "$state_dir/$$"
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Invalid state directory: $state_dir"
+ return $OCF_ERR_ARGS
+ fi
+ rm -f "$state_dir/$$"
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL) ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ if [ $rc != $OCF_SUCCESS ]; then
+ exit $rc
+ fi
+ return $rc
+}
+
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
+
+case $__OCF_ACTION in
+start) evacuate_validate; evacuate_start;;
+stop) evacuate_stop;;
+monitor) evacuate_validate; evacuate_monitor;;
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) evacuate_usage
+ exit $OCF_SUCCESS
+ ;;
+validate-all) exit $OCF_SUCCESS;;
+*) evacuate_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
--
1.8.4.2

49
SOURCES/bz1214781-lvm-partial-activation-fix.patch.patch

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
From 4e8c08a6d966f4e3deca03f4f1c4b5904939e640 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 25 Jun 2015 16:27:47 -0500
Subject: [PATCH 2/3] bz1214781-lvm-partial-activation-fix.patch

---
heartbeat/LVM | 26 ++++++++++++++++++++++++--
1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/heartbeat/LVM b/heartbeat/LVM
index 58cbe83..4b9c167 100755
--- a/heartbeat/LVM
+++ b/heartbeat/LVM
@@ -568,8 +568,30 @@ LVM_validate_all() {
##
VGOUT=`vgck ${VOLUME} 2>&1`
if [ $? -ne 0 ]; then
- ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
- exit $OCF_ERR_GENERIC
+ # Inconsistency might be due to missing physical volumes, which doesn't
+ # automatically mean we should fail. If partial_activation=true then
+ # we should let start try to handle it, or if no PVs are listed as
+ # "unknown device" then another node may have marked a device missing
+ # where we have access to all of them and can start without issue.
+ if vgs -o pv_attr --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'm' > /dev/null 2>&1; then
+ if vgs -o pv_name --noheadings $OCF_RESKEY_volgrpname 2>/dev/null | grep 'unknown device' > /dev/null 2>&1; then
+ if ! ocf_is_true "$OCF_RESKEY_partial_activation" ; then
+ # We are missing devices and cannot activate partially
+ ocf_exit_reason "Volume group [$VOLUME] has devices missing. Consider partial_activation=true to attempt to activate partially"
+ exit $OCF_ERR_GENERIC
+ else
+ # We are missing devices but are allowed to activate partially.
+ # Assume that caused the vgck failure and carry on
+ ocf_log warn "Volume group inconsistency detected with missing device(s) and partial_activation enabled. Proceeding with requested action."
+ fi
+ fi
+ # else the vg is partial but all devices are accounted for, so another
+ # node must have marked the device missing. Proceed.
+ else
+ # vgck failure was for something other than missing devices
+ ocf_exit_reason "Volume group [$VOLUME] does not exist or contains error! ${VGOUT}"
+ exit $OCF_ERR_GENERIC
+ fi
fi
##
--
1.8.4.2

27
SOURCES/bz1223615-apache-includes-fix.patch.patch

@ -0,0 +1,27 @@ @@ -0,0 +1,27 @@
From 72482ca1e117f426378a700a8b1e01443e0fb597 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 25 Jun 2015 16:30:20 -0500
Subject: [PATCH 3/3] bz1223615-apache-includes-fix.patch

---
heartbeat/apache-conf.sh | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/heartbeat/apache-conf.sh b/heartbeat/apache-conf.sh
index dc3426f..a3c8930 100644
--- a/heartbeat/apache-conf.sh
+++ b/heartbeat/apache-conf.sh
@@ -24,7 +24,9 @@ apachecat() {
function procline() {
split($0,a);
if( a[1]~/^[Ii]nclude$/ ) {
- procinclude(a[2]);
+ includedir=a[2];
+ gsub("\"","",includedir);
+ procinclude(includedir);
} else {
if( a[1]=="ServerRoot" ) {
rootdir=a[2];
--
1.8.4.2

49
SOURCES/bz1227293-dhcpd-chroot-fix.patch.patch

@ -0,0 +1,49 @@ @@ -0,0 +1,49 @@
From 6f8a0aa5c0f6c1e4965e4ce10d62ba83ae9f834e Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 29 Jun 2015 13:10:42 -0500
Subject: [PATCH 3/3] bz1227293-dhcpd-chroot-fix.patch

---
heartbeat/dhcpd | 12 ++++++++++--
1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/heartbeat/dhcpd b/heartbeat/dhcpd
index 67b529e..89a9578 100755
--- a/heartbeat/dhcpd
+++ b/heartbeat/dhcpd
@@ -38,6 +38,14 @@ OCF_RESKEY_leases_default="/db/dhcpd.leases"
OCF_RESKEY_interface_default=""
OCF_RESKEY_includes_default=""
+# On some systems, the chrooted default is slightly different.
+# Lets do our best to support both by default.
+if [ ! -d "$OCF_RESKEY_chrooted_path_default" ]; then
+ if [ -d "/var/lib/dhcpd" ]; then
+ OCF_RESKEY_chrooted_path_default="/var/lib/dhcpd"
+ fi
+fi
+
: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
: ${OCF_RESKEY_user=${OCF_RESKEY_user_default}}
@@ -302,7 +310,7 @@ dhcpd_initialize_chroot() {
{ ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
done
- libdir=$(basename $(echo /var/lib/dhcp/lib*))
+ libdir=$(basename $(echo ${OCF_RESKEY_chrooted_path}/lib*))
if test -x /usr/bin/ldd ; then
get_ldd_deps()
{
@@ -327,7 +335,7 @@ dhcpd_initialize_chroot() {
done | sort -u`
for i in $cplibs ; do
if [ -s "$i" ]; then
- cp -pL "$i" "/var/lib/dhcp/$libdir/" ||
+ cp -pL "$i" "${OCF_RESKEY_chrooted_path}/$libdir/" ||
{ ocf_exit_reason "could not copy $i to chroot jail"; return $OCF_ERR_GENERIC; }
fi
done
--
1.8.4.2

121
SOURCES/bz1231032-redis-update.patch.patch

@ -0,0 +1,121 @@ @@ -0,0 +1,121 @@
From c982683ac8c2de64f69c5f47727242c65e00df90 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Mon, 29 Jun 2015 13:07:14 -0500
Subject: [PATCH 2/3] bz1231032-redis-update.patch

---
heartbeat/redis | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----
1 file changed, 46 insertions(+), 5 deletions(-)

diff --git a/heartbeat/redis b/heartbeat/redis
index 6b479b2..b63a2b9 100644
--- a/heartbeat/redis
+++ b/heartbeat/redis
@@ -20,6 +20,7 @@ fi
CHECK_SLAVE_STATE=0
+REDIS_CHECK_DUMP="/usr/bin/redis-check-dump"
REDIS_SERVER="$OCF_RESKEY_bin"
REDIS_CLIENT="$OCF_RESKEY_client_bin"
REDIS_CONFIG="$OCF_RESKEY_config"
@@ -29,6 +30,17 @@ REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
+if ! [ -f $REDIS_CHECK_DUMP ]; then
+ REDIS_CHECK_DUMP="$(which redis-check-dump 2>/dev/null)"
+fi
+
+if [ -f "$REDIS_CONFIG" ]; then
+ REDIS_DUMP_DIR="$(cat $REDIS_CONFIG | grep "^\s*dir\s" | awk '{ print $2 }' 2>/dev/null)"
+ REDIS_DUMP_FILE="$(cat $REDIS_CONFIG | grep "^\s*dbfilename\s" | awk '{ print $2 }' 2>/dev/null)"
+fi
+: ${REDIS_DUMP_DIR:=/var/lib/redis/}
+: ${REDIS_DUMP_FILE:=dump.rdb}
+
function meta_data() {
cat <<EOI
<?xml version="1.0"?>
@@ -289,6 +301,14 @@ function monitor() {
return $OCF_SUCCESS
}
+function check_dump_file()
+{
+ if ! have_binary "$REDIS_CHECK_DUMP"; then
+ return 0
+ fi
+ $REDIS_CHECK_DUMP ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE} 2>&1
+}
+
function start() {
monitor
status=$?
@@ -301,6 +321,16 @@ function start() {
[[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
chown -R "$REDIS_USER" "$REDIS_RUNDIR"
+ # check for 0 byte database dump file. This is an unrecoverable start
+ # condition that we can avoid by deleting the 0 byte database file.
+ if [ -f "${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}" ]; then
+ local size="$(stat --format "%s" ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE})"
+ if [ "$?" -eq "0" ] && [ "$size" -eq "0" ]; then
+ ocf_log notice "Detected 0 byte ${REDIS_DUMP_FILE}, deleting zero length file to avoid start failure."
+ rm -f ${REDIS_DUMP_DIR}/${REDIS_DUMP_FILE}
+ fi
+ fi
+
ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
@@ -325,7 +355,8 @@ function start() {
# It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
sleep 1
else
- ocf_log err "start: Unknown error waiting for redis to start"
+ check_output="$(check_dump_file)"
+ ocf_log err "start: Unknown error waiting for redis to start. redis-check-dump output=${check_output//$'\n'/; }"
return $OCF_ERR_GENERIC
fi
done
@@ -338,7 +369,8 @@ function start() {
return $OCF_SUCCESS
fi
- ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }"
+ check_output="$(check_dump_file)"
+ ocf_log err "start: Unknown error starting redis. redis-server output=${output//$'\n'/; } redis-check-dump output=${check_output//$'\n'/; }"
return $status
}
@@ -427,14 +459,23 @@ function demote() {
redis_client slaveof "$master_host" "$master_port"
- # wait briefly for the slave to connect to the master
- for (( c=1; c <= 20; c++ ))
- do
+ # Wait forever for the slave to connect to the master and finish the
+ # sync. Timeout is controlled by Pacemaker "op start timeout=XX".
+ #
+ # hint: redis master_link_status will only come "up" when
+ # the SYNC with the master has completed.
+ # This can take an arbitraty time (data) and should
+ # only be parametrized by the start operation timeout
+ # by the administrator, not by this resource agent code
+ while true; do
+ # Wait infinite if replication is syncing
+ # Then start/demote operation timeout determines timeout
monitor
status=$?
if (( status == OCF_SUCCESS )); then
return $OCF_SUCCESS
fi
+
sleep 1
done
--
1.8.4.2

246
SOURCES/bz1232376-oracle-agent-update.diff

@ -0,0 +1,246 @@ @@ -0,0 +1,246 @@
diff --git a/heartbeat/oracle b/heartbeat/oracle
index 5ecc2f3..c629eb6 100755
--- a/heartbeat/oracle
+++ b/heartbeat/oracle
@@ -27,6 +27,9 @@
# OCF_RESKEY_ipcrm (optional; defaults to "instance")
# OCF_RESKEY_clear_backupmode (optional; default to "false")
# OCF_RESKEY_shutdown_method (optional; default to "checkpoint/abort")
+# OCF_RESKEY_monuser (optional; defaults to "OCFMON")
+# OCF_RESKEY_monpassword (optional; defaults to "OCFMON")
+# OCF_RESKEY_monprofile (optional; defaults to "OCFMONPROFILE")
#
# Initialization:
@@ -56,6 +59,11 @@ oracle_usage() {
!
}
+# Defaults
+OCF_RESKEY_monuser_default="OCFMON"
+OCF_RESKEY_monpassword_default="OCFMON"
+OCF_RESKEY_monprofile_default="OCFMONPROFILE"
+
oracle_meta_data() {
cat <<END
<?xml version="1.0"?>
@@ -100,6 +108,39 @@ If this does not work for you, just set it explicitely.
<content type="string" default="" />
</parameter>
+<parameter name="monuser" unique="0">
+<longdesc lang="en">
+Monitoring user name. Every connection as
+sysdba is logged in an audit log. This can
+result in a large number of new files created.
+A new user is created (if it doesn't exist) in
+the start action and subsequently used in monitor.
+It should have very limited rights. Make sure
+that the password for this user does not expire.
+</longdesc>
+<shortdesc lang="en">monuser</shortdesc>
+<content type="string" default="$OCF_RESKEY_monuser_default" />
+</parameter>
+
+<parameter name="monpassword" unique="0">
+<longdesc lang="en">
+Password for the monitoring user. Make sure
+that the password for this user does not expire.
+</longdesc>
+<shortdesc lang="en">monpassword</shortdesc>
+<content type="string" default="$OCF_RESKEY_monpassword_default" />
+</parameter>
+
+<parameter name="monprofile" unique="0">
+<longdesc lang="en">
+Profile used by the monitoring user. If the
+profile does not exist, it will be created
+with a non-expiring password.
+</longdesc>
+<shortdesc lang="en">monprofile</shortdesc>
+<content type="string" default="$OCF_RESKEY_monprofile_default" />
+</parameter>
+
<parameter name="ipcrm" unique="0">
<longdesc lang="en">
Sometimes IPC objects (shared memory segments and semaphores)
@@ -216,7 +257,7 @@ execsql() {
if [ "$US" = "$ORACLE_OWNER" ]; then
sqlplus -S /nolog
else
- su - $ORACLE_OWNER -c ". $ORA_ENVF; sqlplus -S /nolog"
+ su - $ORACLE_OWNER -s /bin/sh -c ". $ORA_ENVF; sqlplus -S /nolog"
fi
}
@@ -250,7 +291,7 @@ dbasql() {
runsql "connect / as sysdba" $*
}
monsql() {
- runsql "connect $MONUSR/$MONUSR" $*
+ runsql "connect $MONUSR/\"$MONPWD\"" $*
}
# use dbasql_one if the query should result in a single line output
# at times people stuff commands in oracle .profile
@@ -325,22 +366,73 @@ getipc() {
echo "oradebug tracefile_name"
echo "oradebug ipc"
}
+show_mon_profile() {
+ echo "select PROFILE from dba_profiles where PROFILE='$MONPROFILE';"
+}
+mk_mon_profile() {
+ cat<<EOF
+create profile $MONPROFILE limit FAILED_LOGIN_ATTEMPTS UNLIMITED PASSWORD_LIFE_TIME UNLIMITED;
+EOF
+}
show_mon_user() {
echo "select USERNAME, ACCOUNT_STATUS from dba_users where USERNAME='$MONUSR';"
}
mk_mon_user() {
cat<<EOF
-create user $MONUSR identified by $MONUSR;
+create user $MONUSR identified by "$MONPWD" profile $MONPROFILE;
grant create session to $MONUSR;
grant select on v_\$instance to $MONUSR;
EOF
}
-check_mon_user() {
+show_mon_user_profile() {
+ echo "select PROFILE from dba_users where USERNAME='$MONUSR';"
+}
+set_mon_user_profile() {
+ echo "alter user $MONUSR profile $MONPROFILE;"
+}
+reset_mon_user_password() {
+ echo "alter user $MONUSR identified by $MONPWD;"
+}
+check_mon_profile() {
local output
- dbasql show_mon_user | grep -w "^$MONUSR" >/dev/null &&
+ output=`dbasql show_mon_profile`
+ if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
return 0
+ fi
+ output=`dbasql mk_mon_profile show_mon_profile`
+ if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
+ return 0
+ else
+ ocf_log err "could not create $MONPROFILE oracle profile"
+ ocf_log err "sqlplus output: $output"
+ return 1
+ fi
+}
+check_mon_user() {
+ local output
+ local output2
+
+ output=`dbasql show_mon_user`
+ if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
+ if echo "$output" | grep -w "EXPIRED" >/dev/null; then
+ dbasql reset_mon_user_password
+ fi
+ output=`dbasql show_mon_user_profile`
+ if echo "$output" | grep -iw "^$MONPROFILE" >/dev/null; then
+ return 0
+ else
+ output=`dbasql set_mon_user_profile`
+ output2=`dbasql show_mon_user_profile`
+ if echo "$output2" | grep -iw "^$MONPROFILE" >/dev/null; then
+ return 0
+ fi
+ ocf_log err "could not set profile for $MONUSR oracle user"
+ ocf_log err "sqlplus output: $output( $output2 )"
+ return 1
+ fi
+ fi
output=`dbasql mk_mon_user show_mon_user`
- if echo "$output" | grep -w "^$MONUSR" >/dev/null; then
+ if echo "$output" | grep -iw "^$MONUSR" >/dev/null; then
return 0
else
ocf_log err "could not create $MONUSR oracle user"
@@ -417,7 +509,7 @@ ipcdesc() {
}
rmipc() {
local what=$1 id=$2
- ipcs -$what | filteroraipc | grep -w $id >/dev/null 2>&1 ||
+ ipcs -$what | filteroraipc | grep -iw $id >/dev/null 2>&1 ||
return
ocf_log info "Removing `ipcdesc $what` $id."
ipcrm -$what $id
@@ -447,6 +539,8 @@ is_proc_running() {
# instance in OPEN state?
instance_live() {
local status=`monsql_one dbstat`
+ [ "$status" = OPEN ] && return 0
+ status=`dbasql_one dbstat`
if [ "$status" = OPEN ]; then
return 0
else
@@ -473,7 +567,7 @@ ora_cleanup() {
}
oracle_getconfig() {
- ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user" "$OCF_RESKEY_tns_admin"
+ ora_common_getconfig "$OCF_RESKEY_sid" "$OCF_RESKEY_home" "$OCF_RESKEY_user"
clear_backupmode=${OCF_RESKEY_clear_backupmode:-"false"}
shutdown_method=${OCF_RESKEY_shutdown_method:-"checkpoint/abort"}
@@ -493,7 +587,7 @@ oracle_getconfig() {
oracle_start() {
local status output
if is_proc_running; then
- status="`monsql_one dbstat`"
+ status="`dbasql_one dbstat`"
case "$status" in
"OPEN")
: nothing to be done, we can leave right now
@@ -541,6 +635,11 @@ oracle_start() {
fi
output=`dbasql dbopen`
+ # check/create the monitor profile
+ if ! check_mon_profile; then
+ return $OCF_ERR_GENERIC
+ fi
+
# check/create the monitor user
if ! check_mon_user; then
return $OCF_ERR_GENERIC
@@ -650,7 +749,12 @@ show_procs() {
proc_pids() { show_procs | awk '{print $1}'; }
PROCS_CLEANUP_TIME="30"
-MONUSR="OCFMON"
+MONUSR=${OCF_RESKEY_monuser:-$OCF_RESKEY_monuser_default}
+MONPWD=${OCF_RESKEY_monpassword:-$OCF_RESKEY_monpassword_default}
+MONPROFILE=${OCF_RESKEY_monprofile_default:-$OCF_RESKEY_monprofile_default}
+
+MONUSR=$(echo $MONUSR | awk '{print toupper($0)}')
+MONPROFILE=$(echo $MONPROFILE | awk '{print toupper($0)}')
OCF_REQUIRED_PARAMS="sid"
OCF_REQUIRED_BINARIES="sqlplus"
ocf_rarun $*
diff --git a/heartbeat/oralsnr b/heartbeat/oralsnr
index 2409017..a91eeab 100755
--- a/heartbeat/oralsnr
+++ b/heartbeat/oralsnr
@@ -158,7 +158,7 @@ runasdba() {
(
echo ". $ORA_ENVF"
cat
- ) | su - $ORACLE_OWNER
+ ) | su -s $SH - $ORACLE_OWNER
fi
}
@@ -268,7 +268,7 @@ oralsnr_validate_all() {
# used in ora-common.sh
show_procs() {
ps -e -o pid,user,args |
- grep '[t]nslsnr' | grep -w "$listener" | grep -w "$ORACLE_OWNER"
+ grep '[t]nslsnr' | grep -i -w "$listener" | grep -w "$ORACLE_OWNER"
}
proc_pids() { show_procs | awk '{print $1}'; }
PROCS_CLEANUP_TIME="10"

133
SOURCES/bz1242181-virtualdomain-migrate_options.patch

@ -0,0 +1,133 @@ @@ -0,0 +1,133 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2016-02-29 10:54:21.870787072 +0100
+++ b/heartbeat/VirtualDomain 2016-02-29 14:02:23.260696550 +0100
@@ -106,11 +106,28 @@
Note: Be sure this composed host name is locally resolveable and the
associated IP is reachable through the favored network.
+
+See also the migrate_options parameter below.
</longdesc>
<shortdesc lang="en">Migration network host name suffix</shortdesc>
<content type="string" default="" />
</parameter>
+<parameter name="migrate_options" unique="0" required="0">
+<longdesc lang="en">
+Extra virsh options for the guest live migration. You can also specify
+here --migrateuri if the calculated migrate URI is unsuitable for your
+environment. If --migrateuri is set then migration_network_suffix
+and migrateport are effectively ignored. Use "%n" as the placeholder
+for the target node name.
+
+Please refer to the libvirt documentation for details on guest
+migration.
+</longdesc>
+<shortdesc lang="en">live migrate options</shortdesc>
+<content type="string" />
+</parameter>
+
<parameter name="monitor_scripts" unique="0" required="0">
<longdesc lang="en">
To additionally monitor services within the virtual domain, add this
@@ -485,14 +502,45 @@
force_stop
}
+mk_migrateuri() {
+ local target_node
+ local migrate_target
+ local hypervisor
+
+ target_node="$OCF_RESKEY_CRM_meta_migrate_target"
+
+ # A typical migration URI via a special migration network looks
+ # like "tcp://bar-mig:49152". The port would be randomly chosen
+ # by libvirt from the range 49152-49215 if omitted, at least since
+ # version 0.7.4 ...
+ if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
+ hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
+ # Hostname might be a FQDN
+ migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
+ case $hypervisor in
+ qemu)
+ # For quiet ancient libvirt versions a migration port is needed
+ # and the URI must not contain the "//". Newer versions can handle
+ # the "bad" URI.
+ echo "tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
+ ;;
+ xen)
+ echo "xenmigr://${migrate_target}"
+ ;;
+ *)
+ ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
+ ;;
+ esac
+ fi
+}
+
VirtualDomain_Migrate_To() {
+ local rc
local target_node
local remoteuri
local transport_suffix
local migrateuri
- local migrateport
- local migrate_target
- local hypervisor
+ local migrate_opts
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
@@ -503,38 +551,26 @@
if [ -n "${OCF_RESKEY_migration_transport}" ]; then
transport_suffix="+${OCF_RESKEY_migration_transport}"
fi
- # A typical migration URI via a special migration network looks
- # like "tcp://bar-mig:49152". The port would be randomly chosen
- # by libvirt from the range 49152-49215 if omitted, at least since
- # version 0.7.4 ...
- if [ -n "${OCF_RESKEY_migration_network_suffix}" ]; then
- hypervisor="${OCF_RESKEY_hypervisor%%[+:]*}"
- # Hostname might be a FQDN
- migrate_target=$(echo ${target_node} | sed -e "s,^\([^.]\+\),\1${OCF_RESKEY_migration_network_suffix},")
- case $hypervisor in
- qemu)
- # For quiet ancient libvirt versions a migration port is needed
- # and the URI must not contain the "//". Newer versions can handle
- # the "bad" URI.
- migrateuri="tcp:${migrate_target}:${OCF_RESKEY_migrateport}"
- ;;
- xen)
- migrateuri="xenmigr://${migrate_target}"
- ;;
- *)
- ocf_log warn "$DOMAIN_NAME: Migration via dedicated network currently not supported for ${hypervisor}."
- ;;
- esac
+
+ # User defined migrateuri or do we make one?
+ migrate_opts="$OCF_RESKEY_migrate_options"
+ if echo "$migrate_opts" | fgrep -qs -- "--migrateuri="; then
+ migrateuri=`echo "$migrate_opts" |
+ sed "s/.*--migrateuri=\([^ ]*\).*/\1/;s/%n/$target_node/g"`
+ migrate_opts=`echo "$migrate_opts" |
+ sed "s/\(.*\)--migrateuri=[^ ]*\(.*\)/\1\3/"`
+ else
+ migrateuri=`mk_migrateuri`
fi
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
# OK, we know where to connect to. Now do the actual migration.
- ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using remote hypervisor URI ${remoteuri} ${migrateuri})."
- virsh ${VIRSH_OPTIONS} migrate --live $DOMAIN_NAME ${remoteuri} ${migrateuri}
+ ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
+ virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri
rc=$?
if [ $rc -ne 0 ]; then
- ocf_exit_reason "$DOMAIN_NAME: live migration to ${remoteuri} ${migrateuri} failed: $rc"
+ ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
return $OCF_ERR_GENERIC
else
ocf_log info "$DOMAIN_NAME: live migration to ${target_node} succeeded."

40
SOURCES/bz1242558-virtualdomain-may-remove-config-file.patch

@ -0,0 +1,40 @@ @@ -0,0 +1,40 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2015-11-20 11:52:58.314263831 +0100
+++ b/heartbeat/VirtualDomain 2015-11-20 11:53:55.247196256 +0100
@@ -340,13 +340,32 @@
return $rc
}
+# virsh undefine removes configuration files if they are in
+# directories which are managed by libvirt. such directories
+# include also subdirectories of /etc (for instance
+# /etc/libvirt/*) which may be surprising. VirtualDomain didn't
+# include the undefine call before, hence this wasn't an issue
+# before.
+#
+# There seems to be no way to find out which directories are
+# managed by libvirt.
+#
verify_undefined() {
- for dom in `virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null`; do
- if [ "$dom" = "$DOMAIN_NAME" ]; then
+ local tmpf
+ if virsh --connect=${OCF_RESKEY_hypervisor} list --all --name 2>/dev/null | grep -wqs "$DOMAIN_NAME"
+ then
+ tmpf=$(mktemp -t vmcfgsave.XXXXXX)
+ if [ ! -r "$tmpf" ]; then
+ ocf_log warn "unable to create temp file, disk full?"
+ # we must undefine the domain
virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
- return
+ else
+ cp -p $OCF_RESKEY_config $tmpf
+ virsh $VIRSH_OPTIONS undefine $DOMAIN_NAME > /dev/null 2>&1
+ [ -f $OCF_RESKEY_config ] || cp -f $tmpf $OCF_RESKEY_config
+ rm -f $tmpf
fi
- done
+ fi
}
VirtualDomain_Start() {

92
SOURCES/bz1247303-rabbitmq-cluster-forget-stopped-cluster-nodes.patch

@ -0,0 +1,92 @@ @@ -0,0 +1,92 @@
diff -uNr a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
--- a/heartbeat/rabbitmq-cluster 2016-02-22 11:09:48.989128414 +0100
+++ b/heartbeat/rabbitmq-cluster 2016-02-22 11:10:12.011835745 +0100
@@ -39,7 +39,14 @@
RMQ_LOG_DIR="/var/log/rabbitmq"
NODENAME=$(ocf_local_nodename)
+# this attr represents the current active local rmq node name.
+# when rmq stops or the node is fenced, this attr disappears
RMQ_CRM_ATTR_COOKIE="rmq-node-attr-${OCF_RESOURCE_INSTANCE}"
+# this attr represents the last known active local rmq node name
+# when rmp stops or the node is fenced, the attr stays forever so
+# we can continue to map an offline pcmk node to it's rmq node name
+# equivalent.
+RMQ_CRM_ATTR_COOKIE_LAST_KNOWN="rmq-node-attr-last-known-${OCF_RESOURCE_INSTANCE}"
meta_data() {
cat <<END
@@ -79,7 +86,7 @@
rmq_usage() {
cat <<END
-usage: $0 {start|stop|monitor|migrate_to|migrate_from|validate-all|meta-data}
+usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
Expects to have a fully populated OCF RA-compliant environment set.
END
@@ -116,8 +123,13 @@
exit $OCF_ERR_GENERIC
fi
- # store the pcmknode to rmq node mapping as an attribute
+ # store the pcmknode to rmq node mapping as a transient attribute. This allows
+ # us to retrieve the join list with a simple xpath.
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "$RMQ_CRM_ATTR_COOKIE" -v "$node_name"
+
+ # the pcmknode to rmq node mapping as a permanent attribute as well. this lets
+ # us continue to map offline nodes to their equivalent rmq node name
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l forever --name "$RMQ_CRM_ATTR_COOKIE_LAST_KNOWN" -v "$node_name"
}
rmq_delete_nodename()
@@ -262,6 +274,41 @@
return $OCF_SUCCESS
}
+
+rmq_notify() {
+ node_list="${OCF_RESKEY_CRM_meta_notify_stop_uname}"
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+
+
+ # When notifications are on, this agent is going to "forget" nodes once they
+ # leave the cluster. This is thought to resolve some issues where rabbitmq
+ # blocks trying to sync with an offline node after a fencing action occurs.
+ if ! [ "${mode}" = "post-stop" ]; then
+ return $OCF_SUCCESS
+ fi
+
+ rmq_monitor
+ if [ $? -ne $OCF_SUCCESS ]; then
+ # only run forget when we are for sure active
+ return $OCF_SUCCESS
+ fi
+
+ # forget each stopped rmq instance in the provided pcmk node in the list.
+ for node in $(echo "$node_list"); do
+ local rmq_node="$(${HA_SBIN_DIR}/crm_attribute -N $node -l forever --query --name $RMQ_CRM_ATTR_COOKIE_LAST_KNOWN -q)"
+ if [ -z "$rmq_node" ]; then
+ ocf_log warn "Unable to map pcmk node $node to a known rmq node."
+ continue
+ fi
+ ocf_log notice "Forgetting stopped node $rmq_node"
+ $RMQ_CTL forget_cluster_node $rmq_node
+ if [ $? -ne 0 ]; then
+ ocf_log warn "Unable to forget offline node $rmq_node."
+ fi
+ done
+ return $OCF_SUCCESS
+}
+
rmq_start() {
local join_list=""
local rc
@@ -357,6 +404,7 @@
stop) rmq_stop;;
monitor) rmq_monitor;;
validate-all) rmq_validate;;
+notify) rmq_notify;;
usage|help) rmq_usage
exit $OCF_SUCCESS
;;

45
SOURCES/bz1249430-1-tomcat-fix-selinux-enforced.patch

@ -0,0 +1,45 @@ @@ -0,0 +1,45 @@
diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index 8b7fe31..07a7ce4 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -49,6 +49,13 @@
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+# Use runuser if available for SELinux.
+if [ -x /sbin/runuser ]; then
+ SU=runuser
+else
+ SU=su
+fi
+
############################################################################
# Usage
usage()
@@ -143,7 +150,7 @@ monitor_tomcat()
start_rotatelogs()
{
# -s is required because tomcat5.5's login shell is /bin/false
- su - -s /bin/sh $RESOURCE_TOMCAT_USER \
+ $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "$ROTATELOGS -l \"$CATALINA_BASE/logs/catalina_%F.log\" $CATALINA_ROTATETIME" \
< "$CATALINA_OUT" > /dev/null 2>&1 &
}
@@ -154,7 +161,7 @@ rotate_catalina_out()
{
# Check catalina_%F.log is writable or not.
CURRENT_ROTATELOG_SUFFIX=`date +"%F"`
- su - -s /bin/sh $RESOURCE_TOMCAT_USER \
+ $SU - -s /bin/sh $RESOURCE_TOMCAT_USER \
-c "touch \"$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log\"" > /dev/null 2>&1
if [ $? -ne 0 ]; then
ocf_exit_reason "$CATALINA_BASE/logs/catalina_$CURRENT_ROTATELOG_SUFFIX.log is not writable."
@@ -205,7 +212,7 @@ attemptTomcatCommand()
if [ "$RESOURCE_TOMCAT_USER" = root ]; then
"$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1
else
- tomcatCommand $@ | su - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
+ tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
fi
if [ -n "$REDIRECT_DEFAULT_CONFIG" ]; then

112
SOURCES/bz1249430-2-tomcat-fix-selinux-enforced.patch

@ -0,0 +1,112 @@ @@ -0,0 +1,112 @@
From a1860a5bbe5c63c6a34d9160a8aacffc61a89dcf Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 16 Sep 2016 14:25:28 +0200
Subject: [PATCH] tomcat: use systemd where available due to newer versions not
generating PID-file

---
heartbeat/tomcat | 44 ++++++++++++++++++++++++++++++++++++++++----
1 file changed, 40 insertions(+), 4 deletions(-)

diff --git a/heartbeat/tomcat b/heartbeat/tomcat
index 07a7ce4..813d280 100755
--- a/heartbeat/tomcat
+++ b/heartbeat/tomcat
@@ -56,6 +56,10 @@ else
SU=su
fi
+if which systemctl > /dev/null 2>&1; then
+ SYSTEMD=1
+fi
+
############################################################################
# Usage
usage()
@@ -90,6 +94,10 @@ isrunning_tomcat()
#
isalive_tomcat()
{
+ if ocf_is_true $SYSTEMD; then
+ systemctl is-active tomcat@${TOMCAT_NAME} > /dev/null 2>&1
+ return $?
+ fi
# As the server stops, the PID file disappears. To avoid race conditions,
# we will have remembered the PID of a running instance on script entry.
local pid=$rememberedPID
@@ -184,9 +192,31 @@ rotate_catalina_out()
}
############################################################################
+# Create systemd configuration
+create_systemd_config()
+{
+cat<<-EOF > /etc/sysconfig/tomcat@${TOMCAT_NAME}
+JAVA_HOME=${JAVA_HOME}
+JAVA_OPTS="${JAVA_OPTS}"
+CATALINA_HOME=${CATALINA_HOME}
+CATALINA_BASE=${CATALINA_BASE}
+CATALINA_OUT=${CATALINA_OUT}
+CATALINA_OPTS="${CATALINA_OPTS}"
+CATALINA_TMPDIR="${CATALINA_TMPDIR}"
+JAVA_ENDORSED_DIRS="${JAVA_ENDORSED_DIRS}"
+LOGGING_CONFIG="${LOGGING_CONFIG}"
+LOGGING_MANAGER="${LOGGING_MANAGER}"
+TOMCAT_CFG=${TOMCAT_CFG}
+EOF
+}
+
+############################################################################
# Tomcat Command
tomcatCommand()
{
+ if ocf_is_true $SYSTEMD; then
+ systemctl $@ tomcat@${TOMCAT_NAME}
+ else
cat<<-END_TOMCAT_COMMAND
export JAVA_HOME=${JAVA_HOME}
export JAVA_OPTS="${JAVA_OPTS}"
@@ -202,6 +232,7 @@ cat<<-END_TOMCAT_COMMAND
export TOMCAT_CFG=${TOMCAT_CFG}
$TOMCAT_START_SCRIPT $@
END_TOMCAT_COMMAND
+ fi
}
attemptTomcatCommand()
{
@@ -209,7 +240,9 @@ attemptTomcatCommand()
export TOMCAT_CFG=$(mktemp ${HA_RSCTMP}/tomcat-tmp-XXXXX.cfg)
fi
- if [ "$RESOURCE_TOMCAT_USER" = root ]; then
+ if ocf_is_true $SYSTEMD; then
+ tomcatCommand $@
+ elif [ "$RESOURCE_TOMCAT_USER" = root ]; then
"$TOMCAT_START_SCRIPT" $@ >> "$TOMCAT_CONSOLE" 2>&1
else
tomcatCommand $@ | $SU - -s /bin/sh "$RESOURCE_TOMCAT_USER" >> "$TOMCAT_CONSOLE" 2>&1
@@ -224,6 +257,9 @@ attemptTomcatCommand()
# Start Tomcat
start_tomcat()
{
+ if ocf_is_true $SYSTEMD; then
+ create_systemd_config
+ fi
cd "$CATALINA_HOME/bin"
validate_all_tomcat || exit $?
@@ -334,11 +370,11 @@ Resource script for Tomcat. It manages a Tomcat instance as a cluster resource.
<parameters>
<parameter name="tomcat_name" unique="1" >
-<longdesc lang="en"><![CDATA[
+<longdesc lang="en">
The name of the resource, added as a Java parameter in JAVA_OPTS:
--Dname=<tomcat_name> to Tomcat process on start. Used to ensure
+-Dname=&lt;tomcat_name&gt; to Tomcat process on start. Used to ensure
process is still running and must be unique.
-]]></longdesc>
+</longdesc>
<shortdesc>The name of the resource</shortdesc>
<content type="string" default="" />
</parameter>

1188
SOURCES/bz1250728-send_arp-fix-buffer-overflow-on-infiniband.patch

File diff suppressed because it is too large Load Diff

33
SOURCES/bz1251484-redis-client-passwd-support.patch

@ -0,0 +1,33 @@ @@ -0,0 +1,33 @@
commit fe53056f225fadae184a0ab79f1f96430854812f
Author: David Vossel <dvossel@redhat.com>
Date: Thu Aug 13 14:11:30 2015 -0400

High: redis: use required client pass word when set

diff --git a/heartbeat/redis b/heartbeat/redis
index e1d0795..65abb2a 100755
--- a/heartbeat/redis
+++ b/heartbeat/redis
@@ -218,7 +218,11 @@ function set_score()
function redis_client() {
ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@"
- "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+ if [ -n "$clientpasswd" ]; then
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" -a "$clientpasswd" "$@" | sed 's/\r//'
+ else
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+ fi
}
function simple_status() {
@@ -487,6 +491,9 @@ function validate() {
}
NODENAME=$(ocf_local_nodename)
+if [ -f "$REDIS_CONFIG" ]; then
+ clientpasswd="$(cat $REDIS_CONFIG | sed -n -e 's/^\s*requirepass\s*\(.*\)\s*$/\1/p' | tail -n 1)"
+fi
ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"

135
SOURCES/bz1260713-1-sapdatabase-process-count-suser.patch

@ -0,0 +1,135 @@ @@ -0,0 +1,135 @@
From fe55f9b909d81a0093dbfb1f00083706cf5d2cf1 Mon Sep 17 00:00:00 2001
From: Alexander Krauth <alexander.krauth@basf.com>
Date: Fri, 19 Feb 2016 18:00:58 +0100
Subject: [PATCH] High: SAPDatabase: Add support for Oracle 12c

To work with Oracle 12c the agent needs an option
to pass the new Database Username to the resource.

Example configuration:

primitive oracle-database SAPDatabase \
params \
SID=HAO \
DBTYPE=ORA \
DBOUSER=oracle \
STRICT_MONITORING=1 \
op monitor interval=120 timeout=60
---
heartbeat/SAPDatabase | 12 +++++++++++-
heartbeat/sapdb.sh | 35 ++++++++++++++++++++++++++---------
2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase
index de7959f..641bd40 100755
--- a/heartbeat/SAPDatabase
+++ b/heartbeat/SAPDatabase
@@ -18,6 +18,7 @@
# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default)
# OCF_RESKEY_DBTYPE (mandatory, one of the following values: ORA,ADA,DB6,SYB,HDB)
# OCF_RESKEY_DBINSTANCE (optional, Database instance name, if not equal to SID)
+# OCF_RESKEY_DBOSUSER (optional, the Linux user that owns the database processes on operating system level)
# OCF_RESKEY_STRICT_MONITORING (optional, activate application level monitoring - with Oracle a failover will occur in case of an archiver stuck)
# OCF_RESKEY_AUTOMATIC_RECOVER (optional, automatic startup recovery, default is false)
# OCF_RESKEY_MONITOR_SERVICES (optional, default is to monitor all database services)
@@ -69,7 +70,7 @@ meta_data() {
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPDatabase">
-<version>2.06</version>
+<version>2.14</version>
<shortdesc lang="en">Manages a SAP database instance as an HA resource.</shortdesc>
<longdesc lang="en">
@@ -115,6 +116,11 @@ Usually you can leave this empty. Then the default: /usr/sap/hostctrl/exe is use
<shortdesc lang="en">Database instance name, if not equal to SID</shortdesc>
<content type="string" default="" />
</parameter>
+ <parameter name="DBOSUSER" unique="1" required="0">
+ <longdesc lang="en">The parameter can be set, if the database processes on operating system level are not executed with the default user of the used database type. Defaults: ADA=taken from /etc/opt/sdb, DB6=db2SID, ORA=oraSID and oracle, SYB=sybSID, HDB=SIDadm</longdesc>
+ <shortdesc lang="en">the Linux user that owns the database processes on operating system level</shortdesc>
+ <content type="string" default="" />
+ </parameter>
<parameter name="NETSERVICENAME" unique="0" required="0">
<longdesc lang="en">Deprecated - do not use anymore. This parameter will be deleted in one of the next releases.</longdesc>
<shortdesc lang="en">deprecated - do not use anymore</shortdesc>
@@ -305,6 +311,10 @@ DBTYPE=`echo "$OCF_RESKEY_DBTYPE" | tr '[:lower:]' '[:upper:]'`
if saphostctrl_installed; then
. ${OCF_FUNCTIONS_DIR}/sapdb.sh
else
+ if [ -n "${OCF_RESKEY_DBOSUSER}" ]; then
+ ocf_exit_reason "Usage of parameter OCF_RESKEY_DBOSUSER is not possible without having SAP Host-Agent installed"
+ exit $OCF_ERR_ARGS
+ fi
. ${OCF_FUNCTIONS_DIR}/sapdb-nosha.sh
fi
sapdatabase_init
diff --git a/heartbeat/sapdb.sh b/heartbeat/sapdb.sh
index 7edb4b8..33d2033 100755
--- a/heartbeat/sapdb.sh
+++ b/heartbeat/sapdb.sh
@@ -210,7 +210,11 @@ sapdatabase_monitor() {
then
DBINST="-dbinstance $OCF_RESKEY_DBINSTANCE "
fi
- output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST`
+ if [ -n "$OCF_RESKEY_DBOSUSER" ]
+ then
+ DBOSUSER="-dbuser $OCF_RESKEY_DBOSUSER "
+ fi
+ output=`$SAPHOSTCTRL -function GetDatabaseStatus -dbname $SID -dbtype $DBTYPE $DBINST $DBOSUSER`
# we have to parse the output, because the returncode doesn't tell anything about the instance status
for SERVICE in `echo "$output" | grep -i 'Component[ ]*Name *[:=] [A-Za-z][A-Za-z0-9_]* (' | sed 's/^.*Component[ ]*Name *[:=] *\([A-Za-z][A-Za-z0-9_]*\).*$/\1/i'`
@@ -255,30 +259,43 @@ sapdatabase_monitor() {
# sapdatabase_status: Are there any database processes on this host ?
#
sapdatabase_status() {
+ sid=`echo $SID | tr '[:upper:]' '[:lower:]'`
+
+ SUSER=${OCF_RESKEY_DBOSUSER:-""}
+
case $DBTYPE in
ADA) SEARCH="$SID/db/pgm/kernel"
- SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'`
+ [ -z "$SUSER" ] && SUSER=`grep "^SdbOwner" /etc/opt/sdb | awk -F'=' '{print $2}'`
SNUM=2
;;
- ORA) SEARCH="ora_[a-z][a-z][a-z][a-z]_"
- SUSER="ora`echo $SID | tr '[:upper:]' '[:lower:]'`"
- SNUM=4
+ ORA) DBINST=${OCF_RESKEY_DBINSTANCE}
+ DBINST=${OCF_RESKEY_DBINSTANCE:-${SID}}
+ SEARCH="ora_[a-z][a-z][a-z][a-z]_$DBINST"
+
+ if [ -z "$SUSER" ]; then
+ id "oracle" > /dev/null 2> /dev/null && SUSER="oracle"
+ id "ora${sid}" > /dev/null 2> /dev/null && SUSER="${SUSER:+${SUSER},}ora${sid}"
+ fi
+
+ SNUM=4
;;
DB6) SEARCH="db2[a-z][a-z][a-z]"
- SUSER="db2`echo $SID | tr '[:upper:]' '[:lower:]'`"
+ [ -z "$SUSER" ] && SUSER="db2${sid}"
SNUM=2
;;
SYB) SEARCH="dataserver"
- SUSER="syb`echo $SID | tr '[:upper:]' '[:lower:]'`"
+ [ -z "$SUSER" ] && SUSER="syb${sid}"
SNUM=1
;;
HDB) SEARCH="hdb[a-z]*server"
- SUSER="`echo $SID | tr '[:upper:]' '[:lower:]'`adm"
+ [ -z "$SUSER" ] && SUSER="${sid}adm"
SNUM=1
;;
esac
- cnt=`ps -u $SUSER -o args 2> /dev/null | grep -c $SEARCH`
+ [ -z "$SUSER" ] && return $OCF_ERR_INSTALLED
+
+ cnt=`ps -u $SUSER -o args 2> /dev/null | grep -v grep | grep -c $SEARCH`
[ $cnt -ge $SNUM ] && return $OCF_SUCCESS
return $OCF_NOT_RUNNING
}

24
SOURCES/bz1260713-2-sapdatabase-process-count-suser.patch

@ -0,0 +1,24 @@ @@ -0,0 +1,24 @@
From af5863ecd255d2d514113d39bbf03ab95b5ccca2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kristoffer=20Gr=C3=B6nlund?= <krig@koru.se>
Date: Mon, 16 Nov 2015 17:14:43 +0100
Subject: [PATCH] SAPDatabase: Add Oracle 12 to list of supported databases
(bsc#953991)

This agent has been tested to work with Oracle database version 12.
---
heartbeat/SAPDatabase | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/SAPDatabase b/heartbeat/SAPDatabase
index 3b77206..de7959f 100755
--- a/heartbeat/SAPDatabase
+++ b/heartbeat/SAPDatabase
@@ -78,7 +78,7 @@ Resource script for SAP databases. It manages a SAP database of any type as an H
The purpose of the resource agent is to start, stop and monitor the database instance of a SAP system. Together with the RDBMS system it will also control the related network service for the database. Like the Oracle Listener and the xserver of MaxDB.
The resource agent expects a standard SAP installation of the database and therefore needs less parameters to configure.
The resource agent supports the following databases:
-- Oracle 10.2 and 11.2
+- Oracle 10.2, 11.2 and 12
- DB/2 UDB for Windows and Unix 9.x
- SAP-DB / MaxDB 7.x
- Sybase ASE 15.7

11
SOURCES/bz1263348-mysql-tmpfile-leak.patch

@ -0,0 +1,11 @@ @@ -0,0 +1,11 @@
diff -uNr a/heartbeat/mysql b/heartbeat/mysql
--- a/heartbeat/mysql 2016-02-29 10:54:21.896786740 +0100
+++ b/heartbeat/mysql 2016-02-29 10:59:13.377446910 +0100
@@ -344,6 +344,7 @@
get_slave_info
rc=$?
+ rm -f $tmpfile
if [ $rc -eq 0 ]; then
# show slave status is not empty

37
SOURCES/bz1265527-sap_redhat_cluster_connector-hostnames-with-dash.patch

@ -0,0 +1,37 @@ @@ -0,0 +1,37 @@
diff -uNr a/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector b/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector
--- a/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector 2013-07-18 21:17:48.000000000 +0200
+++ b/sap_redhat_cluster_connector-6353d27/sap_redhat_cluster_connector 2016-02-29 11:04:48.714352114 +0100
@@ -251,13 +251,13 @@
open CRMOUT, "$cmd_cibadmin --local -Q --xpath '//primitive[\@type=\"$sra\"]' --node-path 2>/dev/null |" || die "could not open cibadmin output";
while (<CRMOUT>) {
my $line = $_;
- if ($line =~ /primitive..id='([a-zA-Z0-9_]+)'/) {
+ if ($line =~ /primitive..id='([a-zA-Z0-9_-]+)'/) {
($fname) = ($1);
} else {
next;
}
- if ( $line =~ /[group|master|clone]..id='([a-zA-Z0-9_]+)'/) {
+ if ( $line =~ /[group|master|clone]..id='([a-zA-Z0-9_-]+)'/) {
($fgname) = ($1);
}
@@ -265,7 +265,7 @@
open RESOURCE1_OUT, "$cmd_cibadmin -Q --xpath \"//primitive[\@id='$fname']//nvpair[\@name='$sparam']\" 2>/dev/null |" || die "could not open cibadmin output";
while (<RESOURCE1_OUT>) {
my $result = $_;
- if ($result =~ /value="([a-zA-Z0-9_]+)"/) {
+ if ($result =~ /value="([a-zA-Z0-9_-]+)"/) {
my $finstance=$1;
if ( $1 =~ /^${sid}_[a-zA-Z0-9]+${ino}_[a-zA-Z0-9_-]+$/ ) {
$foundRes=1;
@@ -279,7 +279,7 @@
open RESOURCE2_OUT, "$cmd_cibadmin -Q --xpath \"//primitive[\@id='$fname']//nvpair[\@name='$sparam2']\" 2>/dev/null |" || die "could not open cibadmin output";
while (<RESOURCE2_OUT>) {
my $result = $_;
- if ($result =~ /value="([a-zA-Z0-9_]+)"/) {
+ if ($result =~ /value="([a-zA-Z0-9_-]+)"/) {
my $finstance=$1;
if ( $1 =~ /^${sid}_[a-zA-Z0-9]+${ino}_[a-zA-Z0-9_-]+$/ ) {
$foundRes=1;

60
SOURCES/bz1276699-ipaddr2-use-ipv6-dad-for-collision-detection.patch

@ -0,0 +1,60 @@ @@ -0,0 +1,60 @@
diff -uNr a/heartbeat/IPaddr2 b/heartbeat/IPaddr2
--- a/heartbeat/IPaddr2 2016-02-29 10:54:21.909786575 +0100
+++ b/heartbeat/IPaddr2 2016-02-29 14:38:48.502852067 +0100
@@ -673,19 +673,35 @@
#
run_send_ua() {
local i
- # Wait until the allocated IPv6 address gets ready by checking
- # "tentative" flag is disappeared, otherwise send_ua can not
- # send the unsolicited advertisement requests.
- for i in 1 2 3 4 5; do
- $IP2UTIL -o -f $FAMILY addr show dev $NIC \
- | grep -q -e "$OCF_RESKEY_ip/$NETMASK .* tentative"
- [ $? -ne 0 ] && break
- if [ $i -eq 5 ]; then
- ocf_log warn "$OCF_RESKEY_ip still has 'tentative' status. (ignored)"
+
+ # Duplicate Address Detection [DAD]
+ # Kernel will flag the IP as 'tentative' until it ensured that
+ # there is no duplicates.
+ # If there is, it will flag it as 'dadfailed'
+ for i in $(seq 1 10); do
+ ipstatus=$($IP2UTIL -o -f $FAMILY addr show dev $NIC to $OCF_RESKEY_ip/$NETMASK)
+ case "$ipstatus" in
+ *dadfailed*)
+ ocf_log err "IPv6 address collision $OCF_RESKEY_ip [DAD]"
+ $IP2UTIL -f $FAMILY addr del dev $NIC $OCF_RESKEY_ip/$NETMASK
+ if [ $? -ne 0 ]; then
+ ocf_log err "Could not delete IPv6 address"
+ fi
+ return $OCF_ERR_GENERIC
+ ;;
+ *tentative*)
+ if [ $i -eq 10 ]; then
+ ofc_log warn "IPv6 address : DAD is still in tentative"
+ fi
+ ;;
+ *)
break
- fi
+ ;;
+ esac
sleep 1
done
+ # Now the address should be usable
+
ARGS="-i $OCF_RESKEY_arp_interval -c $OCF_RESKEY_arp_count $OCF_RESKEY_ip $NETMASK $NIC"
ocf_log info "$SENDUA $ARGS"
$SENDUA $ARGS || ocf_log err "Could not send ICMPv6 Unsolicited Neighbor Advertisements."
@@ -838,6 +854,10 @@
else
if [ -x $SENDUA ]; then
run_send_ua
+ if [ $? -ne 0 ]; then
+ ocf_exit_reason "run_send_ua failed."
+ exit $OCF_ERR_GENERIC
+ fi
fi
fi
;;

728
SOURCES/bz1282723-novacompute-novaevacuate-fix-evacute-typo.patch

@ -0,0 +1,728 @@ @@ -0,0 +1,728 @@
diff -uNr a/doc/man/Makefile.am b/doc/man/Makefile.am
--- a/doc/man/Makefile.am 2016-02-02 14:49:34.546698286 +0100
+++ b/doc/man/Makefile.am 2016-02-02 14:50:29.893979453 +0100
@@ -73,7 +73,7 @@
ocf_heartbeat_MailTo.7 \
ocf_heartbeat_ManageRAID.7 \
ocf_heartbeat_ManageVE.7 \
- ocf_heartbeat_NovaCompute.7 \
+ ocf_heartbeat_nova-compute-wait.7 \
ocf_heartbeat_NovaEvacuate.7 \
ocf_heartbeat_Pure-FTPd.7 \
ocf_heartbeat_Raid1.7 \
diff -uNr a/heartbeat/Makefile.am b/heartbeat/Makefile.am
--- a/heartbeat/Makefile.am 2016-02-02 14:49:34.546698286 +0100
+++ b/heartbeat/Makefile.am 2016-02-02 14:50:29.894979440 +0100
@@ -52,7 +52,7 @@
IPv6addr_LDADD = -lplumb $(LIBNETLIBS)
send_ua_LDADD = $(LIBNETLIBS)
-osp_SCRIPTS = NovaCompute \
+osp_SCRIPTS = nova-compute-wait \
NovaEvacuate
ocf_SCRIPTS = ClusterMon \
diff -uNr a/heartbeat/NovaCompute b/heartbeat/NovaCompute
--- a/heartbeat/NovaCompute 2016-02-02 14:49:34.541698351 +0100
+++ b/heartbeat/NovaCompute 1970-01-01 01:00:00.000000000 +0100
@@ -1,366 +0,0 @@
-#!/bin/sh
-#
-#
-# NovaCompute agent manages compute daemons.
-#
-# Copyright (c) 2015
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like. Any license provided herein, whether implied or
-# otherwise, applies only to this software file. Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
-#
-
-#######################################################################
-# Initialization:
-
-###
-: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
-. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-###
-
-: ${__OCF_ACTION=$1}
-
-#######################################################################
-
-meta_data() {
- cat <<END
-<?xml version="1.0"?>
-<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
-<resource-agent name="NovaCompute" version="1.0">
-<version>1.0</version>
-
-<longdesc lang="en">
-OpenStack Nova Compute Server.
-</longdesc>
-<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>
-
-<parameters>
-
-<parameter name="auth_url" unique="0" required="1">
-<longdesc lang="en">
-Authorization URL for connecting to keystone in admin context
-</longdesc>
-<shortdesc lang="en">Authorization URL</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="username" unique="0" required="1">
-<longdesc lang="en">
-Username for connecting to keystone in admin context
-</longdesc>
-<shortdesc lang="en">Username</shortdesc>
-</parameter>
-
-<parameter name="password" unique="0" required="1">
-<longdesc lang="en">
-Password for connecting to keystone in admin context
-</longdesc>
-<shortdesc lang="en">Password</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="tenant_name" unique="0" required="1">
-<longdesc lang="en">
-Tenant name for connecting to keystone in admin context.
-Note that with Keystone V3 tenant names are only unique within a domain.
-</longdesc>
-<shortdesc lang="en">Tenant name</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="domain" unique="0" required="0">
-<longdesc lang="en">
-DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
-</longdesc>
-<shortdesc lang="en">DNS domain</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="endpoint_type" unique="0" required="0">
-<longdesc lang="en">
-Nova API location (internal, public or admin URL)
-</longdesc>
-<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
-<content type="string" default="" />
-</parameter>
-
-<parameter name="no_shared_storage" unique="0" required="0">
-<longdesc lang="en">
-Disable shared storage recovery for instances. Use at your own risk!
-</longdesc>
-<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
-<content type="boolean" default="0" />
-</parameter>
-
-<parameter name="evacuation_delay" unique="0" required="0">
-<longdesc lang="en">
-How long to wait for nova to finish evacuating instances elsewhere
-before starting nova-compute. Only used when the agent detects
-evacuations might be in progress.
-
-You may need to increase the start timeout when increasing this value.
-</longdesc>
-<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
-<content type="integer" default="120" />
-</parameter>
-
-</parameters>
-
-<actions>
-<action name="start" timeout="600" />
-<action name="stop" timeout="300" />
-<action name="monitor" timeout="20" interval="10" depth="0"/>
-<action name="validate-all" timeout="20" />
-<action name="meta-data" timeout="5" />
-</actions>
-</resource-agent>
-END
-}
-
-#######################################################################
-
-# don't exit on TERM, to test that lrmd makes sure that we do exit
-trap sigterm_handler TERM
-sigterm_handler() {
- ocf_log info "They use TERM to bring us down. No such luck."
- return
-}
-
-nova_usage() {
- cat <<END
-usage: $0 {start|stop|monitor|validate-all|meta-data}
-
-Expects to have a fully populated OCF RA-compliant environment set.
-END
-}
-
-nova_pid() {
- ps axf | grep python.*nova-compute | grep -v grep | awk '{print $1}'
-}
-
-nova_start() {
- nova_monitor
- if [ $? = $OCF_SUCCESS ]; then
- return $OCF_SUCCESS
- fi
-
- state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
- if [ "x$state" = x ]; then
- : never been fenced
-
- elif [ "x$state" = xno ]; then
- : has been evacuated, however it could have been 1s ago
- ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
- sleep ${OCF_RESKEY_evacuation_delay}
-
- else
- ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
- while [ "x$state" != "xno" ]; do
- state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
- sleep 5
- done
-
- ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
- sleep ${OCF_RESKEY_evacuation_delay}
- fi
-
- export LIBGUESTFS_ATTACH_METHOD=appliance
- su nova -s /bin/sh -c /usr/bin/nova-compute &
-
- rc=$OCF_NOT_RUNNING
- ocf_log info "Waiting for nova to start"
- while [ $rc != $OCF_SUCCESS ]; do
- nova_monitor
- rc=$?
- done
-
-## TEMPORARY disable call to "service enable" that seems to create
-## issues and it is unnecessary since fence_compute doesn't disable
-## the service
-
-# if [ "x${OCF_RESKEY_domain}" != x ]; then
-# export service_host="${NOVA_HOST}.${OCF_RESKEY_domain}"
-# else
-# export service_host="${NOVA_HOST}"
-# fi
-
-# python -c "import os; from novaclient import client as nova_client; nova = nova_client.Client('2', os.environ.get('OCF_RESKEY_username'), os.environ.get('OCF_RESKEY_password'), os.environ.get('OCF_RESKEY_tenant_name'), os.environ.get('OCF_RESKEY_auth_url')); nova.services.enable(os.environ.get('service_host'), 'nova-compute');"
-
-# rc=$?
-# if [ $rc != 0 ]; then
-# ocf_exit_reason "nova.services.enable failed $rc"
-# exit $OCF_NOT_RUNNING
-# fi
-
- return $OCF_SUCCESS
-}
-
-nova_stop() {
- pid=$(nova_pid)
- if [ "x$pid" != x ]; then
- su nova -c "kill -TERM $pid" -s /bin/bash
- fi
-
- while [ "x$pid" != x ]; do
- sleep 1
- pid=$(nova_pid)
- done
-
- return $OCF_SUCCESS
-}
-
-nova_monitor() {
- pid=$(nova_pid)
- if [ "x$pid" != x ]; then
- ## TEMPORARY disable call to fence_compute to avoid noise on first
- ## first startup due to nova-compute not being fast enough to populate
- ## the db and fence_compute checking if node exists and it's enabled
- #state=$(fence_compute ${fence_options} -o status -n $NOVA_HOST | grep Status)
- #if [ "x$state" = "xStatus: ON" ]; then
- return $OCF_SUCCESS
- #else
- # ocf_exit_reason "Nova status: $state"
- # return $OCF_ERR_GENERIC
- #fi
- fi
-
- return $OCF_NOT_RUNNING
-}
-
-nova_notify() {
- return $OCF_SUCCESS
-}
-
-nova_validate() {
- rc=$OCF_SUCCESS
- fence_options=""
-
- check_binary openstack-config
- check_binary nova-compute
-
- if [ ! -f /etc/nova/nova.conf ]; then
- ocf_exit_reason "/etc/nova/nova.conf not found"
- exit $OCF_ERR_CONFIGURED
- fi
-
- if [ -z "${OCF_RESKEY_auth_url}" ]; then
- ocf_exit_reason "auth_url not configured"
- exit $OCF_ERR_CONFIGURED
- fi
-
- fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
-
- if [ -z "${OCF_RESKEY_username}" ]; then
- ocf_exit_reason "username not configured"
- exit $OCF_ERR_CONFIGURED
- fi
-
- fence_options="${fence_options} -l ${OCF_RESKEY_username}"
-
- if [ -z "${OCF_RESKEY_password}" ]; then
- ocf_exit_reason "password not configured"
- exit $OCF_ERR_CONFIGURED
- fi
-
- fence_options="${fence_options} -p ${OCF_RESKEY_password}"
-
- if [ -z "${OCF_RESKEY_tenant_name}" ]; then
- ocf_exit_reason "tenant_name not configured"
- exit $OCF_ERR_CONFIGURED
- fi
-
- fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
-
- if [ -n "${OCF_RESKEY_domain}" ]; then
- fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
- fi
-
- if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
- if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
- fence_options="${fence_options} --no-shared-storage"
- fi
- fi
-
- if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
- case ${OCF_RESKEY_endpoint_type} in
- adminURL|publicURL|internalURL) ;;
- *)
- ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
- exit $OCF_ERR_CONFIGURED
- ;;
- esac
- fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
- fi
-
- # we take a chance here and hope that host is either not configured
- # or configured in nova.conf
-
- NOVA_HOST=$(openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
- if [ $? = 1 ]; then
- if [ "x${OCF_RESKEY_domain}" != x ]; then
- NOVA_HOST=$(uname -n | awk -F. '{print $1}')
- else
- NOVA_HOST=$(uname -n)
- fi
- fi
-
- # We only need to check a configured value, calculated ones are fine
- openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null
- if [ $? = 0 ]; then
- if [ "x${OCF_RESKEY_domain}" != x ]; then
- short_host=$(uname -n | awk -F. '{print $1}')
- if [ "x$NOVA_HOST" != "x${short_host}" ]; then
- ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function"
- rc=$OCF_ERR_CONFIGURED
- fi
-
- elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then
- ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function"
- rc=$OCF_ERR_CONFIGURED
- fi
- fi
-
- if [ $rc != $OCF_SUCCESS ]; then
- exit $rc
- fi
- return $rc
-}
-
-: ${OCF_RESKEY_evacuation_delay=120}
-case $__OCF_ACTION in
-meta-data) meta_data
- exit $OCF_SUCCESS
- ;;
-usage|help) nova_usage
- exit $OCF_SUCCESS
- ;;
-esac
-
-case $__OCF_ACTION in
-start) nova_validate; nova_start;;
-stop) nova_stop;;
-monitor) nova_validate; nova_monitor;;
-notify) nova_notify;;
-validate-all) exit $OCF_SUCCESS;;
-*) nova_usage
- exit $OCF_ERR_UNIMPLEMENTED
- ;;
-esac
-rc=$?
-ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
-exit $rc
diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait
--- a/heartbeat/nova-compute-wait 1970-01-01 01:00:00.000000000 +0100
+++ b/heartbeat/nova-compute-wait 2016-02-02 14:50:29.894979440 +0100
@@ -0,0 +1,304 @@
+#!/bin/sh
+#
+#
+# nova-compute-wait agent manages compute daemons.
+#
+# Copyright (c) 2015
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+
+#######################################################################
+# Initialization:
+
+###
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+###
+
+: ${__OCF_ACTION=$1}
+
+#######################################################################
+
+meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="nova-compute-wait" version="1.0">
+<version>1.0</version>
+
+<longdesc lang="en">
+OpenStack Nova Compute Server.
+</longdesc>
+<shortdesc lang="en">OpenStack Nova Compute Server</shortdesc>
+
+<parameters>
+
+<parameter name="auth_url" unique="0" required="1">
+<longdesc lang="en">
+Authorization URL for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Authorization URL</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="username" unique="0" required="1">
+<longdesc lang="en">
+Username for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Username</shortdesc>
+</parameter>
+
+<parameter name="password" unique="0" required="1">
+<longdesc lang="en">
+Password for connecting to keystone in admin context
+</longdesc>
+<shortdesc lang="en">Password</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="tenant_name" unique="0" required="1">
+<longdesc lang="en">
+Tenant name for connecting to keystone in admin context.
+Note that with Keystone V3 tenant names are only unique within a domain.
+</longdesc>
+<shortdesc lang="en">Tenant name</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="domain" unique="0" required="0">
+<longdesc lang="en">
+DNS domain in which hosts live, useful when the cluster uses short names and nova uses FQDN
+</longdesc>
+<shortdesc lang="en">DNS domain</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="endpoint_type" unique="0" required="0">
+<longdesc lang="en">
+Nova API location (internal, public or admin URL)
+</longdesc>
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
+<content type="string" default="" />
+</parameter>
+
+<parameter name="no_shared_storage" unique="0" required="0">
+<longdesc lang="en">
+Disable shared storage recovery for instances. Use at your own risk!
+</longdesc>
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
+<content type="boolean" default="0" />
+</parameter>
+
+<parameter name="evacuation_delay" unique="0" required="0">
+<longdesc lang="en">
+How long to wait for nova to finish evacuating instances elsewhere
+before starting nova-compute. Only used when the agent detects
+evacuations might be in progress.
+
+You may need to increase the start timeout when increasing this value.
+</longdesc>
+<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
+<content type="integer" default="120" />
+</parameter>
+
+</parameters>
+
+<actions>
+<action name="start" timeout="600" />
+<action name="stop" timeout="300" />
+<action name="monitor" timeout="20" interval="10" depth="0"/>
+<action name="validate-all" timeout="20" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+END
+}
+
+#######################################################################
+
+# don't exit on TERM, to test that lrmd makes sure that we do exit
+trap sigterm_handler TERM
+sigterm_handler() {
+ ocf_log info "They use TERM to bring us down. No such luck."
+ return
+}
+
+nova_usage() {
+ cat <<END
+usage: $0 {start|stop|monitor|validate-all|meta-data}
+
+Expects to have a fully populated OCF RA-compliant environment set.
+END
+}
+
+nova_start() {
+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ if [ "x$state" = x ]; then
+ : never been fenced
+
+ elif [ "x$state" = xno ]; then
+ : has been evacuated, however it could have been 1s ago
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+
+ else
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
+ while [ "x$state" != "xno" ]; do
+ state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
+ sleep 5
+ done
+
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
+ sleep ${OCF_RESKEY_evacuation_delay}
+ fi
+ return $OCF_SUCCESS
+}
+
+nova_stop() {
+ return $OCF_SUCCESS
+}
+
+nova_monitor() {
+ return $OCF_SUCCESS
+}
+
+nova_notify() {
+ return $OCF_SUCCESS
+}
+
+nova_validate() {
+ rc=$OCF_SUCCESS
+ fence_options=""
+
+ check_binary openstack-config
+ check_binary nova-compute
+
+ if [ ! -f /etc/nova/nova.conf ]; then
+ ocf_exit_reason "/etc/nova/nova.conf not found"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_domain}" ]; then
+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL) ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ # we take a chance here and hope that host is either not configured
+ # or configured in nova.conf
+
+ NOVA_HOST=$(openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
+ if [ $? = 1 ]; then
+ if [ "x${OCF_RESKEY_domain}" != x ]; then
+ NOVA_HOST=$(uname -n | awk -F. '{print $1}')
+ else
+ NOVA_HOST=$(uname -n)
+ fi
+ fi
+
+ # We only need to check a configured value, calculated ones are fine
+ openstack-config --get /etc/nova/nova.conf DEFAULT host 2>/dev/null
+ if [ $? = 0 ]; then
+ if [ "x${OCF_RESKEY_domain}" != x ]; then
+ short_host=$(uname -n | awk -F. '{print $1}')
+ if [ "x$NOVA_HOST" != "x${short_host}" ]; then
+ ocf_exit_reason "Invalid Nova host name, must be ${short_host} in order for instance recovery to function"
+ rc=$OCF_ERR_CONFIGURED
+ fi
+
+ elif [ "x$NOVA_HOST" != "x$(uname -n)" ]; then
+ ocf_exit_reason "Invalid Nova host name, must be $(uname -n) in order for instance recovery to function"
+ rc=$OCF_ERR_CONFIGURED
+ fi
+ fi
+
+ if [ $rc != $OCF_SUCCESS ]; then
+ exit $rc
+ fi
+ return $rc
+}
+
+: ${OCF_RESKEY_evacuation_delay=120}
+case $__OCF_ACTION in
+meta-data) meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) nova_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+case $__OCF_ACTION in
+start) nova_validate; nova_start;;
+stop) nova_stop;;
+monitor) nova_validate; nova_monitor;;
+notify) nova_notify;;
+validate-all) exit $OCF_SUCCESS;;
+*) nova_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
+exit $rc
diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
--- a/heartbeat/NovaEvacuate 2016-02-02 14:49:34.541698351 +0100
+++ b/heartbeat/NovaEvacuate 2016-02-02 14:50:22.768072003 +0100
@@ -141,7 +141,7 @@
}
update_evacuation() {
- attrd_updater -p -n evacute -Q -N ${1} -v ${2}
+ attrd_updater -p -n evacuate -Q -N ${1} -v ${2}
arc=$?
if [ ${arc} != 0 ]; then
ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
@@ -219,7 +219,12 @@
return $OCF_NOT_RUNNING
fi
- handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}')
+ handle_evacuations $(
+ attrd_updater -n evacuate -A |
+ sed 's/ value=""/ value="no"/' |
+ tr '="' ' ' |
+ awk '{print $4" "$6}'
+ )
return $OCF_SUCCESS
}

131
SOURCES/bz1284526-galera-crash-recovery.patch

@ -0,0 +1,131 @@ @@ -0,0 +1,131 @@
From d9833b68498e306d181be11adf9eee14b646a899 Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Tue, 2 Feb 2016 14:34:36 +0100
Subject: [PATCH] galera: force crash recovery if needed during last commit
detection

---
heartbeat/galera | 90 +++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 60 insertions(+), 30 deletions(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index 7be2b00..ca94c21 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -525,6 +525,58 @@ detect_first_master()
set_bootstrap_node $best_node
}
+detect_last_commit()
+{
+ local last_commit
+ local recover_args="--defaults-file=$OCF_RESKEY_config \
+ --pid-file=$OCF_RESKEY_pid \
+ --socket=$OCF_RESKEY_socket \
+ --datadir=$OCF_RESKEY_datadir \
+ --user=$OCF_RESKEY_user"
+ local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p'
+
+ ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
+ last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
+ if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
+ local tmp=$(mktemp)
+ local tmperr=$(mktemp)
+
+ ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
+
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr
+
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+ if [ -z "$last_commit" ]; then
+ # Galera uses InnoDB's 2pc transactions internally. If
+ # server was stopped in the middle of a replication, the
+ # recovery may find a "prepared" XA transaction in the
+ # redo log, and mysql won't recover automatically
+
+ cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null
+ if [ $? -eq 0 ]; then
+ # we can only rollback the transaction, but that's OK
+ # since the DB will get resynchronized anyway
+ ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover"
+ ${OCF_RESKEY_binary} $recover_args --wsrep-recover \
+ --tc-heuristic-recover=rollback > $tmp 2>/dev/null
+
+ last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+ fi
+ fi
+ rm -f $tmp $tmperr
+ fi
+
+ if [ ! -z "$last_commit" ]; then
+ ocf_log info "Last commit version found: $last_commit"
+ set_last_commit $last_commit
+ return $OCF_SUCCESS
+ else
+ ocf_exit_reason "Unable to detect last known write sequence number"
+ clear_last_commit
+ return $OCF_ERR_GENERIC
+ fi
+}
+
# For galera, promote is really start
galera_promote()
{
@@ -569,13 +620,15 @@ galera_demote()
clear_bootstrap_node
clear_last_commit
- # record last commit by "starting" galera. start is just detection of the last sequence number
- galera_start
+ # record last commit for next promotion
+ detect_last_commit
+ rc=$?
+ return $rc
}
galera_start()
{
- local last_commit
+ local rc
echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME
if [ $? -ne 0 ]; then
@@ -591,34 +644,11 @@ galera_start()
mysql_common_prepare_dirs
- ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat"
- last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')"
- if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then
- ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
- local tmp=$(mktemp)
- ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \
- --pid-file=$OCF_RESKEY_pid \
- --socket=$OCF_RESKEY_socket \
- --datadir=$OCF_RESKEY_datadir \
- --user=$OCF_RESKEY_user \
- --wsrep-recover > $tmp 2>&1
-
- last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')"
- rm -f $tmp
-
- if [ "$last_commit" = "-1" ]; then
- last_commit="0"
- fi
- fi
-
- if [ -z "$last_commit" ]; then
- ocf_exit_reason "Unable to detect last known write sequence number"
- clear_last_commit
- return $OCF_ERR_GENERIC
+ detect_last_commit
+ rc=$?
+ if [ $rc -ne $OCF_SUCCESS ]; then
+ return $rc
fi
- ocf_log info "Last commit version found: $last_commit"
-
- set_last_commit $last_commit
master_exists
if [ $? -eq 0 ]; then

89
SOURCES/bz1284526-galera-heuristic-recovered.patch

@ -0,0 +1,89 @@ @@ -0,0 +1,89 @@
From 4d98bbcdadda60166faf7ccc512b9095b439e2bd Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Tue, 2 Feb 2016 16:29:10 +0100
Subject: [PATCH] galera: prevent recovered nodes from bootstrapping cluster
when possible

---
heartbeat/README.galera | 19 ++++++++++++++++++-
heartbeat/galera | 41 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index ca94c21..84c92fd 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -276,6 +276,22 @@ is_bootstrap()
}
+set_heuristic_recovered()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true"
+}
+
+clear_heuristic_recovered()
+{
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D
+}
+
+is_heuristic_recovered()
+{
+ local node=$1
+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null
+}
+
clear_last_commit()
{
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D
@@ -398,8 +414,19 @@ detect_first_master()
local best_node="$NODENAME"
local last_commit=0
local missing_nodes=0
+ local nodes=""
+ local nodes_recovered=""
+ # avoid selecting a recovered node as bootstrap if possible
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
+ if is_heuristic_recovered $node; then
+ nodes_recovered="$nodes_recovered $node"
+ else
+ nodes="$nodes $node"
+ fi
+ done
+
+ for node in $nodes_recovered $nodes; do
last_commit=$(get_last_commit $node)
if [ -z "$last_commit" ]; then
@@ -466,6 +493,12 @@ detect_last_commit()
--tc-heuristic-recover=rollback > $tmp 2>/dev/null
last_commit="$(cat $tmp | sed -n $recovered_position_regex)"
+ if [ ! -z "$last_commit" ]; then
+ ocf_log warn "State recovered. force SST at next restart for full resynchronization"
+ rm -f ${OCF_RESKEY_datadir}/grastate.dat
+ # try not to use this node if bootstrap is needed
+ set_heuristic_recovered
+ fi
fi
fi
rm -f $tmp $tmperr
@@ -549,11 +582,17 @@ galera_promote()
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
+ # clear attribute heuristic-recovered. if last shutdown was
+ # not clean, we cannot be extra-cautious by requesting a SST
+ # since this is the bootstrap node
+ clear_heuristic_recovered
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
+ # sync is done, clear info about last recovery
+ clear_heuristic_recovered
fi
ocf_log info "Galera started"

113
SOURCES/bz1284526-galera-no-grastate.patch

@ -0,0 +1,113 @@ @@ -0,0 +1,113 @@
From 422ef6a2018ebf9d6765e1f2965778f42c6a9d9c Mon Sep 17 00:00:00 2001
From: Damien Ciabrini <dciabrin@redhat.com>
Date: Tue, 15 Mar 2016 18:45:13 +0100
Subject: [PATCH] galera: don't bootstrap from a node with no grastate.dat when
possible

---
heartbeat/README.galera | 9 ++++-----
heartbeat/galera | 36 ++++++++++++++++++++++--------------
2 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/heartbeat/galera b/heartbeat/galera
index 72add3c..e4495be 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -276,20 +276,20 @@ is_bootstrap()
}
-set_heuristic_recovered()
+set_no_grastate()
{
- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true"
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -v "true"
}
-clear_heuristic_recovered()
+clear_no_grastate()
{
- ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -D
}
-is_heuristic_recovered()
+is_no_grastate()
{
local node=$1
- ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null
+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-no-grastate" -Q 2>/dev/null
}
clear_last_commit()
@@ -419,7 +419,7 @@ detect_first_master()
# avoid selecting a recovered node as bootstrap if possible
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do
- if is_heuristic_recovered $node; then
+ if is_no_grastate $node; then
nodes_recovered="$nodes_recovered $node"
else
nodes="$nodes $node"
@@ -473,6 +473,12 @@ detect_last_commit()
local tmp=$(mktemp)
local tmperr=$(mktemp)
+ # if we pass here because grastate.dat doesn't exist,
+ # try not to bootstrap from this node if possible
+ if [ ! -f ${OCF_RESKEY_datadir}/grastate.dat ]; then
+ set_no_grastate
+ fi
+
ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'"
${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr
@@ -496,8 +502,8 @@ detect_last_commit()
if [ ! -z "$last_commit" ]; then
ocf_log warn "State recovered. force SST at next restart for full resynchronization"
rm -f ${OCF_RESKEY_datadir}/grastate.dat
- # try not to use this node if bootstrap is needed
- set_heuristic_recovered
+ # try not to bootstrap from this node if possible
+ set_no_grastate
fi
fi
fi
@@ -582,17 +588,17 @@ galera_promote()
if ocf_is_true $bootstrap; then
promote_everyone
clear_bootstrap_node
- # clear attribute heuristic-recovered. if last shutdown was
+ # clear attribute no-grastate. if last shutdown was
# not clean, we cannot be extra-cautious by requesting a SST
# since this is the bootstrap node
- clear_heuristic_recovered
+ clear_no_grastate
ocf_log info "Bootstrap complete, promoting the rest of the galera instances."
else
# if this is not the bootstrap node, make sure this instance
# syncs with the rest of the cluster before promotion returns.
wait_for_sync
- # sync is done, clear info about last recovery
- clear_heuristic_recovered
+ # sync is done, clear info about last startup
+ clear_no_grastate
fi
ocf_log info "Galera started"
@@ -611,6 +617,7 @@ galera_demote()
# if this node was previously a bootstrap node, that is no longer the case.
clear_bootstrap_node
clear_last_commit
+ clear_no_grastate
# record last commit for next promotion
detect_last_commit
@@ -722,6 +729,7 @@ galera_stop()
clear_last_commit
clear_master_score
clear_bootstrap_node
+ clear_no_grastate
return $rc
}

31
SOURCES/bz1287303-novaevacuate-invoke-off-action.patch

@ -0,0 +1,31 @@ @@ -0,0 +1,31 @@
From 5e9310bbbcd5086ea9a3edf85d523c4c2a57f1c3 Mon Sep 17 00:00:00 2001
From: Andrew Beekhof <andrew@beekhof.net>
Date: Tue, 8 Dec 2015 13:54:12 +1100
Subject: [PATCH] NovaEvacuate should invoke fence_compute with action 'off'

Conceptually we are resurrecting in one direction only (off) and not
bringing it back to the current host afterwards (on)

Also it will overwrite the attrd variable too soon.

Change-Id: I9694945ca7eedae4f5cb6758fe1e8ce7f72ae808
---
ocf/NovaEvacuate | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
index a17a159..0e22d7e 100644
--- a/heartbeat/NovaEvacuate
+++ b/heartbeat/NovaEvacuate
@@ -198,7 +198,7 @@ handle_evacuations() {
return $OCF_SUCCESS
fi
- fence_compute ${fence_options} -o reboot -n $node
+ fence_compute ${fence_options} -o off -n $node
rc=$?
if [ $rc = 0 ]; then
--
1.9.1

23
SOURCES/bz1287314-novaevacuate-simplify-nova-check.patch

@ -0,0 +1,23 @@ @@ -0,0 +1,23 @@
diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
--- a/heartbeat/NovaEvacuate 2016-02-29 10:54:21.933786269 +0100
+++ b/heartbeat/NovaEvacuate 2016-02-29 13:29:27.000139496 +0100
@@ -177,17 +177,10 @@
esac
if [ $need_evacuate = 1 ]; then
- found=0
ocf_log notice "Initiating evacuation of $node"
- for known in $(fence_compute ${fence_options} -o list | tr -d ','); do
- if [ ${known} = ${node} ]; then
- found=1
- break
- fi
- done
-
- if [ $found = 0 ]; then
+ fence_compute ${fence_options} -o status -n ${node}
+ if [ $? != 0 ]; then
ocf_log info "Nova does not know about ${node}"
# Dont mark as no because perhaps nova is unavailable right now
continue

1778
SOURCES/bz1289107-saphana-mcos-support.patch

File diff suppressed because it is too large Load Diff

101
SOURCES/bz1296406-virtualdomain-migration_speed-migration_downtime.patch

@ -0,0 +1,101 @@ @@ -0,0 +1,101 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2016-03-04 14:41:22.001333979 +0100
+++ b/heartbeat/VirtualDomain 2016-03-04 14:42:34.516395470 +0100
@@ -17,12 +17,16 @@
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
# Defaults
+OCF_RESKEY_migration_downtime_default=0
+OCF_RESKEY_migration_speed_default=0
OCF_RESKEY_force_stop_default=0
OCF_RESKEY_autoset_utilization_cpu_default="true"
OCF_RESKEY_autoset_utilization_hv_memory_default="true"
OCF_RESKEY_migrateport_default=$(( 49152 + $(ocf_maybe_random) % 64 ))
OCF_RESKEY_CRM_meta_timeout_default=90000
+: ${OCF_RESKEY_migration_downtime=${OCF_RESKEY_migration_downtime_default}}
+: ${OCF_RESKEY_migration_speed=${OCF_RESKEY_migration_speed_default}}
: ${OCF_RESKEY_force_stop=${OCF_RESKEY_force_stop_default}}
: ${OCF_RESKEY_autoset_utilization_cpu=${OCF_RESKEY_autoset_utilization_cpu_default}}
: ${OCF_RESKEY_autoset_utilization_hv_memory=${OCF_RESKEY_autoset_utilization_hv_memory_default}}
@@ -96,6 +100,22 @@
<content type="string" default="" />
</parameter>
+<parameter name="migration_downtime" unique="0" required="0">
+<longdesc lang="en">
+Define max downtime during live migration in milliseconds
+</longdesc>
+<shortdesc lang="en">Live migration downtime</shortdesc>
+<content type="integer" default="${OCF_RESKEY_migration_downtime_default}" />
+</parameter>
+
+<parameter name="migration_speed" unique="0" required="0">
+<longdesc lang="en">
+Define live migration speed per resource in MiB/s
+</longdesc>
+<shortdesc lang="en">Live migration speed</shortdesc>
+<content type="integer" default="${OCF_RESKEY_migration_speed_default}" />
+</parameter>
+
<parameter name="migration_network_suffix" unique="0" required="0">
<longdesc lang="en">
Use a dedicated migration network. The migration URI is composed by
@@ -562,6 +582,7 @@
local transport_suffix
local migrateuri
local migrate_opts
+ local migrate_pid
target_node="$OCF_RESKEY_CRM_meta_migrate_target"
@@ -586,9 +607,28 @@
# Scared of that sed expression? So am I. :-)
remoteuri=$(echo ${OCF_RESKEY_hypervisor} | sed -e "s,\(.*\)://[^/:]*\(:\?[0-9]*\)/\(.*\),\1${transport_suffix}://${target_node}\2/\3,")
+ # Live migration speed limit
+ if [ ${OCF_RESKEY_migration_speed} -ne 0 ]; then
+ ocf_log info "$DOMAIN_NAME: Setting live migration speed limit for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed})."
+ virsh ${VIRSH_OPTIONS} migrate-setspeed $DOMAIN_NAME ${OCF_RESKEY_migration_speed}
+ fi
+
# OK, we know where to connect to. Now do the actual migration.
- ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
- virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri
+ ocf_log info "$DOMAIN_NAME: Starting live migration to ${target_node} (using: virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri)."
+ virsh ${VIRSH_OPTIONS} migrate --live $migrate_opts $DOMAIN_NAME $remoteuri $migrateuri &
+
+ migrate_pid=${!}
+
+ # Live migration downtime interval
+ # Note: You can set downtime only while live migration is in progress
+ if [ ${OCF_RESKEY_migration_downtime} -ne 0 ]; then
+ sleep 2
+ ocf_log info "$DOMAIN_NAME: Setting live migration downtime for $DOMAIN_NAME (using: virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime})."
+ virsh ${VIRSH_OPTIONS} migrate-setmaxdowntime $DOMAIN_NAME ${OCF_RESKEY_migration_downtime}
+ fi
+
+ wait ${migrate_pid}
+
rc=$?
if [ $rc -ne 0 ]; then
ocf_exit_reason "$DOMAIN_NAME: live migration to ${target_node} failed: $rc"
@@ -671,6 +711,18 @@
return $OCF_ERR_INSTALLED
fi
fi
+
+ # Check if migration_speed is a decimal value
+ if ! ocf_is_decimal ${OCF_RESKEY_migration_speed}; then
+ ocf_exit_reason "migration_speed has to be a decimal value"
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Check if migration_downtime is a decimal value
+ if ! ocf_is_decimal ${OCF_RESKEY_migration_downtime}; then
+ ocf_exit_reason "migration_downtime has to be a decimal value"
+ return $OCF_ERR_CONFIGURED
+ fi
}
if [ $# -ne 1 ]; then

33
SOURCES/bz1299404-galera-custom-host-port.patch

@ -0,0 +1,33 @@ @@ -0,0 +1,33 @@
From cbccff5ed9b1fc5641063f05ad531f897d366fa4 Mon Sep 17 00:00:00 2001
From: Mike Bayer <mike_mp@zzzcomputing.com>
Date: Tue, 15 Sep 2015 14:54:05 -0400
Subject: [PATCH] galera: add support for MYSQL_HOST and MYSQL_PORT from
clustercheck

---
heartbeat/galera | 12 ++++++++++++
1 file changed, 12 insertions(+)

diff --git a/heartbeat/galera b/heartbeat/galera
index 920507b..1a1a4ce 100755
--- a/heartbeat/galera
+++ b/heartbeat/galera
@@ -704,6 +704,18 @@ if [ -n "${OCF_RESKEY_check_passwd}" ]; then
MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK --password=${OCF_RESKEY_check_passwd}"
fi
+# This value is automatically sourced from /etc/sysconfig/checkcluster if available
+if [ -n "${MYSQL_HOST}" ]; then
+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -h ${MYSQL_HOST}"
+fi
+
+# This value is automatically sourced from /etc/sysconfig/checkcluster if available
+if [ -n "${MYSQL_PORT}" ]; then
+ MYSQL_OPTIONS_CHECK="$MYSQL_OPTIONS_CHECK -P ${MYSQL_PORT}"
+fi
+
+
+
# What kind of method was invoked?
case "$1" in
start) galera_start;;

35
SOURCES/bz1301189-virtualdomain-fix-locale.patch

@ -0,0 +1,35 @@ @@ -0,0 +1,35 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2016-01-25 12:05:30.437008638 +0100
+++ b/heartbeat/VirtualDomain 2016-01-25 12:25:06.850256377 +0100
@@ -282,12 +282,13 @@
status="no state"
while [ "$status" = "no state" ]; do
try=$(($try + 1 ))
- status=$(virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
+ status=$(LANG=C virsh $VIRSH_OPTIONS domstate $DOMAIN_NAME 2>&1 | tr 'A-Z' 'a-z')
case "$status" in
- *"error:"*"domain not found"*|"shut off")
+ *"error:"*"domain not found"|*"error:"*"failed to get domain"*|"shut off")
# shut off: domain is defined, but not started, will not happen if
# domain is created but not defined
- # Domain not found: domain is not defined and thus not started
+ # "Domain not found" or "failed to get domain": domain is not defined
+ # and thus not started
ocf_log debug "Virtual domain $DOMAIN_NAME is not running: $(echo $status | sed s/error://g)"
rc=$OCF_NOT_RUNNING
;;
@@ -415,11 +416,12 @@
local status=0
ocf_log info "Issuing forced shutdown (destroy) request for domain ${DOMAIN_NAME}."
- out=$(virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z')
+ out=$(LANG=C virsh $VIRSH_OPTIONS destroy ${DOMAIN_NAME} 2>&1|tr 'A-Z' 'a-z')
ex=$?
echo >&2 "$out"
case $ex$out in
- *"error:"*"domain is not running"*|*"error:"*"domain not found"*)
+ *"error:"*"domain is not running"*|*"error:"*"domain not found"*|\
+ *"error:"*"failed to get domain"*)
: ;; # unexpected path to the intended outcome, all is well
[!0]*)
ocf_exit_reason "forced stop failed"

207
SOURCES/bz1303037-1-portblock.patch

@ -0,0 +1,207 @@ @@ -0,0 +1,207 @@
diff -uNr a/heartbeat/portblock b/heartbeat/portblock
--- a/heartbeat/portblock 2013-06-18 15:22:27.000000000 +0200
+++ b/heartbeat/portblock 2016-02-29 13:51:22.205860012 +0100
@@ -24,8 +24,10 @@
# Defaults
OCF_RESKEY_ip_default="0.0.0.0/0"
+OCF_RESKEY_reset_local_on_unblock_stop_default="false"
: ${OCF_RESKEY_ip=${OCF_RESKEY_ip_default}}
+: ${OCF_RESKEY_reset_local_on_unblock_stop=${OCF_RESKEY_reset_local_on_unblock_stop_default}}
#######################################################################
CMD=`basename $0`
TICKLETCP=$HA_BIN/tickle_tcp
@@ -37,16 +39,22 @@
$CMD is used to temporarily block ports using iptables.
- It can be used to turn off a port before bringing
+ It can be used to blackhole a port before bringing
up an IP address, and enable it after a service is started.
- To do that for samba, the following resource line can be used:
+ To do that for samba, the following can be used:
- $CMD::tcp::137,138::block \\
- 10.10.10.20 \\
- nmbd smbd \\
- $CMD::tcp::137,138::unblock
+ crm configure <<EOF
+ primitive portblock-samba ocf:heartbeat:portblock \\
+ params protocol=tcp portno=137,138 action=block
+ primitive portunblock-samba ocf:heartbeat:portblock \\
+ params protocol=tcp portno=137,138 action=unblock
+ primitive samba-vip ocf:heartbeat:IPaddr2 \\
+ params ip=10.10.10.20
+ group g-samba \\
+ portblock-samba samba-vip nmbd smbd portunblock-samba
+ EOF
- This will do the follwing things:
+ This will do the following things:
- DROP all incoming packets for TCP ports 137 and 138
- Bring up the IP alias 10.10.10.20
@@ -54,13 +62,16 @@
- Re-enable TCP ports 137 and 138
(enable normal firewall rules on those ports)
- This prevents clients from getting ICMP port unreachable
- if they try to reconnect to the service after the alias is
- enabled but before nmbd and smbd are running. These packets
- will cause some clients to give up attempting to reconnect to
- the server.
+ This prevents clients from getting TCP RST if they try to reconnect
+ to the service after the alias is enabled but before nmbd and smbd
+ are running. These packets will cause some clients to give up
+ attempting to reconnect to the server.
+
+ Attempts to connect to UDP and other non-TCP ports which have nothing
+ listening can result in ICMP port unreachable responses, which can
+ have the same undesirable affect on some clients.
- NOTE: iptables is linux-specific...
+ NOTE: iptables is Linux-specific.
An additional feature in the portblock RA is the tickle ACK function
enabled by specifying the tickle_dir parameter. The tickle ACK
@@ -138,7 +149,7 @@
The port number used to be blocked/unblocked.
</longdesc>
<shortdesc lang="en">portno</shortdesc>
-<content type="integer" default="" />
+<content type="string" default="" />
</parameter>
<parameter name="action" unique="0" required="1">
@@ -149,6 +160,26 @@
<content type="string" default="" />
</parameter>
+<parameter name="reset_local_on_unblock_stop" unique="0" required="0">
+<content type="boolean" default="${OCF_RESKEY_reset_local_on_unblock_stop_default}" />
+<shortdesc lang="en">(try to) reset server TCP sessions when unblock stops</shortdesc>
+<longdesc>
+If for some reason the long lived server side TCP sessions won't be cleaned up
+by a reconfiguration/flush/stop of whatever services this portblock protects,
+they would linger in the connection table, even after the IP is gone
+and services have been switched over to an other node.
+
+An example would be the default NFS kernel server.
+
+These "known" connections may seriously confuse and delay a later switchback.
+
+Enabling this option will cause this agent to try to get rid of these connections
+by injecting a temporary iptables rule to TCP-reset outgoing packets from the
+blocked ports, and additionally tickle them locally,
+just before it starts to DROP incoming packets on "unblock stop".
+</longdesc>
+</parameter>
+
<parameter name="ip" unique="0" required="0">
<longdesc lang="en">
The IP address used to be blocked/unblocked.
@@ -233,12 +264,34 @@
fi
}
-run_tickle_tcp()
+tickle_remote()
{
[ -z "$OCF_RESKEY_tickle_dir" ] && return
echo 1 > /proc/sys/net/ipv4/tcp_tw_recycle
f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
- [ -f $f ] && cat $f | $TICKLETCP -n 3
+ [ -r $f ] || return
+ $TICKLETCP -n 3 < $f
+}
+
+tickle_local()
+{
+ [ -z "$OCF_RESKEY_tickle_dir" ] && return
+ f=$OCF_RESKEY_tickle_dir/$OCF_RESKEY_ip
+ [ -r $f ] || return
+ # swap "local" and "remote" address,
+ # so we tickle ourselves.
+ # We set up a REJECT with tcp-reset before we do so, so we get rid of
+ # the no longer wanted potentially long lived "ESTABLISHED" connection
+ # entries on the IP we are going to delet in a sec. These would get in
+ # the way if we switch-over and then switch-back in quick succession.
+ local i
+ awk '{ print $2, $1; }' $f | $TICKLETCP
+ netstat -tn | grep -Fw $OCF_RESKEY_ip || return
+ for i in 0.1 0.5 1 2 4 ; do
+ sleep $i
+ awk '{ print $2, $1; }' $f | $TICKLETCP
+ netstat -tn | grep -Fw $OCF_RESKEY_ip || break
+ done
}
SayActive()
@@ -304,15 +357,30 @@
#IptablesBLOCK {udp|tcp} portno,portno ip
IptablesBLOCK()
{
+ local rc=0
+ local try_reset=false
+ if [ "$1/$4/$__OCF_ACTION" = tcp/unblock/stop ] &&
+ ocf_is_true $reset_local_on_unblock_stop
+ then
+ try_reset=true
+ fi
if
chain_isactive "$1" "$2" "$3"
then
: OK -- chain already active
else
+ if $try_reset ; then
+ $IPTABLES -I OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
+ tickle_local
+ fi
$IPTABLES -I INPUT -p "$1" -d "$3" -m multiport --dports "$2" -j DROP
+ rc=$?
+ if $try_reset ; then
+ $IPTABLES -D OUTPUT -p "$1" -s "$3" -m multiport --sports "$2" -j REJECT --reject-with tcp-reset
+ fi
fi
- return $?
+ return $rc
}
#IptablesUNBLOCK {udp|tcp} portno,portno ip
@@ -338,7 +406,7 @@
unblock)
IptablesUNBLOCK "$@"
rc=$?
- run_tickle_tcp
+ tickle_remote
#ignore run_tickle_tcp exit code!
return $rc
;;
@@ -411,6 +479,17 @@
exit $OCF_ERR_CONFIGURED
;;
esac
+
+ if ocf_is_true $reset_local_on_unblock_stop; then
+ if [ $action != unblock ] ; then
+ ocf_log err "reset_local_on_unblock_stop is only relevant with action=unblock"
+ exit $OCF_ERR_CONFIGURED
+ fi
+ if [ -z $OCF_RESKEY_tickle_dir ] ; then
+ ocf_log warn "reset_local_on_unblock_stop works best with tickle_dir enabled as well"
+ fi
+ fi
+
return $OCF_SUCCESS
}
@@ -451,6 +530,7 @@
portno=$OCF_RESKEY_portno
action=$OCF_RESKEY_action
ip=$OCF_RESKEY_ip
+reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop
case $1 in
start)

31
SOURCES/bz1303037-2-portblock.patch

@ -0,0 +1,31 @@ @@ -0,0 +1,31 @@
From 8ac05986ac7ef354456253edbd22cbb4a2d96e90 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 16 Sep 2016 10:19:38 +0200
Subject: [PATCH] portblock: create tickle_dir if it doesnt exist

---
heartbeat/portblock | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/heartbeat/portblock b/heartbeat/portblock
index c480954..c97488b 100755
--- a/heartbeat/portblock
+++ b/heartbeat/portblock
@@ -466,8 +466,7 @@ IptablesValidateAll()
exit $OCF_ERR_CONFIGURED
fi
if [ ! -d "$OCF_RESKEY_tickle_dir" ]; then
- ocf_log err "The tickle dir doesn't exist!"
- exit $OCF_ERR_INSTALLED
+ mkdir -p $OCF_RESKEY_tickle_dir
fi
fi
@@ -534,6 +533,7 @@ reset_local_on_unblock_stop=$OCF_RESKEY_reset_local_on_unblock_stop
case $1 in
start)
+ IptablesValidateAll
IptablesStart $protocol $portno $ip $action
;;

45
SOURCES/bz1303803-Backup-and-restore-rabbitmq-users-during-resource-re.patch

@ -0,0 +1,45 @@ @@ -0,0 +1,45 @@
From: Peter Lemenkov <lemenkov@redhat.com>
Date: Mon, 29 Feb 2016 12:46:50 +0100
Subject: [PATCH] Backup and restore rabbitmq users during resource restart

Signed-off-by: Peter Lemenkov <lemenkov@redhat.com>

diff --git a/heartbeat/rabbitmq-cluster b/heartbeat/rabbitmq-cluster
index cc45f09..4545495 100755
--- a/heartbeat/rabbitmq-cluster
+++ b/heartbeat/rabbitmq-cluster
@@ -289,7 +289,19 @@ rmq_start() {
rmq_stop
rmq_wipe_data
rmq_join_existing "$join_list"
- if [ $? -ne 0 ]; then
+ rc=$?
+
+ # Restore users (if any)
+ BaseDataDir=`dirname $RMQ_DATA_DIR`
+ if [ -f $BaseDataDir/users.erl ] ; then
+ rabbitmqctl eval "
+ {ok, [Users]} = file:consult(\"$BaseDataDir/users.erl\"),
+ lists:foreach(fun(X) -> mnesia:dirty_write(rabbit_user, X) end, Users).
+ "
+ rm -f $BaseDataDir/users.erl
+ fi
+
+ if [ $rc -ne 0 ]; then
ocf_log info "node failed to join even after reseting local data. Check SELINUX policy"
return $OCF_ERR_GENERIC
fi
@@ -299,6 +311,13 @@ rmq_start() {
}
rmq_stop() {
+ # Backup users
+ BaseDataDir=`dirname $RMQ_DATA_DIR`
+ rabbitmqctl eval "
+ Users = mnesia:dirty_select(rabbit_user, [{ {internal_user, '\\\$1', '_', '_'}, [{'/=', '\\\$1', <<\"guest\">>}], ['\\\$_'] } ]),
+ file:write_file(\"$BaseDataDir/users.erl\", io_lib:fwrite(\"~p.~n\", [Users])).
+ "
+
rmq_monitor
if [ $? -eq $OCF_NOT_RUNNING ]; then
return $OCF_SUCCESS

259
SOURCES/bz1305549-nova-compute-wait-nova-compute-unfence.patch

@ -0,0 +1,259 @@ @@ -0,0 +1,259 @@
diff -uNr a/heartbeat/nova-compute-wait b/heartbeat/nova-compute-wait
--- a/heartbeat/nova-compute-wait 2017-02-02 11:23:38.263510362 +0100
+++ b/heartbeat/nova-compute-wait 2017-02-02 11:28:27.181650906 +0100
@@ -1,30 +1,15 @@
#!/bin/sh
+# Copyright 2015 Red Hat, Inc.
#
+# Description: Manages compute daemons
#
-# nova-compute-wait agent manages compute daemons.
+# Authors: Andrew Beekhof
#
-# Copyright (c) 2015
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like. Any license provided herein, whether implied or
-# otherwise, applies only to this software file. Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# Support: openstack@lists.openstack.org
+# License: Apache Software License (ASL) 2.0
#
+
#######################################################################
# Initialization:
@@ -137,6 +122,8 @@
}
nova_start() {
+ build_unfence_overlay
+
state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
if [ "x$state" = x ]; then
: never been fenced
@@ -147,8 +134,8 @@
sleep ${OCF_RESKEY_evacuation_delay}
else
- ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
while [ "x$state" != "xno" ]; do
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
state=$(attrd_updater -p -n evacuate -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
sleep 5
done
@@ -156,14 +143,22 @@
ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
sleep ${OCF_RESKEY_evacuation_delay}
fi
+
+ touch "$statefile"
+
return $OCF_SUCCESS
}
nova_stop() {
+ rm -f "$statefile"
return $OCF_SUCCESS
}
nova_monitor() {
+ if [ ! -f "$statefile" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
return $OCF_SUCCESS
}
@@ -171,17 +166,113 @@
return $OCF_SUCCESS
}
+build_unfence_overlay() {
+ fence_options=""
+
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ candidates=$(/usr/sbin/stonith_admin -l ${NOVA_HOST})
+ for candidate in ${candidates}; do
+ pcs stonith show $d | grep -q fence_compute
+ if [ $? = 0 ]; then
+ ocf_log info "Unfencing nova based on: $candidate"
+ fence_auth=$(pcs stonith show $candidate | grep Attributes: | sed -e s/Attributes:// -e s/-/_/g -e 's/[^ ]\+=/OCF_RESKEY_\0/g' -e s/passwd/password/g)
+ eval "export $fence_auth"
+ break
+ fi
+ done
+ fi
+
+ # Copied from NovaEvacuate
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
+ ocf_exit_reason "auth_url not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
+
+ if [ -z "${OCF_RESKEY_username}" ]; then
+ ocf_exit_reason "username not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
+
+ if [ -z "${OCF_RESKEY_password}" ]; then
+ ocf_exit_reason "password not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
+
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
+ ocf_exit_reason "tenant_name not configured"
+ exit $OCF_ERR_CONFIGURED
+ fi
+
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
+
+ if [ -n "${OCF_RESKEY_domain}" ]; then
+ fence_options="${fence_options} -d ${OCF_RESKEY_domain}"
+ fi
+
+ if [ -n "${OCF_RESKEY_region_name}" ]; then
+ fence_options="${fence_options} \
+ --region-name ${OCF_RESKEY_region_name}"
+ fi
+
+ if [ -n "${OCF_RESKEY_insecure}" ]; then
+ if ocf_is_true "${OCF_RESKEY_insecure}"; then
+ fence_options="${fence_options} --insecure"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
+ fence_options="${fence_options} --no-shared-storage"
+ fi
+ fi
+
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
+ case ${OCF_RESKEY_endpoint_type} in
+ adminURL|publicURL|internalURL)
+ ;;
+ *)
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type}" \
+ "not valid. Use adminURL or publicURL or internalURL"
+ exit $OCF_ERR_CONFIGURED
+ ;;
+ esac
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
+ fi
+
+ mkdir -p /run/systemd/system/openstack-nova-compute.service.d
+ cat<<EOF>/run/systemd/system/openstack-nova-compute.service.d/unfence-20.conf
+[Service]
+ExecStartPost=/sbin/fence_compute ${fence_options} -o on -n ${NOVA_HOST}
+EOF
+}
+
nova_validate() {
rc=$OCF_SUCCESS
check_binary crudini
check_binary nova-compute
+ check_binary fence_compute
if [ ! -f /etc/nova/nova.conf ]; then
ocf_exit_reason "/etc/nova/nova.conf not found"
exit $OCF_ERR_CONFIGURED
fi
+ # Is the state directory writable?
+ state_dir=$(dirname $statefile)
+ touch "$state_dir/$$"
+ if [ $? != 0 ]; then
+ ocf_exit_reason "Invalid state directory: $state_dir"
+ return $OCF_ERR_ARGS
+ fi
+ rm -f "$state_dir/$$"
+
NOVA_HOST=$(crudini --get /etc/nova/nova.conf DEFAULT host 2>/dev/null)
if [ $? = 1 ]; then
short_host=$(uname -n | awk -F. '{print $1}')
@@ -198,6 +289,8 @@
return $rc
}
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
+
: ${OCF_RESKEY_evacuation_delay=120}
case $__OCF_ACTION in
meta-data) meta_data
@@ -221,3 +314,4 @@
rc=$?
ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
exit $rc
+
diff -uNr a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
--- a/heartbeat/NovaEvacuate 2017-02-02 11:23:38.253510461 +0100
+++ b/heartbeat/NovaEvacuate 2017-02-02 11:28:49.262432371 +0100
@@ -1,30 +1,16 @@
#!/bin/sh
#
+# Copyright 2015 Red Hat, Inc.
#
-# NovaCompute agent manages compute daemons.
+# Description: Manages evacuation of nodes running nova-compute
#
-# Copyright (c) 2015
+# Authors: Andrew Beekhof
#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of version 2 of the GNU General Public License as
-# published by the Free Software Foundation.
-#
-# This program is distributed in the hope that it would be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
-#
-# Further, this software is distributed without any warranty that it is
-# free of the rightful claim of any third person regarding infringement
-# or the like. Any license provided herein, whether implied or
-# otherwise, applies only to this software file. Patent licenses, if
-# any, provided herein do not apply to combinations of this program with
-# other software, or any other product whatsoever.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write the Free Software Foundation,
-# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+# Support: openstack@lists.openstack.org
+# License: Apache Software License (ASL) 2.0
#
+
#######################################################################
# Initialization:
@@ -180,7 +166,7 @@
ocf_log notice "Initiating evacuation of $node"
fence_compute ${fence_options} -o status -n ${node}
- if [ $? != 0 ]; then
+ if [ $? = 1 ]; then
ocf_log info "Nova does not know about ${node}"
# Dont mark as no because perhaps nova is unavailable right now
continue

42
SOURCES/bz1305549-redis-notify-clients-of-master-being-demoted.patch

@ -0,0 +1,42 @@ @@ -0,0 +1,42 @@
From f1c2249ef5e8524ddb986f0df879d5f18e935da3 Mon Sep 17 00:00:00 2001
From: Oyvind Albrigtsen <oalbrigt@redhat.com>
Date: Fri, 20 Jan 2017 09:17:15 +0100
Subject: [PATCH] redis: use "CLIENT KILL type normal" to notify clients of
master being demoted

---
heartbeat/redis | 11 +++++++++++
1 file changed, 11 insertions(+)

diff --git a/heartbeat/redis b/heartbeat/redis
index 1ea0025..d08e57a 100755
--- a/heartbeat/redis
+++ b/heartbeat/redis
@@ -436,6 +436,11 @@ function demote() {
local master_host
local master_port
+ # client kill is only supported in Redis 2.8.12 or greater
+ version=$(redis_client -v | awk '{print $NF}')
+ ocf_version_cmp "$version" "2.8.11"
+ client_kill=$?
+
CHECK_SLAVE_STATE=1
monitor
status=$?
@@ -478,9 +483,15 @@ function demote() {
while true; do
# Wait infinite if replication is syncing
# Then start/demote operation timeout determines timeout
+ if [ "$client_kill" -eq 2 ]; then
+ redis_client CLIENT PAUSE 2000
+ fi
monitor
status=$?
if (( status == OCF_SUCCESS )); then
+ if [ "$client_kill" -eq 2 ]; then
+ redis_client CLIENT KILL type normal
+ fi
return $OCF_SUCCESS
fi

20
SOURCES/bz1307160-virtualdomain-fix-unnecessary-error-when-probing-nonexistent-domain.patch

@ -0,0 +1,20 @@ @@ -0,0 +1,20 @@
diff -uNr a/heartbeat/VirtualDomain b/heartbeat/VirtualDomain
--- a/heartbeat/VirtualDomain 2016-04-26 12:22:22.345053246 +0200
+++ b/heartbeat/VirtualDomain 2016-04-26 12:24:27.479535075 +0200
@@ -263,8 +263,6 @@
if [ -n "$emulator" ]; then
basename $emulator
- else
- ocf_log error "Unable to determine emulator for $DOMAIN_NAME"
fi
}
@@ -301,6 +299,7 @@
;;
# This can be expanded to check for additional emulators
*)
+ ocf_log error "Unable to determine emulator for $DOMAIN_NAME"
;;
esac

136
SOURCES/bz1316130-systemd-drop-in-clvmd-LVM.patch

@ -0,0 +1,136 @@ @@ -0,0 +1,136 @@
diff -uNr a/configure.ac b/configure.ac
--- a/configure.ac 2017-05-03 10:00:54.396040173 +0200
+++ b/configure.ac 2017-05-03 10:07:28.969236697 +0200
@@ -65,6 +65,21 @@
AM_CONDITIONAL(OCFT_FEDORA_CASES, test "x$OCFT_TEST_CASES" = "xfedora" )
AM_CONDITIONAL(OCFT_DEFAULT_CASES, test "x$OCFT_TEST_CASES" = "xdefault" )
+AC_ARG_WITH([systemdsystemunitdir],
+ [AS_HELP_STRING([--with-systemdsystemunitdir=DIR], [Directory for systemd service files])],,
+ [with_systemdsystemunitdir=auto])
+AS_IF([test "x$with_systemdsystemunitdir" = "xyes" -o "x$with_systemdsystemunitdir" = "xauto"], [
+ def_systemdsystemunitdir=$($PKGCONFIG --variable=systemdsystemunitdir systemd)
+
+ AS_IF([test "x$def_systemdsystemunitdir" = "x"],
+ [AS_IF([test "x$with_systemdsystemunitdir" = "xyes"],
+ [AC_MSG_ERROR([systemd support requested but pkg-config unable to query systemd package])])
+ with_systemdsystemunitdir=no],
+ [with_systemdsystemunitdir="$def_systemdsystemunitdir"])])
+AS_IF([test "x$with_systemdsystemunitdir" != "xno"],
+ [AC_SUBST([systemdsystemunitdir], [$with_systemdsystemunitdir])])
+AM_CONDITIONAL([HAVE_SYSTEMD], [test "x$with_systemdsystemunitdir" != "xno"])
+
dnl
dnl AM_INIT_AUTOMAKE([1.11.1 foreign dist-bzip2 dist-xz])
dnl
@@ -857,6 +872,7 @@
heartbeat/ocf-directories \
heartbeat/ocf-shellfuncs \
heartbeat/shellfuncs \
+systemd/Makefile \
tools/Makefile \
tools/ocf-tester \
tools/ocft/Makefile \
diff -uNr a/heartbeat/clvm b/heartbeat/clvm
--- a/heartbeat/clvm 2017-05-03 10:00:54.560038569 +0200
+++ b/heartbeat/clvm 2017-05-03 10:01:13.309855171 +0200
@@ -353,6 +353,18 @@
return $?
fi
+ # systemd drop-in to stop process before storage services during
+ # shutdown/reboot
+ if ps -p 1 | grep -q systemd ; then
+ systemdrundir="/run/systemd/system/resource-agents-deps.target.d"
+ mkdir "$systemdrundir"
+ cat > "$systemdrundir/99-clvmd.conf" <<EOF
+[Unit]
+After=blk-availability.service
+EOF
+ systemctl daemon-reload
+ fi
+
clvmd_status
if [ $? -eq $OCF_SUCCESS ]; then
ocf_log debug "$DAEMON already started"
diff -uNr a/heartbeat/LVM b/heartbeat/LVM
--- a/heartbeat/LVM 2017-05-03 10:00:54.558038589 +0200
+++ b/heartbeat/LVM 2017-05-03 10:01:13.309855171 +0200
@@ -452,6 +452,18 @@
local vg=$1
local clvmd=0
+ # systemd drop-in to stop process before storage services during
+ # shutdown/reboot
+ if ps -p 1 | grep -q systemd ; then
+ systemdrundir="/run/systemd/system/resource-agents-deps.target.d"
+ mkdir "$systemdrundir"
+ cat > "$systemdrundir/99-LVM.conf" <<EOF
+[Unit]
+After=blk-availability.service
+EOF
+ systemctl daemon-reload
+ fi
+
# TODO: This MUST run vgimport as well
ocf_log info "Activating volume group $vg"
if [ "$LVM_MAJOR" -eq "1" ]; then
diff -uNr a/Makefile.am b/Makefile.am
--- a/Makefile.am 2013-06-18 15:22:27.000000000 +0200
+++ b/Makefile.am 2017-05-03 10:01:13.308855181 +0200
@@ -37,7 +37,7 @@
endif
if BUILD_LINUX_HA
-SUBDIRS += include heartbeat tools ldirectord doc
+SUBDIRS += include heartbeat tools ldirectord doc systemd
LINUX_HA = without
else
LINUX_HA = with
diff -uNr a/resource-agents.spec.in b/resource-agents.spec.in
--- a/resource-agents.spec.in 2017-05-03 10:00:54.384040291 +0200
+++ b/resource-agents.spec.in 2017-05-03 10:01:13.309855171 +0200
@@ -231,6 +231,10 @@
/usr/lib/ocf/resource.d/redhat
%endif
+%if %{defined _unitdir}
+%{_unitdir}/resource-agents-deps.target
+%endif
+
%dir %{_datadir}/%{name}
%dir %{_datadir}/%{name}/ocft
%{_datadir}/%{name}/ocft/configs
diff -uNr a/systemd/Makefile.am b/systemd/Makefile.am
--- a/systemd/Makefile.am 1970-01-01 01:00:00.000000000 +0100
+++ b/systemd/Makefile.am 2017-05-03 10:01:13.311855152 +0200
@@ -0,0 +1,23 @@
+#
+# Copyright (C) 2017 Oyvind Albrigtsen
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+
+MAINTAINERCLEANFILES = Makefile.in
+
+if HAVE_SYSTEMD
+dist_systemdsystemunit_DATA = resource-agents-deps.target
+endif
diff -uNr a/systemd/resource-agents-deps.target b/systemd/resource-agents-deps.target
--- a/systemd/resource-agents-deps.target 1970-01-01 01:00:00.000000000 +0100
+++ b/systemd/resource-agents-deps.target 2017-05-03 10:01:13.311855152 +0200
@@ -0,0 +1,2 @@
+[Unit]
+Description=resource-agents dependencies

Some files were not shown because too many files have changed in this diff Show More

Loading…
Cancel
Save