|
|
|
|
From feffc766c48a1010c1bf4f8b1db74795d06dbd50 Mon Sep 17 00:00:00 2001
|
|
|
|
|
From: David Vossel <dvossel@redhat.com>
|
|
|
|
|
Date: Mon, 25 Aug 2014 14:57:09 -0500
|
|
|
|
|
Subject: [PATCH 2/4] ethmonitor updates
|
|
|
|
|
|
|
|
|
|
---
|
|
|
|
|
heartbeat/ethmonitor | 290 +++++++++++++++++++++++++++++++++------------------
|
|
|
|
|
1 file changed, 187 insertions(+), 103 deletions(-)
|
|
|
|
|
|
|
|
|
|
diff --git a/heartbeat/ethmonitor b/heartbeat/ethmonitor
|
|
|
|
|
index b85d7fc..a447391 100755
|
|
|
|
|
--- a/heartbeat/ethmonitor
|
|
|
|
|
+++ b/heartbeat/ethmonitor
|
|
|
|
|
@@ -1,14 +1,14 @@
|
|
|
|
|
#!/bin/sh
|
|
|
|
|
#
|
|
|
|
|
-# OCF Resource Agent compliant script.
|
|
|
|
|
-# Monitor the vitality of a local network interface.
|
|
|
|
|
+# OCF Resource Agent compliant script.
|
|
|
|
|
+# Monitor the vitality of a local network interface.
|
|
|
|
|
#
|
|
|
|
|
# Based on the work by Robert Euhus and Lars Marowsky-Br<EFBFBD>e.
|
|
|
|
|
#
|
|
|
|
|
# Transfered from Ipaddr2 into ethmonitor by Alexander Krauth
|
|
|
|
|
#
|
|
|
|
|
# Copyright (c) 2011 Robert Euhus, Alexander Krauth, Lars Marowsky-Br<EFBFBD>e
|
|
|
|
|
-# All Rights Reserved.
|
|
|
|
|
+# All Rights Reserved.
|
|
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of version 2 of the GNU General Public License as
|
|
|
|
|
@@ -29,12 +29,12 @@
|
|
|
|
|
# along with this program; if not, write the Free Software Foundation,
|
|
|
|
|
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
|
|
|
|
#
|
|
|
|
|
-# OCF parameters are as below
|
|
|
|
|
+# OCF parameters are as below
|
|
|
|
|
#
|
|
|
|
|
# OCF_RESKEY_interface
|
|
|
|
|
# OCF_RESKEY_multiplicator
|
|
|
|
|
# OCF_RESKEY_name
|
|
|
|
|
-# OCF_RESKEY_repeat_count
|
|
|
|
|
+# OCF_RESKEY_repeat_count
|
|
|
|
|
# OCF_RESKEY_repeat_interval
|
|
|
|
|
# OCF_RESKEY_pktcnt_timeout
|
|
|
|
|
# OCF_RESKEY_arping_count
|
|
|
|
|
@@ -70,10 +70,13 @@ The resource configuration requires a monitor operation, because the monitor doe
|
|
|
|
|
In addition to the resource configuration, you need to configure some location constraints, based on a CIB attribute value.
|
|
|
|
|
The name of the attribute value is configured in the 'name' option of this RA.
|
|
|
|
|
|
|
|
|
|
-Example constraint configuration:
|
|
|
|
|
+Example constraint configuration using crmsh
|
|
|
|
|
location loc_connected_node my_resource_grp \
|
|
|
|
|
rule $id="rule_loc_connected_node" -INF: ethmonitor eq 0
|
|
|
|
|
|
|
|
|
|
+Example constraint configuration using pcs. Only allow 'my_resource' to run on nodes where eth0 ethernet device is available.
|
|
|
|
|
+pcs constraint location my_resource rule score=-INFINITY ethmonitor-eth0 ne 1
|
|
|
|
|
+
|
|
|
|
|
The ethmonitor works in 3 different modes to test the interface vitality.
|
|
|
|
|
1. call ip to see if the link status is up (if link is down -> error)
|
|
|
|
|
2. call ip and watch the RX counter (if packages come around in a certain time -> success)
|
|
|
|
|
@@ -157,14 +160,30 @@ Maximum number of IPs from ARP cache list to check for ARP REQUEST (arping) answ
|
|
|
|
|
<content type="integer" default="5"/>
|
|
|
|
|
</parameter>
|
|
|
|
|
|
|
|
|
|
+<parameter name="infiniband_device">
|
|
|
|
|
+<longdesc lang="en">
|
|
|
|
|
+For interfaces that are infiniband devices.
|
|
|
|
|
+</longdesc>
|
|
|
|
|
+<shortdesc lang="en">infiniband device</shortdesc>
|
|
|
|
|
+<content type="string" />
|
|
|
|
|
+</parameter>
|
|
|
|
|
+
|
|
|
|
|
+<parameter name="infiniband_port">
|
|
|
|
|
+<longdesc lang="en">
|
|
|
|
|
+For infiniband devices, this is the port to monitor.
|
|
|
|
|
+</longdesc>
|
|
|
|
|
+<shortdesc lang="en">infiniband port</shortdesc>
|
|
|
|
|
+<content type="integer" />
|
|
|
|
|
+</parameter>
|
|
|
|
|
+
|
|
|
|
|
</parameters>
|
|
|
|
|
<actions>
|
|
|
|
|
-<action name="start" timeout="20s" />
|
|
|
|
|
-<action name="stop" timeout="20s" />
|
|
|
|
|
-<action name="status" depth="0" timeout="20s" interval="10s" />
|
|
|
|
|
-<action name="monitor" depth="0" timeout="20s" interval="10s" />
|
|
|
|
|
-<action name="meta-data" timeout="5s" />
|
|
|
|
|
-<action name="validate-all" timeout="20s" />
|
|
|
|
|
+<action name="start" timeout="60s" />
|
|
|
|
|
+<action name="stop" timeout="20s" />
|
|
|
|
|
+<action name="status" depth="0" timeout="60s" interval="10s" />
|
|
|
|
|
+<action name="monitor" depth="0" timeout="60s" interval="10s" />
|
|
|
|
|
+<action name="meta-data" timeout="5s" />
|
|
|
|
|
+<action name="validate-all" timeout="20s" />
|
|
|
|
|
</actions>
|
|
|
|
|
</resource-agent>
|
|
|
|
|
END
|
|
|
|
|
@@ -173,7 +192,7 @@ END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
-# Return true, if the interface exists
|
|
|
|
|
+# Return true, if the interface exists
|
|
|
|
|
#
|
|
|
|
|
is_interface() {
|
|
|
|
|
#
|
|
|
|
|
@@ -181,14 +200,25 @@ is_interface() {
|
|
|
|
|
#
|
|
|
|
|
local iface=`$IP2UTIL -o -f inet addr show | grep " $1 " \
|
|
|
|
|
| cut -d ' ' -f2 | sort -u | grep -v '^ipsec[0-9][0-9]*$'`
|
|
|
|
|
- [ "$iface" != "" ]
|
|
|
|
|
+ [ "$iface" != "" ]
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+infiniband_status()
|
|
|
|
|
+{
|
|
|
|
|
+ local device="$OCF_RESKEY_infiniband_device"
|
|
|
|
|
+
|
|
|
|
|
+ if [ -n "$OCF_RESKEY_infiniband_port" ]; then
|
|
|
|
|
+ device="${OCF_RESKEY_infiniband_device}:${OCF_RESKEY_infiniband_port}"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ ibstatus ${device} | grep -q ACTIVE
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if_init() {
|
|
|
|
|
local rc
|
|
|
|
|
|
|
|
|
|
if [ X"$OCF_RESKEY_interface" = "X" ]; then
|
|
|
|
|
- ocf_log err "Interface name (the interface parameter) is mandatory"
|
|
|
|
|
+ ocf_exit_reason "Interface name (the interface parameter) is mandatory"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
@@ -196,60 +226,67 @@ if_init() {
|
|
|
|
|
|
|
|
|
|
if is_interface $NIC
|
|
|
|
|
then
|
|
|
|
|
- case "$NIC" in
|
|
|
|
|
- *:*) ocf_log err "Do not specify a virtual interface : $OCF_RESKEY_interface"
|
|
|
|
|
- exit $OCF_ERR_CONFIGURED;;
|
|
|
|
|
- *) ;;
|
|
|
|
|
- esac
|
|
|
|
|
+ case "$NIC" in
|
|
|
|
|
+ *:*) ocf_exit_reason "Do not specify a virtual interface : $OCF_RESKEY_interface"
|
|
|
|
|
+ exit $OCF_ERR_CONFIGURED;;
|
|
|
|
|
+ *) ;;
|
|
|
|
|
+ esac
|
|
|
|
|
else
|
|
|
|
|
- case $__OCF_ACTION in
|
|
|
|
|
- validate-all) ocf_log err "Interface $NIC does not exist"
|
|
|
|
|
- exit $OCF_ERR_CONFIGURED;;
|
|
|
|
|
- *) ocf_log warn "Interface $NIC does not exist"
|
|
|
|
|
- ## It might be a bond interface which is temporarily not available, therefore we want to continue here
|
|
|
|
|
- ;;
|
|
|
|
|
- esac
|
|
|
|
|
+ case $__OCF_ACTION in
|
|
|
|
|
+ validate-all)
|
|
|
|
|
+ ocf_exit_reason "Interface $NIC does not exist"
|
|
|
|
|
+ exit $OCF_ERR_CONFIGURED;;
|
|
|
|
|
+ *)
|
|
|
|
|
+ ## It might be a bond interface which is temporarily not available, therefore we want to continue here
|
|
|
|
|
+ ocf_log warn "Interface $NIC does not exist"
|
|
|
|
|
+ ;;
|
|
|
|
|
+ esac
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
: ${OCF_RESKEY_multiplier:="1"}
|
|
|
|
|
if ! ocf_is_decimal "$OCF_RESKEY_multiplier"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_multiplier [$OCF_RESKEY_multiplier]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
|
|
|
|
|
ATTRNAME=${OCF_RESKEY_name:-"ethmonitor-$NIC"}
|
|
|
|
|
|
|
|
|
|
- REP_COUNT=${OCF_RESKEY_repeat_count:-5}
|
|
|
|
|
+ REP_COUNT=${OCF_RESKEY_repeat_count:-5}
|
|
|
|
|
if ! ocf_is_decimal "$REP_COUNT" -o [ $REP_COUNT -lt 1 ]; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_repeat_count [$REP_COUNT]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
- fi
|
|
|
|
|
+ fi
|
|
|
|
|
REP_INTERVAL_S=${OCF_RESKEY_repeat_interval:-10}
|
|
|
|
|
if ! ocf_is_decimal "$REP_INTERVAL_S"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_repeat_interval [$REP_INTERVAL_S]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
: ${OCF_RESKEY_pktcnt_timeout:="5"}
|
|
|
|
|
if ! ocf_is_decimal "$OCF_RESKEY_pktcnt_timeout"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_pktcnt_timeout [$OCF_RESKEY_pktcnt_timeout]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
: ${OCF_RESKEY_arping_count:="1"}
|
|
|
|
|
if ! ocf_is_decimal "$OCF_RESKEY_arping_count"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_count [$OCF_RESKEY_arping_count]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
: ${OCF_RESKEY_arping_timeout:="1"}
|
|
|
|
|
if ! ocf_is_decimal "$OCF_RESKEY_arping_timeout"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_timeout [$OCF_RESKEY_arping_count]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
: ${OCF_RESKEY_arping_cache_entries:="5"}
|
|
|
|
|
if ! ocf_is_decimal "$OCF_RESKEY_arping_cache_entries"; then
|
|
|
|
|
- ocf_log err "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]"
|
|
|
|
|
+ ocf_exit_reason "Invalid OCF_RESKEY_arping_cache_entries [$OCF_RESKEY_arping_cache_entries]"
|
|
|
|
|
exit $OCF_ERR_CONFIGURED
|
|
|
|
|
fi
|
|
|
|
|
- return $OCF_SUCCESS
|
|
|
|
|
+
|
|
|
|
|
+ if [ -n "$OCF_RESKEY_infiniband_device" ]; then
|
|
|
|
|
+ #ibstatus is required if an infiniband_device is provided
|
|
|
|
|
+ check_binary ibstatus
|
|
|
|
|
+ fi
|
|
|
|
|
+ return $OCF_SUCCESS
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# get the link status on $NIC
|
|
|
|
|
@@ -277,7 +314,7 @@ watch_pkt_counter () {
|
|
|
|
|
for n in `seq $(( $OCF_RESKEY_pktcnt_timeout * 10 ))`; do
|
|
|
|
|
sleep 0.1
|
|
|
|
|
RX_PACKETS_NEW="`get_rx_packets`"
|
|
|
|
|
- ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW"
|
|
|
|
|
+ ocf_log debug "RX_PACKETS_OLD: $RX_PACKETS_OLD RX_PACKETS_NEW: $RX_PACKETS_NEW"
|
|
|
|
|
if [ "$RX_PACKETS_OLD" -ne "$RX_PACKETS_NEW" ]; then
|
|
|
|
|
ocf_log debug "we received some packets."
|
|
|
|
|
return 0
|
|
|
|
|
@@ -308,7 +345,7 @@ do_arping () {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#
|
|
|
|
|
-# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level
|
|
|
|
|
+# Check the interface depending on the level given as parameter: $OCF_RESKEY_check_level
|
|
|
|
|
#
|
|
|
|
|
# 09: check for nonempty ARP cache
|
|
|
|
|
# 10: watch for packet counter changes
|
|
|
|
|
@@ -322,21 +359,47 @@ do_arping () {
|
|
|
|
|
# the tests for higher check levels are run.
|
|
|
|
|
#
|
|
|
|
|
if_check () {
|
|
|
|
|
+ local arp_list
|
|
|
|
|
# always check link status first
|
|
|
|
|
link_status="`get_link_status`"
|
|
|
|
|
ocf_log debug "link_status: $link_status (1=up, 0=down)"
|
|
|
|
|
- [ $link_status -eq 0 ] && return $OCF_NOT_RUNNING
|
|
|
|
|
+
|
|
|
|
|
+ if [ $link_status -eq 0 ]; then
|
|
|
|
|
+ ocf_log notice "link_status: DOWN"
|
|
|
|
|
+ return $OCF_NOT_RUNNING
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ # if this is an infiniband device, try ibstatus script
|
|
|
|
|
+ if [ -n "$OCF_RESKEY_infiniband_device" ]; then
|
|
|
|
|
+ if infiniband_status; then
|
|
|
|
|
+ return $OCF_SUCCESS
|
|
|
|
|
+ fi
|
|
|
|
|
+ ocf_log info "Infiniband device $OCF_RESKEY_infiniband_device is not available, check ibstatus for more information"
|
|
|
|
|
+ return $OCF_NOT_RUNNING
|
|
|
|
|
+ fi
|
|
|
|
|
|
|
|
|
|
# watch for packet counter changes
|
|
|
|
|
- ocf_log debug "watch for packet counter changes"
|
|
|
|
|
- watch_pkt_counter && return $OCF_SUCCESS
|
|
|
|
|
+ ocf_log debug "watch for packet counter changes"
|
|
|
|
|
+ watch_pkt_counter
|
|
|
|
|
+ if [ $? -eq 0 ]; then
|
|
|
|
|
+ return $OCF_SUCCESS
|
|
|
|
|
+ else
|
|
|
|
|
+ ocf_log debug "No packets received during packet watch timeout"
|
|
|
|
|
+ fi
|
|
|
|
|
|
|
|
|
|
# check arping ARP cache entries
|
|
|
|
|
- ocf_log debug "check arping ARP cache entries"
|
|
|
|
|
- for ip in `get_arp_list`; do
|
|
|
|
|
+ ocf_log debug "check arping ARP cache entries"
|
|
|
|
|
+ arp_list=`get_arp_list`
|
|
|
|
|
+ for ip in `echo $arp_list`; do
|
|
|
|
|
do_arping $ip && return $OCF_SUCCESS
|
|
|
|
|
done
|
|
|
|
|
|
|
|
|
|
+ # if we get here, the ethernet device is considered not running.
|
|
|
|
|
+ # provide some logging information
|
|
|
|
|
+ if [ -z "$arp_list" ]; then
|
|
|
|
|
+ ocf_log info "No ARP cache entries found to arping"
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
# watch for packet counter changes in promiscios mode
|
|
|
|
|
# ocf_log debug "watch for packet counter changes in promiscios mode"
|
|
|
|
|
# be sure switch off promiscios mode in any case
|
|
|
|
|
@@ -362,67 +425,89 @@ END
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
set_cib_value() {
|
|
|
|
|
- local score=`expr $1 \* $OCF_RESKEY_multiplier`
|
|
|
|
|
- attrd_updater -n $ATTRNAME -v $score -q
|
|
|
|
|
- local rc=$?
|
|
|
|
|
- case $rc in
|
|
|
|
|
- 0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
|
|
|
|
|
- *) ocf_log warn "attrd_updater: Could not update $ATTRNAME = $score: rc=$rc";;
|
|
|
|
|
- esac
|
|
|
|
|
- return $rc
|
|
|
|
|
+ local score=`expr $1 \* $OCF_RESKEY_multiplier`
|
|
|
|
|
+ attrd_updater -n $ATTRNAME -v $score -q
|
|
|
|
|
+ local rc=$?
|
|
|
|
|
+ case $rc in
|
|
|
|
|
+ 0) ocf_log debug "attrd_updater: Updated $ATTRNAME = $score" ;;
|
|
|
|
|
+ *) ocf_log warn "attrd_updater: Could not update $ATTRNAME = $score: rc=$rc";;
|
|
|
|
|
+ esac
|
|
|
|
|
+ return $rc
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if_monitor() {
|
|
|
|
|
- ha_pseudo_resource $OCF_RESOURCE_INSTANCE monitor
|
|
|
|
|
- local pseudo_status=$?
|
|
|
|
|
- if [ $pseudo_status -ne $OCF_SUCCESS ]; then
|
|
|
|
|
- exit $pseudo_status
|
|
|
|
|
- fi
|
|
|
|
|
-
|
|
|
|
|
- local mon_rc=$OCF_NOT_RUNNING
|
|
|
|
|
- local attr_rc=$OCF_NOT_RUNNING
|
|
|
|
|
- local runs=0
|
|
|
|
|
- local start_time
|
|
|
|
|
- local end_time
|
|
|
|
|
- local sleep_time
|
|
|
|
|
- while [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]
|
|
|
|
|
- do
|
|
|
|
|
- start_time=`date +%s%N`
|
|
|
|
|
- if_check
|
|
|
|
|
- mon_rc=$?
|
|
|
|
|
- REP_COUNT=$(( $REP_COUNT - 1 ))
|
|
|
|
|
- if [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]; then
|
|
|
|
|
- ocf_log warn "Monitoring of $OCF_RESOURCE_INSTANCE failed, $REP_COUNT retries left."
|
|
|
|
|
- end_time=`date +%s%N`
|
|
|
|
|
- sleep_time=`echo "scale=9; ( $start_time + ( $REP_INTERVAL_S * 1000000000 ) - $end_time ) / 1000000000" | bc -q 2> /dev/null`
|
|
|
|
|
- sleep $sleep_time 2> /dev/null
|
|
|
|
|
- runs=$(($runs + 1))
|
|
|
|
|
- fi
|
|
|
|
|
-
|
|
|
|
|
- if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then
|
|
|
|
|
- ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error"
|
|
|
|
|
- fi
|
|
|
|
|
- done
|
|
|
|
|
-
|
|
|
|
|
- ocf_log debug "Monitoring return code: $mon_rc"
|
|
|
|
|
- if [ $mon_rc -eq $OCF_SUCCESS ]; then
|
|
|
|
|
- set_cib_value 1
|
|
|
|
|
- attr_rc=$?
|
|
|
|
|
- else
|
|
|
|
|
- ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed."
|
|
|
|
|
- set_cib_value 0
|
|
|
|
|
- attr_rc=$?
|
|
|
|
|
- fi
|
|
|
|
|
-
|
|
|
|
|
- ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors.
|
|
|
|
|
- ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself.
|
|
|
|
|
- exit $attr_rc
|
|
|
|
|
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE monitor
|
|
|
|
|
+ local pseudo_status=$?
|
|
|
|
|
+ if [ $pseudo_status -ne $OCF_SUCCESS ]; then
|
|
|
|
|
+ exit $pseudo_status
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ local mon_rc=$OCF_NOT_RUNNING
|
|
|
|
|
+ local attr_rc=$OCF_NOT_RUNNING
|
|
|
|
|
+ local runs=0
|
|
|
|
|
+ local start_time
|
|
|
|
|
+ local end_time
|
|
|
|
|
+ local sleep_time
|
|
|
|
|
+ while [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]
|
|
|
|
|
+ do
|
|
|
|
|
+ start_time=`date +%s%N`
|
|
|
|
|
+ if_check
|
|
|
|
|
+ mon_rc=$?
|
|
|
|
|
+ REP_COUNT=$(( $REP_COUNT - 1 ))
|
|
|
|
|
+ if [ $mon_rc -ne $OCF_SUCCESS -a $REP_COUNT -gt 0 ]; then
|
|
|
|
|
+ ocf_log warn "Monitoring of $OCF_RESOURCE_INSTANCE failed, $REP_COUNT retries left."
|
|
|
|
|
+ end_time=`date +%s%N`
|
|
|
|
|
+ sleep_time=`echo "scale=9; ( $start_time + ( $REP_INTERVAL_S * 1000000000 ) - $end_time ) / 1000000000" | bc -q 2> /dev/null`
|
|
|
|
|
+ sleep $sleep_time 2> /dev/null
|
|
|
|
|
+ runs=$(($runs + 1))
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ if [ $mon_rc -eq $OCF_SUCCESS -a $runs -ne 0 ]; then
|
|
|
|
|
+ ocf_log info "Monitoring of $OCF_RESOURCE_INSTANCE recovered from error"
|
|
|
|
|
+ fi
|
|
|
|
|
+ done
|
|
|
|
|
+
|
|
|
|
|
+ ocf_log debug "Monitoring return code: $mon_rc"
|
|
|
|
|
+ if [ $mon_rc -eq $OCF_SUCCESS ]; then
|
|
|
|
|
+ set_cib_value 1
|
|
|
|
|
+ attr_rc=$?
|
|
|
|
|
+ else
|
|
|
|
|
+ ocf_log err "Monitoring of $OCF_RESOURCE_INSTANCE failed."
|
|
|
|
|
+ set_cib_value 0
|
|
|
|
|
+ attr_rc=$?
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ ## The resource should not fail, if the interface is down. It should fail, if the update of the CIB variable has errors.
|
|
|
|
|
+ ## To react on the interface failure you must use constraints based on the CIB variable value, not on the resource itself.
|
|
|
|
|
+ exit $attr_rc
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+if_stop()
|
|
|
|
|
+{
|
|
|
|
|
+ attrd_updater -D -n $ATTRNAME
|
|
|
|
|
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
+if_start()
|
|
|
|
|
+{
|
|
|
|
|
+ local rc
|
|
|
|
|
+ ha_pseudo_resource $OCF_RESOURCE_INSTANCE start
|
|
|
|
|
+ rc=$?
|
|
|
|
|
+ if [ $rc -ne $OCF_SUCCESS ]; then
|
|
|
|
|
+ ocf_exit_reason "Failure to create ethmonitor state file"
|
|
|
|
|
+ return $rc
|
|
|
|
|
+ fi
|
|
|
|
|
+
|
|
|
|
|
+ # perform the first monitor during the start operation
|
|
|
|
|
+ if_monitor
|
|
|
|
|
+ return $?
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+
|
|
|
|
|
if_validate() {
|
|
|
|
|
- check_binary $IP2UTIL
|
|
|
|
|
- check_binary arping
|
|
|
|
|
- if_init
|
|
|
|
|
+ check_binary $IP2UTIL
|
|
|
|
|
+ check_binary arping
|
|
|
|
|
+ if_init
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case $__OCF_ACTION in
|
|
|
|
|
@@ -436,18 +521,17 @@ esac
|
|
|
|
|
if_validate
|
|
|
|
|
|
|
|
|
|
case $__OCF_ACTION in
|
|
|
|
|
-start) ha_pseudo_resource $OCF_RESOURCE_INSTANCE start
|
|
|
|
|
+start) if_start
|
|
|
|
|
exit $?
|
|
|
|
|
;;
|
|
|
|
|
-stop) attrd_updater -D -n $ATTRNAME
|
|
|
|
|
- ha_pseudo_resource $OCF_RESOURCE_INSTANCE stop
|
|
|
|
|
+stop) if_stop
|
|
|
|
|
exit $?
|
|
|
|
|
;;
|
|
|
|
|
monitor|status) if_monitor
|
|
|
|
|
exit $?
|
|
|
|
|
;;
|
|
|
|
|
validate-all) exit $?
|
|
|
|
|
- ;;
|
|
|
|
|
+ ;;
|
|
|
|
|
*) if_usage
|
|
|
|
|
exit $OCF_ERR_UNIMPLEMENTED
|
|
|
|
|
;;
|
|
|
|
|
--
|
|
|
|
|
1.8.4.2
|
|
|
|
|
|