From ac89df16142faf972cdb88bd8599329fbc15ffb1 Mon Sep 17 00:00:00 2001 From: Andrea Ieri Date: Fri, 28 Oct 2016 13:37:45 +0200 Subject: [PATCH 1/2] Medium: IPaddr2: add option to enable sending refresh arp packets in monitor This commit introduces a new parameter: OCF_RESKEY_arp_count_refresh_default (default: 0) This parameter allows to specify whether to send gratuitous ARP packets during the monitoring of the resource, and how many. This is to alleviate issues with potentially stuck switch ARP caches, which can arise in case of split brain situations. Example: - a two node cluster is interconnected directly as well as through a switch. Node A is master, node B is slave - communication between master and slave is severed (split brain) - node B initializes the virtual IPs and sends gratuitous ARP packets - the switch updated its ARP table to point to B - node B dies (without node A noticing) - node A never notices anything wrong - the switch fails to notice that the virtual IP belongs to A --- heartbeat/IPaddr2 | 52 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index b224ca5..c49d638 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -76,6 +76,7 @@ OCF_RESKEY_clusterip_hash_default="sourceip-sourceport" OCF_RESKEY_unique_clone_address_default=false OCF_RESKEY_arp_interval_default=200 OCF_RESKEY_arp_count_default=5 +OCF_RESKEY_arp_count_refresh_default=0 OCF_RESKEY_arp_bg_default=true OCF_RESKEY_arp_mac_default="ffffffffffff" @@ -86,6 +87,7 @@ OCF_RESKEY_arp_mac_default="ffffffffffff" : ${OCF_RESKEY_unique_clone_address=${OCF_RESKEY_unique_clone_address_default}} : ${OCF_RESKEY_arp_interval=${OCF_RESKEY_arp_interval_default}} : ${OCF_RESKEY_arp_count=${OCF_RESKEY_arp_count_default}} +: ${OCF_RESKEY_arp_count_refresh=${OCF_RESKEY_arp_count_refresh_default}} : ${OCF_RESKEY_arp_bg=${OCF_RESKEY_arp_bg_default}} : ${OCF_RESKEY_arp_mac=${OCF_RESKEY_arp_mac_default}} ####################################################################### @@ -274,12 +276,22 @@ Specify the interval between unsolicited ARP packets in milliseconds. -Number of unsolicited ARP packets to send. +Number of unsolicited ARP packets to send at resource initialization. -ARP packet count +ARP packet count sent during initialization + + +Number of unsolicited ARP packets to send during resource monitoring. Doing +so helps mitigate issues of stuck ARP caches resulting from split-brain +situations. + +ARP packet count sent during monitoring + + + Whether or not to send the ARP packets in the background. @@ -660,20 +672,25 @@ is_infiniband() { # Run send_arp to note peers about new mac address # run_send_arp() { - ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip auto not_used not_used" if [ "x$IP_CIP" = "xyes" ] ; then if [ x = "x$IF_MAC" ] ; then MY_MAC=auto else MY_MAC=`echo ${IF_MAC} | sed -e 's/://g'` fi - ARGS="-i $OCF_RESKEY_arp_interval -r $OCF_RESKEY_arp_count -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" - fi - ocf_log info "$SENDARP $ARGS" - if ocf_is_true $OCF_RESKEY_arp_bg; then - ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 else - $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" + MY_MAC=auto + fi + [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ + || ARP_COUNT=$OCF_RESKEY_arp_count + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" + ocf_log info "$SENDARP $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + else + $SENDARP $ARGS || ocf_log err "Could not send gratuitous arps" + fi fi } @@ -720,12 +737,16 @@ run_send_ua() { # Run ipoibarping to note peers about new Infiniband address # run_send_ib_arp() { - ARGS="-q -c $OCF_RESKEY_arp_count -U -I $NIC $OCF_RESKEY_ip" - ocf_log info "ipoibarping $ARGS" - if ocf_is_true $OCF_RESKEY_arp_bg; then - (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 - else - ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" + [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ + || ARP_COUNT=$OCF_RESKEY_arp_count + if [ $ARP_COUNT -ne 0 ] ; then + ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" + ocf_log info "ipoibarping $ARGS" + if ocf_is_true $OCF_RESKEY_arp_bg; then + (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 + else + ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps" + fi fi } @@ -946,6 +967,7 @@ ip_monitor() { local ip_status=`ip_served` case $ip_status in ok) + $ARP_SEND_FUN refresh return $OCF_SUCCESS ;; partial|no|partial2) From aa1db299f0b684fb814e6c31e96890868fa90e04 Mon Sep 17 00:00:00 2001 From: Andrea Ieri Date: Fri, 4 Nov 2016 15:37:15 +0100 Subject: [PATCH 2/2] Low: IPaddr2: Log refresh arp packets at debug level instead of info --- heartbeat/IPaddr2 | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/heartbeat/IPaddr2 b/heartbeat/IPaddr2 index c49d638..c9acf59 100755 --- a/heartbeat/IPaddr2 +++ b/heartbeat/IPaddr2 @@ -681,11 +681,16 @@ run_send_arp() { else MY_MAC=auto fi - [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ - || ARP_COUNT=$OCF_RESKEY_arp_count + if [ "x$1" = "xrefresh" ] ; then + ARP_COUNT=$OCF_RESKEY_arp_count_refresh + LOGLEVEL=debug + else + ARP_COUNT=$OCF_RESKEY_arp_count + LOGLEVEL=info + fi if [ $ARP_COUNT -ne 0 ] ; then ARGS="-i $OCF_RESKEY_arp_interval -r $ARP_COUNT -p $SENDARPPIDFILE $NIC $OCF_RESKEY_ip $MY_MAC not_used not_used" - ocf_log info "$SENDARP $ARGS" + ocf_log $LOGLEVEL "$SENDARP $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then ($SENDARP $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 else @@ -737,11 +742,16 @@ run_send_ua() { # Run ipoibarping to note peers about new Infiniband address # run_send_ib_arp() { - [ "x$1" = "xrefresh" ] && ARP_COUNT=$OCF_RESKEY_arp_count_refresh \ - || ARP_COUNT=$OCF_RESKEY_arp_count + if [ "x$1" = "xrefresh" ] ; then + ARP_COUNT=$OCF_RESKEY_arp_count_refresh + LOGLEVEL=debug + else + ARP_COUNT=$OCF_RESKEY_arp_count + LOGLEVEL=info + fi if [ $ARP_COUNT -ne 0 ] ; then ARGS="-q -c $ARP_COUNT -U -I $NIC $OCF_RESKEY_ip" - ocf_log info "ipoibarping $ARGS" + ocf_log $LOGLEVEL "ipoibarping $ARGS" if ocf_is_true $OCF_RESKEY_arp_bg; then (ipoibarping $ARGS || ocf_log err "Could not send gratuitous arps")& >&2 else