You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

1778 lines
69 KiB

diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana
--- a/heartbeat/SAPHana 2016-04-26 12:01:55.620889964 +0200
+++ b/heartbeat/SAPHana 2016-04-26 12:03:17.240897137 +0200
@@ -2,9 +2,9 @@
#
# SAPHana
#
-# Description: Manages two single SAP HANA Instance in System Replication
+# Description: Manages two single SAP HANA Instance in System Replication
# Planned: do also manage scale-up scenarios
-# currently the SAPHana is dependent of the analysis of
+# currently the SAPHana is dependent of the analysis of
# SAPHanaTopology
# For supported scenarios please read the README file provided
# in the same software package (rpm)
@@ -16,16 +16,17 @@
# Support: linux@sap.com
# License: GNU General Public License (GPL)
# Copyright: (c) 2013,2014 SUSE Linux Products GmbH
+# Copyright: (c) 2015 SUSE Linux GmbH
#
-# An example usage:
+# An example usage:
# See usage() function below for more details...
#
# OCF instance parameters:
-# OCF_RESKEY_SID
-# OCF_RESKEY_InstanceNumber
-# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default)
-# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default)
-# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default)
+# OCF_RESKEY_SID
+# OCF_RESKEY_InstanceNumber
+# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default)
+# OCF_RESKEY_DIR_PROFILE (optional, well known directories will be searched by default)
+# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default)
# OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no)
# OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt))
# OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes)
@@ -71,7 +72,7 @@
info )
case "$shf" in
all) skip=0
- ;;
+ ;;
none )
skip=1
;;
@@ -80,13 +81,13 @@
mtype=${mtype#fh}
echo "$shf"| grep -iq ${mtype}; search=$?
if [ $search -eq 0 ]; then
- skip=0
+ skip=0
else
skip=1
fi
;;
esac
- ;;
+ ;;
esac
if [ $skip -eq 0 ]; then
ocf_log "$level" "$message"
@@ -103,8 +104,8 @@
local rc=0
methods=$(saphana_methods)
methods=$(echo $methods | tr ' ' '|')
- cat <<-!
- usage: $0 ($methods)
+ cat <<-EOF
+ usage: $0 ($methods)
$0 manages a SAP HANA Instance as an HA resource.
@@ -118,8 +119,17 @@
The 'validate-all' operation reports whether the parameters are valid
The 'methods' operation reports on the methods $0 supports
- !
- return $rc
+EOF
+ return $rc
+}
+
+function backup_global_and_nameserver() {
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local rc=0
+ cp /hana/shared/LNX/global/hdb/custom/config/global.ini /hana/shared/LNX/global/hdb/custom/config/global.ini.$(date +"%s")
+ cp /hana/shared/LNX/global/hdb/custom/config/nameserver.ini /hana/shared/LNX/global/hdb/custom/config/nameserver.ini.$(date +"%s")
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ return $rc
}
#
@@ -130,11 +140,12 @@
function saphana_meta_data() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=0
- cat <<END
+#
+ cat <<END
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHana">
-<version>0.149.7</version>
+<version>0.151.1</version>
<shortdesc lang="en">Manages two SAP HANA instances in system replication (SR).</shortdesc>
<longdesc lang="en">
@@ -157,7 +168,7 @@
2. landscapeHostConfiguration
The interface is used to monitor a HANA system. The python script is named landscapeHostConfiguration.py.
landscapeHostConfiguration.py has some detailed output about HANA system status
- and node roles. For our monitor the overall status is relevant. This overall
+ and node roles. For our monitor the overall status is relevant. This overall
status is reported by the returncode of the script:
0: Internal Fatal, 1: ERROR, 2: WARNING, 3: INFO, 4: OK
The SAPHana resource agent will interpret returncodes 0 as FATAL, 1 as not-running or ERROR and and returncodes 2+3+4 as RUNNING.
@@ -168,14 +179,14 @@
system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register).
4. hdbsql / systemReplicationStatus
- Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script
+ Interface is SQL query into HANA (system replication table). The hdbsql query will be replaced by a python script
"systemReplicationStatus.py" in SAP HANA SPS8 or 9.
As long as we need to use hdbsql you need to setup secure store users for linux user root to be able to
access the SAP HANA database. You need to configure a secure store user key "SAPHANA${SID}SR" which can connect the SAP
- HANA database:
+ HANA database:
5. saphostctrl
- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the
+ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the
SAP HANA instance. This is the hostname used during the HANA installation.
</longdesc>
@@ -207,7 +218,7 @@
</parameter>
<parameter name="DUPLICATE_PRIMARY_TIMEOUT" unique="0" required="0">
<shortdesc lang="en">Time difference needed between to primary time stamps, if a dual-primary situation occurs</shortdesc>
- <longdesc lang="en">Time difference needed between to primary time stamps,
+ <longdesc lang="en">Time difference needed between to primary time stamps,
if a dual-primary situation occurs. If the time difference is
less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin
a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After
@@ -231,12 +242,8 @@
<content type="string" default="" />
</parameter>
<parameter name="SAPHanaFilter" unique="0" required="0">
- <shortdesc lang="en">Define SAPHana resource agent messages to be printed</shortdesc>
- <longdesc lang="en">Define SAPHana resource agent messages to be printed.
- This parameter should only be set if requested by support. The default is sufficient for normal operation.
- Values: ra-act-lpa-dec-flow
- You could specify any combination of the above values like "ra-act-flow"
- </longdesc>
+ <shortdesc lang="en">OUTDATED PARAMETER</shortdesc>
+ <longdesc lang="en">OUTDATED PARAMETER</longdesc>
<content type="string" default="" />
</parameter>
</parameters>
@@ -271,7 +278,7 @@
for m in start stop status monitor promote demote notify validate-all methods meta-data usage; do
echo "$m"
done
- return $rc
+ return $rc
}
#
@@ -298,7 +305,7 @@
local remoteNode=""
local rc=1
for cl in ${otherNodes[@]}; do
- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]})
+ vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]} "$cl")
if [ "$vHost" = "$remoteHost" ]; then # we found the correct node
remoteNode=$cl
rc=0
@@ -347,9 +354,10 @@
}
#
-# function: get_hana_attribute
+# function: get_hana_attribute
# params: NODE ATTR [STORE]
# globals: -
+# output: attribute value
#
function get_hana_attribute()
{
@@ -358,14 +366,20 @@
local attr_node=$1
local attr_name=$2
local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
- local attr_default=${5:-}
+ local attr_default=${4:-}
+ local dstr
local attr_val=""
- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default"); rc=$?
- if [ $debug_attributes -eq 1 ]; then
- dstr=$(date)
- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE
- fi
- echo "$attr_val"
+ dstr=$(date)
+ case "$attr_store" in
+ reboot | forever )
+ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE
+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ props )
+ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE
+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ esac
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -388,11 +402,17 @@
attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$?
if [ "$attr_old" != "$attr_value" ]; then
super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store; rc=$?
- if [ $debug_attributes -eq 1 ]; then
- dstr=$(date)
- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
- fi
+ dstr=$(date)
+ case "$attr_store" in
+ reboot | forever )
+ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ props )
+ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE
+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ esac
else
super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}"
rc=0
@@ -408,7 +428,8 @@
#
function assert() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
- local err_msg=$1 local default_rc=$OCF_NOT_RUNNING
+ local err_msg=$1
+ local default_rc=$OCF_NOT_RUNNING
# DONE: Check, if we need to destinguish between probe and others
if ocf_is_probe; then
default_exit=$OCF_NOT_RUNNING
@@ -435,7 +456,7 @@
local score=0
if [ -n "$1" ]; then
score=$1
- fi
+ fi
# DONE: PRIO2: Only adjust master if value is really different (try to check that)
oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot)
if [ "$oldscore" != "$score" ]; then
@@ -452,7 +473,7 @@
#
# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER)
# params: NODE_ROLES NODE_SYNC_STATUS
-# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@],
+# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@],
#
scoring_crm_master()
{
@@ -467,7 +488,7 @@
if grep "$rolePatt" <<< "$roles"; then
if grep "$syncPatt" <<< "$sync"; then
skip=1
- myScore=$score
+ myScore=$score
fi
fi
fi
@@ -496,7 +517,7 @@
# function: saphana_init - initialize variables for the resource agent
# params: InstanceName
# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w),
-# globals: sr_name(w), remoteHost(w), otherNodes(w)
+# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w)
# globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w)
# globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w)
# globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w)
@@ -506,6 +527,8 @@
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=$OCF_SUCCESS
local vName
+ local clN
+ # local site
# two parameter models (for transition only)
# OLD: InstanceName
# NEW: SID InstanceNumber
@@ -528,11 +551,10 @@
#
# if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology
#
- vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]});
+ vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME");
fi
SAPVIRHOST=${vName}
PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER"
- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}"
AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}"
LPA_DIRECTORY=/var/lib/SAPHanaRA
LPA_ATTR=("lpa_${sid}_lpt" "forever")
@@ -591,6 +613,8 @@
*openais* ) otherNodes=($(crm_node -l | awk '$3 == "member" { if ($2 != me) { print $2 }}' me=${NODENAME}));;
*cman* ) otherNodes=($(crm_node -l | awk '{for (i=1; i<=NF; i++) { if ($i != me) { print $i }}}' me=${NODENAME}));;
esac
+ #
+ #
remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]});
if [ -z "$remoteHost" ]; then
@@ -611,9 +635,13 @@
# ATTR_NAME_HANA_SITE
sr_name=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SITE[@]});
sr_mode=$(get_hana_attribute "${NODENAME}" ${ATTR_NAME_HANA_SRMODE[@]})
+
if [ -z "$sr_mode" ]; then
sr_mode="sync"
fi
+ if [ -n "$remoteNode" ]; then
+ rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]});
+ fi
super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode"
# optional OCF parameters, we try to guess which directories are correct
if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ]
@@ -706,7 +734,7 @@
then
runninginst=$(echo "$output" | grep '^0 : ' | cut -d' ' -f3)
if [ "$runninginst" != "$InstanceName" ]
- then
+ then
super_ocf_log warn "ACT: sapstartsrv is running for instance $runninginst, that service will be killed"
restart=1
else
@@ -784,38 +812,113 @@
node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"
+ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files?
+ # This might also solve some problems when we could not figure-out the ilocal or remote site name
for i in 1 2 3 4 5 6 7 8 9; do
case "$node_status" in
- primary )
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY"
- return $HANA_STATE_PRIMARY;;
+ primary )
+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY"
+ return $HANA_STATE_PRIMARY;;
syncmem | sync | async )
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY"
- return $HANA_STATE_SECONDARY;;
- none ) # have seen that mode on second side BEFEORE we registered it as replica
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE"
- return $HANA_STATE_STANDALONE;;
+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY"
+ return $HANA_STATE_SECONDARY;;
+ none ) # have seen that mode on second side BEFEORE we registered it as replica
+ super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE"
+ return $HANA_STATE_STANDALONE;;
* )
- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>"
- dump=$( echo $node_status | hexdump -C );
- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>"
- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
- super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status"
- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>"
+ dump=$( echo $node_status | hexdump -C );
+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>"
+ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
+ super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status"
+ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
esac;
done
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
+# function: analyze_hana_sync_statusSRS
+# params: -
+# globals: DIR_EXECUTABLE(r), FULL_SR_STATUS(w), remoteNode
+#
+# systemReplicationStatus.py return-codes:
+# NoHSR = 10
+# Error = 11
+# Unkown = 12
+# Initializing = 13
+# Syncing = 14
+# Active = 15
+function analyze_hana_sync_statusSRS()
+{
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam=""
+ if [ -n "$rem_SR_name" ]; then
+ siteParam="--site=$rem_SR_name"
+ fi
+ FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$?
+ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc"
+ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc"
+ #
+ # TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported)
+ # And is the check for return code capable for chains?
+ #
+ if [ $srRc -eq 15 ]; then
+ # Fix for a HANA BUG, where a non-working SR resulted in RC 15:
+ if grep -q "ACTIVE" <<< "$FULL_SR_STATUS"; then
+ super_ocf_log info "FLOW $FUNCNAME SOK"
+ set_hana_attribute "$remoteNode" "SOK" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
+ super_ocf_log info "ACT site=$sr_name, seting SOK for secondary (1)"
+ lpa_set_lpt 30 "$remoteNode"
+ rc=0;
+ else
+ # ok we should be careful and set secondary to SFAIL
+ super_ocf_log info "FLOW $FUNCNAME SFAIL"
+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (6) - srRc=$srRc lss=$lss No ACTIVES found in cmd output"
+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary
+ lpa_set_lpt 10 "$remoteNode"
+ fi
+ elif [ $srRc -le 11 ]; then # 11 and 10
+ # if systemReplicationStatus is ERROR and landscapeHostConfiguration is down than do NOT set SFAIL
+ get_hana_landscape_status; lss=$?
+ if [ $lss -lt 2 ]; then
+ # keep everithing like it was
+ rc=2
+ else
+ # ok we should be careful and set secondary to SFAIL
+ super_ocf_log info "FLOW $FUNCNAME SFAIL"
+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss"
+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary
+ lpa_set_lpt 10 "$remoteNode"
+ rc=1
+ fi
+ else
+ super_ocf_log info "FLOW $FUNCNAME SFAIL"
+ set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
+ super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc"
+ # TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary
+ lpa_set_lpt 10 "$remoteNode"
+ rc=1;
+ fi
+ super_ocf_log info "FLOW $FUNCNAME PRIM+LPA"
+ super_ocf_log info "DBG PRIM"
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ return $rc
+}
+
#
-# function: analyze_hana_sync_status - query and check hana system replication status
+####
+#### OLD HDBSQL STUFF FOR SPS6,7,8 AND SCALE-UP ONLY
+####
+# function: analyze_hana_sync_statusSQL - query and check hana system replication status
# params: -
# globals: DIR_EXECUTABLE(r), remoteHost(r)
# get the HANA sync status
-#
-function analyze_hana_sync_status()
+#
+function analyze_hana_sync_statusSQL()
{
super_ocf_log info "FLOW $FUNCNAME ($*)"
local -a clusterNodes=()
@@ -863,35 +966,9 @@
# TODO PRIO1: REMOVE remoteNode dependency - set SFAIL
set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
fi
- # first get a list of all secondary hosts, than a list of all secondary hosts, if the is ANY failure at this site
- # TODO: PRIO9: for first we assume there is only ONE secondary site (like ROT)
- # TODO: PRIO3: should we loop over all cluster nodes fetching their roles-attribute? To minimize sql-queries?
- #
- all_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_secondaries ); sqlrc=$?
- all_secondary_hosts=$(echo $all_secondary_hosts | dequote);
- if [ "$sqlrc" -eq 0 ]; then
- all_broken_secondary_hosts=$(timeout $hdbSrQueryTimeout hdbsql -a -x -U $secUser $query_failed_secondaries); sqlrc=$?
- all_broken_secondary_hosts=$(echo $all_broken_secondary_hosts | dequote);
- if [ "$sqlrc" -eq 0 ]; then
- if [ -n "$all_broken_secondary_hosts" ]; then
- #
- # we have a broken secondary site - set all hosts to "SFAIL"
- #
- # Note: since HANA hostname can be different from nodename we need to check all vhost attributes
- for n in $all_broken_secondary_hosts; do
- for cl in ${otherNodes[@]}; do
- vHost=$(get_hana_attribute $cl ${ATTR_NAME_HANA_VHOST[@]})
- if [ "$vHost" = "$n" ]; then # we found the correct node
- set_hana_attribute $cl "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]}
- fi
- done
- done
- fi
- fi
- fi
else
case "$sqlrc" in
- 19 )
+ 19 )
# return codes 19: license error -> set SFAIL!
# DONE: PRIO1: We should NOT set SFAIL, if HDB is exactly broken now
# When HDB breaks during monitor this could prevent a prositive remote failover
@@ -901,7 +978,7 @@
done
;;
esac
- fi
+ fi
return $rc
}
@@ -932,10 +1009,18 @@
local remoteInstance="";
remoteInstance=$InstanceNr
if ocf_is_true ${AUTOMATED_REGISTER}; then
+ #
+ #
+ #
+ #
+ #
super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"
+ #
+ #
su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$?
+ # backup_global_and_nameserver
else
- super_ocf_log info "ACT: IGNORE REGISTER because AUTOMATED_REGISTER is set to FALSE"
+ super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE"
rc=1
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
@@ -945,7 +1030,7 @@
#
# function: saphana_status - pure status check
# params: -
-# globals: SIDInstanceName, OCF_*,
+# globals: SIDInstanceName, OCF_*,
function saphana_status() {
local binDeam="hdb.sap${SIDInstanceName}" rc=0
binDeam=${binDeam:0:15} # Process name is limited to the first 15 characters
@@ -956,13 +1041,13 @@
#
# function: saphana_start - start a hana instance
# params: -
-# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName,
+# globals: OCF_*, SAPCONTROL, InstanceNr, SID, InstanceName,
#
function saphana_start() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=$OCF_NOT_RUNNING
local output=""
- local loopcount=0
+ local loopcount=0
check_sapstartsrv
rc=$?
#
@@ -1000,11 +1085,11 @@
# saphana_stop: Stop the SAP instance
#
function saphana_stop() {
- super_ocf_log info "FLOW $FUNCNAME ($*)"
- local output=""
- local rc=0
- check_sapstartsrv; rc=$?
- if [ $rc -eq $OCF_SUCCESS ]; then
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local output=""
+ local rc=0
+ check_sapstartsrv; rc=$?
+ if [ $rc -eq $OCF_SUCCESS ]; then
output=$($SAPCONTROL -nr $InstanceNr -function Stop)
rc=$?
super_ocf_log info "ACT: Stopping SAP Instance $SID-$InstanceName: $output"
@@ -1032,7 +1117,7 @@
# function: saphana_validate - validation of (some) variables/parameters
# params: -
# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r), SAPVIRHOST(r)
-# saphana_validate: Check the symantic of the input parameters
+# saphana_validate: Check the symantic of the input parameters
#
function saphana_validate() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
@@ -1060,12 +1145,12 @@
#
# function: saphana_start_primary - handle startup of PRIMARY in M/S
# params:
-# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*,
+# globals: OCF_*(r), NODENAME, ATTR_NAME_*, HANA_STATE_*,
#
function saphana_start_primary()
{
super_ocf_log info "FLOW $FUNCNAME ($*)"
- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING
+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING
local lss sqlrc;
local rc=0
local lpa_dec=4
@@ -1074,7 +1159,7 @@
# we will be a master (PRIMARY) so checking, if the is an OTHER master
#
super_ocf_log debug "DBG: saphana_primary - check_for_primary reports HANA_STATE_PRIMARY"
- #
+ #
lpa_init_lpt $HANA_STATE_PRIMARY
lpa_check_lpt_status; lpa_dec=$?
get_hana_landscape_status; lss=$?
@@ -1139,7 +1224,7 @@
1 ) # landcape says we are down, lets start and adjust scores and return code
super_ocf_log info "LPA: landcape: DOWN, LPA: start ==> start instance"
saphana_start
- rc=$?
+ rc=$?
LPTloc=$(date '+%s')
lpa_set_lpt $LPTloc
;;
@@ -1152,7 +1237,7 @@
# DONE: PRIO3: check if this reaction is correct - tell cluster about failed start
super_ocf_log info "LPA: landcape: UP, LPA: register ==> take down"
set_crm_master -inf
- rc=$OCF_NOT_RUNNING
+ rc=$OCF_NOT_RUNNING
;;
1 ) # lets try to register
# DONE: PRIO2: Like Action in start_secondary
@@ -1160,7 +1245,7 @@
super_ocf_log info "DEC: AN OTHER HANA IS AVAILABLE ==> LETS REGISTER"
set_crm_master 0
if wait_for_primary_master 1; then
- register_hana_secondary
+ register_hana_secondary
check_for_primary; primary_status=$?
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then
super_ocf_log info "ACT: Register successful"
@@ -1169,11 +1254,11 @@
set_crm_master 0
saphana_start_secondary
rc=$?
- lpa_set_lpt 30
+ lpa_set_lpt 10
else
super_ocf_log err "ACT: Register failed"
rc=$OCF_NOT_RUNNING
- fi
+ fi
else
# lets check next monitor, if we can register
rc=$OCF_SUCCESS
@@ -1185,6 +1270,9 @@
case "$lss" in
2 | 3 | 4 ) # as we ARE up we just keep it up
# TODO: PRIO3: I now change from "just keep it up to take that down"
+# TODO: PRIO1 differ lpt_advice!!
+# 2 => DOWN
+# 3 => KEEP
# TODO: PRIO3: OCF_SUCCESS, OCF_NOT_RUNNING or OCF_ERR_xxxx ?
set_crm_master -9000
#scoring_crm_master "$my_role" "$my_sync"
@@ -1193,7 +1281,7 @@
1 ) # we are down, so we should wait --> followup in next monitor
super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting"
# TODO: PRIO3: Check, if WAITING is correct here
- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
+ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -9000
rc=$OCF_SUCCESS
;;
@@ -1202,7 +1290,7 @@
fail ) # process a lpa FAIL
super_ocf_log info "LPA: LPA reports FAIL"
set_crm_master -inf
- rc=$OCF_NOT_RUNNING
+ rc=$OCF_NOT_RUNNING
;;
esac
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
@@ -1278,12 +1366,12 @@
#
# function: saphana_start_secondary - handle startup of PRIMARY in M/S
# params:
-# globals: OCF_*(r), NODENAME, ATTR_NAME_*,
+# globals: OCF_*(r), NODENAME, ATTR_NAME_*,
#
function saphana_start_secondary()
{
super_ocf_log info "FLOW $FUNCNAME ($*)"
- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING
+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING
local sqlrc;
set_crm_master 0
#
@@ -1291,9 +1379,9 @@
#
lpa_push_lpt 10
lpa_set_lpt 10
- #
+ #
####### LPA - end
- #
+ #
#
# we would be slave (secondary)
# we first need to check, if there are Master Nodes, because the Scecondary only starts
@@ -1311,16 +1399,16 @@
# It seams the stating secondary could not start because of stopping primary
# so this is a WAITING situation
super_ocf_log info "ACT: PRIMARY seams to be down now ==> WAITING"
- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
+ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -INFINITY
rc=$OCF_SUCCESS
fi
else
- lpa_set_lpt 30
+ lpa_set_lpt 10
fi
else
super_ocf_log info "ACT: wait_for_primary_master ==> WAITING"
- set_hana_attribute ${NODENAME} "WAITING" ${ATTR_NAME_HANA_CLONE_STATE[@]}
+ set_hana_attribute ${NODENAME} "WAITING4PRIM" ${ATTR_NAME_HANA_CLONE_STATE[@]}
set_crm_master -INFINITY
rc=$OCF_SUCCESS
fi
@@ -1329,11 +1417,71 @@
}
#
+# function: saphana_check_local_instance
+# params:
+# output:
+# rc: rc=0 (UP) rc=1 (DOWN)
+# globals:
+#
+function saphana_check_local_instance()
+{
+ local rc=1
+ local count=0
+ local SERVNO
+ local output
+ local MONITOR_SERVICES="hdbnameserver|hdbdaemon" # TODO: PRIO1: exact list of Services
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ check_sapstartsrv
+ rc=$?
+ if [ $rc -eq $OCF_SUCCESS ]
+ then
+ output=$($SAPCONTROL -nr $InstanceNr -function GetProcessList -format script)
+ # we have to parse the output, because the returncode doesn't tell anything about the instance status
+ for SERVNO in `echo "$output" | grep '^[0-9] ' | cut -d' ' -f1 | sort -u`
+ do
+ local COLOR=`echo "$output" | grep "^$SERVNO dispstatus: " | cut -d' ' -f3`
+ local SERVICE=`echo "$output" | grep "^$SERVNO name: " | cut -d' ' -f3`
+ local STATE=0
+ local SEARCH
+
+ case $COLOR in
+ GREEN|YELLOW) STATE=$OCF_SUCCESS;;
+ *) STATE=$OCF_NOT_RUNNING;;
+ esac
+
+ SEARCH=`echo "$MONITOR_SERVICES" | sed 's/\+/\\\+/g' | sed 's/\./\\\./g'`
+ if [ `echo "$SERVICE" | egrep -c "$SEARCH"` -eq 1 ]
+ then
+ if [ $STATE -eq $OCF_NOT_RUNNING ]
+ then
+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !"
+ rc=$STATE
+ fi
+ count=1
+ fi
+ done
+
+ if [ $count -eq 0 -a $rc -eq $OCF_SUCCESS ]
+ then
+ if ocf_is_probe
+ then
+ rc=1
+ else
+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "The SAP instance does not run any services which this RA could monitor!"
+ rc=1
+ fi
+ fi
+ fi
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ return $rc
+}
+
+#
# function: lpa_get_lpt - get lpt from cluster
# params: NODE
# output: LPT
# rc: rc=0: OK, rc=1: InternalERROR, rc=2: ERROR
-# globals: LPA_ATTR_*,
+# globals: LPA_ATTR_*,
#
function lpa_get_lpt() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
@@ -1348,7 +1496,7 @@
rc=2
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
}
#
@@ -1372,7 +1520,7 @@
rc=0
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
}
#
@@ -1398,7 +1546,7 @@
rc=2
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
}
#
@@ -1422,15 +1570,15 @@
rc=2
else
rc=0
- fi
+ fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
}
#
# function: lpa_init_lpt - initialize local lpt, if needed
# params: HANA_STATE
-# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r),
+# globals: HANA_STATE_*(r), LPA_DIRECTORY(r), sid(r), NODENAME(r),
# lpa_init_lpt
#
# Returncodes:
@@ -1439,7 +1587,7 @@
# Initializing (if NO local LPT-file):
# SECONDARY sets to 0
# PRIMARY sets to 1
-#
+#
function lpa_init_lpt() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=1
@@ -1458,11 +1606,11 @@
LPTloc=10
lpa_push_lpt "10"; rc=$?
else
- rc=2
+ rc=2
fi
lpa_set_lpt $LPTloc
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
}
#
@@ -1472,6 +1620,10 @@
# lpa_check_lpt_status
#
# Returncodes:
+# 0: start
+# 1: register than start
+# 2: wait4gab
+# 3: wait4other
#
# Initializing (if NO local LPT-file):
# SECONDARY sets to 10
@@ -1480,20 +1632,20 @@
# LPRlocal OR LPTremore ARE real lpt (>1000)
# THEN:
# Bigger LPR wins, if delta-gab is OK
-# LPTlocal >> LPTremore ===> rc=0 (start)
+# LPTlocal >> LPTremore ===> rc=0 (start)
# LPTRemote >> LPTlocal ===> rc=1 (register)
-# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait)
+# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab)
# LPRlocal AND LPTremore ARE NOT real lpt (<=1000)
# THEN:
# Bigger LPT wins
-# LPTlocal > LPTremore ===> rc=0 (start)
+# LPTlocal > LPTremore ===> rc=0 (start)
# LPTRemote > LPTlocal ===> rc=1 (register)
-# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait)
+# Stalemate in all other cases ==> STALEMATE-HANDLING ===> rc=2 (wait4gab)
# LPTRemote is not initialized or node not kown in cluster (crm_mon -l) (0)
# TODO: PRIO1: Need to introduce a return-code 3 for remote sides lpa not ready
# THEN:
# WAIT ==> like STALEMATE-HANDLING ===> rc=2 (wait)
-#
+#
function lpa_check_lpt_status() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=0
@@ -1501,6 +1653,8 @@
local LPTrem=-1
local LPTMark=1000
local delta=0
+ local remSn_name=""
+ local remHost=""
#
# First GET LPT from ATTR-FILE-DEFAULT
#
@@ -1550,7 +1704,20 @@
fi
fi
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ return $rc
+}
+
+# function: is_the_master_nameserver
+# params: -
+# rc: 0: yes, local node is THE master nameserver
+# 1: else
+# globals:
+function is_the_master_nameserver()
+{
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local rc=0
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ return $rc
}
#
@@ -1574,11 +1741,12 @@
check_for_primary; primary_status=$?
if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then
saphana_start_primary; rc=$?
- else
+ else
+ lpa_set_lpt 10
saphana_start_secondary; rc=$?
- lpa_set_lpt 30
- fi
+ fi
fi
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -1596,7 +1764,7 @@
check_for_primary; primary_status=$?
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then
lpa_set_lpt 10
- fi
+ fi
saphana_stop; rc=$?
return $rc
}
@@ -1637,7 +1805,7 @@
DEMOTED )
promoted=0;
;;
- WAITING )
+ WAITING* )
# DONE: lpa_check_lpt_status to come out of here :)
# DONE: PRIO2: CHECK IF THE FIX FOR COMING OUT OF WAITING IS CORRECT
get_hana_landscape_status; lss=$?
@@ -1648,7 +1816,8 @@
lpa_set_lpt $LPTloc
fi
lpa_check_lpt_status; lparc=$?
- if [ $lparc -ne 2 ]; then
+ # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes
+ if [ $lparc -lt 2 ]; then
# lpa - no need to wait any longer - lets try a new start
saphana_start_clone
rc=$?
@@ -1663,7 +1832,7 @@
super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING"
fi
return $OCF_SUCCESS
- fi
+ fi
promoted=0;
;;
UNDEFINED )
@@ -1682,13 +1851,13 @@
get_hana_landscape_status; lss=$?
super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss"
case "$lss" in
- 0 ) # FATAL or ERROR
+ 0 ) # FATAL or ERROR
rc=$OCF_ERR_GENERIC
;;
- 1 ) # DOWN or ERROR
+ 1 ) # DOWN or ERROR
# DONE: PRIO2: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error
if ocf_is_probe; then
- #
+ #
# leave master score untouched, only set return code
#
rc=$OCF_NOT_RUNNING
@@ -1699,7 +1868,7 @@
# For Migration it would be good to decrease master score
# For Reload locally we should NOT adjust the master score
# ===> Should we rely on the migration threshold?
- # set_crm_master
+ # set_crm_master
if ocf_is_true "${PreferSiteTakeover}" ; then
#
# DONE: PRIO1: first check, if remote site is already (and still) in sync
@@ -1708,7 +1877,7 @@
# TODO PRIO1: REMOVE remoteNode dependency - get_sync_status
remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]})
case "$remoteSync" in
- SOK )
+ SOK | PRIM )
super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)"
set_crm_master 5
if check_for_primary_master; then
@@ -1718,11 +1887,11 @@
SFAIL )
super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred"
;;
- * )
+ * )
super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred"
;;
- esac
- else
+ esac
+ else
# TODO: PRIO5: SCALE-OUT ONLY? Implement for local restart
# It maybe that for the local restart we only need to decrease the secondaries promotion score
#super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here"
@@ -1765,8 +1934,12 @@
case "$my_role" in
[12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status
;;
- [34]:P:*:master:* ) # primary is up and should now be able to anser hdbsql query
- analyze_hana_sync_status
+ [34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query
+ if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then
+ analyze_hana_sync_statusSRS
+ else
+ analyze_hana_sync_statusSQL
+ fi
;;
esac
rem_role=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_ROLES[@]})
@@ -1776,9 +1949,9 @@
[234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster
lpa_check_lpt_status; again_lpa_rc=$?
if [ $again_lpa_rc -eq 2 ]; then
- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart"
- lpa_set_lpt 10
- lpa_push_lpt 10
+ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart"
+ lpa_set_lpt 10
+ lpa_push_lpt 10
rc=$OCF_NOT_RUNNING
fi
;;
@@ -1812,13 +1985,13 @@
function saphana_monitor_secondary()
{
super_ocf_log info "FLOW $FUNCNAME ($*)"
- local rc=$OCF_ERR_GENERIC
- local promoted=0
+ local rc=$OCF_ERR_GENERIC
+ local promoted=0
local init_attribute=0
local lss
#
# OK, we are running as HANA SECONDARY
- #
+ #
if ! lpa_get_lpt ${NODENAME}; then
lpa_set_lpt 10
lpa_push_lpt 10
@@ -1863,7 +2036,7 @@
super_ocf_log debug "DBG: saphana_monitor_clone: HANA_STATE_SECONDARY"
#
# old method was: saphana_monitor - new method is get_hana_landscape_status
- get_hana_landscape_status; lss=$?
+ get_hana_landscape_status; lss=$?
super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss"
case "$lss" in
0 ) # FATAL
@@ -1919,11 +2092,11 @@
# a) returning 7 here and force cluster a restart of the slave
# b) starting the instance here inside the monitor -> may result in longer runtime, timeouts
#
- # first check with the status function (OS tools) if there could be something like a SAP instance running
- # as we do not know here, if we are in master or slave state we do not want to start our monitoring
- # agents (sapstartsrv) on the wrong host
- local rc=$OCF_ERR_GENERIC
- local promoted=0
+ # first check with the status function (OS tools) if there could be something like a SAP instance running
+ # as we do not know here, if we are in master or slave state we do not want to start our monitoring
+ # agents (sapstartsrv) on the wrong host
+ local rc=$OCF_ERR_GENERIC
+ local promoted=0
local init_attribute=0
local lpaRc=0
local mRc=0
@@ -1973,7 +2146,7 @@
# function: saphana_promote_clone - promote a hana clone
# params: -
# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r),
-# saphana_promote_clone:
+# saphana_promote_clone:
# In a Master/Slave configuration get Master being the primary OR by running hana takeover
#
function saphana_promote_clone() {
@@ -2017,7 +2190,7 @@
rc=$OCF_SUCCESS;
else
rc=$OCF_FAILED_MASTER
- fi
+ fi
;;
* )
super_ocf_log err "ACT: HANA SYNC STATUS IS NOT 'SOK' SO THIS HANA SITE COULD NOT BE PROMOTED"
@@ -2039,10 +2212,10 @@
#
# function: saphana_demote_clone - demote a hana clone instance
# params: -
-# globals: OCF_*(r), NODENAME(r),
+# globals: OCF_*(r), NODENAME(r),
# saphana_demote_clone
-# the HANA System Replication (SR) runs in a Master/Slave
-# While we could not change a HANA instance to be really demoted, we only mark the status for
+# the HANA System Replication (SR) runs in a Master/Slave
+# While we could not change a HANA instance to be really demoted, we only mark the status for
# correct monitor return codes
#
function saphana_demote_clone() {
@@ -2056,9 +2229,9 @@
}
#
-# function: main - main function to operate
+# function: main - main function to operate
# params: ACTION
-# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w),
+# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), SAPVIRHOST(w), DIR_EXECUTABLE(w),
# globals: SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r)
#
@@ -2073,7 +2246,7 @@
SAPCONTROL=""
DIR_PROFILE=""
SAPSTARTPROFILE=""
-SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}"
+SAPHanaFilter="ra-act-dec-lpa"
NODENAME=$(crm_node -n)
@@ -2100,7 +2273,7 @@
exit $OCF_SUCCESS;;
*);;
esac
-saphana_init
+saphana_init
if ! ocf_is_root
then
@@ -2141,7 +2314,7 @@
saphana_$ACTION$CLACT
ra_rc=$?
;;
- validate-all)
+ validate-all)
saphana_validate
ra_rc=$?
;;
@@ -2149,12 +2322,13 @@
lpa_check_lpt_status
ra_rc=$?
;;
- *) # seams to be a unknown request
- saphana_methods
+ *) # seams to be a unknown request
+ saphana_methods
ra_rc=$OCF_ERR_UNIMPLEMENTED
;;
esac
timeE=$(date '+%s')
(( timeR = timeE - timeB ))
+#super_ocf_log info "RA ==== SAPHanaFilter=$SAPHanaFilter"
super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($THE_VERSION) (${timeR}s)===="
exit ${ra_rc}
diff -uNr a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology
--- a/heartbeat/SAPHanaTopology 2016-04-26 12:01:55.620889964 +0200
+++ b/heartbeat/SAPHanaTopology 2016-04-26 12:03:18.033887556 +0200
@@ -16,7 +16,7 @@
# Copyright: (c) 2014 SUSE Linux Products GmbH
# (c) 2015 SUSE Linux GmbH
#
-# An example usage:
+# An example usage:
# See usage() function below for more details...
#
# OCF instance parameters:
@@ -41,7 +41,6 @@
HANA_STATE_DEFECT=3
debug_attributes=0
-
SH=/bin/sh
#
@@ -57,7 +56,7 @@
local shf="${SAPHanaFilter:-all}"
#ocf_log "info" "super_ocf_log: f:$shf l:$level m:$message"
# message levels: (dbg)|info|warn|err|error
- #
+ #
# message types: (ACT|RA|FLOW|DBG|LPA|DEC
case "$level" in
dbg | debug | warn | err | error ) skip=0
@@ -65,7 +64,7 @@
info )
case "$shf" in
all) skip=0
- ;;
+ ;;
none )
skip=1
;;
@@ -74,13 +73,13 @@
mtype=${mtype#fh}
echo "$shf"| grep -iq ${mtype}; search=$?
if [ $search -eq 0 ]; then
- skip=0
+ skip=0
else
skip=1
fi
;;
esac
- ;;
+ ;;
esac
if [ $skip -eq 0 ]; then
ocf_log "$level" "$message"
@@ -126,15 +125,15 @@
<?xml version="1.0"?>
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
<resource-agent name="SAPHanaTopology">
- <version>0.149.6</version>
+ <version>0.151.1</version>
<shortdesc lang="en">Analyzes SAP HANA System Replication Topology.</shortdesc>
<longdesc lang="en">This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to
all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases.
In addition it starts and monitors the local saphostagent.
-1. Interface to monitor a HANA system: landscapeHostConfiguration.py
+1. Interface to monitor a HANA system: landscapeHostConfiguration.py
landscapeHostConfiguration.py has some detailed output about HANA system status
-and node roles. For our monitor the overall status is relevant. This overall
+and node roles. For our monitor the overall status is relevant. This overall
status is reported by the returncode of the script:
0: Internal Fatal
1: ERROR
@@ -150,7 +149,7 @@
system replication takeover (sr_takeover) or to register a former primary to a newer one (sr_register).
3. saphostctrl
- The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the
+ The interface saphostctrl uses the function ListInstances to figure out the virtual host name of the
SAP HANA instance. This is the hostname used during the HANA installation.
</longdesc>
<parameters>
@@ -172,13 +171,8 @@
<content type="string" default="" />
</parameter>
<parameter name="SAPHanaFilter" unique="0" required="0">
- <shortdesc lang="en">Define type of SAPHanaTopology RA messages to be printed</shortdesc>
- <longdesc lang="en">Define type of SAPHanaTopology RA messages to be printed.
-Define SAPHana resource agent messages to be printed.
- This parameter should only be set if requested by support. The default is sufficient for normal operation.
- Values: ra-act-lpa-dec-flow
- You could specify any combination of the above values like "ra-act-flow"
- </longdesc>
+ <shortdesc lang="en">OUTDATED</shortdesc>
+ <longdesc lang="en">OUTDATED</longdesc>
<content type="string" default="" />
</parameter>
</parameters>
@@ -197,7 +191,7 @@
}
#
-# function: get_hana_attribute
+# function: get_hana_attribute
# params: NODE ATTR [STORE]
# globals: -
#
@@ -208,16 +202,19 @@
local attr_node=$1
local attr_name=$2
local attr_store=${3:-reboot} # DONE: PRIO5 get this (optional) from parameter
- local attr_val=""
- attr_val=$(crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q); rc=$?
- if [ $debug_attributes -eq 1 ]; then
- dstr=$(date)
- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q --> $attr_val" >> /var/log/fhATTRIBUTE
- fi
- echo "$attr_val"
- if [ $rc -ne 0 ]; then
- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -G -n "$attr_name" -l $attr_store -q"
- fi
+ local attr_default=${4:-}
+ local dstr
+ dstr=$(date)
+ case "$attr_store" in
+ reboot | forever )
+ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE
+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ props )
+ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE
+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ esac
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -234,19 +231,24 @@
local attr_value=$2
local attr_name=$3
local attr_store=${4:-reboot} # DONE: PRIO5 get this (optional) from parameter
+ local attr_default=${5:-}
local rc=1
- local attr_old
- attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store); get_rc=$?
+ local attr_old=""
+ local dstr
+ dstr=$(date)
+ attr_old=$(get_hana_attribute $attr_node $attr_name $attr_store $attr_default); get_rc=$?
if [ "$attr_old" != "$attr_value" ]; then
super_ocf_log debug "DBG: SET attribute $attr_name for node ${attr_node} to ${attr_value} former ($attr_old) get_rc=$get_rc "
- if [ $debug_attributes -eq 1 ]; then
- dstr=$(date)
- echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
- fi
- crm_attribute -N $attr_node -v "$attr_value" -n "$attr_name" -l $attr_store; rc=$?
- if [ $rc -ne 0 ]; then
- super_ocf_log debug "DBG: ATTRIBUTE-FAILURE: crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store"
- fi
+ case "$attr_store" in
+ reboot | forever )
+ echo "$dstr: SAPHanaTopology: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE
+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ props )
+ echo "$dstr: SAPHanaTopology: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE
+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$?
+ ;;
+ esac
else
super_ocf_log debug "DBG: LET attribute $attr_name for node ${attr_node} still be ${attr_value}"
rc=0
@@ -299,7 +301,7 @@
#
# yes it is a clone config - check, if its configured well
#
- if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then
+ if [ "$OCF_RESKEY_CRM_meta_clone_node_max" -ne 1 ] ; then
super_ocf_log err "ACT: Clone options misconfigured. (expect: clone_node_max=1)"
exit $OCF_ERR_CONFIGURED
fi
@@ -314,8 +316,8 @@
#
# function: sht_init - initialize variables for the resource agent
# params: -
-# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w),
-# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w)
+# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w),
+# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w)
# globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w)
# globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w)
# sht_init : Define global variables with default values, if optional parameters are not set
@@ -327,6 +329,8 @@
local myInstanceName=""
local rc=$OCF_SUCCESS
local hdbANSWER=""
+ local siteID
+ local siteNAME
HOSTEXECNAME=saphostexec
USRSAP=/usr/sap
SAPSERVICE_PATH=${USRSAP}/sapservices
@@ -340,10 +344,9 @@
super_ocf_log debug "DBG2: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)"
sid=$(echo "$SID" | tr [:upper:] [:lower:])
sidadm="${sid}adm"
- SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}"
ocf_env=$(env | grep 'OCF_RESKEY_CRM')
super_ocf_log debug "DBG3: OCF: $ocf_env"
- ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN?
+ ATTR_NAME_HANA_SYNC_STATUS=("hana_${sid}_sync_state" "reboot") # SOK, SFAIL, UNKNOWN?
ATTR_NAME_HANA_PRIMARY_AT=("hana_${sid}_primary_at" "reboot") # Not really used
ATTR_NAME_HANA_CLONE_STATE=("hana_${sid}_clone_state" "reboot") # UKNOWN?, DEMOTED, PROMOTED
ATTR_NAME_HANA_REMOTEHOST=("hana_${sid}_remoteHost" "forever")
@@ -352,8 +355,14 @@
ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever")
ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever")
ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot")
-
+ #
+ # new "central" attributes
+ #
+ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa")
# optional OCF parameters, we try to guess which directories are correct
+
+ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]})
+
if [ -z "$OCF_RESKEY_DIR_EXECUTABLE" ]
then
DIR_EXECUTABLE="/usr/sap/$SID/$InstanceName/exe"
@@ -387,19 +396,32 @@
# we need: mode=primary|sync|syncmem|...; site name=<site>; mapping/<me>=<site>/<node> (multiple lines)
case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in
*corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');;
- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');;
- *cman* ) nodelist=$(crm_node -l);;
+ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');;
+ *cman* ) nodelist=$(crm_node -l);;
esac
#### SAP-CALL
- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null)
- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)"
- site=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}')
+ # hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode
+ for i in 1 2 3 4 5 6 7 8 9; do
+ hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null)
+ super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)"
+ srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}')
+ case "$srmode" in
+ primary | syncmem | sync | async | none )
+ # we can leave the loop as we already got a result
+ break
+ ;;
+ * )
+ # lets pause a bit to give hdbnsutil a chance to answer next time
+ sleep 2
+ ;;
+ esac
+ done
+ # TODO PRIO3: Implement a file lookup, if we did not get a result
+ siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}')
+ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}')
+ site=$siteNAME
srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}')
- if [ $debug_attributes -eq 1 ]; then
- dstr=$(date)
- echo "$dstr: SAPHanaTopology: srmode=$srmode" >> /var/log/fhATTRIBUTE
- fi
- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site)
+ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site)
super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING"
#
# filter all non-cluster mappings
@@ -413,12 +435,12 @@
echo $hanaVHost;
fi;
done;
- done )
+ done )
super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost"
super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS"
return $OCF_SUCCESS
-}
+}
#
# function: check_for_primary - check if local SAP HANA is configured as primary
@@ -428,32 +450,30 @@
function check_for_primary() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=0
- # DONE: Change stderr location!!
- #sidadm=lnxadm
- #node_status=$(check_for_primary_single)
- node_status=$srmode
- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status"
- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"
- for i in 1 2 3 4 5 6 7 8 9; do
- case "$node_status" in
- primary )
+ node_status=$srmode
+ super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status"
+ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status"
+ for i in 1 2 3 4 5 6 7 8 9; do
+ case "$node_status" in
+ primary )
super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY"
return $HANA_STATE_PRIMARY;;
syncmem | sync | async )
super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY"
return $HANA_STATE_SECONDARY;;
- none ) # have seen that mode on second side BEFEORE we registered it as replica
+ none ) # have seen that mode on second side BEFEORE we registered it as replica
super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE"
return $HANA_STATE_STANDALONE;;
* )
- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>"
- dump=$( echo $node_status | hexdump -C );
- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>"
- #### SAP-CALL
- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status"
- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
+ # TODO: PRIO1: Should we set SFAIL?
+ # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
+ dump=$( echo $node_status | hexdump -C );
+ super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>"
+ #### SAP-CALL
+ node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null )
+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}')
+ super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status"
+ # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes
esac;
done
super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT"
@@ -464,7 +484,7 @@
#
# function: start_saphostagent
# params: -
-# globals:
+# globals: HOSTEXEC_PATH(r), HOSTEXEC_PROFILE_PATH(r)
#
function start_saphostagent()
{
@@ -478,7 +498,7 @@
#
# function: stop_saphostagent
# params: -
-# globals:
+# globals: HOSTEXEC_PATH(r)
#
function stop_saphostagent()
{
@@ -496,6 +516,8 @@
function check_saphostagent()
{
local rc=1
+ # TODO: PRIO3: should the path been removed like "saphostexec" instead of "/usr/sap/hostctrl/exe/saphostexec"
+ # or should we use ${HOSTEXEC_PATH} instead?
pgrep -f /usr/sap/hostctrl/exe/saphostexec; rc=$?
return $rc
}
@@ -509,15 +531,16 @@
# sht_start : Start the SAP HANA instance
#
function sht_start() {
-
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=$OCF_NOT_RUNNING
local output=""
- local loopcount=0
+ local loopcount=0
- mkdir -p /var/lib/SAPHana
- touch /var/lib/SAPHana/SAPTopologyON
+ # TODO: PRIO3: move the string "$HA_RSCTMP/SAPHana/SAPTopologyON" to a variable
+ # TODO: PRIO3: move the file to the clusters tmp directory?
+ mkdir -p $HA_RSCTMP/SAPHana
+ touch $HA_RSCTMP/SAPHana/SAPTopologyON
if ! check_saphostagent; then
start_saphostagent
fi
@@ -532,16 +555,16 @@
# function: sht_stop - stop a hana instance
# params: -
# globals: OCF_*(r), SAPCONTROL(r), SID(r), InstanceName(r)
-# sht_stop: Stop the SAP instance
+# sht_stop: Stop the SAP HANA Topology Resource
#
function sht_stop() {
super_ocf_log info "FLOW $FUNCNAME ($*)"
local output=""
local rc=0
- rm /var/lib/SAPHana/SAPTopologyON
+ rm $HA_RSCTMP/SAPHana/SAPTopologyON
rc=$OCF_SUCCESS
-
+
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -557,13 +580,13 @@
super_ocf_log info "FLOW $FUNCNAME ($*)"
local rc=0
- if [ -f /var/lib/SAPHana/SAPTopologyON ]; then
+ if [ -f $HA_RSCTMP/SAPHana/SAPTopologyON ]; then
rc=$OCF_SUCCESS
else
rc=$OCF_NOT_RUNNING
fi
- super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -575,37 +598,37 @@
# sht_status: Lightweight check of SAP instance only with OS tools
#
function sht_status() {
- super_ocf_log info "FLOW $FUNCNAME ($*)"
- local rc=0
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local rc=0
- sht_monitor; rc=$?
- return $rc
+ sht_monitor; rc=$?
+ return $rc
}
#
# function: sht_validate - validation of (some) variables/parameters
# params: -
-# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r),
-# sht_validate: Check the symantic of the input parameters
+# globals: OCF_*(r), SID(r), InstanceName(r), InstanceNr(r),
+# sht_validate: Check the symantic of the input parameters
#
function sht_validate() {
- super_ocf_log info "FLOW $FUNCNAME ($*)"
- local rc=$OCF_SUCCESS
- if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ]
- then
- super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!"
- rc=$OCF_ERR_ARGS
- fi
+ super_ocf_log info "FLOW $FUNCNAME ($*)"
+ local rc=$OCF_SUCCESS
+ if [ $(echo "$SID" | grep -c '^[A-Z][A-Z0-9][A-Z0-9]$') -ne 1 ]
+ then
+ super_ocf_log err "ACT: Parsing instance profile name: '$SID' is not a valid SID!"
+ rc=$OCF_ERR_ARGS
+ fi
- if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ]
- then
- super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!"
- rc=$OCF_ERR_ARGS
- fi
+ if [ $(echo "$InstanceNr" | grep -c '^[0-9][0-9]$') -ne 1 ]
+ then
+ super_ocf_log err "ACT: Parsing instance profile name: '$InstanceNr' is not a valid instance number!"
+ rc=$OCF_ERR_ARGS
+ fi
- super_ocf_log info "FLOW $FUNCNAME rc=$rc"
- return $rc
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc"
+ return $rc
}
#
@@ -661,15 +684,15 @@
if ocf_is_probe; then
super_ocf_log debug "DBG2: PROBE ONLY"
+ sht_monitor; rc=$?
else
super_ocf_log debug "DBG2: REGULAR MONITOR"
if ! check_saphostagent; then
start_saphostagent
fi
- fi
#
# First check, if we are PRIMARY or SECONDARY
- #
+ #
super_ocf_log debug "DBG2: HANA SID $SID"
super_ocf_log debug "DBG2: HANA InstanceName $InstanceName"
super_ocf_log debug "DBG2: HANA InstanceNr $InstanceNr"
@@ -721,8 +744,8 @@
set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]}
fi
case "$hanaPrim" in
- P ) ;;
- S ) # only secondary may propargate its sync status
+ P ) ;;
+ S ) # only secondary may propargate its sync status
case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in
*corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');;
*openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');;
@@ -732,8 +755,10 @@
for n in ${nodelist}; do
set_hana_attribute ${n} "$srmode" ${ATTR_NAME_HANA_SRMODE[@]}
done
- ;;
+ ;;
esac
+ #
+ fi # end ocf_is_NOT_probe
super_ocf_log info "FLOW $FUNCNAME rc=$rc"
return $rc
}
@@ -752,7 +777,7 @@
}
#
-# function: main - main function to operate
+# function: main - main function to operate
# params: ACTION
# globals: OCF_*(r), SID(w), sidadm(w), InstanceName(w), DIR_EXECUTABLE(w), ACTION(w), CLACT(w), ra_rc(rw), $0(r), %ENV(r)
#
@@ -763,7 +788,7 @@
InstanceName=""
InstanceNr=""
DIR_EXECUTABLE=""
-SAPHanaFilter="${OCF_RESKEY_SAPHanaFilter:-ra-act-dec-lpa}"
+SAPHanaFilter="ra-act-dec-lpa"
NODENAME=$(crm_node -n)
if [ $# -ne 1 ]
@@ -785,11 +810,11 @@
exit $OCF_SUCCESS;;
notify) sht_notify
exit $OCF_SUCCESS;;
- admin-setup) admin-setup
- exit $OCF_SUCCESS;;
+ admin-setup) admin-setup
+ exit $OCF_SUCCESS;;
*);;
esac
-sht_init
+sht_init
if ! ocf_is_root
then
@@ -810,7 +835,6 @@
exit $OCF_ERR_ARGS
fi
-
if is_clone
then
CLACT=_clone
@@ -830,12 +854,12 @@
sht_$ACTION$CLACT
ra_rc=$?
;;
- validate-all)
+ validate-all)
sht_validate
ra_rc=$?
;;
- *) # seams to be a unknown request
- sht_methods
+ *) # seams to be a unknown request
+ sht_methods
ra_rc=$OCF_ERR_UNIMPLEMENTED
;;
esac