You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1990 lines
85 KiB
1990 lines
85 KiB
diff -uNr a/heartbeat/SAPHana b/heartbeat/SAPHana |
|
--- a/heartbeat/SAPHana 2016-10-14 10:09:56.479051279 +0200 |
|
+++ b/heartbeat/SAPHana 2016-10-14 10:29:23.990066292 +0200 |
|
@@ -2,8 +2,8 @@ |
|
# |
|
# SAPHana |
|
# |
|
-# Description: Manages two single SAP HANA Instance in System Replication |
|
-# Planned: do also manage scale-up scenarios |
|
+# Description: Manages two SAP HANA Databases in System Replication |
|
+# Planned: do also manage scale-out scenarios |
|
# currently the SAPHana is dependent of the analysis of |
|
# SAPHanaTopology |
|
# For supported scenarios please read the README file provided |
|
@@ -16,7 +16,7 @@ |
|
# Support: linux@sap.com |
|
# License: GNU General Public License (GPL) |
|
# Copyright: (c) 2013,2014 SUSE Linux Products GmbH |
|
-# Copyright: (c) 2015 SUSE Linux GmbH |
|
+# (c) 2015-2016 SUSE Linux GmbH |
|
# |
|
# An example usage: |
|
# See usage() function below for more details... |
|
@@ -29,12 +29,13 @@ |
|
# OCF_RESKEY_INSTANCE_PROFILE (optional, well known directories will be searched by default) |
|
# OCF_RESKEY_PREFER_SITE_TAKEOVER (optional, default is no) |
|
# OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT (optional, time difference needed between two last-primary-tiemstampe (lpt)) |
|
-# OCF_RESKEY_SAPHanaFilter (optional, should only be set if been told by support or for debugging purposes) |
|
+# OCF_RESKEY_SAPHanaFilter (outdated, replaced by cluster property hana_${sid}_glob_filter) |
|
# |
|
# |
|
####################################################################### |
|
# |
|
# Initialization: |
|
+SAPHanaVersion="0.152.17" |
|
timeB=$(date '+%s') |
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} |
|
@@ -43,6 +44,12 @@ |
|
# |
|
####################################################################### |
|
# |
|
+log_attributes=false |
|
+if ocf_is_true "$log_attributes"; then |
|
+ log_attr_file="/var/log/fhATTRIBUTES" |
|
+else |
|
+ log_attr_file="/dev/null" |
|
+fi |
|
|
|
HANA_STATE_PRIMARY=0 |
|
HANA_STATE_SECONDARY=1 |
|
@@ -107,7 +114,7 @@ |
|
cat <<-EOF |
|
usage: $0 ($methods) |
|
|
|
- $0 manages a SAP HANA Instance as an HA resource. |
|
+ $0 manages two SAP HANA databases (scale-up) in system replication. |
|
|
|
The 'start' operation starts the HANA instance or bring the "clone instance" to a WAITING status |
|
The 'stop' operation stops the HANA instance |
|
@@ -145,15 +152,14 @@ |
|
<?xml version="1.0"?> |
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> |
|
<resource-agent name="SAPHana"> |
|
-<version>0.151.1</version> |
|
+<version>$SAPHanaVersion</version> |
|
|
|
-<shortdesc lang="en">Manages two SAP HANA instances in system replication (SR).</shortdesc> |
|
+<shortdesc lang="en">Manages two SAP HANA database systems in system replication (SR).</shortdesc> |
|
<longdesc lang="en"> |
|
-The SAPHanaSR resource agent manages two SAP Hana instances (databases) which are configured |
|
-in system replication. This first version is limited to the scale-up scenario. Scale-Out is |
|
-not supported in this version. |
|
+The SAPHanaSR resource agent manages two SAP HANA database systems which are configured |
|
+in system replication. SAPHana supports Scale-Up scenarios. |
|
|
|
-Managing the two SAP HANA instances means that the resource agent controls the start/stop of the |
|
+Managing the two SAP HANA database systems means that the resource agent controls the start/stop of the |
|
instances. In addition the resource agent is able to monitor the SAP HANA databases to check their |
|
availability on landscape host configuration level. For this monitoring the resource agent relies on interfaces |
|
provided by SAP. A third task of the resource agent is to also check the synchronisation status |
|
@@ -205,9 +211,10 @@ |
|
<longdesc lang="en">Should cluster/RA prefer to switchover to slave instance instead of restarting master locally? Default="yes" |
|
no: Do prefer restart locally |
|
yes: Do prefer takever to remote site |
|
+ never: Do never run a sr_takeover (promote) at the secondary side. THIS VALUE IS CURRENTLY NOT SUPPORTED. |
|
</longdesc> |
|
<shortdesc lang="en">Local or site recover preferred?</shortdesc> |
|
- <content type="boolean" default="yes" /> |
|
+ <content type="string" default="yes" /> |
|
</parameter> |
|
<parameter name="AUTOMATED_REGISTER" unique="0" required="0"> |
|
<shortdesc lang="en">Define, if a former primary should automatically be registered.</shortdesc> |
|
@@ -220,7 +227,7 @@ |
|
<shortdesc lang="en">Time difference needed between to primary time stamps, if a dual-primary situation occurs</shortdesc> |
|
<longdesc lang="en">Time difference needed between to primary time stamps, |
|
if a dual-primary situation occurs. If the time difference is |
|
- less than the time gap, then the cluster hold one or both instances in a "WAITING" status. This is to give an admin |
|
+ less than the time gap, then the cluster holds one or both instances in a "WAITING" status. This is to give an admin |
|
a chance to react on a failover. A failed former primary will be registered after the time difference is passed. After |
|
this registration to the new primary all data will be overwritten by the system replication. |
|
</longdesc> |
|
@@ -290,6 +297,45 @@ |
|
local rc=0; tr -d '"'; return $rc |
|
} |
|
|
|
+# function: version: cpmpare two HANA version strings |
|
+function ver_lt() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 0 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_le() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_gt() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 2 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_ge() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 |
|
+} |
|
+# |
|
+# function: version: cpmpare two HANA version strings |
|
+# |
|
+function version() { |
|
+ if [ $# -eq 3 ]; then |
|
+ case "$2" in |
|
+ LE | le | "<=" ) ver_le $1 $3;; |
|
+ LT | lt | "<" ) ver_lt $1 $3;; |
|
+ GE | ge | ">=" ) ver_ge $1 $3;; |
|
+ GT | gt | ">" ) ver_gt $1 $3;; |
|
+ * ) return 1; |
|
+ esac |
|
+ elif [ $# -ge 5 ]; then |
|
+ version $1 $2 $3 && shift 2 && version $* |
|
+ else |
|
+ return 1; |
|
+ fi |
|
+} |
|
+ |
|
# |
|
# function: remoteHost2remoteNode - convert a SAP remoteHost to the cluster node name |
|
# params: remoteHost |
|
@@ -372,12 +418,16 @@ |
|
dstr=$(date) |
|
case "$attr_store" in |
|
reboot | forever ) |
|
- echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ if ocf_is_true "$log_attributes"; then |
|
+ echo "$dstr: SAPHana: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file |
|
+ fi |
|
+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? |
|
;; |
|
props ) |
|
- echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ if ocf_is_true "$log_attributes"; then |
|
+ echo "$dstr: SAPHana: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file |
|
+ fi |
|
+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? |
|
;; |
|
esac |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
@@ -405,12 +455,16 @@ |
|
dstr=$(date) |
|
case "$attr_store" in |
|
reboot | forever ) |
|
- echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ if ocf_is_true "$log_attributes"; then |
|
+ echo "$dstr: SAPHana: crm_attribute -N $attr_node -v $attr_value -n \"$attr_name\" -l $attr_store" >> $log_attr_file |
|
+ fi |
|
+ crm_attribute -N $attr_node -v $attr_value -n "$attr_name" -l $attr_store 2>>$log_attr_file; rc=$? |
|
;; |
|
props ) |
|
- echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ if ocf_is_true "$log_attributes"; then |
|
+ echo "$dstr: SAPHana: crm_attribute -v $attr_value -n \"$attr_name\" -t crm_config -s SAPHanaSR" >> $log_attr_file |
|
+ fi |
|
+ crm_attribute -v $attr_value -n "$attr_name" -t crm_config -s SAPHanaSR 2>>$log_attr_file; rc=$? |
|
;; |
|
esac |
|
else |
|
@@ -460,6 +514,10 @@ |
|
# DONE: PRIO2: Only adjust master if value is really different (try to check that) |
|
oldscore=$(${HA_SBIN_DIR}/crm_master -G -q -l reboot) |
|
if [ "$oldscore" != "$score" ]; then |
|
+ dstr=$(date) |
|
+ if ocf_is_true "$log_attributes"; then |
|
+ echo "$dstr: SAPHana: crm_master -v $score -l reboot " >> $log_attr_file |
|
+ fi |
|
super_ocf_log debug "DBG: SET crm master: $score (old: $oldscore)" |
|
${HA_SBIN_DIR}/crm_master -v $score -l reboot; rc=$? |
|
else |
|
@@ -471,9 +529,9 @@ |
|
} |
|
|
|
# |
|
-# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE_PREFERRED_SITE_TAKEOVER) |
|
+# function: scoring_crm_master - score instance due to role ans sync match (table SCORING_TABLE) |
|
# params: NODE_ROLES NODE_SYNC_STATUS |
|
-# globals: SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@], |
|
+# globals: SCORING_TABLE[@], |
|
# |
|
scoring_crm_master() |
|
{ |
|
@@ -482,7 +540,7 @@ |
|
local sync="$2" |
|
local skip=0 |
|
local myScore="" |
|
- for scan in "${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}"; do |
|
+ for scan in "${SCORING_TABLE[@]}"; do |
|
if [ $skip -eq 0 ]; then |
|
read rolePatt syncPatt score <<< $scan |
|
if grep "$rolePatt" <<< "$roles"; then |
|
@@ -494,7 +552,7 @@ |
|
fi |
|
done |
|
super_ocf_log debug "DBG: scoring_crm_master adjust score $myScore" |
|
- # TODO: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 |
|
+ # DONE: PRIO1: DO Not Score, If we did not found our role/sync at this moment - bsc#919925 |
|
if [ -n "$myScore" ]; then |
|
set_crm_master $myScore |
|
fi |
|
@@ -514,28 +572,91 @@ |
|
} |
|
|
|
# |
|
+# function: HANA_CALL |
|
+# params: timeout-in-seconds cmd-line |
|
+# globals: sid(r), SID(r), InstanceName(r) |
|
+# |
|
+function HANA_CALL() |
|
+{ |
|
+ # |
|
+ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with |
|
+ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) |
|
+ # TODO: PRIO 5: Maybe make "su" optional by a parameter |
|
+ local timeOut=0 |
|
+ local onTimeOut="" |
|
+ local rc=0 |
|
+ local use_su=1 # Default to be changed later (see TODO above) |
|
+ local pre_cmd="" |
|
+ local cmd="" |
|
+ local pre_script="" |
|
+ local output="" |
|
+ while [ $# -gt 0 ]; do |
|
+ case "$1" in |
|
+ --timeout ) timeOut=$2; shift;; |
|
+ --use-su ) use_su=1;; |
|
+ --on-timeout ) onTimeOut="$2"; shift;; |
|
+ --cmd ) shift; cmd="$*"; break;; |
|
+ esac |
|
+ shift |
|
+ done |
|
+ |
|
+ if [ $use_su -eq 1 ]; then |
|
+ pre_cmd="su - ${sid}adm -c" |
|
+ pre_script="true" |
|
+ else |
|
+ # as root user we need the library path to the SAP kernel to be able to call sapcontrol |
|
+ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH |
|
+ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] |
|
+ then |
|
+ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH |
|
+ fi |
|
+ pre_cmd="bash -c" |
|
+ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" |
|
+ fi |
|
+ case $timeOut in |
|
+ 0 | inf ) |
|
+ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? |
|
+ ;; |
|
+ * ) |
|
+ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? |
|
+ # |
|
+ # on timeout ... |
|
+ # |
|
+ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then |
|
+ local second_output="" |
|
+ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); |
|
+ fi |
|
+ ;; |
|
+ esac |
|
+ echo "$output" |
|
+ return $rc; |
|
+} |
|
+ |
|
+# |
|
# function: saphana_init - initialize variables for the resource agent |
|
# params: InstanceName |
|
-# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), |
|
-# globals: sr_name(w), remoteHost(w), otherNodes(w), rem_SR_name(w) |
|
+# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), SAPVIRHOST(w), PreferSiteTakeover(w), |
|
+# globals: sr_name(w), remoteHost(w), otherNodes(w), remSR_name(w) |
|
# globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_CLONE_STATE(w) |
|
# globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w) |
|
# globals: LPA_DIRECTORY(w), SIDInstanceName(w), remoteNode(w), hdbSrQueryTimeout(w) |
|
+# globals: NODENAME(w), vNAME(w), hdbver(w), |
|
# saphana_init : Define global variables with default values, if optional parameters are not set |
|
# |
|
function saphana_init() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=$OCF_SUCCESS |
|
- local vName |
|
local clN |
|
# local site |
|
# two parameter models (for transition only) |
|
# OLD: InstanceName |
|
# NEW: SID InstanceNumber |
|
+ NODENAME=$(crm_node -n) |
|
SID=$OCF_RESKEY_SID |
|
InstanceNr=$OCF_RESKEY_InstanceNumber |
|
SIDInstanceName="${SID}_HDB${InstanceNr}" |
|
InstanceName="HDB${InstanceNr}" |
|
+ export SAPSYSTEMNAME=$SID |
|
super_ocf_log debug "DBG: Used new method to get SID ($SID) and InstanceNr ($InstanceNr)" |
|
sid=$(echo "$SID" | tr [:upper:] [:lower:]) |
|
sidadm="${sid}adm" |
|
@@ -544,15 +665,23 @@ |
|
# DONE: PRIO4: SAPVIRHOST might be different to NODENAME |
|
# DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? Answer: Yes |
|
# try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 |
|
- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 |
|
- vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ |
|
- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr) |
|
+ # We rely on the following format: SID is word#4, SYSNR is work#6, vHost is word#8 |
|
+ if [ -e /usr/sap/hostctrl/exe/saphostctrl ]; then |
|
+ vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ |
|
+ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) |
|
+ super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" |
|
+ else |
|
+ super_ocf_log error "ERR: SAPHOSTAGENT is not installed at /usr/sap/hostctrl/exe (saphostctrl missing)" |
|
+ fi |
|
if [ -z "$vName" ]; then |
|
# |
|
# if saphostctrl does not know the answer, try to fallback to attribute provided by SAPHanaTopology |
|
# |
|
vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]} "$NODENAME"); |
|
fi |
|
+ if [ -z "$vName" ]; then # last fallback if we are not able to figure out the virtual host name |
|
+ vName="$NODENAME" |
|
+ fi |
|
SAPVIRHOST=${vName} |
|
PreferSiteTakeover="$OCF_RESKEY_PREFER_SITE_TAKEOVER" |
|
AUTOMATED_REGISTER="${OCF_RESKEY_AUTOMATED_REGISTER:-false}" |
|
@@ -571,6 +700,12 @@ |
|
ATTR_NAME_HANA_SRMODE=("hana_${sid}_srmode" "forever") |
|
ATTR_NAME_HANA_VHOST=("hana_${sid}_vhost" "forever") |
|
ATTR_NAME_HANA_STATUS=("hana_${sid}_status" "reboot") |
|
+ ATTR_NAME_HANA_OPERATION_MODE=("hana_${sid}_op_mode" "forever") |
|
+ # |
|
+ # new "central" attributes |
|
+ # |
|
+ ATTR_NAME_HANA_FILTER=("hana_${sid}_glob_filter" "props" "ra-act-dec-lpa") |
|
+ SAPHanaFilter=$(get_hana_attribute "X" ${ATTR_NAME_HANA_FILTER[@]}) |
|
# |
|
# TODO: PRIO4: Table for non-preferred-site-takeover |
|
# |
|
@@ -591,9 +726,7 @@ |
|
) |
|
SCORING_TABLE_PREFERRED_LOCAL_RESTART=( |
|
"[0-9]*:P:[^:]*:master .* 150" |
|
- "[0-9]*:P:[^:]*:slave .* 140" |
|
- "[0-9]*:P:[^:]*:\? .* 0" |
|
- "[0-9]*:P:[^:]*:- .* 0" |
|
+ "[0-9]*:P:[^:]*:.* .* 140" |
|
"[0-9]*:S:[^:]*:master SOK 100" |
|
"[0-9]*:S:[^:]*:master SFAIL -INFINITY" |
|
"[0-9]*:S:[^:]*:slave SOK 10" |
|
@@ -602,6 +735,25 @@ |
|
"[0-9]*:S:[^:]*:- .* 0" |
|
".* .* -1" |
|
) |
|
+ SCORING_TABLE_PREFERRED_NEVER=( |
|
+ "[234]*:P:[^:]*:master .* 150" |
|
+ "[015-9]*:P:[^:]*:master .* 90" |
|
+ "[0-9]*:P:[^:]*:.* .* -INFINITY" |
|
+ "[0-9]*:S:[^:]*:.* .* -INFINITY" |
|
+ ".* .* -INFINITY" |
|
+ ) |
|
+ if ocf_is_true $PreferSiteTakeover; then |
|
+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_SITE_TAKEOVER[@]}") |
|
+ else |
|
+ case "$PreferSiteTakeover" in |
|
+ never|NEVER|Never ) |
|
+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_NEVER[@]}") |
|
+ ;; |
|
+ * ) |
|
+ SCORING_TABLE=("${SCORING_TABLE_PREFERRED_LOCAL_RESTART[@]}") |
|
+ ;; |
|
+ esac |
|
+ fi |
|
# |
|
DUPLICATE_PRIMARY_TIMEOUT="${OCF_RESKEY_DUPLICATE_PRIMARY_TIMEOUT:-7200}" |
|
super_ocf_log debug "DBG: DUPLICATE_PRIMARY_TIMEOUT=$DUPLICATE_PRIMARY_TIMEOUT" |
|
@@ -615,7 +767,7 @@ |
|
esac |
|
# |
|
# |
|
- |
|
+ # |
|
remoteHost=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_REMOTEHOST[@]}); |
|
if [ -z "$remoteHost" ]; then |
|
if [ ${#otherNodes[@]} -eq 1 ]; then # we are a 2 node cluster, lets assume the other is the remote-host |
|
@@ -640,7 +792,7 @@ |
|
sr_mode="sync" |
|
fi |
|
if [ -n "$remoteNode" ]; then |
|
- rem_SR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); |
|
+ remSR_name=$(get_hana_attribute ${remoteNode} ${ATTR_NAME_HANA_SITE[@]}); |
|
fi |
|
super_ocf_log debug "DBG: sr_name=$sr_name, remoteHost=$remoteHost, remoteNode=$remoteNode, sr_mode=$sr_mode" |
|
# optional OCF parameters, we try to guess which directories are correct |
|
@@ -671,26 +823,21 @@ |
|
# |
|
SAPSTARTPROFILE="$(ls -1 $DIR_PROFILE/${OCF_RESKEY_INSTANCE_PROFILE:-${SID}_${InstanceName}_*})" |
|
fi |
|
- # as root user we need the library path to the SAP kernel to be able to call sapcontrol |
|
- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH |
|
- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] |
|
- then |
|
- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH |
|
- export LD_LIBRARY_PATH |
|
- fi |
|
PATH=${PATH}:${DIR_EXECUTABLE}; export PATH |
|
+ local ges_ver |
|
+ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') |
|
+ hdbver=${ges_ver%.*.*} |
|
+ # |
|
+ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state |
|
+ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible |
|
+ # |
|
+ hdbState="hdbnsutil -sr_state" |
|
+ hdbMap="hdbnsutil -sr_state" |
|
+ if version "$hdbver" ">=" "1.00.111"; then |
|
+ hdbState="hdbnsutil -sr_stateConfiguration" |
|
+ hdbMap="hdbnsutil -sr_stateHostMapping" |
|
+ fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" |
|
- ############################# |
|
- # TODO: PRIO9: To be able to call landscapeHostConfig.py without su (so as root) |
|
- # TODO: PRIO9: Research for environment script .htacces or something like that |
|
- #export SAPSYSTEMNAME=ZLF |
|
- #export DIR_INSTANCE=/usr/sap/ZLF/HDB02 |
|
- #export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$DIR_INSTANCE/exe:$DIR_INSTANCE/exe/Python/lib |
|
- #export PYTHONPATH=$DIR_INSTANCE/$HOST:$DIR_INSTANCE/exe/python_support:$DIR_INSTANCE/exe |
|
- #export PYTHONHOME=$DIR_INSTANCE/exe/Python |
|
- #export SAP_RETRIEVAL_PATH=$DIR_INSTANCE/$HOST |
|
- #export DIR_EXECUTABLE=$DIR_INSTANCE/exe |
|
- ############################# |
|
return $OCF_SUCCESS |
|
} |
|
|
|
@@ -765,7 +912,11 @@ |
|
# or ownership - they will be recreated by sapstartsrv during next start |
|
rm -f /tmp/.sapstream5${InstanceNr}13 |
|
rm -f /tmp/.sapstream5${InstanceNr}14 |
|
- $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm |
|
+ ( |
|
+ export PATH="$DIR_EXECUTABLE${PATH:+:}$PATH" |
|
+ export LD_LIBRARY_PATH="$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH" |
|
+ $SAPSTARTSRV pf=$SAPSTARTPROFILE -D -u $sidadm |
|
+ ) |
|
# now make sure the daemon has been started and is able to respond |
|
local srvrc=1 |
|
while [ $srvrc -eq 1 -a $(pgrep -f "sapstartsrv.*$runninginst" | wc -l) -gt 0 ] |
|
@@ -809,31 +960,47 @@ |
|
function check_for_primary() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=$HANA_STATE_DEFECT |
|
- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) |
|
- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') |
|
- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" |
|
- # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsitil does not answer properly -> lookup in config files? |
|
+ # TODO: PRIO 3: Check beginning from which SPS does SAP support HDBSettings.sh? |
|
+ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback |
|
+ # TODO: PRIO2: Maybe we need to use a fallback interface when hdbnsutil does not answer properly -> lookup in config files? |
|
# This might also solve some problems when we could not figure-out the ilocal or remote site name |
|
- for i in 1 2 3 4 5 6 7 8 9; do |
|
+ local chkMethod="" |
|
+ for chkMethod in hU hU hU gP; do |
|
+ case "$chkMethod" in |
|
+ gP ) |
|
+ local gpKeys="" |
|
+ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) |
|
+ node_full_status=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: <begin>/ { out=1 } /^SAPCONTROL-OK: <end>/ { out=0 } /=/ {if (out==1) {print $3} }') |
|
+ node_status=$(echo "$node_full_status" | awk -F= '$1=="mode" {print $2}') |
|
+ super_ocf_log info "ACT: Using getParameter.py as fallback - node_status=$node_status" |
|
+ ;; |
|
+ hU | * ) |
|
+ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported |
|
+ node_full_status=$(HANA_CALL --timeout 60 --cmd "$hdbState" 2>/dev/null ) |
|
+ node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') |
|
+ super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" |
|
+ ;; |
|
+ esac |
|
case "$node_status" in |
|
primary ) |
|
- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_PRIMARY" |
|
- return $HANA_STATE_PRIMARY;; |
|
+ rc=$HANA_STATE_PRIMARY |
|
+ break;; |
|
syncmem | sync | async ) |
|
- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_SECONDARY" |
|
- return $HANA_STATE_SECONDARY;; |
|
+ rc=$HANA_STATE_SECONDARY |
|
+ break;; |
|
none ) # have seen that mode on second side BEFEORE we registered it as replica |
|
- super_ocf_log info "FLOW: $FUNCNAME rc=HANA_STATE_STANDALONE" |
|
- return $HANA_STATE_STANDALONE;; |
|
+ rc=$HANA_STATE_STANDALONE |
|
+ break;; |
|
* ) |
|
super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: <$node_status>" |
|
dump=$( echo $node_status | hexdump -C ); |
|
super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP <$dump>" |
|
- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) |
|
- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') |
|
+ # TODO: Limit the runtime of hdbnsutil and use getParameter.py as fallback |
|
+ # SAP_CALL |
|
super_ocf_log debug "DEC: check_for_primary: loop=$i: node_status=$node_status" |
|
# TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes |
|
esac; |
|
+ sleep 2 |
|
done |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
@@ -854,12 +1021,18 @@ |
|
{ |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=-1 srRc=0 all_nodes_other_side="" n="" siteParam="" |
|
- if [ -n "$rem_SR_name" ]; then |
|
- siteParam="--site=$rem_SR_name" |
|
+ if [ -n "$remSR_name" ]; then |
|
+ siteParam="--site=$remSR_name" |
|
fi |
|
- FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? |
|
- super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" |
|
- super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$rem_SR_name')-> $srRc" |
|
+ # TODO: Get rid of the su by using a new interface: |
|
+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh systemReplicationStatus.py $siteParam |
|
+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? |
|
+ # TODO: Limit the runtime of systemReplicationStatus.py |
|
+ # SAP_CALL |
|
+ # FULL_SR_STATUS=$(su - $sidadm -c "python $DIR_EXECUTABLE/python_support/systemReplicationStatus.py $siteParam" 2>/dev/null); srRc=$? |
|
+ FULL_SR_STATUS=$(HANA_CALL --timeout 60 --cmd "systemReplicationStatus.py" 2>/dev/null); srRc=$? |
|
+ super_ocf_log info "DEC $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" |
|
+ super_ocf_log info "FLOW $FUNCNAME systemReplicationStatus.py (to site '$remSR_name')-> $srRc" |
|
# |
|
# TODO: PRIO2: Here we might also need to filter additional sites (if multi tier should be supported) |
|
# And is the check for return code capable for chains? |
|
@@ -890,7 +1063,7 @@ |
|
# ok we should be careful and set secondary to SFAIL |
|
super_ocf_log info "FLOW $FUNCNAME SFAIL" |
|
set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} |
|
- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (5) - srRc=$srRc lss=$lss" |
|
+ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (5) - srRc=$srRc lss=$lss" |
|
# TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary |
|
lpa_set_lpt 10 "$remoteNode" |
|
rc=1 |
|
@@ -898,7 +1071,7 @@ |
|
else |
|
super_ocf_log info "FLOW $FUNCNAME SFAIL" |
|
set_hana_attribute "$remoteNode" "SFAIL" ${ATTR_NAME_HANA_SYNC_STATUS[@]} |
|
- super_ocf_log info "ACT site=$sr_name, seting SFAIL for secondary (2) - srRc=$srRc" |
|
+ super_ocf_log info "ACT site=$sr_name, setting SFAIL for secondary (2) - srRc=$srRc" |
|
# TODO: PRIO1 - P004: need to check LSS again to avoid dying primary to block (SFAIL) secondary |
|
lpa_set_lpt 10 "$remoteNode" |
|
rc=1; |
|
@@ -992,14 +1165,28 @@ |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=0 |
|
# |
|
- su - $sidadm -c "python $DIR_EXECUTABLE/python_support/landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? |
|
+ # TODO: Get rid of the su by using a new interface: |
|
+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh landscapeHostConfiguration.py |
|
+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? |
|
+ # DONE: Limit the runtime of landscapeHostConfiguration.py |
|
+ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? |
|
+ if [ $rc -eq 124 ]; then |
|
+ # TODO: PRIO 1: Check, if we should loop here like 'for i in 1 2 3 ...' ? |
|
+ # landscape timeout |
|
+ sleep 20 |
|
+ HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 1>/dev/null 2>/dev/null; rc=$? |
|
+ if [ $rc -eq 124 ]; then |
|
+ # TODO PRIO2: How to handle still hanging lss - current solution is to say "FATAL" |
|
+ rc=0 |
|
+ fi |
|
+ fi |
|
return $rc; |
|
} |
|
|
|
# |
|
# function: register_hana_secondary - register local hana as secondary to the other site |
|
# params: - |
|
-# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r) |
|
+# globals: sidadm(r), remoteHost(r), InstanceNr(r), sr_mode(r), sr_name(r), hdbver(r) |
|
# register_hana_secondary |
|
# |
|
function register_hana_secondary() |
|
@@ -1007,17 +1194,31 @@ |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=2; |
|
local remoteInstance=""; |
|
+ local newParameter=0 |
|
remoteInstance=$InstanceNr |
|
+ |
|
+ |
|
+ if version "$hdbver" ">=" "1.00.110"; then |
|
+ newParameter=1 |
|
+ fi |
|
+ |
|
if ocf_is_true ${AUTOMATED_REGISTER}; then |
|
- # |
|
- # |
|
- # |
|
- # |
|
- # |
|
- super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" |
|
- # |
|
- # |
|
- su - $sidadm -c "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? |
|
+ # TODO: Get rid of the su by using a new interface: |
|
+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_register ... |
|
+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? |
|
+ # TODO: Limit the runtime of hdbnsutil -sr_register ???? |
|
+ if [ $newParameter -eq 1 ]; then |
|
+ local hanaOM="" |
|
+ hanaOM=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_OPERATION_MODE[@]}) |
|
+ if [ -n "$hanaOM" ]; then |
|
+ hanaOM="--operationMode=$hanaOM" |
|
+ fi |
|
+ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name" |
|
+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --replicationMode=$sr_mode $hanaOM --name=$sr_name"; rc=$? |
|
+ else |
|
+ super_ocf_log info "ACT: REGISTER: hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name" |
|
+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_register --remoteHost=$remoteHost --remoteInstance=$remoteInstance --mode=$sr_mode --name=$sr_name"; rc=$? |
|
+ fi |
|
# backup_global_and_nameserver |
|
else |
|
super_ocf_log info "ACT: SAPHANA DROP REGISTER because AUTOMATED_REGISTER is set to FALSE" |
|
@@ -1051,7 +1252,7 @@ |
|
check_sapstartsrv |
|
rc=$? |
|
# |
|
- # TODO: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? |
|
+ # DONE: ASK: PRIO5: For SCALE-OUT - do we need to use an other call like StartSystem? Or better to use the HDB command? |
|
# |
|
if [ $rc -eq $OCF_SUCCESS ]; then |
|
output=$($SAPCONTROL -nr $InstanceNr -function Start) |
|
@@ -1169,7 +1370,7 @@ |
|
0 ) # LPA says start-up |
|
lpa_advice="start" |
|
# TODO: PRIO1: We need to do a special handling for remote being a 234-Secondary in SR Status SOK |
|
- # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) |
|
+ # if ( remote_role like [234]:S ) && ( remote_sync_status is SOK|PRIM ) && ( PreferSiteTakeover ) |
|
# then lpa_advice="wait" |
|
remoteRole=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_ROLES[@]}) |
|
remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) |
|
@@ -1193,17 +1394,20 @@ |
|
1) # LPA says register! |
|
lpa_advice="register" |
|
;; |
|
- 2) # LPA says wait for second LPT |
|
+ 2) # LPA says wait for older LPA to expire |
|
+ lpa_advice="wait" |
|
+ ;; |
|
+ 3) # LPA says to wait for remote LPA to be reported/announced |
|
lpa_advice="wait" |
|
;; |
|
- 3 | 4 ) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss |
|
+ 4) # LPA says something is completely wrong - FAIL resource # TODO: PRIO1: RC3 for waiting remote side to report lss |
|
lpa_advice="fail" |
|
;; |
|
- * ) # LPA failed with an unkonown status - FAIL resource |
|
+ *) # LPA failed with an unkonown status - FAIL resource |
|
lpa_advice="fail" |
|
;; |
|
esac |
|
- |
|
+ |
|
# DONE: PRIO2: Do we need to differ 0 and 1 here? While 0 is a fatal SAP error, 1 for down/error |
|
if [ $lss -eq 0 ]; then |
|
super_ocf_log err "ACT: get_hana_landscape_status reports FATAL" |
|
@@ -1218,7 +1422,7 @@ |
|
2 | 3 | 4 ) # as landcape says we are up - just set the scores and return code |
|
super_ocf_log info "LPA: landcape: UP, LPA: start ==> keep running" |
|
LPTloc=$(date '+%s') |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
rc=$OCF_SUCCESS |
|
;; |
|
1 ) # landcape says we are down, lets start and adjust scores and return code |
|
@@ -1226,7 +1430,7 @@ |
|
saphana_start |
|
rc=$? |
|
LPTloc=$(date '+%s') |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
;; |
|
esac |
|
scoring_crm_master "$my_role" "$my_sync" |
|
@@ -1250,11 +1454,11 @@ |
|
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then |
|
super_ocf_log info "ACT: Register successful" |
|
lpa_push_lpt 10 |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
set_crm_master 0 |
|
saphana_start_secondary |
|
rc=$? |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
else |
|
super_ocf_log err "ACT: Register failed" |
|
rc=$OCF_NOT_RUNNING |
|
@@ -1279,11 +1483,19 @@ |
|
rc=$OCF_ERR_GENERIC |
|
;; |
|
1 ) # we are down, so we should wait --> followup in next monitor |
|
- super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" |
|
- # TODO: PRIO3: Check, if WAITING is correct here |
|
- set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
- set_crm_master -9000 |
|
- rc=$OCF_SUCCESS |
|
+ # DONE: PRIO3: Check, if WAITING is correct here |
|
+ if ocf_is_true "$AUTOMATED_REGISTER" ; then |
|
+ super_ocf_log info "LPA: landcape: DOWN, LPA: wait ==> keep waiting" |
|
+ super_ocf_log info "RA: landcape: DOWN, LPA: wait ==> keep waiting" |
|
+ set_hana_attribute ${NODENAME} "WAITING4LPA" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
+ set_crm_master -9000 |
|
+ rc=$OCF_SUCCESS |
|
+ else |
|
+ super_ocf_log warning "LPA: OLD primary needs manual registration (AUTOMATED_REGISTER='false')" |
|
+ set_hana_attribute ${NODENAME} "WAITING4REG" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
+ set_crm_master -9000 |
|
+ rc=$OCF_NOT_RUNNING |
|
+ fi |
|
;; |
|
esac |
|
;; |
|
@@ -1309,22 +1521,24 @@ |
|
local ch ch_role |
|
# |
|
# get actual list of cluster members |
|
- # |
|
+ # |
|
if [ -n "$otherNodes" ]; then |
|
for ch in ${otherNodes[@]}; do |
|
if [ $rc -eq 1 ]; then |
|
ch_role=$(get_hana_attribute ${ch} ${ATTR_NAME_HANA_ROLES[@]}) |
|
-# TODO: PRIO3: check if [0-9], [234] or [34] is correct |
|
-# TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? |
|
-# grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 |
|
-# grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 |
|
-# Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master |
|
- awk -F: 'BEGIN { rc=1 } |
|
- $1 ~ "[34]" && $2 ="P" && $4="master" { rc=0 } |
|
- END { exit rc }' <<< $ch_role ; rc=$? |
|
+ # TODO: PRIO3: check if [0-9], [234] or [34] is correct |
|
+ # TODO: PRIO4: Do we need different checks like "any-primary-master" or "running-primary-master" ? |
|
+ # grep '[0-9]*:P:[^:]*:master:' <<< $ch_role && rc=0 |
|
+ # grep '[34]:P:[^:]*:master:' <<< $ch_role && rc=0 |
|
+ # Match "Running+Available Primary" Master -> Match field 1: 3/4, 2: P, 4: master |
|
+ super_ocf_log debug "DBG: check_for_primary_master (3) ch_role=$ch_role" |
|
+ awk -F: 'BEGIN { rc=1 } |
|
+ $1 ~ "[34]" && $2 == "P" && $4 == "master" { rc=0 } |
|
+ END { exit rc }' <<< $ch_role ; rc=$? |
|
+ super_ocf_log debug "DBG: check_for_primary_master (4) rc=$rc" |
|
fi |
|
done |
|
- fi |
|
+ fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
} |
|
@@ -1378,7 +1592,7 @@ |
|
####### LPA - begin |
|
# |
|
lpa_push_lpt 10 |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
# |
|
####### LPA - end |
|
# |
|
@@ -1404,7 +1618,7 @@ |
|
rc=$OCF_SUCCESS |
|
fi |
|
else |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
fi |
|
else |
|
super_ocf_log info "ACT: wait_for_primary_master ==> WAITING" |
|
@@ -1454,7 +1668,7 @@ |
|
then |
|
if [ $STATE -eq $OCF_NOT_RUNNING ] |
|
then |
|
- [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE is not running with status $COLOR !" |
|
+ [ "$MONLOG" != "NOLOG" ] && ocf_log err "SAP instance service $SERVICE status color is $COLOR !" |
|
rc=$STATE |
|
fi |
|
count=1 |
|
@@ -1511,13 +1725,17 @@ |
|
local crm_rc=1 |
|
local lpt=$1 |
|
local clpt=-1 |
|
- local node=${2:-${NODENAME}} |
|
+ local node=$2 |
|
set_hana_attribute ${node} "$lpt" ${LPA_ATTR[@]}; crm_rc=$? |
|
- clpt=$(lpa_get_lpt $NODENAME) |
|
- if [ "$lpt" != "$clpt" ]; then |
|
- rc=2 |
|
+ if [ -n "$node" ]; then |
|
+ clpt=$(lpa_get_lpt $NODENAME) |
|
+ if [ "$lpt" != "$clpt" ]; then |
|
+ rc=2 |
|
+ else |
|
+ rc=0 |
|
+ fi |
|
else |
|
- rc=0 |
|
+ super_ocf_log info "DEC: lpa_set_lpt ignore to change value for empty node name" |
|
fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
@@ -1608,7 +1826,7 @@ |
|
else |
|
rc=2 |
|
fi |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
} |
|
@@ -1621,9 +1839,10 @@ |
|
# |
|
# Returncodes: |
|
# 0: start |
|
-# 1: register than start |
|
-# 2: wait4gab |
|
-# 3: wait4other |
|
+# 1: register (then start) |
|
+# 2: wait4gab (WAIT4LPA - Older LPA needs to expire) |
|
+# 3: wait4other (WAIT4LPA - Remote LPA needs to be announced) |
|
+# 4: lpa internal error |
|
# |
|
# Initializing (if NO local LPT-file): |
|
# SECONDARY sets to 10 |
|
@@ -1648,7 +1867,7 @@ |
|
# |
|
function lpa_check_lpt_status() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
- local rc=0 |
|
+ local rc=4 |
|
local LPTloc=-1 |
|
local LPTrem=-1 |
|
local LPTMark=1000 |
|
@@ -1666,16 +1885,16 @@ |
|
if [ -z "$LPTloc" -o "$LPTloc" -eq -1 -o "$lparc" -ne 0 ]; then |
|
# last option - try to initialize as PRIMARY |
|
lpa_push_lpt 20 |
|
- lpa_set_lpt 20 |
|
+ lpa_set_lpt 20 $NODENAME |
|
LPTloc=20 # DEFAULT |
|
fi |
|
fi |
|
- # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt |
|
+ # TODO PRIO1: REMOVE remoteNode dependency - lpa_get_lpt |
|
LPTrem=$(lpa_get_lpt $remoteNode); lparc=$? |
|
if [ $lparc -ne 0 ]; then |
|
# LPT of the other node could not be evaluated - LPA says WAIT |
|
super_ocf_log debug "DBG: LPA: LPTloc=$LPTloc, LPTrem undefined ==> WAIT" |
|
- rc=2 |
|
+ rc=3 |
|
else |
|
super_ocf_log debug "DBG: LPA: LPTloc ($LPTloc) LPTrem ($LPTrem) delta ($delta)" |
|
if [ $LPTloc -lt $LPTMark -a $LPTrem -lt $LPTMark ]; then |
|
@@ -1683,11 +1902,11 @@ |
|
else |
|
delta=$DUPLICATE_PRIMARY_TIMEOUT # at least one of the lpts is a real timestamp so include delta-gap |
|
fi |
|
- if (( delta < LPTloc - LPTrem )); then |
|
+ if (( delta < LPTloc - LPTrem )); then |
|
# We are the winner - LPA says STARTUP |
|
super_ocf_log debug "DBG: LPA: LPTloc wins $LPTloc > $LPTrem + $delta ==> START" |
|
rc=0 |
|
- elif (( delta < LPTrem - LPTloc )); then |
|
+ elif (( delta < LPTrem - LPTloc )); then |
|
if ocf_is_true "$AUTOMATED_REGISTER" ; then |
|
# The other one has won - LPA says REGISTER |
|
super_ocf_log debug "DBG: LPA: LPTrem wins $LPTrem > $LPTloc + $delta ==> REGISTER" |
|
@@ -1697,12 +1916,12 @@ |
|
rc=2 |
|
fi |
|
|
|
- else |
|
+ else |
|
super_ocf_log debug "DBG: LPA: Difference between LPTloc and LPTrem is less than delta ($delta) ==> WAIT" |
|
# TODO: PRIO3: ADD STALEMATE-HANDLING HERE; currently admin should set one of the lpa to 20 |
|
rc=2 |
|
- fi |
|
- fi |
|
+ fi |
|
+ fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
} |
|
@@ -1716,6 +1935,7 @@ |
|
{ |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=0 |
|
+ # always true for scale-up |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
} |
|
@@ -1728,23 +1948,15 @@ |
|
# |
|
function saphana_start_clone() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
- local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING |
|
+ local primary_status sync_attr score_master rc=$OCF_NOT_RUNNING |
|
local sqlrc; |
|
- local chkusr; |
|
- # TODO: PRIO4: remove check_secstore_users later |
|
- secUser=$(check_secstore_users SAPHANA${SID}SR SLEHALOC RHELHALOC) ; chkusr=$? |
|
- if [ $chkusr -ne 0 ]; then |
|
- super_ocf_log err "ACT: Secure store users are missing (see best practice manual how to setup the users)" |
|
- rc=$OCF_ERR_CONFIGURED |
|
+ set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
+ check_for_primary; primary_status=$? |
|
+ if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then |
|
+ saphana_start_primary; rc=$? |
|
else |
|
- set_hana_attribute ${NODENAME} "DEMOTED" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
- check_for_primary; primary_status=$? |
|
- if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then |
|
- saphana_start_primary; rc=$? |
|
- else |
|
- lpa_set_lpt 10 |
|
- saphana_start_secondary; rc=$? |
|
- fi |
|
+ lpa_set_lpt 10 $NODENAME |
|
+ saphana_start_secondary; rc=$? |
|
fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
return $rc |
|
@@ -1761,9 +1973,10 @@ |
|
local rc=0 |
|
local primary_status="x" |
|
set_hana_attribute ${NODENAME} "UNDEFINED" ${ATTR_NAME_HANA_CLONE_STATE[@]} |
|
+ super_ocf_log debug "DBG: SET UNDEFINED" |
|
check_for_primary; primary_status=$? |
|
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
fi |
|
saphana_stop; rc=$? |
|
return $rc |
|
@@ -1813,26 +2026,42 @@ |
|
# seems admin already decided that for us? -> we are running - set DEMOTED |
|
promoted=0; |
|
LPTloc=$(date '+%s') |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
fi |
|
lpa_check_lpt_status; lparc=$? |
|
- # TODO: PRIO1: Need to differ lpa_check_lpt_status return codes |
|
- if [ $lparc -lt 2 ]; then |
|
- # lpa - no need to wait any longer - lets try a new start |
|
- saphana_start_clone |
|
- rc=$? |
|
- super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
- return $rc |
|
- else |
|
- lpa_init_lpt $HANA_STATE_PRIMARY |
|
- # still waiting for second site to report lpa-lpt |
|
- if ocf_is_true "$AUTOMATED_REGISTER" ; then |
|
- super_ocf_log info "LPA: Still waiting for remote site to report LPA status" |
|
- else |
|
- super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" |
|
- fi |
|
- return $OCF_SUCCESS |
|
- fi |
|
+ # DONE: PRIO1: Need to differ lpa_check_lpt_status return codes |
|
+ case "$lparc" in |
|
+ 0 | 1 ) |
|
+ # lpa - no need to wait any longer - lets try a new start |
|
+ saphana_start_clone |
|
+ rc=$? |
|
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
+ return $rc |
|
+ ;; |
|
+ 2 ) |
|
+ lpa_init_lpt $HANA_STATE_PRIMARY |
|
+ # still waiting for second site to expire |
|
+ if ocf_is_true "$AUTOMATED_REGISTER" ; then |
|
+ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" |
|
+ else |
|
+ super_ocf_log info "LPA: Dual primary detected and AUTOMATED_REGISTER='false' ==> WAITING" |
|
+ super_ocf_log info "LPA: You need to manually sr_register the older primary" |
|
+ fi |
|
+ return $OCF_SUCCESS |
|
+ ;; |
|
+ 3 ) |
|
+ lpa_init_lpt $HANA_STATE_PRIMARY |
|
+ # still waiting for second site to report lpa-lpt |
|
+ super_ocf_log info "LPA: Still waiting for remote site to report LPA status" |
|
+ return $OCF_SUCCESS |
|
+ ;; |
|
+ 4 ) |
|
+ # lpa internal error |
|
+ # TODO PRIO3: Impplement special handling for this issue - should we fail the ressource? |
|
+ super_ocf_log info "LPA: LPA reports an internal error" |
|
+ return $OCF_SUCCESS |
|
+ ;; |
|
+ esac |
|
promoted=0; |
|
;; |
|
UNDEFINED ) |
|
@@ -1848,7 +2077,7 @@ |
|
;; |
|
esac |
|
fi |
|
- get_hana_landscape_status; lss=$? |
|
+ get_hana_landscape_status; lss=$? |
|
super_ocf_log debug "DBG: saphana_monitor_clone: get_hana_landscape_status=$lss" |
|
case "$lss" in |
|
0 ) # FATAL or ERROR |
|
@@ -1876,19 +2105,20 @@ |
|
# |
|
# TODO PRIO1: REMOVE remoteNode dependency - get_sync_status |
|
remoteSync=$(get_hana_attribute $remoteNode ${ATTR_NAME_HANA_SYNC_STATUS[@]}) |
|
+ # TODO HANDLING OF "NEVER" |
|
case "$remoteSync" in |
|
SOK | PRIM ) |
|
super_ocf_log info "DEC: PreferSiteTakeover selected so decrease promotion score here (and reset lpa)" |
|
set_crm_master 5 |
|
if check_for_primary_master; then |
|
- lpa_set_lpt 20 |
|
+ lpa_set_lpt 20 $NODENAME |
|
fi |
|
;; |
|
SFAIL ) |
|
- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" |
|
+ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync (SFAIL) ==> local restart preferred" |
|
;; |
|
* ) |
|
- super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" |
|
+ super_ocf_log info "DEC: PreferSiteTakeover selected BUT remoteHost is not in sync ($remoteSync) ==> local restart preferred" |
|
;; |
|
esac |
|
else |
|
@@ -1916,7 +2146,7 @@ |
|
rc=$OCF_SUCCESS |
|
else |
|
LPTloc=$(date '+%s') |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
lpa_push_lpt $LPTloc |
|
if [ "$promoted" -eq 1 ]; then |
|
set_hana_attribute "$NODENAME" "PRIM" ${ATTR_NAME_HANA_SYNC_STATUS[@]} |
|
@@ -1931,12 +2161,14 @@ |
|
fi |
|
my_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) |
|
my_role=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_ROLES[@]}) |
|
- case "$my_role" in |
|
+ case "$my_role" in |
|
[12]:P:*:master:* ) # primary is down or may not anser hdbsql query so drop analyze_hana_sync_status |
|
;; |
|
[34]:P:*:*:* ) # primary is up and should now be able to anser hdbsql query |
|
if [ -f $DIR_EXECUTABLE/python_support/systemReplicationStatus.py ]; then |
|
- analyze_hana_sync_statusSRS |
|
+ if [ "$promote_attr" = "PROMOTED" ]; then |
|
+ analyze_hana_sync_statusSRS |
|
+ fi |
|
else |
|
analyze_hana_sync_statusSQL |
|
fi |
|
@@ -1949,8 +2181,8 @@ |
|
[234]:P:* ) # dual primary, but other instance marked as PROMOTED by the cluster |
|
lpa_check_lpt_status; again_lpa_rc=$? |
|
if [ $again_lpa_rc -eq 2 ]; then |
|
- super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" |
|
- lpa_set_lpt 10 |
|
+ super_ocf_log info "DEC: Dual primary detected, other instance is PROMOTED and lpa stalemate ==> local restart" |
|
+ lpa_set_lpt 10 $NODENAME |
|
lpa_push_lpt 10 |
|
rc=$OCF_NOT_RUNNING |
|
fi |
|
@@ -1993,7 +2225,7 @@ |
|
# OK, we are running as HANA SECONDARY |
|
# |
|
if ! lpa_get_lpt ${NODENAME}; then |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
lpa_push_lpt 10 |
|
fi |
|
promote_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_CLONE_STATE[@]}) |
|
@@ -2042,17 +2274,25 @@ |
|
0 ) # FATAL |
|
# DONE: PRIO1: Maybe we need to differ between 0 and 1. While 0 is a fatal sap error, 1 is down/error |
|
# TODO: PRIO3: is OCF_ERR_GENERIC best option? |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
rc=$OCF_ERR_GENERIC |
|
;; |
|
1 ) # ERROR |
|
- lpa_set_lpt 10 |
|
+ lpa_set_lpt 10 $NODENAME |
|
rc=$OCF_NOT_RUNNING |
|
;; |
|
2 | 3 | 4 ) # WARN INFO OK |
|
rc=$OCF_SUCCESS |
|
- lpa_set_lpt 30 |
|
+ lpa_set_lpt 30 $NODENAME |
|
sync_attr=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) |
|
+ local hanaOM="" |
|
+ local hanaOut1="" |
|
+ # TODO: PRIO 3: check, if using getParameter.py is the best option to analyze the set operationMode |
|
+ # DONE: PRIO 3: Should we default to logreplay for SAP HANA >= SPS11 ? |
|
+ hanaOut1=$(HANA_CALL --timeout 10 --use-su --cmd "getParameter.py --key=global.ini/system_replication/operation_mode --sapcontrol=1") |
|
+ hanaFilter1=$(echo "$hanaOut1" | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: <begin>/ { out=1 } /^SAPCONTROL-OK: <end>/ { out=0 } /=/ {if (out==1) {print $3} }') |
|
+ hanaOM=$(echo "$hanaFilter1" | awk -F= '$1=="operation_mode" {print $2}') |
|
+ set_hana_attribute ${NODENAME} "$hanaOM" ${ATTR_NAME_HANA_OPERATION_MODE[@]} |
|
super_ocf_log debug "DBG: sync_attr=$sync_attr" |
|
case "$sync_attr" in |
|
"SOK" ) # This is a possible node to promote, when primary is missing |
|
@@ -2112,7 +2352,7 @@ |
|
fi |
|
# |
|
# First check, if we are PRIMARY or SECONDARY |
|
- # |
|
+ # |
|
check_for_primary; primary_status=$? |
|
if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then |
|
# FIX: bsc#919925 Leaving Node Maintenance stops HANA Resource Agent |
|
@@ -2145,7 +2385,7 @@ |
|
# |
|
# function: saphana_promote_clone - promote a hana clone |
|
# params: - |
|
-# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), |
|
+# globals: OCF_*(r), NODENAME(r), HANA_STATE_*, SID(r), InstanceName(r), |
|
# saphana_promote_clone: |
|
# In a Master/Slave configuration get Master being the primary OR by running hana takeover |
|
# |
|
@@ -2169,7 +2409,7 @@ |
|
else |
|
if [ $primary_status -eq $HANA_STATE_SECONDARY ]; then |
|
# |
|
- # we are SECONDARY/SLAVE and need to takepover ... |
|
+ # we are SECONDARY/SLAVE and need to takeover ... promote on the replica (secondary) side... |
|
# promote on the replica side... |
|
# |
|
hana_sync=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_SYNC_STATUS[@]}) |
|
@@ -2178,9 +2418,14 @@ |
|
super_ocf_log info "ACT: !!!!!!! Promote REPLICA $SID-$InstanceName to be primary. !!!!!!" |
|
LPTloc=$(date '+%s') |
|
# lpa_set_lpt 20 $remoteNode |
|
- lpa_set_lpt $LPTloc |
|
+ lpa_set_lpt $LPTloc $NODENAME |
|
lpa_push_lpt $LPTloc |
|
- su - $sidadm -c "hdbnsutil -sr_takeover" |
|
+ # TODO: Get rid of the su by using a new interface: |
|
+ # SAPSYSTEMNAME=SLE /usr/sap/SLE/HDB00/HDBSettings.sh hdbnsutil -sr_takeover ... |
|
+ # TODO: Check beginning from which SPS does SAP support HDBSettings.sh? |
|
+ # TODO: Limit the runtime of hdbnsutil -sr_takeover ???? |
|
+ # SAP_CALL |
|
+ HANA_CALL --timeout inf --use-su --cmd "hdbnsutil -sr_takeover" |
|
# |
|
# now gain check, if we are primary NOW |
|
# |
|
@@ -2248,7 +2493,6 @@ |
|
SAPSTARTPROFILE="" |
|
SAPHanaFilter="ra-act-dec-lpa" |
|
|
|
-NODENAME=$(crm_node -n) |
|
|
|
|
|
if [ $# -ne 1 ] |
|
@@ -2306,8 +2550,7 @@ |
|
fi |
|
|
|
# What kind of method was invoked? |
|
-THE_VERSION=$(saphana_meta_data | grep '<version') |
|
-super_ocf_log info "RA ==== begin action $ACTION$CLACT ($THE_VERSION) ====" |
|
+super_ocf_log info "RA ==== begin action $ACTION$CLACT ($SAPHanaVersion) ====" |
|
ra_rc=$OCF_ERR_UNIMPLEMENTED |
|
case "$ACTION" in |
|
start|stop|monitor|promote|demote) # Standard controling actions |
|
@@ -2329,6 +2572,6 @@ |
|
esac |
|
timeE=$(date '+%s') |
|
(( timeR = timeE - timeB )) |
|
-#super_ocf_log info "RA ==== SAPHanaFilter=$SAPHanaFilter" |
|
-super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($THE_VERSION) (${timeR}s)====" |
|
+super_ocf_log debug "DBG: ==== SAPHanaFilter=$SAPHanaFilter" |
|
+super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaVersion) (${timeR}s)====" |
|
exit ${ra_rc} |
|
diff -uNr a/heartbeat/SAPHanaTopology b/heartbeat/SAPHanaTopology |
|
--- a/heartbeat/SAPHanaTopology 2016-10-14 10:09:56.480051268 +0200 |
|
+++ b/heartbeat/SAPHanaTopology 2016-10-14 10:29:45.384831725 +0200 |
|
@@ -14,7 +14,7 @@ |
|
# Support: linux@sap.com |
|
# License: GNU General Public License (GPL) |
|
# Copyright: (c) 2014 SUSE Linux Products GmbH |
|
-# (c) 2015 SUSE Linux GmbH |
|
+# (c) 2015-2016 SUSE Linux GmbH |
|
# |
|
# An example usage: |
|
# See usage() function below for more details... |
|
@@ -23,17 +23,25 @@ |
|
# OCF_RESKEY_SID (LNX, NDB, SLE) |
|
# OCF_RESKEY_InstanceNumber (00..99) |
|
# OCF_RESKEY_DIR_EXECUTABLE (optional, well known directories will be searched by default) |
|
-# OCF_RESKEY_SAPHanaFilter |
|
+# OCF_RESKEY_SAPHanaFilter (outdated, replaced by cluster property hana_${sid}_glob_filter) |
|
# |
|
####################################################################### |
|
# |
|
# Initialization: |
|
+SAPHanaVersion="0.152.17" |
|
timeB=$(date '+%s') |
|
|
|
: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} |
|
. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs |
|
|
|
####################################################################### |
|
+# |
|
+log_attributes=false |
|
+if ocf_is_true "$log_attributes"; then |
|
+ log_attr_file="/var/log/fhATTRIBUTES" |
|
+else |
|
+ log_attr_file="/dev/null" |
|
+fi |
|
|
|
HANA_STATE_PRIMARY=0 |
|
HANA_STATE_SECONDARY=1 |
|
@@ -125,7 +133,7 @@ |
|
<?xml version="1.0"?> |
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> |
|
<resource-agent name="SAPHanaTopology"> |
|
- <version>0.151.1</version> |
|
+ <version>$SAPHanaVersion</version> |
|
<shortdesc lang="en">Analyzes SAP HANA System Replication Topology.</shortdesc> |
|
<longdesc lang="en">This RA analyzes the SAP HANA topology and "sends" all findings via the node status attributes to |
|
all nodes in the cluster. These attributes are taken by the SAPHana RA to control the SAP Hana Databases. |
|
@@ -207,12 +215,12 @@ |
|
dstr=$(date) |
|
case "$attr_store" in |
|
reboot | forever ) |
|
- echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ echo "$dstr: SAPHanaTopology: crm_attribute -N ${attr_node} -G -n \"$attr_name\" -l $attr_store -q" >> $log_attr_file |
|
+ crm_attribute -N ${attr_node} -G -n "$attr_name" -l $attr_store -q -d "$attr_default" 2>>$log_attr_file; rc=$? |
|
;; |
|
props ) |
|
- echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> /var/log/fhATTRIBUTE |
|
- crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>/var/log/fhATTRIBUTE; rc=$? |
|
+ echo "$dstr: SAPHanaTopology: crm_attribute -G -n \"$attr_name\" -t crm_config -q" >> $log_attr_file |
|
+ crm_attribute -G -n "$attr_name" -t crm_config -q -d "$attr_default" 2>>$log_attr_file; rc=$? |
|
;; |
|
esac |
|
super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
@@ -282,6 +290,53 @@ |
|
} |
|
|
|
# |
|
+# function: dequote - filter: remove quotes (") from stdin |
|
+# params: - |
|
+# globals: - |
|
+function dequote() |
|
+{ |
|
+ local rc=0; tr -d '"'; return $rc |
|
+} |
|
+ |
|
+# function: version: cpmpare two HANA version strings |
|
+function ver_lt() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 0 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_le() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 0 -o $? -eq 1 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_gt() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 2 && return 0 || return 1 |
|
+} |
|
+ |
|
+function ver_ge() { |
|
+ ocf_version_cmp $1 $2 |
|
+ test $? -eq 2 -o $? -eq 1 && return 0 || return 1 |
|
+} |
|
+# |
|
+# function: version: cpmpare two HANA version strings |
|
+# |
|
+function version() { |
|
+ if [ $# -eq 3 ]; then |
|
+ case "$2" in |
|
+ LE | le | "<=" ) ver_le $1 $3;; |
|
+ LT | lt | "<" ) ver_lt $1 $3;; |
|
+ GE | ge | ">=" ) ver_ge $1 $3;; |
|
+ GT | gt | ">" ) ver_gt $1 $3;; |
|
+ * ) return 1; |
|
+ esac |
|
+ elif [ $# -ge 5 ]; then |
|
+ version $1 $2 $3 && shift 2 && version $* |
|
+ else |
|
+ return 1; |
|
+ fi |
|
+} |
|
+# |
|
# function: is_clone - report, if resource is configured as a clone (also master/slave) |
|
# params: - |
|
# globals: OCF_*(r) |
|
@@ -314,12 +369,74 @@ |
|
} |
|
|
|
# |
|
+# function: HANA_CALL |
|
+# params: timeout-in-seconds cmd-line |
|
+# globals: sid(r), SID(r), InstanceName(r) |
|
+# |
|
+function HANA_CALL() |
|
+{ |
|
+ # |
|
+ # TODO: PRIO 5: remove 'su - ${sidadm} later, when SAP HANA resoled issue with |
|
+ # root-user-called hdbnsutil -sr_state (which creates root-owned shared memory file in /var/lib/hdb/SID/shmgrp) |
|
+ # TODO: PRIO 5: Maybe make "su" optional by a parameter |
|
+ local timeOut=0 |
|
+ local onTimeOut="" |
|
+ local rc=0 |
|
+ local use_su=1 # Default to be changed later (see TODO above) |
|
+ local pre_cmd="" |
|
+ local cmd="" |
|
+ local pre_script="" |
|
+ local output="" |
|
+ while [ $# -gt 0 ]; do |
|
+ case "$1" in |
|
+ --timeout ) timeOut=$2; shift;; |
|
+ --use-su ) use_su=1;; |
|
+ --on-timeout ) onTimeOut="$2"; shift;; |
|
+ --cmd ) shift; cmd="$*"; break;; |
|
+ esac |
|
+ shift |
|
+ done |
|
+ |
|
+ if [ $use_su -eq 1 ]; then |
|
+ pre_cmd="su - ${sid}adm -c" |
|
+ pre_script="true" |
|
+ else |
|
+ # as root user we need the library path to the SAP kernel to be able to call sapcontrol |
|
+ # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH |
|
+ if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] |
|
+ then |
|
+ MY_LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH |
|
+ fi |
|
+ pre_cmd="bash -c" |
|
+ pre_script="LD_LIBRARY_PATH=$MY_LD_LIBRARY_PATH; export LD_LIBRARY_PATH" |
|
+ fi |
|
+ case $timeout in |
|
+ 0 | inf ) |
|
+ output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? |
|
+ ;; |
|
+ * ) |
|
+ output=$(timeout $timeOut $pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $cmd"); rc=$? |
|
+ # |
|
+ # on timeout ... |
|
+ # |
|
+ if [ $rc -eq 124 -a -n "$onTimeOut" ]; then |
|
+ local second_output="" |
|
+ second_output=$($pre_cmd "$pre_script; /usr/sap/$SID/$InstanceName/HDBSettings.sh $onTimeOut"); |
|
+ fi |
|
+ ;; |
|
+ esac |
|
+ echo "$output" |
|
+ return $rc; |
|
+} |
|
+ |
|
+# |
|
# function: sht_init - initialize variables for the resource agent |
|
# params: - |
|
# globals: OCF_*(r), SID(w), sid(rw), sidadm(w), InstanceName(w), InstanceNr(w), |
|
-# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w), remoteHost(w) |
|
+# globals: meta_notify_master_uname(w), HANA_SR_TOLOPOGY(w), sr_name(w) |
|
# globals: ATTR_NAME_HANA_SYNC_STATUS(w), ATTR_NAME_HANA_PRIMARY_AT(w), ATTR_NAME_HANA_CLONE_STATE(w) |
|
# globals: DIR_EXECUTABLE(w), SAPSTARTSRV(w), SAPCONTROL(w), DIR_PROFILE(w), SAPSTARTPROFILE(w), LD_LIBRARY_PATH(w), PATH(w), nodelist(w) |
|
+# globals: NODENAME(w), hdbver(w) |
|
# sht_init : Define global variables with default values, if optional parameters are not set |
|
# |
|
# |
|
@@ -331,12 +448,14 @@ |
|
local hdbANSWER="" |
|
local siteID |
|
local siteNAME |
|
+ local chkMethod="" |
|
HOSTEXECNAME=saphostexec |
|
USRSAP=/usr/sap |
|
SAPSERVICE_PATH=${USRSAP}/sapservices |
|
SAPHOSTCTRL_PATH=${USRSAP}/hostctrl/exe |
|
HOSTEXEC_PATH=${SAPHOSTCTRL_PATH}/${HOSTEXECNAME} |
|
HOSTEXEC_PROFILE_PATH=${SAPHOSTCTRL_PATH}/host_profile |
|
+ NODENAME=$(crm_node -n) |
|
SID=$OCF_RESKEY_SID |
|
InstanceNr=$OCF_RESKEY_InstanceNumber |
|
myInstanceName="${SID}_HDB${InstanceNr}" |
|
@@ -382,13 +501,6 @@ |
|
DIR_PROFILE="$OCF_RESKEY_DIR_PROFILE" |
|
fi |
|
|
|
- # as root user we need the library path to the SAP kernel to be able to call sapcontrol |
|
- # check, if we already added DIR_EXECUTABLE at the beginning of LD_LIBRARY_PATH |
|
- if [ "${LD_LIBRARY_PATH%%*:}" != "$DIR_EXECUTABLE" ] |
|
- then |
|
- LD_LIBRARY_PATH=$DIR_EXECUTABLE${LD_LIBRARY_PATH:+:}$LD_LIBRARY_PATH |
|
- export LD_LIBRARY_PATH |
|
- fi |
|
|
|
PATH=${PATH}:${DIR_EXECUTABLE} |
|
# |
|
@@ -399,12 +511,45 @@ |
|
*openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; |
|
*cman* ) nodelist=$(crm_node -l);; |
|
esac |
|
+ # |
|
+ # get HANA version |
|
+ # |
|
+ local ges_ver |
|
+ ges_ver=$(HANA_CALL --timeout 10 --cmd "HDB version" | tr -d " " | awk -F: '$1 == "version" {print $2}') |
|
+ hdbver=${ges_ver%.*.*} |
|
+ # |
|
+ # since rev 111.00 we should use a new hdbnsutil option to get the -sr_state |
|
+ # since rev 112.03 the old option is changed and we should use -sr_stateConfiguration where ever possible |
|
+ # |
|
+ hdbState="hdbnsutil -sr_state" |
|
+ hdbMap="hdbnsutil -sr_state" |
|
+ if version "$hdbver" ">=" "1.00.111"; then |
|
+ hdbState="hdbnsutil -sr_stateConfiguration" |
|
+ hdbMap="hdbnsutil -sr_stateHostMapping" |
|
+ fi |
|
#### SAP-CALL |
|
# hdbnsutil was a bit unstable in some tests so we recall the tool, if it fails to report the srmode |
|
- for i in 1 2 3 4 5 6 7 8 9; do |
|
- hdbANSWER=$(su - ${sidadm} -c "hdbnsutil -sr_state --sapcontrol=1" 2>/dev/null) |
|
- super_ocf_log debug "DBG2: hdbANSWER=\$\(su - ${sidadm} -c \"hdbnsutil -sr_state --sapcontrol=1\"\)" |
|
- srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') |
|
+ for chkMethod in hU hU hU gP ; do |
|
+ # DONE: Limit the runtime of hdbnsutil. |
|
+ # TODO: Use getParameter.py if we get no answer |
|
+ # SAP_CALL |
|
+ #super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" |
|
+ #srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') |
|
+ case "$chkMethod" in |
|
+ gP ) # call getParameter (gP) |
|
+ local gpKeys="" |
|
+ gpKeys=$(echo --key=global.ini/system_replication/{mode,site_name,site_id}) |
|
+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "HDBSettings.sh getParameter.py $gpKeys --sapcontrol=1" 2>&1 | awk -F/ 'BEGIN {out=0} /^SAPCONTROL-OK: <begin>/ { out=1 } /^SAPCONTROL-OK: <end>/ { out=0 } /=/ {if (out==1) {print $3} }') |
|
+ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') |
|
+ super_ocf_log info "ACT: hdbnsutil not answering - using global.ini as fallback - srmode=$srmode" |
|
+ ;; |
|
+ hU | * ) # call hdbnsUtil (hU) ( also for unknown chkMethod ) |
|
+ # DONE: PRIO1: Begginning from SAP HANA rev 112.03 -sr_state is not longer supported |
|
+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbState --sapcontrol=1" 2>/dev/null) |
|
+ super_ocf_log debug "DBG2: hdbANSWER=$hdbANSWER" |
|
+ srmode=$(echo "$hdbANSWER" | awk -F= '$1=="mode" {print $2}') |
|
+ ;; |
|
+ esac |
|
case "$srmode" in |
|
primary | syncmem | sync | async | none ) |
|
# we can leave the loop as we already got a result |
|
@@ -417,27 +562,51 @@ |
|
esac |
|
done |
|
# TODO PRIO3: Implement a file lookup, if we did not get a result |
|
- siteID=$(echo "$hdbANSWER" | awk -F= '/site id/ {print $2}') |
|
- siteNAME=$(echo "$hdbANSWER" | awk -F= '/site name/ {print $2}') |
|
+ siteID=$(echo "$hdbANSWER" | awk -F= '/site.id/ {print $2}') # allow 'site_id' AND 'site id' |
|
+ siteNAME=$(echo "$hdbANSWER" | awk -F= '/site.name/ {print $2}') |
|
site=$siteNAME |
|
srmode=$(echo "$hdbANSWER" | awk -F= '/mode/ {print $2}') |
|
- MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 ~ "mapping" && $3 !~ site { print $4 }' site=$site) |
|
- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" |
|
# |
|
- # filter all non-cluster mappings |
|
+ # for rev >= 111 we use the new mapping query |
|
# |
|
- # DONE: PRIO2: Need mapping between HANA HOSTS not cluster NODES |
|
- local hanaVHost |
|
- hanaRemoteHost=$(for n1 in $nodelist; do |
|
- hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) |
|
- for n2 in $MAPPING; do |
|
- if [ "$hanaVHost" == "$n2" ]; then |
|
- echo $hanaVHost; |
|
- fi; |
|
- done; |
|
- done ) |
|
- super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" |
|
- super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" |
|
+ if version "$hdbver" ">=" "1.00.111"; then |
|
+ hdbANSWER=$(HANA_CALL --timeout 60 --cmd "$hdbMap --sapcontrol=1" 2>/dev/null) |
|
+ fi |
|
+ MAPPING=$(echo "$hdbANSWER" | awk -F[=/] '$1 == "mapping" && $3 != site { print $4 }' site=$site) |
|
+ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING" |
|
+ if [ -n "$MAPPING" ]; then |
|
+ # we have a mapping from HANA, lets use it |
|
+ # |
|
+ # filter all non-cluster mappings |
|
+ # |
|
+ local hanaVHost="" |
|
+ local n1="" |
|
+ hanaRemoteHost="" |
|
+ for n1 in $nodelist; do |
|
+ hanaVHost=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_VHOST[@]}) |
|
+ for n2 in $MAPPING; do |
|
+ if [ "$hanaVHost" == "$n2" ]; then |
|
+ hanaRemoteHost="$hanaVHost" |
|
+ fi; |
|
+ done; |
|
+ done |
|
+ super_ocf_log info "DEC: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" |
|
+ super_ocf_log debug "DBG: site=$site, mode=$srmode, MAPPING=$MAPPING, hanaRemoteHost=$hanaRemoteHost" |
|
+ else |
|
+ # HANA DID NOT TOLD THE MAPPING, LETS TRY TO USE THE SITE ATTRIBUTES |
|
+ local n1="" |
|
+ local hanaSite="" |
|
+ for n1 in $nodelist; do |
|
+ # TODO: PRIO9 - For multi tier with more than 2 chain/star members IN the cluster we might need to be |
|
+ # able to catch more than one remoteHost |
|
+ # currently having more than 2 HANA in a chain/star members IN the cluster is not allowed, the third must be external |
|
+ if [ "$NODENAME" != "$n1" ]; then |
|
+ hanaSite=$(get_hana_attribute ${n1} ${ATTR_NAME_HANA_SITE[@]}) |
|
+ hanaRemoteHost="$n1" |
|
+ fi |
|
+ done |
|
+ super_ocf_log info "DEC: site=$site, mode=$srmode, hanaRemoteHost=$hanaRemoteHost - found by remote site ($hanaSite)" |
|
+ fi |
|
super_ocf_log info "FLOW $FUNCNAME rc=$OCF_SUCCESS" |
|
return $OCF_SUCCESS |
|
} |
|
@@ -446,38 +615,29 @@ |
|
# function: check_for_primary - check if local SAP HANA is configured as primary |
|
# params: - |
|
# globals: HANA_STATE_PRIMARY(r), HANA_STATE_SECONDARY(r), HANA_STATE_DEFECT(r), HANA_STATE_STANDALONE(r) |
|
+# srmode(r) |
|
# |
|
function check_for_primary() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=0 |
|
- node_status=$srmode |
|
- super_ocf_log debug "DBG2: check_for_primary: node_status=$node_status" |
|
- super_ocf_log debug "DBG: check_for_primary: node_status=$node_status" |
|
- for i in 1 2 3 4 5 6 7 8 9; do |
|
- case "$node_status" in |
|
- primary ) |
|
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" |
|
- return $HANA_STATE_PRIMARY;; |
|
- syncmem | sync | async ) |
|
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" |
|
- return $HANA_STATE_SECONDARY;; |
|
- none ) # have seen that mode on second side BEFEORE we registered it as replica |
|
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" |
|
- return $HANA_STATE_STANDALONE;; |
|
- * ) |
|
- # TODO: PRIO1: Should we set SFAIL? |
|
- # TODO: PRIO2: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes |
|
- dump=$( echo $node_status | hexdump -C ); |
|
- super_ocf_log err "ACT: check_for_primary: we didn't expect node_status to be: DUMP: <$dump>" |
|
- #### SAP-CALL |
|
- node_full_status=$(su - ${sidadm} -c "hdbnsutil -sr_state" 2>/dev/null ) |
|
- node_status=$(echo "$node_full_status" | awk '$1=="mode:" {print $2}') |
|
- super_ocf_log info "DEC: check_for_primary: loop=$i: node_status=$node_status" |
|
- # TODO: PRIO1: Maybe we need to keep the old value for P/S/N, if hdbnsutil just crashes |
|
- esac; |
|
- done |
|
- super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_DEFECT" |
|
- return $HANA_STATE_DEFECT |
|
+ super_ocf_log debug "DBG: check_for_primary: srmode=$srmode" |
|
+ case "$srmode" in |
|
+ primary ) |
|
+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_PRIMARY" |
|
+ rc=$HANA_STATE_PRIMARY;; |
|
+ syncmem | sync | async ) |
|
+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_SECONDARY" |
|
+ rc=$HANA_STATE_SECONDARY;; |
|
+ none ) # have seen that mode on second side BEFEORE we registered it as replica |
|
+ super_ocf_log info "FLOW $FUNCNAME rc=HANA_STATE_STANDALONE" |
|
+ rc=$HANA_STATE_STANDALONE;; |
|
+ * ) |
|
+ dump=$( echo $srmode | hexdump -C ); |
|
+ super_ocf_log err "ACT: check_for_primary: we didn't expect srmode to be: DUMP: <$dump>" |
|
+ rc=$HANA_STATE_DEFECT |
|
+ esac; |
|
+ super_ocf_log info "FLOW $FUNCNAME rc=$rc" |
|
+ return $rc |
|
} |
|
|
|
|
|
@@ -653,7 +813,7 @@ |
|
function sht_stop_clone() { |
|
super_ocf_log info "FLOW $FUNCNAME ($*)" |
|
local rc=0 |
|
- check_for_primary; primary_status=$? |
|
+ check_for_primary; primary_status=$? |
|
if [ $primary_status -eq $HANA_STATE_PRIMARY ]; then |
|
hanaPrim="P" |
|
elif [ $primary_status -eq $HANA_STATE_SECONDARY ]; then |
|
@@ -663,7 +823,7 @@ |
|
else |
|
hanaPrim="-" |
|
fi |
|
- set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} |
|
+ set_hana_attribute "${NODENAME}" "1:$hanaPrim:-:-:-:-" ${ATTR_NAME_HANA_ROLES[@]} |
|
sht_stop; rc=$? |
|
return $rc |
|
} |
|
@@ -718,28 +878,49 @@ |
|
fi |
|
# DONE: PRIO1: ASK: Is the output format of ListInstances fix? Could we take that as an API? |
|
# try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 |
|
- # We rely on the following format: SID is word#4, NR is work#6, vHost is word#8 |
|
+ # We rely on the following format: SID is word#4, SYSNR is word#6, vHost is word#8 |
|
#### SAP-CALL |
|
vName=$(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances \ |
|
- | awk '$4 == SID && $6=NR { print $8 }' SID=$SID NR=$InstanceNr 2>/dev/null ) |
|
+ | awk '$4 == SID && $6 == SYSNR { print $8 }' SID=$SID SYSNR=$InstanceNr 2>/dev/null ) |
|
# super_ocf_log debug "DBG: ListInstances: $(/usr/sap/hostctrl/exe/saphostctrl -function ListInstances)" |
|
if [ -n "$vName" ]; then |
|
- set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} |
|
+ set_hana_attribute ${NODENAME} "$vName" ${ATTR_NAME_HANA_VHOST[@]} |
|
else |
|
vName=$(get_hana_attribute ${NODENAME} ${ATTR_NAME_HANA_VHOST[@]}) |
|
fi |
|
#site=$(get_site_name) |
|
#### SAP-CALL |
|
- hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" |
|
- hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) |
|
+ # SAP_CALL |
|
+ #hanaANSWER=$(su - $sidadm -c "python exe/python_support/landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" |
|
+ # |
|
+ # since rev 09x SAP has added the --sapcontrol option for the landscapeHostConfiguration interface |
|
+ # we begin to use --sapcontrol with rev 100 |
|
+ # since rev 120 we need to use the --sapcontrol, because SAP changed the tool output |
|
+ # |
|
+ if version "$hdbver" ">=" "1.00.100"; then |
|
+ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py --sapcontrol=1" 2>/dev/null); hanalrc="$?" |
|
+ # TODO: PRIO9: Do we need to check the lines: 'SAPCONTROL-OK: <begin>' and 'SAPCONTROL-OK: <end>'? |
|
+ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | \ |
|
+ awk -F= '$1 == "nameServerConfigRole" {f1=$2} |
|
+ $1 == "nameServerActualRole" {f2=$2} |
|
+ $1 == "indexServerConfigRole" {f3=$2} |
|
+ $1 == "indexServerActualRole" {f4=$2} |
|
+ END { printf "%s:%s:%s:%s\n", f1, f2, f3,f4 }') |
|
+ else |
|
+ # |
|
+ # old code for backward compatability |
|
+ # |
|
+ hanaANSWER=$(HANA_CALL --timeout 60 --cmd "landscapeHostConfiguration.py" 2>/dev/null); hanalrc="$?" |
|
+ hanarole=$(echo "$hanaANSWER" | tr -d ' ' | awk -F'|' '$2 == host { printf "%s:%s:%s:%s\n",$10,$11,$12,$13 } ' host=${vName}) |
|
+ fi |
|
#if [ -z "$MAPPING" ]; then |
|
# super_ocf_log info "ACT: Did not find remote Host at this moment" |
|
#fi |
|
# FH TODO PRIO3: TRY TO GET RID OF "ATTR_NAME_HANA_REMOTEHOST" |
|
if [ -n "$hanaRemoteHost" ]; then |
|
- set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} |
|
+ set_hana_attribute ${NODENAME} "$hanaRemoteHost" ${ATTR_NAME_HANA_REMOTEHOST[@]} |
|
fi |
|
- set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} |
|
+ set_hana_attribute ${NODENAME} "$hanalrc:$hanaPrim:$hanarole" ${ATTR_NAME_HANA_ROLES[@]} |
|
if [ -n "$site" ]; then |
|
set_hana_attribute ${NODENAME} "$site" ${ATTR_NAME_HANA_SITE[@]} |
|
fi |
|
@@ -748,8 +929,8 @@ |
|
S ) # only secondary may propargate its sync status |
|
case $(crm_attribute --type crm_config --name cluster-infrastructure -q) in |
|
*corosync* ) nodelist=$(crm_node -l | awk '{ print $2 }');; |
|
- *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; |
|
- *cman* ) nodelist=$(crm_node -l);; |
|
+ *openais* ) nodelist=$(crm_node -l | awk '/member/ {print $2}');; |
|
+ *cman* ) nodelist=$(crm_node -l);; |
|
esac |
|
|
|
for n in ${nodelist}; do |
|
@@ -789,7 +970,6 @@ |
|
InstanceNr="" |
|
DIR_EXECUTABLE="" |
|
SAPHanaFilter="ra-act-dec-lpa" |
|
-NODENAME=$(crm_node -n) |
|
|
|
if [ $# -ne 1 ] |
|
then |
|
@@ -846,8 +1026,7 @@ |
|
fi |
|
fi |
|
|
|
-THE_VERSION=$(sht_meta_data | grep '<version') |
|
-super_ocf_log info "RA ==== begin action $ACTION$CLACT ($THE_VERSION) ====" |
|
+super_ocf_log info "RA ==== begin action $ACTION$CLACT ($SAPHanaVersion) ====" |
|
ra_rc=$OCF_ERR_UNIMPLEMENTED |
|
case "$ACTION" in |
|
start|stop|monitor) # Standard controling actions |
|
@@ -865,5 +1044,5 @@ |
|
esac |
|
timeE=$(date '+%s') |
|
(( timeR = timeE - timeB )) |
|
-super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($THE_VERSION) (${timeR}s)====" |
|
+super_ocf_log info "RA ==== end action $ACTION$CLACT with rc=${ra_rc} ($SAPHanaVersion) (${timeR}s)====" |
|
exit ${ra_rc} |
|
diff -uNr a/tools/show_SAPHanaSR_attributes b/tools/show_SAPHanaSR_attributes |
|
--- a/tools/show_SAPHanaSR_attributes 2016-10-14 10:09:56.467051414 +0200 |
|
+++ b/tools/show_SAPHanaSR_attributes 2016-10-14 10:31:28.051676675 +0200 |
|
@@ -1,19 +1,78 @@ |
|
#!/usr/bin/perl |
|
# |
|
-# get_all_lnx_attributes |
|
-# |
|
-# license: GPL |
|
-# author: fabian.herschel@suse.com |
|
-# date: 2014-05-13 |
|
-# |
|
+# SAPHanaSR-showAttr |
|
+# (c) 2014 SUSE Linux Products GmbH, Nuremberg, Germany |
|
+# (c) 2015-2016 SUSE Linux GmbH, Nuremberg Germany |
|
+# Author: Fabian Herschel <fabian.herschel@suse.com> |
|
+# License: GPL v2+ |
|
+my $Version="0.18.2016.02.16.1"; |
|
# |
|
+################################################################## |
|
use POSIX; |
|
use strict; |
|
+use Sys::Syslog; |
|
+use Sys::Hostname; |
|
+use File::Path; |
|
+use Getopt::Long; |
|
+use lib '/usr/share/SAPHanaSR/tests'; |
|
+use SAPHanaSRTools; |
|
+ |
|
+################################### |
|
+## this part is not for scale out and currently NOT zero-config |
|
+ |
|
+my $ClusterNodes=2; |
|
+my $ClusterPrimaries=1; |
|
+my $ClusterSecondaries=1; |
|
+my %Name; |
|
+my %Host; |
|
+my $host = hostname(); |
|
|
|
+my $varlib='/var/lib/SAPHanaTD'; |
|
+my $testfile='SAPHanaTD.status'; |
|
+my $testcount=0; |
|
+my $first_test=1; |
|
my $sid=""; |
|
-my $table_title = "Host \\ Attr"; |
|
-my %Name; |
|
+my @sids; |
|
+my $ino=""; |
|
+my $sortBy=""; |
|
+my $table_titleH = "Host"; |
|
+#my %Name; |
|
my %Host; |
|
+my %Site; |
|
+my %Global; |
|
+my %HName; |
|
+my %SName; |
|
+my %GName; |
|
+my $help; |
|
+my $version; |
|
+my $cibFile=""; |
|
+ |
|
+sub init() |
|
+{ |
|
+ my $result = GetOptions ("sid=s" => \@sids, |
|
+ "sort=s" => \$sortBy, |
|
+ "cib=s" => \$cibFile, |
|
+ "version" => \$version, |
|
+ "help" => \$help, |
|
+ ); |
|
+ return 0; |
|
+} |
|
+ |
|
+init(); |
|
+ |
|
+if ( $help ) { |
|
+ printf "SAPHanaSR-showAttr {[--sid=<sid[:instNr]>]} [--sort=<SortBy>] [--cib=<OfflineCibFile>]\n"; |
|
+ printf ""; |
|
+ exit 0; |
|
+} |
|
+if ( $version ) { |
|
+ printf "%s\n", $Version; |
|
+ exit 0; |
|
+} |
|
+ |
|
+if ( $cibFile ne "" ) { |
|
+ printf "Using cib file %s\n", $cibFile; |
|
+} |
|
|
|
sub max { # thanks to http://www.perlunity.de/perl/forum/thread_018329.shtml |
|
my $a = shift; |
|
@@ -21,113 +80,75 @@ |
|
return $a > $b ? $a : $b; |
|
} |
|
|
|
-sub print_attr_host() |
|
-{ |
|
- my ($HKey, $AKey); |
|
- printf "%-22s", "Attribute \\ Host"; |
|
- foreach $HKey (sort keys %Host) { |
|
- printf "%-16s ", $HKey; |
|
- } |
|
- printf "\n"; |
|
- |
|
- printf "%s\n", "-" x 120 ; |
|
- |
|
- foreach $AKey (sort keys %Name) { |
|
- printf "%-22s", $AKey; |
|
- foreach $HKey (sort keys %Host) { |
|
- printf "%-16.16s ", $Host{$HKey} -> {$AKey}; |
|
- } |
|
- |
|
- printf "\n"; |
|
- } |
|
- return 0; |
|
-} |
|
- |
|
-sub print_host_attr() |
|
-{ |
|
- my ($AKey, $HKey, $len, $line_len, $hclen); |
|
- $hclen=$Name{_hosts}->{_length}; |
|
- $line_len=$hclen+1; |
|
- printf "%-$hclen.${hclen}s ", "$table_title"; |
|
- foreach $AKey (sort keys %Name) { |
|
- if ($AKey ne "_hosts") { |
|
- $len = $Name{$AKey}->{_length}; |
|
- $line_len=$line_len+$len+1; |
|
- printf "%-$len.${len}s ", $Name{$AKey}->{_title}; |
|
+sub read_cib($) { |
|
+ my $sid = shift(); |
|
+ if ( $cibFile eq "" ) { |
|
+ printf "Open live cib\n"; |
|
+ open CIB, "cibadmin -Ql |" or die "CIB could not be read from cluster"; |
|
+ } else { |
|
+ open CIB, "<$cibFile" or die "CIB file $cibFile not found or not able to read it"; |
|
+ } |
|
+ while (<CIB>) { |
|
+ chomp; |
|
+ my ($host, $name, $site, $value); |
|
+ if ( $_ =~ /cib-last-written="([^"]*)"/ ) { |
|
+ printf "CIB-time: %s\n", $1; |
|
} |
|
- } |
|
- printf "\n"; |
|
- printf "%s\n", "-" x $line_len ; |
|
- foreach $HKey (sort keys %Host) { |
|
- printf "%-$hclen.${hclen}s ", $HKey; |
|
- foreach $AKey (sort keys %Name) { |
|
- if ($AKey ne "_hosts") { |
|
- $len = $Name{$AKey}->{_length}; |
|
- printf "%-$len.${len}s ", $Host{$HKey} -> {$AKey}; |
|
- } |
|
- } |
|
- printf "\n"; |
|
- } |
|
- return 0; |
|
-} |
|
- |
|
-open ListInstances, "/usr/sap/hostctrl/exe/saphostctrl -function ListInstances|"; |
|
-while (<ListInstances>) { |
|
- # try to catch: Inst Info : LNX - 42 - lv9041 - 740, patch 36, changelist 1444691 |
|
- chomp; |
|
- if ( $_ =~ /:\s+([A-Z][A-Z0-9][A-Z0-9])\s+-/ ) { |
|
- $sid=tolower("$1"); |
|
- } |
|
-} |
|
-close ListInstances; |
|
- |
|
- |
|
-open CIB, "cibadmin -Ql |"; |
|
-while (<CIB>) { |
|
- chomp; |
|
- my ($host, $name, $value); |
|
- my $found=0; |
|
- if ( $_ =~ /nvpair.*name="(\w+_${sid}_\w+)"/ ) { |
|
- $name=$1; |
|
- # find attribute in forever and reboot store :) |
|
- if ( $_ =~ /id="(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { |
|
- $host=$2; |
|
- } |
|
- if ( $_ =~ /value="([^"]+)"/ ) { |
|
- $value=$1; |
|
- $found=1; |
|
- } |
|
- } |
|
- if ( $found == 1 ) { |
|
- # |
|
- # handle the hosts name and table-title |
|
- # |
|
- $Host{$host}->{$name}=${value}; |
|
- if ( defined ($Name{_hosts}->{_length})) { |
|
- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length($host )); |
|
- } else { |
|
- $Name{_hosts}->{_length} = length($host ); |
|
+ if ( $_ =~ /node_state id=".+" uname="([a-zA-Z0-9\-\_]+)" .*crmd="([a-zA-Z0-9\-\_]+)"/ ) { |
|
+ insertAttribute($sid, \%Host, \%HName, $1, "node_status", $2); |
|
} |
|
- $Name{_hosts}->{_length} = max($Name{_hosts}->{_length}, length( $table_title)); |
|
- # |
|
- # now handle the attributes name and value |
|
- # |
|
- $Name{$name}->{$host}=${value}; |
|
- if ( defined ($Name{$name}->{_length})) { |
|
- $Name{$name}->{_length} = max($Name{$name}->{_length}, length($value )); |
|
- } else { |
|
- $Name{$name}->{_length} = length($value ); |
|
+ if ( $_ =~ /nvpair.*name="([a-zA-Z0-9\_\-]+_${sid}_([a-zA-Z0-9\-\_]+))"/ ) { |
|
+ $name=$1; |
|
+ if ( $_ =~ /id=.(status|nodes)-([a-zA-Z0-9\_\-]+)-/ ) { |
|
+ # found attribute in nodes forever and reboot store |
|
+ $host=$2; |
|
+ if ( $_ =~ /value="([^"]+)"/ ) { |
|
+ $value=$1; |
|
+ insertAttribute($sid, \%Host, \%HName, $host, $name, $value); |
|
+ } |
|
+ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_site_[a-zA-Z0-9\-]+_([a-zA-Z0-9\_\-]+)/) { |
|
+ # found a site attribute |
|
+ $site=$1; |
|
+ if ( $name =~ /[a-zA-Z0-9\_\-]+_site_([a-zA-Z0-9\-]+)/ ) { |
|
+ $name = $1; |
|
+ } |
|
+ if ( $_ =~ /value="([^"]+)"/ ) { |
|
+ $value=$1; |
|
+ insertAttribute($sid, \%Site, \%SName, $site, $name, $value); |
|
+ } |
|
+ } elsif ( $_ =~ /id=.SAPHanaSR-[a-zA-Z0-9\_\-]+_glob_[a-zA-Z0-9\_\-]+/) { |
|
+ # found a global attribute |
|
+ $host="GLOBAL"; |
|
+ if ( $name =~ /([a-zA-Z0-9\_\-]+)_glob_([a-zA-Z0-9\_\-]+)/ ) { |
|
+ $name = $2; |
|
+ } |
|
+ if ( $_ =~ /value="([^"]+)"/ ) { |
|
+ $value=$1; |
|
+ insertAttribute($sid, \%Global, \%GName, "global", $name, $value); |
|
+ } |
|
+ } |
|
} |
|
- if ( $name =~ /hana_${sid}_(.*)/ ) { |
|
- $Name{$name}->{_title} = $1; |
|
- } else { |
|
- $Name{$name}->{_title} = $name; |
|
- } |
|
- $Name{$name}->{_length} = max($Name{$name}->{_length}, length( $Name{$name}->{_title})); |
|
- # printf "%-8s %-20s %-30s\n", $1, $2, $3; |
|
- } |
|
+ } |
|
+ close CIB; |
|
} |
|
-close CIB; |
|
|
|
-#print_attr_host; |
|
-print_host_attr; |
|
+if ( 0 == @sids ) { |
|
+ my $sid_ino_list; |
|
+ ( $sid_ino_list ) = get_sid_and_InstNr(); |
|
+ @sids = split(",", $sid_ino_list); |
|
+ |
|
+} |
|
+ |
|
+foreach $sid (@sids) { |
|
+ ( $sid, $ino ) = split(":", $sid); |
|
+ $sid=tolower("$sid"); |
|
+ %Host=(); |
|
+ %HName=(); |
|
+ read_cib($sid); |
|
+ get_hana_attributes($sid); |
|
+ if ( keys(%Host) == 0 ) { |
|
+ printf "No attributes found for SID=%s\n", $sid; |
|
+ } else { |
|
+ print_host_attr(\%Host, \%HName, "Hosts", $sortBy); |
|
+ } |
|
+}
|
|
|