You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1134 lines
35 KiB
1134 lines
35 KiB
5 years ago
|
#!/bin/bash
|
||
|
#
|
||
|
#
|
||
|
# OCF Resource Agent compliant drbd resource script.
|
||
|
#
|
||
|
# Copyright (c) 2009 LINBIT HA-Solutions GmbH,
|
||
|
# Copyright (c) 2009 Florian Haas, Lars Ellenberg
|
||
|
# Based on the Heartbeat drbd OCF Resource Agent by Lars Marowsky-Bree
|
||
|
# (though it turned out to be an almost complete rewrite)
|
||
|
#
|
||
|
# All Rights Reserved.
|
||
|
#
|
||
|
# This program is free software; you can redistribute it and/or modify
|
||
|
# it under the terms of version 2 of the GNU General Public License as
|
||
|
# published by the Free Software Foundation.
|
||
|
#
|
||
|
# This program is distributed in the hope that it would be useful, but
|
||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||
|
#
|
||
|
# Further, this software is distributed without any warranty that it is
|
||
|
# free of the rightful claim of any third person regarding infringement
|
||
|
# or the like. Any license provided herein, whether implied or
|
||
|
# otherwise, applies only to this software file. Patent licenses, if
|
||
|
# any, provided herein do not apply to combinations of this program with
|
||
|
# other software, or any other product whatsoever.
|
||
|
#
|
||
|
# You should have received a copy of the GNU General Public License
|
||
|
# along with this program; if not, write the Free Software Foundation,
|
||
|
# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
||
|
#
|
||
|
#
|
||
|
|
||
|
# OCF instance parameters
|
||
|
# OCF_RESKEY_drbd_resource
|
||
|
# OCF_RESKEY_drbdconf
|
||
|
# OCF_RESKEY_stop_outdates_secondary
|
||
|
# OCF_RESKEY_adjust_master_score
|
||
|
#
|
||
|
# meta stuff this agent looks at:
|
||
|
# OCF_RESKEY_CRM_meta_clone_max
|
||
|
# OCF_RESKEY_CRM_meta_clone_node_max
|
||
|
# OCF_RESKEY_CRM_meta_master_max
|
||
|
# OCF_RESKEY_CRM_meta_master_node_max
|
||
|
#
|
||
|
# OCF_RESKEY_CRM_meta_interval
|
||
|
#
|
||
|
# OCF_RESKEY_CRM_meta_notify
|
||
|
# OCF_RESKEY_CRM_meta_notify_active_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_demote_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_master_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_operation
|
||
|
# OCF_RESKEY_CRM_meta_notify_promote_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_slave_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_start_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_stop_uname
|
||
|
# OCF_RESKEY_CRM_meta_notify_type
|
||
|
#
|
||
|
|
||
|
#######################################################################
|
||
|
# Initialization:
|
||
|
|
||
|
# Resource-agents have moved their ocf-shellfuncs file around.
|
||
|
# There are supposed to be symlinks or wrapper files in the old location,
|
||
|
# pointing to the new one, but people seem to get it wrong all the time.
|
||
|
# Try several locations.
|
||
|
|
||
|
if test -n "${OCF_FUNCTIONS_DIR}" ; then
|
||
|
if test -e "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs" ; then
|
||
|
. "${OCF_FUNCTIONS_DIR}/ocf-shellfuncs"
|
||
|
elif test -e "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs" ; then
|
||
|
. "${OCF_FUNCTIONS_DIR}/.ocf-shellfuncs"
|
||
|
fi
|
||
|
else
|
||
|
if test -e "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs" ; then
|
||
|
. "${OCF_ROOT}/lib/heartbeat/ocf-shellfuncs"
|
||
|
elif test -e "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"; then
|
||
|
. "${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs"
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
# Defaults
|
||
|
OCF_RESKEY_drbdconf_default="/etc/drbd.conf"
|
||
|
|
||
|
# The passed in OCF_CRM_meta_notify_* environment
|
||
|
# is not reliably with pacemaker up to at least
|
||
|
# 1.0.10 and 1.1.4. It should be fixed later.
|
||
|
# Until that is fixed, the "self-outdating feature" would base its actions on
|
||
|
# wrong information, and possibly not outdate when it should, or, even worse,
|
||
|
# outdate the last remaining valid copy.
|
||
|
# Disable.
|
||
|
OCF_RESKEY_stop_outdates_secondary_default="false"
|
||
|
|
||
|
OCF_RESKEY_adjust_master_score_default="5 10 1000 10000"
|
||
|
# ignored | Consistent | Unknown -' | | |
|
||
|
# ignored | NOT UpToDate | UpToDate ---' | |
|
||
|
# Secondary | UpToDate | unknown --------' |
|
||
|
# ignored | UpToDate | known --------------+
|
||
|
# Primary | UpToDate | ignored --------------'
|
||
|
|
||
|
: ${OCF_RESKEY_drbdconf:=${OCF_RESKEY_drbdconf_default}}
|
||
|
: ${OCF_RESKEY_stop_outdates_secondary:=${OCF_RESKEY_stop_outdates_secondary_default}}
|
||
|
: ${OCF_RESKEY_adjust_master_score:=${OCF_RESKEY_adjust_master_score_default}}
|
||
|
|
||
|
# Defaults according to "Configuration 1.0 Explained",
|
||
|
# "Multi-state resource configuration options"
|
||
|
: ${OCF_RESKEY_CRM_meta_clone_node_max=1}
|
||
|
: ${OCF_RESKEY_CRM_meta_master_max=1}
|
||
|
: ${OCF_RESKEY_CRM_meta_master_node_max=1}
|
||
|
#######################################################################
|
||
|
# for debugging this RA
|
||
|
DEBUG_LOG_DIR=/tmp/drbd.ocf.ra.debug
|
||
|
DEBUG_LOG=$DEBUG_LOG_DIR/log
|
||
|
USE_DEBUG_LOG=false
|
||
|
ls_stat_is_dir_0700_root() {
|
||
|
set -- $(command ls -ldn "$1" 2>/dev/null);
|
||
|
case "$1/$3" in
|
||
|
drwx?-??-?/0|\
|
||
|
drwx?-??-?./0) true ;;
|
||
|
*) false ;;
|
||
|
esac
|
||
|
}
|
||
|
# try to avoid symlink vuln.
|
||
|
if ls_stat_is_dir_0700_root $DEBUG_LOG_DIR &&
|
||
|
[[ -w "$DEBUG_LOG" && ! -L "$DEBUG_LOG" ]]
|
||
|
then
|
||
|
USE_DEBUG_LOG=true
|
||
|
exec 9>>"$DEBUG_LOG"
|
||
|
date >&9
|
||
|
echo "$*" >&9
|
||
|
env | grep OCF_ | sort >&9
|
||
|
else
|
||
|
exec 9>/dev/null
|
||
|
fi
|
||
|
# end of debugging aid
|
||
|
#######################################################################
|
||
|
|
||
|
meta_data() {
|
||
|
cat <<END
|
||
|
<?xml version="1.0"?>
|
||
|
<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||
|
<resource-agent name="drbd">
|
||
|
<version>1.3</version>
|
||
|
|
||
|
<longdesc lang="en">
|
||
|
This resource agent manages a DRBD resource as a master/slave resource.
|
||
|
DRBD is a shared-nothing replicated storage device.
|
||
|
Note that you should configure resource level fencing in DRBD,
|
||
|
this cannot be done from this resource agent.
|
||
|
See the DRBD User's Guide for more information.
|
||
|
http://www.drbd.org/docs/applications/
|
||
|
</longdesc>
|
||
|
|
||
|
<shortdesc lang="en">Manages a DRBD device as a Master/Slave resource</shortdesc>
|
||
|
|
||
|
<parameters>
|
||
|
<parameter name="drbd_resource" unique="1" required="1">
|
||
|
<longdesc lang="en">
|
||
|
The name of the drbd resource from the drbd.conf file.
|
||
|
</longdesc>
|
||
|
<shortdesc lang="en">drbd resource name</shortdesc>
|
||
|
<content type="string"/>
|
||
|
</parameter>
|
||
|
|
||
|
<parameter name="drbdconf">
|
||
|
<longdesc lang="en">
|
||
|
Full path to the drbd.conf file.
|
||
|
</longdesc>
|
||
|
<shortdesc lang="en">Path to drbd.conf</shortdesc>
|
||
|
<content type="string" default="${OCF_RESKEY_drbdconf_default}"/>
|
||
|
</parameter>
|
||
|
|
||
|
<parameter name="adjust_master_score">
|
||
|
<longdesc lang="en">
|
||
|
Space separated list of four master score adjustments for different scenarios:
|
||
|
- only access to 'consistent' data
|
||
|
- only remote access to 'uptodate' data
|
||
|
- currently Secondary, local access to 'uptodate' data, but remote is unknown
|
||
|
- local access to 'uptodate' data, and currently Primary or remote is known
|
||
|
|
||
|
Numeric values are expected to be non-decreasing.
|
||
|
|
||
|
Default are the previously hardcoded values.
|
||
|
|
||
|
Set the first value to 0 (and configure proper fencing methods)
|
||
|
to prevent pacemaker from trying to promote while it is unclear
|
||
|
whether the data is really the most recent copy.
|
||
|
(DRBD knows it is "consistent", but is unsure about "uptodate"ness).
|
||
|
|
||
|
Advanced use: Adjust the other values to better fit into complex
|
||
|
dependency score calculations.
|
||
|
</longdesc>
|
||
|
<shortdesc lang="en">master score adjustments</shortdesc>
|
||
|
<content type="string" default="${OCF_RESKEY_adjust_master_score_default}"/>
|
||
|
</parameter>
|
||
|
|
||
|
<parameter name="stop_outdates_secondary">
|
||
|
<longdesc lang="en">
|
||
|
Recommended setting: leave at default (disabled).
|
||
|
|
||
|
Note that this feature depends on the passed in information in
|
||
|
OCF_RESKEY_CRM_meta_notify_master_uname to be correct, which unfortunately is
|
||
|
not reliable for pacemaker versions up to at least 1.0.10 / 1.1.4.
|
||
|
|
||
|
If a Secondary is stopped (unconfigured), it may be marked as outdated in the
|
||
|
drbd meta data, if we know there is still a Primary running in the cluster.
|
||
|
Note that this does not affect fencing policies set in drbd config,
|
||
|
but is an additional safety feature of this resource agent only.
|
||
|
You can enable this behaviour by setting the parameter to true.
|
||
|
|
||
|
If this feature seems to not do what you expect, make sure you have defined
|
||
|
fencing policies in the drbd configuration as well.
|
||
|
</longdesc>
|
||
|
<shortdesc lang="en">outdate a secondary on stop</shortdesc>
|
||
|
<content type="boolean" default="${OCF_RESKEY_stop_outdates_secondary_default}"/>
|
||
|
</parameter>
|
||
|
</parameters>
|
||
|
|
||
|
<actions>
|
||
|
<action name="start" timeout="240" />
|
||
|
<action name="promote" timeout="90" />
|
||
|
<action name="demote" timeout="90" />
|
||
|
<action name="notify" timeout="90" />
|
||
|
<action name="stop" timeout="100" />
|
||
|
<action name="monitor" depth="0" timeout="20" interval="20" role="Slave" />
|
||
|
<action name="monitor" depth="0" timeout="20" interval="10" role="Master" />
|
||
|
<action name="meta-data" timeout="5" />
|
||
|
<action name="validate-all" timeout="30" />
|
||
|
</actions>
|
||
|
</resource-agent>
|
||
|
END
|
||
|
}
|
||
|
|
||
|
do_cmd() {
|
||
|
# Run a command, return its exit code, capture any output, and log
|
||
|
# everything if appropriate.
|
||
|
local cmd="$*" cmd_out ret
|
||
|
ocf_log debug "$DRBD_RESOURCE: Calling $cmd"
|
||
|
cmd_out=$( "$@" )
|
||
|
ret=$?
|
||
|
|
||
|
if [ $ret != 0 ]; then
|
||
|
ocf_log err "$DRBD_RESOURCE: Called $cmd"
|
||
|
ocf_log err "$DRBD_RESOURCE: Exit code $ret"
|
||
|
ocf_log err "$DRBD_RESOURCE: Command output: $cmd_out"
|
||
|
else
|
||
|
ocf_log debug "$DRBD_RESOURCE: Exit code $ret"
|
||
|
ocf_log debug "$DRBD_RESOURCE: Command output: $cmd_out"
|
||
|
fi
|
||
|
|
||
|
echo "$cmd_out"
|
||
|
|
||
|
return $ret
|
||
|
}
|
||
|
|
||
|
do_drbdadm() {
|
||
|
local ret
|
||
|
# Run drbdadm with appropriate command line options, and capture
|
||
|
# its output.
|
||
|
# $DRBDADM is defined during drbd_validate as "drbdadm" plus
|
||
|
# appropriate command line options
|
||
|
do_cmd $DRBDADM "$@"
|
||
|
ret=$?
|
||
|
|
||
|
# having the version mismatch warning once per RA invokation
|
||
|
# should be enough.
|
||
|
export DRBD_DONT_WARN_ON_VERSION_MISMATCH=
|
||
|
|
||
|
return $ret
|
||
|
}
|
||
|
|
||
|
set_master_score() {
|
||
|
# Use quiet mode (-Q) to quench logging. Actual score updates
|
||
|
# will get logged by attrd anyway
|
||
|
if [[ $1 -le 0 ]]; then
|
||
|
remove_master_score
|
||
|
else
|
||
|
do_cmd ${HA_SBIN_DIR}/crm_master -Q -l reboot -v $1
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
remove_master_score() {
|
||
|
do_cmd ${HA_SBIN_DIR}/crm_master -l reboot -D
|
||
|
}
|
||
|
|
||
|
_sh_status_process() {
|
||
|
# _volume not present should not happen,
|
||
|
# but may help make this agent work even if it talks to drbd 8.3.
|
||
|
: ${_volume:=0}
|
||
|
# not-yet-created volumes are reported as -1
|
||
|
(( _volume >= 0 )) || _volume=$[1 << 16]
|
||
|
DRBD_ROLE_LOCAL[$_volume]=${_role:-Unconfigured}
|
||
|
DRBD_ROLE_REMOTE[$_volume]=${_peer:-Unknown}
|
||
|
DRBD_CSTATE[$_volume]=$_cstate
|
||
|
DRBD_DSTATE_LOCAL[$_volume]=${_disk:-Unconfigured}
|
||
|
DRBD_DSTATE_REMOTE[$_volume]=${_pdsk:-DUnknown}
|
||
|
}
|
||
|
drbd_set_status_variables() {
|
||
|
# drbdsetup sh-status prints these values to stdout,
|
||
|
# and then prints _sh_status_process.
|
||
|
#
|
||
|
# if we eval that, we do not need several drbdadm/drbdsetup commands
|
||
|
# to figure out the various aspects of the state.
|
||
|
local _minor _res_name _known _cstate _role _peer _disk _pdsk
|
||
|
local _volume
|
||
|
local _flags_susp _flags_aftr_isp _flags_peer_isp _flags_user_isp
|
||
|
local _resynced_percent
|
||
|
|
||
|
DRBD_ROLE_LOCAL=()
|
||
|
DRBD_ROLE_REMOTE=()
|
||
|
DRBD_CSTATE=()
|
||
|
DRBD_DSTATE_LOCAL=()
|
||
|
DRBD_DSTATE_REMOTE=()
|
||
|
|
||
|
if $DRBD_HAS_MULTI_VOLUME ; then
|
||
|
eval "$($DRBDSETUP sh-status "$DRBD_RESOURCE")"
|
||
|
else
|
||
|
# without "MULTI_VOLUME", the DRBD_DEVICES array
|
||
|
# should contain exactly one value
|
||
|
eval "$($DRBDSETUP "$DRBD_DEVICES" sh-status)"
|
||
|
fi
|
||
|
|
||
|
# if there was no output at all, or a weird output
|
||
|
# make sure the status arrays won't be empty.
|
||
|
[[ ${#DRBD_ROLE_LOCAL[@]} != 0 ]] || DRBD_ROLE_LOCAL=(Unconfigured)
|
||
|
[[ ${#DRBD_ROLE_REMOTE[@]} != 0 ]] || DRBD_ROLE_REMOTE=(Unknown)
|
||
|
[[ ${#DRBD_CSTATE[@]} != 0 ]] || DRBD_CSTATE=(Unconfigured)
|
||
|
[[ ${#DRBD_DSTATE_LOCAL[@]} != 0 ]] || DRBD_DSTATE_LOCAL=(Unconfigured)
|
||
|
[[ ${#DRBD_DSTATE_REMOTE[@]} != 0 ]] || DRBD_DSTATE_REMOTE=(DUnknown)
|
||
|
|
||
|
|
||
|
: == DEBUG == DRBD_ROLE_LOCAL == ${DRBD_ROLE_LOCAL[@]} ==
|
||
|
: == DEBUG == DRBD_ROLE_REMOTE == ${DRBD_ROLE_REMOTE[@]} ==
|
||
|
: == DEBUG == DRBD_CSTATE == ${DRBD_CSTATE[@]} ==
|
||
|
: == DEBUG == DRBD_DSTATE_LOCAL == ${DRBD_DSTATE_LOCAL[@]} ==
|
||
|
: == DEBUG == DRBD_DSTATE_REMOTE == ${DRBD_DSTATE_REMOTE[@]} ==
|
||
|
}
|
||
|
|
||
|
# This is not the only fencing mechanism.
|
||
|
# But in addition to the drbd "fence-peer" handler, which should be configured,
|
||
|
# and is expected to place some appropriate constraints, this is used to
|
||
|
# actually store the Outdated information in DRBD on-disk meta data.
|
||
|
#
|
||
|
# called after stop, and from post notification events.
|
||
|
maybe_outdate_self()
|
||
|
{
|
||
|
# if you claim your right to go online with stale data,
|
||
|
# there you are.
|
||
|
ocf_is_true $OCF_RESKEY_stop_outdates_secondary || return 1
|
||
|
|
||
|
local host stop_uname
|
||
|
# We ignore $OCF_RESKEY_CRM_meta_notify_promote_uname here
|
||
|
# because: if demote and promote for a _stacked_ resource
|
||
|
# (or a "floating" one, where DRBD sits on top of some SAN)
|
||
|
# happen in the same transition, demote will see the promote
|
||
|
# hostname here, and voluntarily outdate itself. Which would
|
||
|
# result in promote failure, as it is using the same meta
|
||
|
# data, which would then be outdated.
|
||
|
# If that is not sufficient for you, you probably need to
|
||
|
# configure fencing policies in the drbd configuration.
|
||
|
host=$(printf "%s\n" $OCF_RESKEY_CRM_meta_notify_master_uname |
|
||
|
grep -vix -m1 -e "$HOSTNAME" )
|
||
|
if [[ -z $host ]] ; then
|
||
|
# no current master host found, do not outdate myself
|
||
|
return 1
|
||
|
fi
|
||
|
for stop_uname in $OCF_RESKEY_CRM_meta_notify_stop_uname; do
|
||
|
[[ $host == "$stop_uname" ]] || continue
|
||
|
# post notification for stop on that host.
|
||
|
# hrmpf. crm passed in stale master_uname :(
|
||
|
# ignore
|
||
|
return 1
|
||
|
done
|
||
|
|
||
|
# e.g. post/promote of some other peer.
|
||
|
# Should not happen, fencing constraints should take care of that.
|
||
|
# But in case it does, scream out loud.
|
||
|
case "${DRBD_ROLE_LOCAL[*]}" in
|
||
|
*Primary*)
|
||
|
# I am Primary.
|
||
|
# The other one is Primary (according to OCF_RESKEY_CRM_meta_notify_master_uname).
|
||
|
# But we cannot talk to each other :( (otherwise this function was not called)
|
||
|
# One of us has to die.
|
||
|
# Which one, however, is not ours to decide.
|
||
|
|
||
|
ocf_log crit "resource internal SPLIT BRAIN: both $HOSTNAME and $host are Primary for $DRBD_RESOURCE, but the replication link is down!"
|
||
|
return 1
|
||
|
esac
|
||
|
|
||
|
# OK, I am not Primary, but there is an other node Primary
|
||
|
# Outdate myself
|
||
|
ocf_log notice "outdating $DRBD_RESOURCE: according to OCF_RESKEY_CRM_meta_notify_master_uname, '$host' is still master"
|
||
|
do_drbdadm outdate $DRBD_RESOURCE
|
||
|
|
||
|
# on some pacemaker versions, -INFINITY may cause resource instance stop/start.
|
||
|
# But in this case that is ok, it may even clear the replication link
|
||
|
# problem.
|
||
|
set_master_score -INFINITY
|
||
|
|
||
|
return 0
|
||
|
}
|
||
|
|
||
|
drbd_update_master_score() {
|
||
|
set -- $OCF_RESKEY_adjust_master_score
|
||
|
local only_consistent=$1 only_remote=$2 local_ok=$3 as_good_as_it_gets=$4
|
||
|
# NOTE
|
||
|
# there may be constraint scores from rules on role=Master,
|
||
|
# that in some ways can add to the node attribute based master score we
|
||
|
# specify below. If you think you want to add personal preferences,
|
||
|
# in case the scores given by this RA do not suffice, this is the
|
||
|
# value space you can work with:
|
||
|
# -INFINITY: Do not promote. Really. Won't work anyways.
|
||
|
# Too bad, at least with current (Oktober 2009) Pacemaker,
|
||
|
# negative master scores cause instance stop; restart cycle :(
|
||
|
# missing, zero: Do not promote.
|
||
|
# I think my data is not good enough.
|
||
|
# Though, of course, you may try, and it might even work.
|
||
|
# 5: please, do not promote, unless this is your only option.
|
||
|
# 10: promotion is probably a bad idea, our local data is no good,
|
||
|
# you'd probably run into severe performance problems, and risk
|
||
|
# application crashes or blocking IO in case you lose the
|
||
|
# replication connection.
|
||
|
# 1000: Ok to be promoted, we have good data locally (though we don't
|
||
|
# know about the peer, so possibly it has even better data?).
|
||
|
# You sould use the crm-fence-peer.sh handler or similar
|
||
|
# mechanism to avoid data divergence.
|
||
|
# 10000: Please promote me/keep me Primary.
|
||
|
# I'm confident that my data is as good as it gets.
|
||
|
#
|
||
|
# For multi volume, we need to compare who is "better" a bit more sophisticated.
|
||
|
# The ${XXX[*]//UpToDate}, without being in double quotes, results in a single space,
|
||
|
# if all are UpToDate.
|
||
|
: == DEBUG == ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ ==
|
||
|
case ${DRBD_ROLE_LOCAL[*]}/${DRBD_DSTATE_LOCAL[*]//UpToDate/ }/${DRBD_DSTATE_REMOTE[*]//UpToDate/ }/ in
|
||
|
*Primary*/\ /*/)
|
||
|
# I am Primary, all local disks are UpToDate
|
||
|
set_master_score $as_good_as_it_gets
|
||
|
;;
|
||
|
*/\ /*DUnknown*/)
|
||
|
# all local disks are UpToDate,
|
||
|
# but I'm not Primary,
|
||
|
# and I'm not sure about the peer's disk state(s).
|
||
|
# We may need to outdate ourselves?
|
||
|
# But if we outdate in a MONITOR, and are disconnected
|
||
|
# secondary because of a hard primary crash, before CRM noticed
|
||
|
# that there is no more master, we'd make us utterly useless!
|
||
|
# Trust that the primary will also notice the disconnect,
|
||
|
# and will place an appropriate fencing constraint via
|
||
|
# its fence-peer handler callback.
|
||
|
set_master_score $local_ok
|
||
|
;;
|
||
|
*/\ /*/)
|
||
|
# We know something about our peer, which means that either the
|
||
|
# replication link is established, or it was not even
|
||
|
# consistent last time we talked to each other.
|
||
|
# Also all our local disks are UpToDate, which means even if we are
|
||
|
# currently synchronizing, we do so as SyncSource.
|
||
|
set_master_score $as_good_as_it_gets
|
||
|
;;
|
||
|
|
||
|
*/*/\ /)
|
||
|
# At least one of our local disks is not up to date.
|
||
|
# But our peer is ALL OK.
|
||
|
# We can expect to have access to useful
|
||
|
# data, but must expect degraded performance.
|
||
|
set_master_score $only_remote
|
||
|
;;
|
||
|
|
||
|
*/*Attaching*/*/|\
|
||
|
*/*Negotiating*/*/)
|
||
|
# some transitional state.
|
||
|
# just don't do anything
|
||
|
: ;;
|
||
|
|
||
|
Unconfigured*|\
|
||
|
*/*Diskless*/*/|\
|
||
|
*/*Failed*/*/|\
|
||
|
*/*Inconsistent*/*/|\
|
||
|
*/*Outdated*/*/)
|
||
|
# ALWAYS put the cluster in MAINTENANCE MODE
|
||
|
# if you add a volume to a live replication group,
|
||
|
# because the new volume will typically come up as Inconsistent
|
||
|
# the first time, which would cause a monitor to revoke the
|
||
|
# master score!
|
||
|
#
|
||
|
# At least some of our local disks are not really useable.
|
||
|
# Our peer is not all good either (or some previous case block
|
||
|
# would have matched). We have no access to useful data.
|
||
|
# DRBD would refuse to be promoted, anyways.
|
||
|
#
|
||
|
# set_master_score -INFINITY
|
||
|
# Too bad, at least with current (Oktober 2009) Pacemaker,
|
||
|
# negative master scores cause instance stop; restart cycle :(
|
||
|
# Hope that this will suffice.
|
||
|
remove_master_score
|
||
|
;;
|
||
|
*)
|
||
|
# All local disks seem to be Consistent.
|
||
|
# They _may_ be up to date, or not.
|
||
|
# We hope that fencing mechanisms have put constraints in
|
||
|
# place, so we won't be promoted with stale data.
|
||
|
# But in case this was a cluster crash,
|
||
|
# at least allow _someone_ to be promoted.
|
||
|
set_master_score $only_consistent
|
||
|
;;
|
||
|
esac
|
||
|
|
||
|
: "$OCF_SUCCESS = OCF_SUCCESS"
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
is_drbd_enabled() {
|
||
|
test -f /proc/drbd
|
||
|
}
|
||
|
|
||
|
#######################################################################
|
||
|
|
||
|
drbd_usage() {
|
||
|
echo "\
|
||
|
usage: $0 {start|stop|monitor|validate-all|promote|demote|notify|meta-data}
|
||
|
|
||
|
Expects to have a fully populated OCF RA-compliant environment set."
|
||
|
}
|
||
|
|
||
|
drbd_status() {
|
||
|
local rc
|
||
|
local dev
|
||
|
rc=$OCF_NOT_RUNNING
|
||
|
|
||
|
# NOT local! but "return values"
|
||
|
# since 8.4 supports multi volumes per resource,
|
||
|
# these are shell arrays.
|
||
|
#
|
||
|
# Initialize to "Unconfigured", in case this returns early.
|
||
|
# They will be re-initialized and properly populated in drbd_set_status_variables.
|
||
|
DRBD_ROLE_LOCAL=(Unconfigured)
|
||
|
DRBD_ROLE_REMOTE=(Unknown)
|
||
|
DRBD_CSTATE=(Unconfigured)
|
||
|
DRBD_DSTATE_LOCAL=(Unconfigured)
|
||
|
DRBD_DSTATE_REMOTE=(DUnknown)
|
||
|
|
||
|
is_drbd_enabled || return $rc
|
||
|
|
||
|
# Not running, if no block devices exist.
|
||
|
#
|
||
|
# FIXME what if some do, and some do not exist?
|
||
|
# Adding/removing volumes to/from existing resources should only be
|
||
|
# done with maintenance-mode enabled.
|
||
|
# If someone does manually kill/remove only some of the volumes,
|
||
|
# we tolerate that here.
|
||
|
for dev in ${DRBD_DEVICES[@]} ""; do
|
||
|
test -b $dev && break
|
||
|
done
|
||
|
[[ $dev ]] || return $rc
|
||
|
|
||
|
# ok, module is loaded, block device nodes exist.
|
||
|
# lets see the status
|
||
|
drbd_set_status_variables
|
||
|
case "${DRBD_ROLE_LOCAL[*]}" in
|
||
|
*Primary*)
|
||
|
rc=$OCF_RUNNING_MASTER
|
||
|
;;
|
||
|
*Secondary*)
|
||
|
rc=$OCF_SUCCESS
|
||
|
;;
|
||
|
*Unconfigured*)
|
||
|
rc=$OCF_NOT_RUNNING
|
||
|
;;
|
||
|
*)
|
||
|
ocf_log err "Unexpected role(s) >>${DRBD_ROLE_LOCAL[*]}<<"
|
||
|
rc=$OCF_ERR_GENERIC
|
||
|
esac
|
||
|
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
# I'm sorry, but there is no $OCF_DEGRADED_MASTER or similar yet.
|
||
|
drbd_monitor() {
|
||
|
local status
|
||
|
drbd_status
|
||
|
status=$?
|
||
|
|
||
|
if [[ $status = $OCF_NOT_RUNNING ]] && ocf_is_probe ; then
|
||
|
# see also linux-ha mailing list archives,
|
||
|
# From: Andrew Beekhof
|
||
|
# Subject: Re: pacemaker+drbd promotion delay
|
||
|
# Date: 2012-04-13 01:47:37 GMT
|
||
|
# e.g.: http://thread.gmane.org/gmane.linux.highavailability.user/37089/focus=37163
|
||
|
# ---
|
||
|
: "do nothing" ;
|
||
|
else
|
||
|
drbd_update_master_score
|
||
|
fi
|
||
|
|
||
|
case $status in
|
||
|
(0) : "OCF_SUCCESS" ;;
|
||
|
(1) : "OCF_ERR_GENERIC" ;;
|
||
|
(2) : "OCF_ERR_ARGS" ;;
|
||
|
(3) : "OCF_ERR_UNIMPLEMENTED" ;;
|
||
|
(4) : "OCF_ERR_PERM" ;;
|
||
|
(5) : "OCF_ERR_INSTALLED" ;;
|
||
|
(6) : "OCF_ERR_CONFIGURED" ;;
|
||
|
(7) : "OCF_NOT_RUNNING" ;;
|
||
|
(8) : "OCF_RUNNING_MASTER" ;;
|
||
|
(9) : "OCF_FAILED_MASTER" ;;
|
||
|
(*) : " WTF? $status " ;;
|
||
|
esac
|
||
|
|
||
|
return $status
|
||
|
}
|
||
|
|
||
|
figure_out_drbd_peer_uname()
|
||
|
{
|
||
|
# depending on whether or not the peer is currently
|
||
|
# configured, slave, master, or about to be started,
|
||
|
# it may be mentioned in various variables (or not at all)
|
||
|
local x
|
||
|
# intentionally not cared for stop_uname
|
||
|
x=$(printf "%s\n" \
|
||
|
$OCF_RESKEY_CRM_meta_notify_start_uname \
|
||
|
$OCF_RESKEY_CRM_meta_notify_promote_uname \
|
||
|
$OCF_RESKEY_CRM_meta_notify_master_uname \
|
||
|
$OCF_RESKEY_CRM_meta_notify_slave_uname \
|
||
|
$OCF_RESKEY_CRM_meta_notify_demote_uname |
|
||
|
grep -vix -m1 -e "$HOSTNAME" )
|
||
|
DRBD_TO_PEER=${x:+ --peer $x}
|
||
|
}
|
||
|
|
||
|
my_udevsettle()
|
||
|
{
|
||
|
for dev in ${DRBD_DEVICES[@]}; do
|
||
|
while ! test -b $dev; do
|
||
|
sleep 1;
|
||
|
done
|
||
|
done
|
||
|
return 0
|
||
|
}
|
||
|
create_device_udev_settle() {
|
||
|
local dev
|
||
|
if $DRBD_HAS_MULTI_VOLUME; then
|
||
|
if do_drbdadm new-resource $DRBD_RESOURCE &&
|
||
|
do_drbdadm new-minor $DRBD_RESOURCE; then
|
||
|
my_udevsettle
|
||
|
else
|
||
|
return 1
|
||
|
fi
|
||
|
elif do_drbdadm syncer $DRBD_RESOURCE ; then
|
||
|
my_udevsettle
|
||
|
else
|
||
|
return 1
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
drbd_start() {
|
||
|
local rc
|
||
|
local status
|
||
|
local first_try=true
|
||
|
|
||
|
rc=$OCF_ERR_GENERIC
|
||
|
|
||
|
if ! is_drbd_enabled; then
|
||
|
do_cmd modprobe -s drbd `$DRBDADM sh-mod-parms` || {
|
||
|
ocf_log err "Cannot load the drbd module.";
|
||
|
: "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED"
|
||
|
return $OCF_ERR_INSTALLED
|
||
|
}
|
||
|
ocf_log debug "$DRBD_RESOURCE start: Module loaded."
|
||
|
fi
|
||
|
|
||
|
# Keep trying to bring up the resource;
|
||
|
# wait for the CRM to time us out if this fails
|
||
|
while :; do
|
||
|
drbd_status
|
||
|
status=$?
|
||
|
case "$status" in
|
||
|
$OCF_SUCCESS)
|
||
|
# Just in case we have to adjust something, this is a
|
||
|
# good place to do it. Actually, we don't expect to be
|
||
|
# called to "start" an already "running" resource, so
|
||
|
# this is probably dead code.
|
||
|
# Also, ignore the exit code of adjust, as we are
|
||
|
# "running" already, anyways, right?
|
||
|
figure_out_drbd_peer_uname
|
||
|
do_drbdadm $DRBD_TO_PEER adjust $DRBD_RESOURCE
|
||
|
rc=$OCF_SUCCESS
|
||
|
break
|
||
|
;;
|
||
|
$OCF_NOT_RUNNING)
|
||
|
# Check for offline resize. If using internal meta data,
|
||
|
# we may need to move it first to its expected location.
|
||
|
$first_try && do_drbdadm check-resize $DRBD_RESOURCE
|
||
|
figure_out_drbd_peer_uname
|
||
|
if ! create_device_udev_settle; then
|
||
|
# We cannot even create the objects
|
||
|
exit $OCF_ERR_GENERIC
|
||
|
fi
|
||
|
if ! do_drbdadm $DRBD_TO_PEER attach $DRBD_RESOURCE ; then
|
||
|
# If we cannot up it, even on the second try,
|
||
|
# it is unlikely to get better. Don't wait for
|
||
|
# this operation to timeout, but short circuit
|
||
|
# exit with generic error.
|
||
|
$first_try || exit $OCF_ERR_GENERIC
|
||
|
sleep 1
|
||
|
fi
|
||
|
;;
|
||
|
$OCF_RUNNING_MASTER)
|
||
|
ocf_log warn "$DRBD_RESOURCE already Primary, demoting."
|
||
|
do_drbdadm secondary $DRBD_RESOURCE
|
||
|
esac
|
||
|
$first_try || sleep 1
|
||
|
first_try=false
|
||
|
done
|
||
|
# in case someone does not configure monitor,
|
||
|
# we must at least call it once after start.
|
||
|
drbd_update_master_score
|
||
|
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
drbd_promote() {
|
||
|
local rc
|
||
|
local status
|
||
|
local first_try=true
|
||
|
|
||
|
rc=$OCF_ERR_GENERIC
|
||
|
|
||
|
# Keep trying to promote the resource;
|
||
|
# wait for the CRM to time us out if this fails
|
||
|
while :; do
|
||
|
drbd_status
|
||
|
status=$?
|
||
|
case "$status" in
|
||
|
$OCF_SUCCESS)
|
||
|
do_drbdadm primary $DRBD_RESOURCE
|
||
|
if [[ $? = 17 ]]; then
|
||
|
# All available disks are inconsistent,
|
||
|
# or I am consistent, but failed to fence the peer.
|
||
|
# Cannot become primary.
|
||
|
# No need to retry indefinitely.
|
||
|
ocf_log crit "Refusing to be promoted to Primary without UpToDate data"
|
||
|
break
|
||
|
fi
|
||
|
;;
|
||
|
$OCF_NOT_RUNNING)
|
||
|
ocf_log error "Trying to promote a resource that was not started"
|
||
|
break
|
||
|
;;
|
||
|
$OCF_RUNNING_MASTER)
|
||
|
rc=$OCF_SUCCESS
|
||
|
break
|
||
|
esac
|
||
|
$first_try || sleep 1
|
||
|
first_try=false
|
||
|
done
|
||
|
|
||
|
# avoid too tight pacemaker driven "recovery" loop,
|
||
|
# if promotion keeps failing for some reason
|
||
|
if [[ $rc != 0 ]] && (( $SECONDS < 15 )) ; then
|
||
|
delay=$(( 15 - SECONDS ))
|
||
|
ocf_log warn "promotion failed; sleep $delay # to prevent tight recovery loop"
|
||
|
sleep $delay
|
||
|
fi
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
drbd_demote() {
|
||
|
local rc
|
||
|
local status
|
||
|
local first_try=true
|
||
|
|
||
|
rc=$OCF_ERR_GENERIC
|
||
|
|
||
|
# Keep trying to demote the resource;
|
||
|
# wait for the CRM to time us out if this fails
|
||
|
while :; do
|
||
|
drbd_status
|
||
|
status=$?
|
||
|
case "$status" in
|
||
|
$OCF_SUCCESS)
|
||
|
rc=$OCF_SUCCESS
|
||
|
break
|
||
|
;;
|
||
|
$OCF_NOT_RUNNING)
|
||
|
ocf_log error "Trying to promote a resource that was not started"
|
||
|
break
|
||
|
;;
|
||
|
$OCF_RUNNING_MASTER)
|
||
|
do_drbdadm secondary $DRBD_RESOURCE
|
||
|
esac
|
||
|
$first_try || sleep 1
|
||
|
first_try=false
|
||
|
done
|
||
|
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
drbd_stop() {
|
||
|
local rc=$OCF_ERR_GENERIC
|
||
|
local first_try=true
|
||
|
|
||
|
# Keep trying to bring down the resource;
|
||
|
# wait for the CRM to time us out if this fails
|
||
|
while :; do
|
||
|
drbd_status
|
||
|
status=$?
|
||
|
case "$status" in
|
||
|
$OCF_SUCCESS)
|
||
|
do_drbdadm down $DRBD_RESOURCE
|
||
|
;;
|
||
|
$OCF_NOT_RUNNING)
|
||
|
# Just in case, down it anyways, in case it has been
|
||
|
# deconfigured but not yet removed.
|
||
|
# Relevant for >= 8.4.
|
||
|
do_drbdadm down $DRBD_RESOURCE
|
||
|
# But ignore any return codes,
|
||
|
# we are not running, so stop is successfull.
|
||
|
rc=$OCF_SUCCESS
|
||
|
break
|
||
|
;;
|
||
|
$OCF_RUNNING_MASTER)
|
||
|
ocf_log warn "$DRBD_RESOURCE still Primary, demoting."
|
||
|
do_drbdadm secondary $DRBD_RESOURCE
|
||
|
esac
|
||
|
$first_try || sleep 1
|
||
|
first_try=false
|
||
|
done
|
||
|
|
||
|
# if there is some Master (Primary) still around,
|
||
|
# outdate myself in drbd on-disk meta data.
|
||
|
maybe_outdate_self
|
||
|
|
||
|
# do not let old master scores laying around.
|
||
|
# they may confuse crm if this node was set to standby.
|
||
|
remove_master_score
|
||
|
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
|
||
|
drbd_notify() {
|
||
|
local n_type=$OCF_RESKEY_CRM_meta_notify_type
|
||
|
local n_op=$OCF_RESKEY_CRM_meta_notify_operation
|
||
|
|
||
|
# active_* and *_resource not really interessting
|
||
|
# : "== DEBUG == active = $OCF_RESKEY_CRM_meta_notify_active_uname"
|
||
|
: "== DEBUG == slave = $OCF_RESKEY_CRM_meta_notify_slave_uname"
|
||
|
: "== DEBUG == master = $OCF_RESKEY_CRM_meta_notify_master_uname"
|
||
|
: "== DEBUG == start = $OCF_RESKEY_CRM_meta_notify_start_uname"
|
||
|
: "== DEBUG == promote = $OCF_RESKEY_CRM_meta_notify_promote_uname"
|
||
|
: "== DEBUG == stop = $OCF_RESKEY_CRM_meta_notify_stop_uname"
|
||
|
: "== DEBUG == demote = $OCF_RESKEY_CRM_meta_notify_demote_uname"
|
||
|
|
||
|
case $n_type/$n_op in
|
||
|
*/start)
|
||
|
# We do not get a /pre/ start notification for ourself.
|
||
|
# but we get a /pre/ start notification for the other side, unless both
|
||
|
# are started from the same transition graph. If there are only two
|
||
|
# peers (the "classic" two-node DRBD), this adjust is usually a no-op.
|
||
|
#
|
||
|
# In case of more than one _possible_ peer, we may still be StandAlone,
|
||
|
# or configured for a meanwhile failed peer, and should now adjust our
|
||
|
# network settings during pre-notification of start of the other node.
|
||
|
#
|
||
|
# We usually get /post/ notification for ourself and the peer.
|
||
|
# In both cases adjust should be a no-op.
|
||
|
drbd_set_status_variables
|
||
|
figure_out_drbd_peer_uname
|
||
|
do_drbdadm $DRBD_TO_PEER -v adjust $DRBD_RESOURCE
|
||
|
;;
|
||
|
post/*)
|
||
|
# After something has been done is a good time to
|
||
|
# recheck our status:
|
||
|
drbd_set_status_variables
|
||
|
drbd_update_master_score
|
||
|
|
||
|
: == DEBUG == ${DRBD_DSTATE_REMOTE[*]} ==
|
||
|
case ${DRBD_DSTATE_REMOTE[*]} in
|
||
|
*DUnknown*)
|
||
|
# Still not communicating.
|
||
|
# Maybe someone else is primary (too)?
|
||
|
maybe_outdate_self
|
||
|
esac
|
||
|
esac
|
||
|
|
||
|
: "$OCF_SUCCESS = OCF_SUCCESS"
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
# "macro" to be able to give useful error messages
|
||
|
# on clone resource configuration error.
|
||
|
meta_expect()
|
||
|
{
|
||
|
local what=$1 whatvar=OCF_RESKEY_CRM_meta_${1//-/_} op=$2 expect=$3
|
||
|
local val=${!whatvar}
|
||
|
if [[ -n $val ]]; then
|
||
|
# [, not [[, or it won't work ;)
|
||
|
[ $val $op $expect ] && return
|
||
|
fi
|
||
|
ocf_log err "meta parameter misconfigured, expected $what $op $expect, but found ${val:-unset}."
|
||
|
exit $OCF_ERR_CONFIGURED
|
||
|
}
|
||
|
|
||
|
ls_stat_is_block_maj_147() {
|
||
|
set -- $(command ls -L -l "$1" 2>/dev/null)
|
||
|
[[ $1 = b* ]] && [[ $5 == 147,* ]]
|
||
|
}
|
||
|
|
||
|
check_crm_feature_set()
|
||
|
{
|
||
|
set -- ${OCF_RESKEY_crm_feature_set//[!0-9]/ }
|
||
|
local a=${1:-0} b=${2:-0} c=${3:-0}
|
||
|
|
||
|
(( a > 3 )) ||
|
||
|
(( a == 3 && b > 0 )) ||
|
||
|
(( a == 3 && b == 0 && c > 0 )) ||
|
||
|
ocf_log warn "You may be disappointed: This RA is intended for pacemaker 1.0 or better!"
|
||
|
}
|
||
|
|
||
|
drbd_validate_all () {
|
||
|
DRBDADM="drbdadm"
|
||
|
DRBDSETUP="drbdsetup"
|
||
|
DRBD_HAS_MULTI_VOLUME=false
|
||
|
|
||
|
# these will _exit_ if they don't find the binaries
|
||
|
check_binary $DRBDADM
|
||
|
check_binary $DRBDSETUP
|
||
|
# XXX I really take cibadmin, sed, grep, etc. for granted.
|
||
|
|
||
|
local VERSION DRBD_KERNEL_VERSION_CODE=0
|
||
|
if VERSION="$($DRBDADM --version 2>/dev/null)"; then
|
||
|
eval $VERSION
|
||
|
fi
|
||
|
if (( $DRBD_KERNEL_VERSION_CODE == 0x0 )) ; then
|
||
|
# Maybe the DRBD module was not loaded (yet).
|
||
|
# I don't want to load the module here,
|
||
|
# maybe this is just a probe or stop.
|
||
|
# It will be loaded on "start", though.
|
||
|
# Instead, look at modinfo output.
|
||
|
# Newer drbdadm does this implicitly, but may reexec older
|
||
|
# drbdadm versions for compatibility reasons.
|
||
|
DRBD_KERNEL_VERSION_CODE=$(printf "0x%02x%02x%02x" $(
|
||
|
modinfo -F version drbd |
|
||
|
sed -ne 's/^\([0-9]\+\)\.\([0-9]\+\)\.\([0-9]\+\).*$/\1 \2 \3/p'))
|
||
|
fi
|
||
|
if (( $DRBD_KERNEL_VERSION_CODE >= 0x080400 )); then
|
||
|
DRBD_HAS_MULTI_VOLUME=true
|
||
|
fi
|
||
|
check_crm_feature_set
|
||
|
|
||
|
# Check clone and M/S options.
|
||
|
meta_expect clone-max -le 2
|
||
|
meta_expect clone-node-max = 1
|
||
|
meta_expect master-node-max = 1
|
||
|
meta_expect master-max -le 2
|
||
|
|
||
|
# Rather than returning $OCF_ERR_CONFIGURED, we sometimes return
|
||
|
# $OCF_ERR_INSTALLED here: the local config may be broken, but some
|
||
|
# other node may have a valid config.
|
||
|
|
||
|
# check drbdconf plausibility
|
||
|
case "$OCF_RESKEY_drbdconf" in
|
||
|
"")
|
||
|
# this is actually ok. drbdadm has its own builtin defaults.
|
||
|
# but as long as we assign an explicit default above,
|
||
|
# this cannot happen anyways.
|
||
|
: ;;
|
||
|
*[!-%+./0-9:=@A-Z_a-z]*)
|
||
|
# no, I do not trust the configurable cib parameters.
|
||
|
ocf_log err "drbdconf name must only contain [-%+./0-9:=@A-Z_a-z]"
|
||
|
: "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED"
|
||
|
return $OCF_ERR_CONFIGURED
|
||
|
;;
|
||
|
*)
|
||
|
# Check if we can read the configuration file.
|
||
|
if [ ! -r "${OCF_RESKEY_drbdconf}" ]; then
|
||
|
ocf_log err "Configuration file ${OCF_RESKEY_drbdconf} does not exist or is not readable!"
|
||
|
: "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED"
|
||
|
return $OCF_ERR_INSTALLED
|
||
|
fi
|
||
|
DRBDADM="$DRBDADM -c $OCF_RESKEY_drbdconf"
|
||
|
esac
|
||
|
|
||
|
# check drbd_resource plausibility
|
||
|
case "$OCF_RESKEY_drbd_resource" in
|
||
|
"")
|
||
|
ocf_log err "No resource name specified!"
|
||
|
: "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED"
|
||
|
return $OCF_ERR_CONFIGURED
|
||
|
;;
|
||
|
*[!-%+./0-9:=@A-Z_a-z]*)
|
||
|
# no, I do not trust the configurable cib parameters.
|
||
|
ocf_log err "Resource name must only contain [-%+./0-9:=@A-Z_a-z]"
|
||
|
: "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED"
|
||
|
return $OCF_ERR_CONFIGURED
|
||
|
esac
|
||
|
# exporting this is useful for "drbdsetup show".
|
||
|
# and it makes it all a little bit more readable.
|
||
|
export DRBD_RESOURCE=$OCF_RESKEY_drbd_resource
|
||
|
|
||
|
# The resource should appear in the config file,
|
||
|
# otherwise something's fishy
|
||
|
# NOTE
|
||
|
# since 8.4 has multi volume support,
|
||
|
# DRBD_DEVICES will be a shell array!
|
||
|
# FIXME we should double check that we explicitly restrict the set of
|
||
|
# valid characters in device names...
|
||
|
if DRBD_DEVICES=($($DRBDADM --stacked sh-dev $DRBD_RESOURCE 2>/dev/null)); then
|
||
|
# apparently a "stacked" resource. Remember for future DRBDADM calls.
|
||
|
DRBDADM="$DRBDADM -S"
|
||
|
elif DRBD_DEVICES=($($DRBDADM sh-dev $DRBD_RESOURCE 2>/dev/null)); then
|
||
|
: # nothing to do.
|
||
|
else
|
||
|
if [[ $__OCF_ACTION = "monitor" && $OCF_RESKEY_CRM_meta_interval = 0 ]]; then
|
||
|
# ok, this was a probe. That may happen on any node,
|
||
|
# to enforce configuration.
|
||
|
: "$OCF_NOT_RUNNING = OCF_NOT_RUNNING"
|
||
|
return $OCF_NOT_RUNNING
|
||
|
else
|
||
|
# hm. probably misconfigured constraint somewhere.
|
||
|
# sorry. don't retry anywhere.
|
||
|
ocf_log err "DRBD resource ${DRBD_RESOURCE} not found in configuration file ${OCF_RESKEY_drbdconf}."
|
||
|
remove_master_score
|
||
|
: "$OCF_ERR_INSTALLED = OCF_ERR_INSTALLED"
|
||
|
return $OCF_ERR_INSTALLED
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
# check for master-max and allow-two-primaries on start|promote only,
|
||
|
# so it could be stopped still, if someone re-configured while running.
|
||
|
case $__OCF_ACTION:$OCF_RESKEY_CRM_meta_master_max in
|
||
|
start:2|promote:2)
|
||
|
if ! $DRBDADM -d -v dump $DRBD_RESOURCE 2>/dev/null |
|
||
|
grep -q -Ee '^[[:space:]]*allow-two-primaries([[:space:]]+yes)?;$'
|
||
|
then
|
||
|
ocf_log err "master-max = 2, but DRBD resource $DRBD_RESOURCE does not allow-two-primaries."
|
||
|
: "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED"
|
||
|
return $OCF_ERR_CONFIGURED
|
||
|
fi
|
||
|
esac
|
||
|
|
||
|
# detect whether notify is configured or not.
|
||
|
# for probes, the meta_notify* namespace is not exported.
|
||
|
case $__OCF_ACTION in
|
||
|
monitor|validate-all)
|
||
|
:;;
|
||
|
*)
|
||
|
# Test if the environment variables for either the notify
|
||
|
# enabled, or one of its effects, are set.
|
||
|
# If both are unset, we complain.
|
||
|
if ! ocf_is_true ${OCF_RESKEY_CRM_meta_notify} &&
|
||
|
[[ ${OCF_RESKEY_CRM_meta_notify_start_uname- NOT SET } = " NOT SET " ]]; then
|
||
|
ocf_log err "you really should enable notify when using this RA"
|
||
|
: "$OCF_ERR_CONFIGURED = OCF_ERR_CONFIGURED"
|
||
|
return $OCF_ERR_CONFIGURED
|
||
|
fi
|
||
|
esac
|
||
|
|
||
|
local i j n=0 fallback=false
|
||
|
for i in $OCF_RESKEY_adjust_master_score; do
|
||
|
[[ $i = *[!0-9]* ]] && fallback=true && ocf_log err "BAD adjust_master_score value $i ; falling back to default"
|
||
|
[[ $j && $i -lt $j ]] && fallback=true && ocf_log err "BAD adjust_master_score value $j > $i ; falling back to default"
|
||
|
j=$i
|
||
|
n=$(( n+1 ))
|
||
|
done
|
||
|
[[ $n != 4 ]] && fallback=true && ocf_log err "Not enough adjust_master_score values ($n != 4); falling back to default"
|
||
|
$fallback && OCF_RESKEY_adjust_master_score=$OCF_RESKEY_adjust_master_score_default
|
||
|
|
||
|
# we use it in various places,
|
||
|
# just make sure it contains what we expect.
|
||
|
HOSTNAME=`uname -n`
|
||
|
|
||
|
: "$OCF_SUCCESS = OCF_SUCCESS"
|
||
|
return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
#######################################################################
|
||
|
|
||
|
if [ $# != 1 ]; then
|
||
|
drbd_usage
|
||
|
exit $OCF_ERR_ARGS
|
||
|
fi
|
||
|
|
||
|
# if $__OCF_ACTION = monitor, but meta_interval not set,
|
||
|
# this is a "probe". we could change behaviour.
|
||
|
: ${OCF_RESKEY_CRM_meta_interval=0}
|
||
|
|
||
|
case $__OCF_ACTION in
|
||
|
meta-data)
|
||
|
meta_data
|
||
|
exit $OCF_SUCCESS
|
||
|
;;
|
||
|
usage)
|
||
|
drbd_usage
|
||
|
exit $OCF_SUCCESS
|
||
|
esac
|
||
|
|
||
|
if $USE_DEBUG_LOG ; then
|
||
|
exec 2>&9
|
||
|
set -x
|
||
|
fi
|
||
|
|
||
|
# Everything except usage and meta-data must pass the validate test
|
||
|
drbd_validate_all || exit
|
||
|
|
||
|
case $__OCF_ACTION in
|
||
|
start)
|
||
|
drbd_start
|
||
|
;;
|
||
|
stop)
|
||
|
drbd_stop
|
||
|
;;
|
||
|
notify)
|
||
|
drbd_notify
|
||
|
;;
|
||
|
promote)
|
||
|
drbd_promote
|
||
|
;;
|
||
|
demote)
|
||
|
drbd_demote
|
||
|
;;
|
||
|
status)
|
||
|
drbd_status
|
||
|
;;
|
||
|
monitor)
|
||
|
drbd_monitor
|
||
|
;;
|
||
|
validate-all)
|
||
|
;;
|
||
|
*)
|
||
|
drbd_usage
|
||
|
exit $OCF_ERR_UNIMPLEMENTED
|
||
|
esac
|
||
|
# exit code is the exit code (return code) of the last command (shell function)
|