You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

564 lines
16 KiB

From d83b9a9394ef69ca2801c84dee46094a224ca654 Mon Sep 17 00:00:00 2001
From: David Vossel <dvossel@redhat.com>
Date: Thu, 5 Mar 2015 13:47:58 -0600
Subject: [PATCH] redis agent support
---
doc/man/Makefile.am | 1 +
heartbeat/Makefile.am | 1 +
heartbeat/redis | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 521 insertions(+)
create mode 100644 heartbeat/redis
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
index 43d60d9..653e818 100644
--- a/doc/man/Makefile.am
+++ b/doc/man/Makefile.am
@@ -125,6 +125,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
ocf_heartbeat_pound.7 \
ocf_heartbeat_proftpd.7 \
ocf_heartbeat_rabbitmq-cluster.7 \
+ ocf_heartbeat_redis.7 \
ocf_heartbeat_rsyncd.7 \
ocf_heartbeat_rsyslog.7 \
ocf_heartbeat_scsi2reservation.7 \
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
index 3bcf2d9..e4ed4fd 100644
--- a/heartbeat/Makefile.am
+++ b/heartbeat/Makefile.am
@@ -105,6 +105,7 @@ ocf_SCRIPTS = ClusterMon \
rabbitmq-cluster \
Raid1 \
Route \
+ redis \
rsyncd \
rsyslog \
SAPDatabase \
diff --git a/heartbeat/redis b/heartbeat/redis
new file mode 100644
index 0000000..6b479b2
--- /dev/null
+++ b/heartbeat/redis
@@ -0,0 +1,519 @@
+#!/bin/bash
+
+. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs
+
+: ${OCF_RESKEY_bin:=/usr/bin/redis-server}
+: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli}
+: ${OCF_RESKEY_user:=redis}
+: ${OCF_RESKEY_rundir:=/var/run/redis}
+: ${OCF_RESKEY_pidfile_name:=redis-server.pid}
+: ${OCF_RESKEY_socket_name:=redis.sock}
+: ${OCF_RESKEY_port:=6379}
+
+if [ -z "$OCF_RESKEY_config" ]; then
+ if [ -f "/etc/redis.conf" ]; then
+ OCF_RESKEY_config="/etc/redis.conf"
+ else
+ OCF_RESKEY_config="/etc/redis/redis.conf"
+ fi
+fi
+
+CHECK_SLAVE_STATE=0
+
+REDIS_SERVER="$OCF_RESKEY_bin"
+REDIS_CLIENT="$OCF_RESKEY_client_bin"
+REDIS_CONFIG="$OCF_RESKEY_config"
+REDIS_USER="$OCF_RESKEY_user"
+REDIS_RUNDIR="$OCF_RESKEY_rundir"
+REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name"
+REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name"
+REDIS_REPLICATION_PORT="$OCF_RESKEY_port"
+
+function meta_data() {
+ cat <<EOI
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="redis">
+<version>1.0</version>
+
+<longdesc lang="en">
+Resource agent script for redis server.
+
+This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config.
+When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000.
+</longdesc>
+
+<shortdesc lang="en">Redis server</shortdesc>
+
+<parameters>
+<parameter name="bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-server\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-server\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_bin}" />
+</parameter>
+
+<parameter name="client_bin" unique="0" required="0">
+<longdesc lang="en">
+Path to \`redis-cli\`
+</longdesc>
+<shortdesc lang="en">Path to \`redis-cli\`</shortdesc>
+<content type="string" default="${OCF_RESKEY_client_bin}" />
+</parameter>
+
+<parameter name="config" unique="1" required="0">
+<longdesc lang="en">
+Path to 'redis.conf'
+</longdesc>
+<shortdesc lang="en">Path to 'redis.conf'</shortdesc>
+<content type="string" default="${OCF_RESKEY_config}" />
+</parameter>
+
+<parameter name="user" unique="0" required="0">
+<longdesc lang="en">
+User to run redis as
+</longdesc>
+<shortdesc lang="en">Redis user</shortdesc>
+<content type="string" default="${OCF_RESKEY_user}" />
+</parameter>
+
+<parameter name="rundir" unique="1" required="0">
+<longdesc lang="en">
+Directory to store socket and pid file in
+</longdesc>
+<shortdesc lang="en">Redis var/run dir</shortdesc>
+<content type="string" default="${OCF_RESKEY_rundir}"/>
+</parameter>
+
+<parameter name="pidfile_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the pidfile. Will be created in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis pidfile name</shortdesc>
+<content type="string" default="${OCF_RESKEY_pidfile_name}"/>
+</parameter>
+
+<parameter name="socket_name" unique="0" required="0">
+<longdesc lang="en">
+The filename to use for the socket. Will be crated in the rundir.
+Should only be a basename, not a full path.
+</longdesc>
+<shortdesc lang="en">Redis socket name</shortdesc>
+<content type="string" default="${OCF_RESKEY_socket_name}"/>
+</parameter>
+
+<parameter name="port" unique="0" required="0">
+<longdesc lang="en">
+Port for replication client to connect to on remote server
+</longdesc>
+<shortdesc lang="en">Replication port</shortdesc>
+<content type="string" default="${OCF_RESKEY_port}"/>
+</parameter>
+
+<parameter name="wait_last_known_master" unique="0" required="0">
+<longdesc lang="en">
+During redis cluster bootstrap, wait for the last known master to be
+promoted before allowing any other instances in the cluster to be
+promoted. This lessens the risk of data loss when persistent data
+is in use.
+</longdesc>
+<shortdesc lang="en">Wait for last known master</shortdesc>
+<content type="boolean" default="false"/>
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="120" />
+<action name="stop" timeout="120" />
+<action name="status" timeout="60" />
+<action name="monitor" depth="0" timeout="60" interval="45" />
+<action name="monitor" role="Master" depth="0" timeout="60" interval="20" />
+<action name="monitor" role="Slave" depth="0" timeout="60" interval="60" />
+<action name="promote" timeout="120" />
+<action name="demote" timeout="120" />
+<action name="notify" timeout="90" />
+<action name="validate-all" timeout="5" />
+<action name="meta-data" timeout="5" />
+</actions>
+</resource-agent>
+EOI
+}
+
+INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'`
+CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication"
+MASTER_HOST=""
+MASTER_ACTIVE_CACHED=""
+MASTER_ACTIVE=""
+
+master_is_active()
+{
+ if [ -z "$MASTER_ACTIVE_CACHED" ]; then
+ # determine if a master instance is already up and is healthy
+ crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1
+ MASTER_ACTIVE=$?
+ MASTER_ACTIVE_CACHED="true"
+ fi
+ return $MASTER_ACTIVE
+}
+
+function set_master()
+{
+ MASTER_HOST="$1"
+ ${CRM_ATTR_REPL_INFO} -v "$1" -q
+}
+
+function last_known_master()
+{
+ if [ -z "$MASTER_HOST" ]; then
+ MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)"
+ fi
+ echo "$MASTER_HOST"
+}
+
+function crm_master_reboot() {
+ "${HA_SBIN_DIR}/crm_master" -l reboot "$@"
+}
+
+function calculate_score()
+{
+ perf_score="$1"
+ connected_clients="$2"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then
+ # only set perferred score by slave_priority if
+ # we are not waiting for the last known master. Otherwise
+ # we want the agent to have complete control over the scoring.
+ perf_score=""
+ connected_clients="0"
+ fi
+
+ if [[ -z "$perf_score" ]]; then
+ if [[ "$(last_known_master)" == "$NODENAME" ]]; then
+ perf_score=1000
+ else
+ perf_score=1
+ fi
+ fi
+ perf_score=$(( perf_score + connected_clients ))
+ echo "$perf_score"
+}
+
+function set_score()
+{
+ local score="$1"
+
+ if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then
+ local last_master="$(last_known_master)"
+ if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then
+ ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted"
+ return
+ fi
+ fi
+
+ ocf_log debug "monitor: Setting master score to '$score'"
+ crm_master_reboot -v "$score"
+}
+
+function redis_client() {
+ ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@"
+ "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//'
+}
+
+function simple_status() {
+ local pid
+
+ if ! [ -f "$REDIS_PIDFILE" ]; then
+ return $OCF_NOT_RUNNING
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING
+
+ ocf_log debug "monitor: redis-server running under pid $pid"
+
+ return $OCF_SUCCESS
+}
+
+function monitor() {
+ local res
+
+ simple_status
+ res=$?
+ if (( res != OCF_SUCCESS )); then
+ return $res
+ fi
+
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+ if [[ -z "${info[role]}" ]]; then
+ ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`"
+ return $OCF_ERR_GENERIC
+ fi
+
+ if ocf_is_ms; then
+ # Here we see if a score has already been set.
+ # If score isn't set we the redis setting 'slave_priority'.
+ # If that isn't set, we default to 1000 for a master, and 1 for slave.
+ # We then add 1 for each connected client
+ score="$(crm_master_reboot --get-value --quiet 2>/dev/null)"
+ if [[ -z "$score" ]]; then
+ score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}")
+ set_score "$score"
+ fi
+
+ if [[ "${info[role]}" == "master" ]]; then
+ if ocf_is_probe; then
+ set_master "$NODENAME"
+ fi
+ return $OCF_RUNNING_MASTER
+ fi
+
+ if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then
+ if [[ "${info[master_link_status]}" != "up" ]]; then
+ ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})"
+ return $OCF_ERR_GENERIC
+ fi
+ if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then
+ ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)"
+ return $OCF_ERR_GENERIC
+ fi
+ fi
+ fi
+ return $OCF_SUCCESS
+}
+
+function start() {
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "start: redis is already running"
+ return $OCF_SUCCESS
+ fi
+
+ [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR"
+ chown -R "$REDIS_USER" "$REDIS_RUNDIR"
+
+ ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'"
+ output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)"
+
+ while true; do
+ # wait for redis to start
+ typeset -A info
+ while read line; do
+ [[ "$line" == "#"* ]] && continue
+ [[ "$line" != *":"* ]] && continue
+ IFS=':' read -r key value <<< "$line"
+ info[$key]="$value"
+ done < <(redis_client info)
+
+ if (( info[loading] == 0 )); then
+ break
+ elif (( info[loading] == 1 )); then
+ sleep "${info[loading_eta_seconds]}"
+ elif pidof "$REDIS_SERVER" >/dev/null; then
+ # unknown error, but the process still exists.
+ # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail
+ # See https://github.com/antirez/redis/issues/2368
+ # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out
+ sleep 1
+ else
+ ocf_log err "start: Unknown error waiting for redis to start"
+ return $OCF_ERR_GENERIC
+ fi
+ done
+
+ ocf_is_ms && demote # pacemaker expects resources to start in slave mode
+
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }"
+ return $status
+}
+
+function stop() {
+ monitor
+ status=$?
+
+ if (( status == OCF_NOT_RUNNING )); then
+ ocf_log info "stop: redis is already stopped"
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+
+ pid="$(<"$REDIS_PIDFILE")"
+ kill -TERM "$pid"
+
+ while true; do
+ simple_status
+ status=$?
+ if (( status == OCF_NOT_RUNNING )); then
+ crm_master_reboot -D
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ done
+}
+
+function promote() {
+ monitor
+ status=$?
+
+ if (( status == OCF_RUNNING_MASTER )); then
+ ocf_log info "promote: Already running as master"
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ elif (( status != OCF_SUCCESS )); then
+ ocf_log err "promote: Node is not running as a slave"
+ return $OCF_ERR_GENERIC
+ fi
+
+ redis_client slaveof no one
+
+ monitor
+ status=$?
+ if (( status == OCF_RUNNING_MASTER )); then
+ set_master "$NODENAME"
+ return $OCF_SUCCESS
+ fi
+
+ ocf_log err "promote: Unknown error while promoting to master (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function demote() {
+ local master_host
+ local master_port
+
+ CHECK_SLAVE_STATE=1
+ monitor
+ status=$?
+
+ if (( status == OCF_SUCCESS )); then
+ ocf_log info "demote: Already running as slave"
+ return $OCF_SUCCESS
+ elif (( status == OCF_NOT_RUNNING )); then
+ ocf_log err "demote: Failed to demote, redis not running."
+ return $OCF_NOT_RUNNING
+ fi
+
+ master_host="$(last_known_master)"
+ master_port="${REDIS_REPLICATION_PORT}"
+
+ # The elected master has to remain a slave during startup.
+ # During this period a placeholder master host is assigned.
+ if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ elif ! master_is_active; then
+ # no master has been promoted yet. we'll be notified when the
+ # master starts.
+ CHECK_SLAVE_STATE=0
+ master_host="no-such-master"
+ fi
+
+ ocf_log info "demote: Setting master to '$master_host'"
+
+ redis_client slaveof "$master_host" "$master_port"
+
+ # wait briefly for the slave to connect to the master
+ for (( c=1; c <= 20; c++ ))
+ do
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then
+ return $OCF_SUCCESS
+ fi
+ sleep 1
+ done
+
+ ocf_log err "demote: Unexpected error setting slave mode (status=$status)"
+ return $OCF_ERR_GENERIC
+}
+
+function notify() {
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "$mode" in
+ post-demote|post-promote) # change the master
+ monitor
+ status=$?
+ if (( status == OCF_SUCCESS )); then # were a slave
+ # calling demote updates the slave's connection
+ # to the newly appointed Master instance.
+ demote
+ fi
+ ;;
+ esac
+ return $OCF_SUCCESS
+}
+
+function validate() {
+ if [[ -x "$REDIS_SERVER" ]]; then
+ ocf_log err "validate: $REDIS_SERVER does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -x "$REDIS_CLIENT" ]]; then
+ ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable"
+ return $OCF_ERR_INSTALLED
+ fi
+ if [[ -f "$REDIS_CONFIG" ]]; then
+ ocf_log err "validate: $REDIS_CONFIG does not exist"
+ return $OCF_ERR_CONFIGURED
+ fi
+ if ! getent passwd "$REDIS_USER" &>/dev/null; then
+ ocf_log err "validate: $REDIS_USER is not a valid user"
+ return $OCF_ERR_CONFIGURED
+ fi
+}
+
+NODENAME=$(ocf_local_nodename)
+
+ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}"
+
+case "${1:-$__OCF_ACTION}" in
+ status|monitor)
+ monitor
+ ;;
+ start)
+ start
+ ;;
+ stop)
+ stop
+ ;;
+ restart)
+ stop && start
+ ;;
+ promote)
+ promote
+ ;;
+ demote)
+ demote
+ ;;
+ notify)
+ notify
+ ;;
+ meta-data)
+ meta_data
+ ;;
+ validate-all)
+ validate
+ ;;
+ *)
+ echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}"
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+status=$?
+ocf_log debug "exit_status=$status"
+exit $status
--
1.8.4.2