From d83b9a9394ef69ca2801c84dee46094a224ca654 Mon Sep 17 00:00:00 2001 From: David Vossel Date: Thu, 5 Mar 2015 13:47:58 -0600 Subject: [PATCH] redis agent support --- doc/man/Makefile.am | 1 + heartbeat/Makefile.am | 1 + heartbeat/redis | 519 ++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 521 insertions(+) create mode 100644 heartbeat/redis diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am index 43d60d9..653e818 100644 --- a/doc/man/Makefile.am +++ b/doc/man/Makefile.am @@ -125,6 +125,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ ocf_heartbeat_pound.7 \ ocf_heartbeat_proftpd.7 \ ocf_heartbeat_rabbitmq-cluster.7 \ + ocf_heartbeat_redis.7 \ ocf_heartbeat_rsyncd.7 \ ocf_heartbeat_rsyslog.7 \ ocf_heartbeat_scsi2reservation.7 \ diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am index 3bcf2d9..e4ed4fd 100644 --- a/heartbeat/Makefile.am +++ b/heartbeat/Makefile.am @@ -105,6 +105,7 @@ ocf_SCRIPTS = ClusterMon \ rabbitmq-cluster \ Raid1 \ Route \ + redis \ rsyncd \ rsyslog \ SAPDatabase \ diff --git a/heartbeat/redis b/heartbeat/redis new file mode 100644 index 0000000..6b479b2 --- /dev/null +++ b/heartbeat/redis @@ -0,0 +1,519 @@ +#!/bin/bash + +. ${OCF_ROOT}/resource.d/heartbeat/.ocf-shellfuncs + +: ${OCF_RESKEY_bin:=/usr/bin/redis-server} +: ${OCF_RESKEY_client_bin:=/usr/bin/redis-cli} +: ${OCF_RESKEY_user:=redis} +: ${OCF_RESKEY_rundir:=/var/run/redis} +: ${OCF_RESKEY_pidfile_name:=redis-server.pid} +: ${OCF_RESKEY_socket_name:=redis.sock} +: ${OCF_RESKEY_port:=6379} + +if [ -z "$OCF_RESKEY_config" ]; then + if [ -f "/etc/redis.conf" ]; then + OCF_RESKEY_config="/etc/redis.conf" + else + OCF_RESKEY_config="/etc/redis/redis.conf" + fi +fi + +CHECK_SLAVE_STATE=0 + +REDIS_SERVER="$OCF_RESKEY_bin" +REDIS_CLIENT="$OCF_RESKEY_client_bin" +REDIS_CONFIG="$OCF_RESKEY_config" +REDIS_USER="$OCF_RESKEY_user" +REDIS_RUNDIR="$OCF_RESKEY_rundir" +REDIS_PIDFILE="$OCF_RESKEY_rundir/$OCF_RESKEY_pidfile_name" +REDIS_SOCKET="$OCF_RESKEY_rundir/$OCF_RESKEY_socket_name" +REDIS_REPLICATION_PORT="$OCF_RESKEY_port" + +function meta_data() { + cat < + + +1.0 + + +Resource agent script for redis server. + +This resource fully supports master/slave replication. The master preference of a node is determined by the 'slave_priority' parameter of the redis config. +When taking the resource from 'unmanaged' to 'managed', the currently active master will be given a priority of 1000 (plus 1 for each active connection). The default 'slave_priority' is 100, so the master will stay master. For a slave to become master after converting the resource to managed, set a slave_priority greater than 1000. + + +Redis server + + + + +Path to \`redis-server\` + +Path to \`redis-server\` + + + + + +Path to \`redis-cli\` + +Path to \`redis-cli\` + + + + + +Path to 'redis.conf' + +Path to 'redis.conf' + + + + + +User to run redis as + +Redis user + + + + + +Directory to store socket and pid file in + +Redis var/run dir + + + + + +The filename to use for the pidfile. Will be created in the rundir. +Should only be a basename, not a full path. + +Redis pidfile name + + + + + +The filename to use for the socket. Will be crated in the rundir. +Should only be a basename, not a full path. + +Redis socket name + + + + + +Port for replication client to connect to on remote server + +Replication port + + + + + +During redis cluster bootstrap, wait for the last known master to be +promoted before allowing any other instances in the cluster to be +promoted. This lessens the risk of data loss when persistent data +is in use. + +Wait for last known master + + + + + + + + + + + + + + + + + + +EOI +} + +INSTANCE_ATTR_NAME=`echo ${OCF_RESOURCE_INSTANCE}| awk -F : '{print $1}'` +CRM_ATTR_REPL_INFO="${HA_SBIN_DIR}/crm_attribute --type crm_config --name ${INSTANCE_ATTR_NAME}_REPL_INFO -s redis_replication" +MASTER_HOST="" +MASTER_ACTIVE_CACHED="" +MASTER_ACTIVE="" + +master_is_active() +{ + if [ -z "$MASTER_ACTIVE_CACHED" ]; then + # determine if a master instance is already up and is healthy + crm_mon --as-xml | grep "resource.*id=\"${OCF_RESOURCE_INSTANCE}\".*role=\"Master\".*active=\"true\".*orphaned=\"false\".*failed=\"false\"" > /dev/null 2>&1 + MASTER_ACTIVE=$? + MASTER_ACTIVE_CACHED="true" + fi + return $MASTER_ACTIVE +} + +function set_master() +{ + MASTER_HOST="$1" + ${CRM_ATTR_REPL_INFO} -v "$1" -q +} + +function last_known_master() +{ + if [ -z "$MASTER_HOST" ]; then + MASTER_HOST="$(${CRM_ATTR_REPL_INFO} --query -q 2>/dev/null)" + fi + echo "$MASTER_HOST" +} + +function crm_master_reboot() { + "${HA_SBIN_DIR}/crm_master" -l reboot "$@" +} + +function calculate_score() +{ + perf_score="$1" + connected_clients="$2" + + if ocf_is_true "$OCF_RESKEY_wait_last_known_master"; then + # only set perferred score by slave_priority if + # we are not waiting for the last known master. Otherwise + # we want the agent to have complete control over the scoring. + perf_score="" + connected_clients="0" + fi + + if [[ -z "$perf_score" ]]; then + if [[ "$(last_known_master)" == "$NODENAME" ]]; then + perf_score=1000 + else + perf_score=1 + fi + fi + perf_score=$(( perf_score + connected_clients )) + echo "$perf_score" +} + +function set_score() +{ + local score="$1" + + if ocf_is_true "$OCF_RESKEY_wait_last_known_master" && ! master_is_active; then + local last_master="$(last_known_master)" + if [ -n "$last_master" ] && [[ "$last_master" != "$NODENAME" ]]; then + ocf_log info "Postponing setting master score for ${NODENAME} until last known master instance [${last_master}] is promoted" + return + fi + fi + + ocf_log debug "monitor: Setting master score to '$score'" + crm_master_reboot -v "$score" +} + +function redis_client() { + ocf_log debug "redis_client: '$REDIS_CLIENT' -s '$REDIS_SOCKET' $@" + "$REDIS_CLIENT" -s "$REDIS_SOCKET" "$@" | sed 's/\r//' +} + +function simple_status() { + local pid + + if ! [ -f "$REDIS_PIDFILE" ]; then + return $OCF_NOT_RUNNING + fi + + pid="$(<"$REDIS_PIDFILE")" + pidof "$REDIS_SERVER" | grep -q "\<$pid\>" || return $OCF_NOT_RUNNING + + ocf_log debug "monitor: redis-server running under pid $pid" + + return $OCF_SUCCESS +} + +function monitor() { + local res + + simple_status + res=$? + if (( res != OCF_SUCCESS )); then + return $res + fi + + typeset -A info + while read line; do + [[ "$line" == "#"* ]] && continue + [[ "$line" != *":"* ]] && continue + IFS=':' read -r key value <<< "$line" + info[$key]="$value" + done < <(redis_client info) + if [[ -z "${info[role]}" ]]; then + ocf_log err "monitor: Could not get role from \`$REDIS_CLIENT -s $REDIS_SOCKET info\`" + return $OCF_ERR_GENERIC + fi + + if ocf_is_ms; then + # Here we see if a score has already been set. + # If score isn't set we the redis setting 'slave_priority'. + # If that isn't set, we default to 1000 for a master, and 1 for slave. + # We then add 1 for each connected client + score="$(crm_master_reboot --get-value --quiet 2>/dev/null)" + if [[ -z "$score" ]]; then + score=$(calculate_score "${info[slave_priority]}" "${info[connected_clients]}") + set_score "$score" + fi + + if [[ "${info[role]}" == "master" ]]; then + if ocf_is_probe; then + set_master "$NODENAME" + fi + return $OCF_RUNNING_MASTER + fi + + if [ "$CHECK_SLAVE_STATE" -eq 1 ]; then + if [[ "${info[master_link_status]}" != "up" ]]; then + ocf_log info "monitor: Slave mode link has not yet been established (link=${info[master_link_status]})" + return $OCF_ERR_GENERIC + fi + if [[ "${info[master_host]}" != "$(last_known_master)" ]]; then + ocf_log err "monitor: Slave mode current master does not match running master. current=${info[master_host]}, running=$(last_known_master)" + return $OCF_ERR_GENERIC + fi + fi + fi + return $OCF_SUCCESS +} + +function start() { + monitor + status=$? + + if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then + ocf_log info "start: redis is already running" + return $OCF_SUCCESS + fi + + [[ ! -d "$REDIS_RUNDIR" ]] && mkdir -p "$REDIS_RUNDIR" + chown -R "$REDIS_USER" "$REDIS_RUNDIR" + + ocf_log info "start: $REDIS_SERVER --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" + output="$(su "$REDIS_USER" -s /bin/sh -c "cd '$REDIS_RUNDIR'; exec '$REDIS_SERVER' '$REDIS_CONFIG' --daemonize yes --unixsocket '$REDIS_SOCKET' --pidfile '$REDIS_PIDFILE'" 2>&1)" + + while true; do + # wait for redis to start + typeset -A info + while read line; do + [[ "$line" == "#"* ]] && continue + [[ "$line" != *":"* ]] && continue + IFS=':' read -r key value <<< "$line" + info[$key]="$value" + done < <(redis_client info) + + if (( info[loading] == 0 )); then + break + elif (( info[loading] == 1 )); then + sleep "${info[loading_eta_seconds]}" + elif pidof "$REDIS_SERVER" >/dev/null; then + # unknown error, but the process still exists. + # This check is mainly because redis daemonizes before it starts listening, causing `redis-cli` to fail + # See https://github.com/antirez/redis/issues/2368 + # It's possible that the `pidof` will pick up a different redis, but in that case, the start operation will just time out + sleep 1 + else + ocf_log err "start: Unknown error waiting for redis to start" + return $OCF_ERR_GENERIC + fi + done + + ocf_is_ms && demote # pacemaker expects resources to start in slave mode + + monitor + status=$? + if (( status == OCF_SUCCESS )) || (( status == OCF_RUNNING_MASTER )); then + return $OCF_SUCCESS + fi + + ocf_log err "start: Unknown error starting redis. output=${output//$'\n'/; }" + return $status +} + +function stop() { + monitor + status=$? + + if (( status == OCF_NOT_RUNNING )); then + ocf_log info "stop: redis is already stopped" + crm_master_reboot -D + return $OCF_SUCCESS + fi + + pid="$(<"$REDIS_PIDFILE")" + kill -TERM "$pid" + + while true; do + simple_status + status=$? + if (( status == OCF_NOT_RUNNING )); then + crm_master_reboot -D + return $OCF_SUCCESS + fi + sleep 1 + done +} + +function promote() { + monitor + status=$? + + if (( status == OCF_RUNNING_MASTER )); then + ocf_log info "promote: Already running as master" + set_master "$NODENAME" + return $OCF_SUCCESS + elif (( status != OCF_SUCCESS )); then + ocf_log err "promote: Node is not running as a slave" + return $OCF_ERR_GENERIC + fi + + redis_client slaveof no one + + monitor + status=$? + if (( status == OCF_RUNNING_MASTER )); then + set_master "$NODENAME" + return $OCF_SUCCESS + fi + + ocf_log err "promote: Unknown error while promoting to master (status=$status)" + return $OCF_ERR_GENERIC +} + +function demote() { + local master_host + local master_port + + CHECK_SLAVE_STATE=1 + monitor + status=$? + + if (( status == OCF_SUCCESS )); then + ocf_log info "demote: Already running as slave" + return $OCF_SUCCESS + elif (( status == OCF_NOT_RUNNING )); then + ocf_log err "demote: Failed to demote, redis not running." + return $OCF_NOT_RUNNING + fi + + master_host="$(last_known_master)" + master_port="${REDIS_REPLICATION_PORT}" + + # The elected master has to remain a slave during startup. + # During this period a placeholder master host is assigned. + if [ -z "$master_host" ] || [[ "$master_host" == "$NODENAME" ]]; then + CHECK_SLAVE_STATE=0 + master_host="no-such-master" + elif ! master_is_active; then + # no master has been promoted yet. we'll be notified when the + # master starts. + CHECK_SLAVE_STATE=0 + master_host="no-such-master" + fi + + ocf_log info "demote: Setting master to '$master_host'" + + redis_client slaveof "$master_host" "$master_port" + + # wait briefly for the slave to connect to the master + for (( c=1; c <= 20; c++ )) + do + monitor + status=$? + if (( status == OCF_SUCCESS )); then + return $OCF_SUCCESS + fi + sleep 1 + done + + ocf_log err "demote: Unexpected error setting slave mode (status=$status)" + return $OCF_ERR_GENERIC +} + +function notify() { + mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}" + case "$mode" in + post-demote|post-promote) # change the master + monitor + status=$? + if (( status == OCF_SUCCESS )); then # were a slave + # calling demote updates the slave's connection + # to the newly appointed Master instance. + demote + fi + ;; + esac + return $OCF_SUCCESS +} + +function validate() { + if [[ -x "$REDIS_SERVER" ]]; then + ocf_log err "validate: $REDIS_SERVER does not exist or is not executable" + return $OCF_ERR_INSTALLED + fi + if [[ -x "$REDIS_CLIENT" ]]; then + ocf_log err "validate: $REDIS_CLIENT does not exist or is not executable" + return $OCF_ERR_INSTALLED + fi + if [[ -f "$REDIS_CONFIG" ]]; then + ocf_log err "validate: $REDIS_CONFIG does not exist" + return $OCF_ERR_CONFIGURED + fi + if ! getent passwd "$REDIS_USER" &>/dev/null; then + ocf_log err "validate: $REDIS_USER is not a valid user" + return $OCF_ERR_CONFIGURED + fi +} + +NODENAME=$(ocf_local_nodename) + +ocf_log debug "action=${1:-$__OCF_ACTION} notify_type=${OCF_RESKEY_CRM_meta_notify_type} notify_operation=${OCF_RESKEY_CRM_meta_notify_operation} master_host=${OCF_RESKEY_CRM_meta_notify_master_uname} slave_host=${OCF_RESKEY_CRM_meta_notify_slave_uname} promote_host=${OCF_RESKEY_CRM_meta_notify_promote_uname} demote_host=${OCF_RESKEY_CRM_meta_notify_demote_uname}; params: bin=${OCF_RESKEY_bin} client_bin=${OCF_RESKEY_client_bin} config=${OCF_RESKEY_config} user=${OCF_RESKEY_user} rundir=${OCF_RESKEY_rundir} port=${OCF_RESKEY_port}" + +case "${1:-$__OCF_ACTION}" in + status|monitor) + monitor + ;; + start) + start + ;; + stop) + stop + ;; + restart) + stop && start + ;; + promote) + promote + ;; + demote) + demote + ;; + notify) + notify + ;; + meta-data) + meta_data + ;; + validate-all) + validate + ;; + *) + echo "Usage: $0 {monitor|start|stop|restart|promote|demote|notify|validate-all|meta-data}" + exit $OCF_ERR_UNIMPLEMENTED + ;; +esac +status=$? +ocf_log debug "exit_status=$status" +exit $status -- 1.8.4.2