You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
495 lines
14 KiB
495 lines
14 KiB
7 years ago
|
From 8c92227bce9cc4fe177eea5b2f7c9016e96434f9 Mon Sep 17 00:00:00 2001
|
||
|
From: David Vossel <dvossel@redhat.com>
|
||
|
Date: Mon, 29 Jun 2015 13:03:17 -0500
|
||
|
Subject: [PATCH 1/3] bz1214360-NovaCompute-update1.patch
|
||
|
|
||
|
---
|
||
|
doc/man/Makefile.am | 1 +
|
||
|
heartbeat/Makefile.am | 3 +-
|
||
|
heartbeat/NovaCompute | 73 ++++++------
|
||
|
heartbeat/NovaEvacuate | 311 +++++++++++++++++++++++++++++++++++++++++++++++++
|
||
|
4 files changed, 352 insertions(+), 36 deletions(-)
|
||
|
create mode 100755 heartbeat/NovaEvacuate
|
||
|
|
||
|
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am
|
||
|
index 42a57fe..d32426b 100644
|
||
|
--- a/doc/man/Makefile.am
|
||
|
+++ b/doc/man/Makefile.am
|
||
|
@@ -74,6 +74,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \
|
||
|
ocf_heartbeat_ManageRAID.7 \
|
||
|
ocf_heartbeat_ManageVE.7 \
|
||
|
ocf_heartbeat_NovaCompute.7 \
|
||
|
+ ocf_heartbeat_NovaEvacuate.7 \
|
||
|
ocf_heartbeat_Pure-FTPd.7 \
|
||
|
ocf_heartbeat_Raid1.7 \
|
||
|
ocf_heartbeat_Route.7 \
|
||
|
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am
|
||
|
index 0bebf97..1034632 100644
|
||
|
--- a/heartbeat/Makefile.am
|
||
|
+++ b/heartbeat/Makefile.am
|
||
|
@@ -52,7 +52,8 @@ send_ua_SOURCES = send_ua.c IPv6addr_utils.c
|
||
|
IPv6addr_LDADD = -lplumb $(LIBNETLIBS)
|
||
|
send_ua_LDADD = $(LIBNETLIBS)
|
||
|
|
||
|
-osp_SCRIPTS = NovaCompute
|
||
|
+osp_SCRIPTS = NovaCompute \
|
||
|
+ NovaEvacuate
|
||
|
|
||
|
ocf_SCRIPTS = ClusterMon \
|
||
|
CTDB \
|
||
|
diff --git a/heartbeat/NovaCompute b/heartbeat/NovaCompute
|
||
|
index f71abeb..09eee38 100644
|
||
|
--- a/heartbeat/NovaCompute
|
||
|
+++ b/heartbeat/NovaCompute
|
||
|
@@ -107,15 +107,26 @@ Disable shared storage recovery for instances. Use at your own risk!
|
||
|
<content type="boolean" default="0" />
|
||
|
</parameter>
|
||
|
|
||
|
+<parameter name="evacuation_delay" unique="0" required="0">
|
||
|
+<longdesc lang="en">
|
||
|
+How long to wait for nova to finish evacuating instances elsewhere
|
||
|
+before starting nova-compute. Only used when the agent detects
|
||
|
+evacuations might be in progress.
|
||
|
+
|
||
|
+You may need to increase the start timeout when increasing this value.
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc>
|
||
|
+<content type="integer" default="120" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
</parameters>
|
||
|
|
||
|
<actions>
|
||
|
-<action name="start" timeout="120" />
|
||
|
+<action name="start" timeout="600" />
|
||
|
<action name="stop" timeout="300" />
|
||
|
<action name="monitor" timeout="20" interval="10" depth="0"/>
|
||
|
<action name="validate-all" timeout="20" />
|
||
|
<action name="meta-data" timeout="5" />
|
||
|
-<action name="notify" timeout="600" />
|
||
|
</actions>
|
||
|
</resource-agent>
|
||
|
END
|
||
|
@@ -132,7 +143,7 @@ sigterm_handler() {
|
||
|
|
||
|
nova_usage() {
|
||
|
cat <<END
|
||
|
-usage: $0 {start|stop|monitor|notify|validate-all|meta-data}
|
||
|
+usage: $0 {start|stop|monitor|validate-all|meta-data}
|
||
|
|
||
|
Expects to have a fully populated OCF RA-compliant environment set.
|
||
|
END
|
||
|
@@ -148,6 +159,26 @@ nova_start() {
|
||
|
return $OCF_SUCCESS
|
||
|
fi
|
||
|
|
||
|
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
|
||
|
+ if [ "x$state" = x ]; then
|
||
|
+ : never been fenced
|
||
|
+
|
||
|
+ elif [ "x$state" = xno ]; then
|
||
|
+ : has been evacuated, however it could have been 1s ago
|
||
|
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
|
||
|
+ sleep ${OCF_RESKEY_evacuation_delay}
|
||
|
+
|
||
|
+ else
|
||
|
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}"
|
||
|
+ while [ "x$state" != "xno" ]; do
|
||
|
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' )
|
||
|
+ sleep 5
|
||
|
+ done
|
||
|
+
|
||
|
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete"
|
||
|
+ sleep ${OCF_RESKEY_evacuation_delay}
|
||
|
+ fi
|
||
|
+
|
||
|
export LIBGUESTFS_ATTACH_METHOD=appliance
|
||
|
su nova -s /bin/sh -c /usr/bin/nova-compute &
|
||
|
|
||
|
@@ -212,33 +243,7 @@ nova_monitor() {
|
||
|
}
|
||
|
|
||
|
nova_notify() {
|
||
|
- if [ "x${OCF_RESKEY_CRM_meta_notify_operation}" != "xstop" ]; then
|
||
|
- return $OCF_SUCCESS
|
||
|
- elif [ "x${OCF_RESKEY_CRM_meta_notify_type}" != "xpost" ]; then
|
||
|
- return $OCF_SUCCESS
|
||
|
- fi
|
||
|
-
|
||
|
- # Only the first node not stopping performs evacuates for now
|
||
|
- # Can we allow all of them to do it? It would make this block much simpler.
|
||
|
- for host in ${OCF_RESKEY_CRM_meta_notify_active_uname}; do
|
||
|
- for stop in ${OCF_RESKEY_CRM_meta_notify_stop_uname}; do
|
||
|
- if [ "$stop" = "$host" ]; then
|
||
|
- : $host is one of the nodes that is stopping
|
||
|
-
|
||
|
- elif [ "x$(echo ${host} | awk -F. '{print $1}')" != "x$(uname -n | awk -F. '{print $1}')" ]; then
|
||
|
- : We are not the first non-stopping node
|
||
|
- return $OCF_SUCCESS
|
||
|
-
|
||
|
- else
|
||
|
- # Also repeat for any peer NOT in active_uname somehow?
|
||
|
- for node in $OCF_RESKEY_CRM_meta_notify_stop_uname; do
|
||
|
- ocf_log info "Performing evacuations for $node"
|
||
|
- fence_compute ${fence_options} -o reboot -n $node
|
||
|
- done
|
||
|
- return $OCF_SUCCESS
|
||
|
- fi
|
||
|
- done
|
||
|
- done
|
||
|
+ return $OCF_SUCCESS
|
||
|
}
|
||
|
|
||
|
nova_validate() {
|
||
|
@@ -246,7 +251,6 @@ nova_validate() {
|
||
|
fence_options=""
|
||
|
|
||
|
check_binary openstack-config
|
||
|
- check_binary fence_compute
|
||
|
check_binary nova-compute
|
||
|
|
||
|
if [ ! -f /etc/nova/nova.conf ]; then
|
||
|
@@ -337,6 +341,7 @@ nova_validate() {
|
||
|
return $rc
|
||
|
}
|
||
|
|
||
|
+: ${OCF_RESKEY_evacuation_delay=120}
|
||
|
case $__OCF_ACTION in
|
||
|
meta-data) meta_data
|
||
|
exit $OCF_SUCCESS
|
||
|
@@ -346,12 +351,10 @@ usage|help) nova_usage
|
||
|
;;
|
||
|
esac
|
||
|
|
||
|
-nova_validate
|
||
|
-
|
||
|
case $__OCF_ACTION in
|
||
|
-start) nova_start;;
|
||
|
+start) nova_validate; nova_start;;
|
||
|
stop) nova_stop;;
|
||
|
-monitor) nova_monitor;;
|
||
|
+monitor) nova_validate; nova_monitor;;
|
||
|
notify) nova_notify;;
|
||
|
validate-all) exit $OCF_SUCCESS;;
|
||
|
*) nova_usage
|
||
|
diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate
|
||
|
new file mode 100755
|
||
|
index 0000000..f9a24f1
|
||
|
--- /dev/null
|
||
|
+++ b/heartbeat/NovaEvacuate
|
||
|
@@ -0,0 +1,311 @@
|
||
|
+#!/bin/sh
|
||
|
+#
|
||
|
+#
|
||
|
+# NovaCompute agent manages compute daemons.
|
||
|
+#
|
||
|
+# Copyright (c) 2015
|
||
|
+#
|
||
|
+# This program is free software; you can redistribute it and/or modify
|
||
|
+# it under the terms of version 2 of the GNU General Public License as
|
||
|
+# published by the Free Software Foundation.
|
||
|
+#
|
||
|
+# This program is distributed in the hope that it would be useful, but
|
||
|
+# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||
|
+#
|
||
|
+# Further, this software is distributed without any warranty that it is
|
||
|
+# free of the rightful claim of any third person regarding infringement
|
||
|
+# or the like. Any license provided herein, whether implied or
|
||
|
+# otherwise, applies only to this software file. Patent licenses, if
|
||
|
+# any, provided herein do not apply to combinations of this program with
|
||
|
+# other software, or any other product whatsoever.
|
||
|
+#
|
||
|
+# You should have received a copy of the GNU General Public License
|
||
|
+# along with this program; if not, write the Free Software Foundation,
|
||
|
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
|
||
|
+#
|
||
|
+
|
||
|
+#######################################################################
|
||
|
+# Initialization:
|
||
|
+
|
||
|
+###
|
||
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
|
||
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
|
||
|
+###
|
||
|
+
|
||
|
+: ${__OCF_ACTION=$1}
|
||
|
+
|
||
|
+#######################################################################
|
||
|
+
|
||
|
+meta_data() {
|
||
|
+ cat <<END
|
||
|
+<?xml version="1.0"?>
|
||
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
|
||
|
+<resource-agent name="NovaEvacuate" version="1.0">
|
||
|
+<version>1.0</version>
|
||
|
+
|
||
|
+<longdesc lang="en">
|
||
|
+Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged.
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc>
|
||
|
+
|
||
|
+<parameters>
|
||
|
+
|
||
|
+<parameter name="auth_url" unique="0" required="1">
|
||
|
+<longdesc lang="en">
|
||
|
+Authorization URL for connecting to keystone in admin context
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Authorization URL</shortdesc>
|
||
|
+<content type="string" default="" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
+<parameter name="username" unique="0" required="1">
|
||
|
+<longdesc lang="en">
|
||
|
+Username for connecting to keystone in admin context
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Username</shortdesc>
|
||
|
+</parameter>
|
||
|
+
|
||
|
+<parameter name="password" unique="0" required="1">
|
||
|
+<longdesc lang="en">
|
||
|
+Password for connecting to keystone in admin context
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Password</shortdesc>
|
||
|
+<content type="string" default="" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
+<parameter name="tenant_name" unique="0" required="1">
|
||
|
+<longdesc lang="en">
|
||
|
+Tenant name for connecting to keystone in admin context.
|
||
|
+Note that with Keystone V3 tenant names are only unique within a domain.
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Tenant name</shortdesc>
|
||
|
+<content type="string" default="" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
+<parameter name="endpoint_type" unique="0" required="0">
|
||
|
+<longdesc lang="en">
|
||
|
+Nova API location (internal, public or admin URL)
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc>
|
||
|
+<content type="string" default="" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
+<parameter name="no_shared_storage" unique="0" required="0">
|
||
|
+<longdesc lang="en">
|
||
|
+Disable shared storage recovery for instances. Use at your own risk!
|
||
|
+</longdesc>
|
||
|
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc>
|
||
|
+<content type="boolean" default="0" />
|
||
|
+</parameter>
|
||
|
+
|
||
|
+</parameters>
|
||
|
+
|
||
|
+<actions>
|
||
|
+<action name="start" timeout="20" />
|
||
|
+<action name="stop" timeout="20" />
|
||
|
+<action name="monitor" timeout="600" interval="10" depth="0"/>
|
||
|
+<action name="validate-all" timeout="20" />
|
||
|
+<action name="meta-data" timeout="5" />
|
||
|
+</actions>
|
||
|
+</resource-agent>
|
||
|
+END
|
||
|
+}
|
||
|
+
|
||
|
+#######################################################################
|
||
|
+
|
||
|
+# don't exit on TERM, to test that lrmd makes sure that we do exit
|
||
|
+trap sigterm_handler TERM
|
||
|
+sigterm_handler() {
|
||
|
+ ocf_log info "They use TERM to bring us down. No such luck."
|
||
|
+ return
|
||
|
+}
|
||
|
+
|
||
|
+evacuate_usage() {
|
||
|
+ cat <<END
|
||
|
+usage: $0 {start|stop|monitor|validate-all|meta-data}
|
||
|
+
|
||
|
+Expects to have a fully populated OCF RA-compliant environment set.
|
||
|
+END
|
||
|
+}
|
||
|
+
|
||
|
+evacuate_stop() {
|
||
|
+ rm -f "$statefile"
|
||
|
+ return $OCF_SUCCESS
|
||
|
+}
|
||
|
+
|
||
|
+evacuate_start() {
|
||
|
+ touch "$statefile"
|
||
|
+ # Do not invole monitor here so that the start timeout can be low
|
||
|
+ return $?
|
||
|
+}
|
||
|
+
|
||
|
+update_evacuation() {
|
||
|
+ attrd_updater -p -n evacute -Q -N ${1} -v ${2}
|
||
|
+ arc=$?
|
||
|
+ if [ ${arc} != 0 ]; then
|
||
|
+ ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}"
|
||
|
+ fi
|
||
|
+ return ${arc}
|
||
|
+}
|
||
|
+
|
||
|
+handle_evacuations() {
|
||
|
+ while [ $# -gt 0 ]; do
|
||
|
+ node=$1
|
||
|
+ state=$2
|
||
|
+ shift; shift;
|
||
|
+ need_evacuate=0
|
||
|
+
|
||
|
+ case $state in
|
||
|
+ "") ;;
|
||
|
+ no) ocf_log debug "$node is either fine or already handled";;
|
||
|
+ yes) need_evacuate=1;;
|
||
|
+ *@*)
|
||
|
+ where=$(echo $state | awk -F@ '{print $1}')
|
||
|
+ when=$(echo $state | awk -F@ '{print $2}')
|
||
|
+ now=$(date +%s)
|
||
|
+
|
||
|
+ if [ $(($now - $when)) -gt 60 ]; then
|
||
|
+ ocf_log info "Processing partial evacuation of $node by $where at $when"
|
||
|
+ need_evacuate=1
|
||
|
+ else
|
||
|
+ # Give some time for any in-flight evacuations to either complete or fail
|
||
|
+ # Nova won't react well if there are two overlapping requests
|
||
|
+ ocf_log info "Deferring processing partial evacuation of $node by $where at $when"
|
||
|
+ fi
|
||
|
+ ;;
|
||
|
+ esac
|
||
|
+
|
||
|
+ if [ $need_evacuate = 1 ]; then
|
||
|
+ found=0
|
||
|
+ ocf_log notice "Initiating evacuation of $node"
|
||
|
+
|
||
|
+ for known in $(fence_compute ${fence_options} -o list | tr -d ','); do
|
||
|
+ if [ ${known} = ${node} ]; then
|
||
|
+ found=1
|
||
|
+ break
|
||
|
+ fi
|
||
|
+ done
|
||
|
+
|
||
|
+ if [ $found = 0 ]; then
|
||
|
+ ocf_log info "Nova does not know about ${node}"
|
||
|
+ # Dont mark as no because perhaps nova is unavailable right now
|
||
|
+ continue
|
||
|
+ fi
|
||
|
+
|
||
|
+ update_evacuation ${node} "$(uname -n)@$(date +%s)"
|
||
|
+ if [ $? != 0 ]; then
|
||
|
+ return $OCF_SUCCESS
|
||
|
+ fi
|
||
|
+
|
||
|
+ fence_compute ${fence_options} -o reboot -n $node
|
||
|
+ rc=$?
|
||
|
+
|
||
|
+ if [ $rc = 0 ]; then
|
||
|
+ update_evacuation ${node} no
|
||
|
+ ocf_log notice "Completed evacuation of $node"
|
||
|
+ else
|
||
|
+ ocf_log warn "Evacuation of $node failed: $rc"
|
||
|
+ update_evacuation ${node} yes
|
||
|
+ fi
|
||
|
+ fi
|
||
|
+ done
|
||
|
+
|
||
|
+ return $OCF_SUCCESS
|
||
|
+}
|
||
|
+
|
||
|
+evacuate_monitor() {
|
||
|
+ if [ ! -f "$statefile" ]; then
|
||
|
+ return $OCF_NOT_RUNNING
|
||
|
+ fi
|
||
|
+
|
||
|
+ handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}')
|
||
|
+ return $OCF_SUCCESS
|
||
|
+}
|
||
|
+
|
||
|
+evacuate_validate() {
|
||
|
+ rc=$OCF_SUCCESS
|
||
|
+ fence_options=""
|
||
|
+
|
||
|
+ check_binary fence_compute
|
||
|
+
|
||
|
+ # Is the state directory writable?
|
||
|
+ state_dir=$(dirname $statefile)
|
||
|
+ touch "$state_dir/$$"
|
||
|
+ if [ $? != 0 ]; then
|
||
|
+ ocf_exit_reason "Invalid state directory: $state_dir"
|
||
|
+ return $OCF_ERR_ARGS
|
||
|
+ fi
|
||
|
+ rm -f "$state_dir/$$"
|
||
|
+
|
||
|
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then
|
||
|
+ ocf_exit_reason "auth_url not configured"
|
||
|
+ exit $OCF_ERR_CONFIGURED
|
||
|
+ fi
|
||
|
+
|
||
|
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}"
|
||
|
+
|
||
|
+ if [ -z "${OCF_RESKEY_username}" ]; then
|
||
|
+ ocf_exit_reason "username not configured"
|
||
|
+ exit $OCF_ERR_CONFIGURED
|
||
|
+ fi
|
||
|
+
|
||
|
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}"
|
||
|
+
|
||
|
+ if [ -z "${OCF_RESKEY_password}" ]; then
|
||
|
+ ocf_exit_reason "password not configured"
|
||
|
+ exit $OCF_ERR_CONFIGURED
|
||
|
+ fi
|
||
|
+
|
||
|
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}"
|
||
|
+
|
||
|
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then
|
||
|
+ ocf_exit_reason "tenant_name not configured"
|
||
|
+ exit $OCF_ERR_CONFIGURED
|
||
|
+ fi
|
||
|
+
|
||
|
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}"
|
||
|
+
|
||
|
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then
|
||
|
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then
|
||
|
+ fence_options="${fence_options} --no-shared-storage"
|
||
|
+ fi
|
||
|
+ fi
|
||
|
+
|
||
|
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then
|
||
|
+ case ${OCF_RESKEY_endpoint_type} in
|
||
|
+ adminURL|publicURL|internalURL) ;;
|
||
|
+ *)
|
||
|
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL"
|
||
|
+ exit $OCF_ERR_CONFIGURED
|
||
|
+ ;;
|
||
|
+ esac
|
||
|
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}"
|
||
|
+ fi
|
||
|
+
|
||
|
+ if [ $rc != $OCF_SUCCESS ]; then
|
||
|
+ exit $rc
|
||
|
+ fi
|
||
|
+ return $rc
|
||
|
+}
|
||
|
+
|
||
|
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active"
|
||
|
+
|
||
|
+case $__OCF_ACTION in
|
||
|
+start) evacuate_validate; evacuate_start;;
|
||
|
+stop) evacuate_stop;;
|
||
|
+monitor) evacuate_validate; evacuate_monitor;;
|
||
|
+meta-data) meta_data
|
||
|
+ exit $OCF_SUCCESS
|
||
|
+ ;;
|
||
|
+usage|help) evacuate_usage
|
||
|
+ exit $OCF_SUCCESS
|
||
|
+ ;;
|
||
|
+validate-all) exit $OCF_SUCCESS;;
|
||
|
+*) evacuate_usage
|
||
|
+ exit $OCF_ERR_UNIMPLEMENTED
|
||
|
+ ;;
|
||
|
+esac
|
||
|
+rc=$?
|
||
|
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc"
|
||
|
+exit $rc
|
||
|
--
|
||
|
1.8.4.2
|
||
|
|