You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
494 lines
14 KiB
494 lines
14 KiB
From 8c92227bce9cc4fe177eea5b2f7c9016e96434f9 Mon Sep 17 00:00:00 2001 |
|
From: David Vossel <dvossel@redhat.com> |
|
Date: Mon, 29 Jun 2015 13:03:17 -0500 |
|
Subject: [PATCH 1/3] bz1214360-NovaCompute-update1.patch |
|
|
|
--- |
|
doc/man/Makefile.am | 1 + |
|
heartbeat/Makefile.am | 3 +- |
|
heartbeat/NovaCompute | 73 ++++++------ |
|
heartbeat/NovaEvacuate | 311 +++++++++++++++++++++++++++++++++++++++++++++++++ |
|
4 files changed, 352 insertions(+), 36 deletions(-) |
|
create mode 100755 heartbeat/NovaEvacuate |
|
|
|
diff --git a/doc/man/Makefile.am b/doc/man/Makefile.am |
|
index 42a57fe..d32426b 100644 |
|
--- a/doc/man/Makefile.am |
|
+++ b/doc/man/Makefile.am |
|
@@ -74,6 +74,7 @@ man_MANS = ocf_heartbeat_AoEtarget.7 \ |
|
ocf_heartbeat_ManageRAID.7 \ |
|
ocf_heartbeat_ManageVE.7 \ |
|
ocf_heartbeat_NovaCompute.7 \ |
|
+ ocf_heartbeat_NovaEvacuate.7 \ |
|
ocf_heartbeat_Pure-FTPd.7 \ |
|
ocf_heartbeat_Raid1.7 \ |
|
ocf_heartbeat_Route.7 \ |
|
diff --git a/heartbeat/Makefile.am b/heartbeat/Makefile.am |
|
index 0bebf97..1034632 100644 |
|
--- a/heartbeat/Makefile.am |
|
+++ b/heartbeat/Makefile.am |
|
@@ -52,7 +52,8 @@ send_ua_SOURCES = send_ua.c IPv6addr_utils.c |
|
IPv6addr_LDADD = -lplumb $(LIBNETLIBS) |
|
send_ua_LDADD = $(LIBNETLIBS) |
|
|
|
-osp_SCRIPTS = NovaCompute |
|
+osp_SCRIPTS = NovaCompute \ |
|
+ NovaEvacuate |
|
|
|
ocf_SCRIPTS = ClusterMon \ |
|
CTDB \ |
|
diff --git a/heartbeat/NovaCompute b/heartbeat/NovaCompute |
|
index f71abeb..09eee38 100644 |
|
--- a/heartbeat/NovaCompute |
|
+++ b/heartbeat/NovaCompute |
|
@@ -107,15 +107,26 @@ Disable shared storage recovery for instances. Use at your own risk! |
|
<content type="boolean" default="0" /> |
|
</parameter> |
|
|
|
+<parameter name="evacuation_delay" unique="0" required="0"> |
|
+<longdesc lang="en"> |
|
+How long to wait for nova to finish evacuating instances elsewhere |
|
+before starting nova-compute. Only used when the agent detects |
|
+evacuations might be in progress. |
|
+ |
|
+You may need to increase the start timeout when increasing this value. |
|
+</longdesc> |
|
+<shortdesc lang="en">Delay to allow evacuations time to complete</shortdesc> |
|
+<content type="integer" default="120" /> |
|
+</parameter> |
|
+ |
|
</parameters> |
|
|
|
<actions> |
|
-<action name="start" timeout="120" /> |
|
+<action name="start" timeout="600" /> |
|
<action name="stop" timeout="300" /> |
|
<action name="monitor" timeout="20" interval="10" depth="0"/> |
|
<action name="validate-all" timeout="20" /> |
|
<action name="meta-data" timeout="5" /> |
|
-<action name="notify" timeout="600" /> |
|
</actions> |
|
</resource-agent> |
|
END |
|
@@ -132,7 +143,7 @@ sigterm_handler() { |
|
|
|
nova_usage() { |
|
cat <<END |
|
-usage: $0 {start|stop|monitor|notify|validate-all|meta-data} |
|
+usage: $0 {start|stop|monitor|validate-all|meta-data} |
|
|
|
Expects to have a fully populated OCF RA-compliant environment set. |
|
END |
|
@@ -148,6 +159,26 @@ nova_start() { |
|
return $OCF_SUCCESS |
|
fi |
|
|
|
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) |
|
+ if [ "x$state" = x ]; then |
|
+ : never been fenced |
|
+ |
|
+ elif [ "x$state" = xno ]; then |
|
+ : has been evacuated, however it could have been 1s ago |
|
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete" |
|
+ sleep ${OCF_RESKEY_evacuation_delay} |
|
+ |
|
+ else |
|
+ ocf_log info "Waiting for pending evacuations from ${NOVA_HOST}" |
|
+ while [ "x$state" != "xno" ]; do |
|
+ state=$(attrd_updater -p -n evacute -N ${NOVA_HOST} | sed -e 's/.*value=//' | tr -d '"' ) |
|
+ sleep 5 |
|
+ done |
|
+ |
|
+ ocf_log info "Pausing to give evacuations from ${NOVA_HOST} time to complete" |
|
+ sleep ${OCF_RESKEY_evacuation_delay} |
|
+ fi |
|
+ |
|
export LIBGUESTFS_ATTACH_METHOD=appliance |
|
su nova -s /bin/sh -c /usr/bin/nova-compute & |
|
|
|
@@ -212,33 +243,7 @@ nova_monitor() { |
|
} |
|
|
|
nova_notify() { |
|
- if [ "x${OCF_RESKEY_CRM_meta_notify_operation}" != "xstop" ]; then |
|
- return $OCF_SUCCESS |
|
- elif [ "x${OCF_RESKEY_CRM_meta_notify_type}" != "xpost" ]; then |
|
- return $OCF_SUCCESS |
|
- fi |
|
- |
|
- # Only the first node not stopping performs evacuates for now |
|
- # Can we allow all of them to do it? It would make this block much simpler. |
|
- for host in ${OCF_RESKEY_CRM_meta_notify_active_uname}; do |
|
- for stop in ${OCF_RESKEY_CRM_meta_notify_stop_uname}; do |
|
- if [ "$stop" = "$host" ]; then |
|
- : $host is one of the nodes that is stopping |
|
- |
|
- elif [ "x$(echo ${host} | awk -F. '{print $1}')" != "x$(uname -n | awk -F. '{print $1}')" ]; then |
|
- : We are not the first non-stopping node |
|
- return $OCF_SUCCESS |
|
- |
|
- else |
|
- # Also repeat for any peer NOT in active_uname somehow? |
|
- for node in $OCF_RESKEY_CRM_meta_notify_stop_uname; do |
|
- ocf_log info "Performing evacuations for $node" |
|
- fence_compute ${fence_options} -o reboot -n $node |
|
- done |
|
- return $OCF_SUCCESS |
|
- fi |
|
- done |
|
- done |
|
+ return $OCF_SUCCESS |
|
} |
|
|
|
nova_validate() { |
|
@@ -246,7 +251,6 @@ nova_validate() { |
|
fence_options="" |
|
|
|
check_binary openstack-config |
|
- check_binary fence_compute |
|
check_binary nova-compute |
|
|
|
if [ ! -f /etc/nova/nova.conf ]; then |
|
@@ -337,6 +341,7 @@ nova_validate() { |
|
return $rc |
|
} |
|
|
|
+: ${OCF_RESKEY_evacuation_delay=120} |
|
case $__OCF_ACTION in |
|
meta-data) meta_data |
|
exit $OCF_SUCCESS |
|
@@ -346,12 +351,10 @@ usage|help) nova_usage |
|
;; |
|
esac |
|
|
|
-nova_validate |
|
- |
|
case $__OCF_ACTION in |
|
-start) nova_start;; |
|
+start) nova_validate; nova_start;; |
|
stop) nova_stop;; |
|
-monitor) nova_monitor;; |
|
+monitor) nova_validate; nova_monitor;; |
|
notify) nova_notify;; |
|
validate-all) exit $OCF_SUCCESS;; |
|
*) nova_usage |
|
diff --git a/heartbeat/NovaEvacuate b/heartbeat/NovaEvacuate |
|
new file mode 100755 |
|
index 0000000..f9a24f1 |
|
--- /dev/null |
|
+++ b/heartbeat/NovaEvacuate |
|
@@ -0,0 +1,311 @@ |
|
+#!/bin/sh |
|
+# |
|
+# |
|
+# NovaCompute agent manages compute daemons. |
|
+# |
|
+# Copyright (c) 2015 |
|
+# |
|
+# This program is free software; you can redistribute it and/or modify |
|
+# it under the terms of version 2 of the GNU General Public License as |
|
+# published by the Free Software Foundation. |
|
+# |
|
+# This program is distributed in the hope that it would be useful, but |
|
+# WITHOUT ANY WARRANTY; without even the implied warranty of |
|
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
|
+# |
|
+# Further, this software is distributed without any warranty that it is |
|
+# free of the rightful claim of any third person regarding infringement |
|
+# or the like. Any license provided herein, whether implied or |
|
+# otherwise, applies only to this software file. Patent licenses, if |
|
+# any, provided herein do not apply to combinations of this program with |
|
+# other software, or any other product whatsoever. |
|
+# |
|
+# You should have received a copy of the GNU General Public License |
|
+# along with this program; if not, write the Free Software Foundation, |
|
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA. |
|
+# |
|
+ |
|
+####################################################################### |
|
+# Initialization: |
|
+ |
|
+### |
|
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} |
|
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs |
|
+### |
|
+ |
|
+: ${__OCF_ACTION=$1} |
|
+ |
|
+####################################################################### |
|
+ |
|
+meta_data() { |
|
+ cat <<END |
|
+<?xml version="1.0"?> |
|
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd"> |
|
+<resource-agent name="NovaEvacuate" version="1.0"> |
|
+<version>1.0</version> |
|
+ |
|
+<longdesc lang="en"> |
|
+Facility for tacking a list of compute nodes and reliably evacuating the ones that fence_evacuate has flagged. |
|
+</longdesc> |
|
+<shortdesc lang="en">Evacuator for OpenStack Nova Compute Server</shortdesc> |
|
+ |
|
+<parameters> |
|
+ |
|
+<parameter name="auth_url" unique="0" required="1"> |
|
+<longdesc lang="en"> |
|
+Authorization URL for connecting to keystone in admin context |
|
+</longdesc> |
|
+<shortdesc lang="en">Authorization URL</shortdesc> |
|
+<content type="string" default="" /> |
|
+</parameter> |
|
+ |
|
+<parameter name="username" unique="0" required="1"> |
|
+<longdesc lang="en"> |
|
+Username for connecting to keystone in admin context |
|
+</longdesc> |
|
+<shortdesc lang="en">Username</shortdesc> |
|
+</parameter> |
|
+ |
|
+<parameter name="password" unique="0" required="1"> |
|
+<longdesc lang="en"> |
|
+Password for connecting to keystone in admin context |
|
+</longdesc> |
|
+<shortdesc lang="en">Password</shortdesc> |
|
+<content type="string" default="" /> |
|
+</parameter> |
|
+ |
|
+<parameter name="tenant_name" unique="0" required="1"> |
|
+<longdesc lang="en"> |
|
+Tenant name for connecting to keystone in admin context. |
|
+Note that with Keystone V3 tenant names are only unique within a domain. |
|
+</longdesc> |
|
+<shortdesc lang="en">Tenant name</shortdesc> |
|
+<content type="string" default="" /> |
|
+</parameter> |
|
+ |
|
+<parameter name="endpoint_type" unique="0" required="0"> |
|
+<longdesc lang="en"> |
|
+Nova API location (internal, public or admin URL) |
|
+</longdesc> |
|
+<shortdesc lang="en">Nova API location (internal, public or admin URL)</shortdesc> |
|
+<content type="string" default="" /> |
|
+</parameter> |
|
+ |
|
+<parameter name="no_shared_storage" unique="0" required="0"> |
|
+<longdesc lang="en"> |
|
+Disable shared storage recovery for instances. Use at your own risk! |
|
+</longdesc> |
|
+<shortdesc lang="en">Disable shared storage recovery for instances</shortdesc> |
|
+<content type="boolean" default="0" /> |
|
+</parameter> |
|
+ |
|
+</parameters> |
|
+ |
|
+<actions> |
|
+<action name="start" timeout="20" /> |
|
+<action name="stop" timeout="20" /> |
|
+<action name="monitor" timeout="600" interval="10" depth="0"/> |
|
+<action name="validate-all" timeout="20" /> |
|
+<action name="meta-data" timeout="5" /> |
|
+</actions> |
|
+</resource-agent> |
|
+END |
|
+} |
|
+ |
|
+####################################################################### |
|
+ |
|
+# don't exit on TERM, to test that lrmd makes sure that we do exit |
|
+trap sigterm_handler TERM |
|
+sigterm_handler() { |
|
+ ocf_log info "They use TERM to bring us down. No such luck." |
|
+ return |
|
+} |
|
+ |
|
+evacuate_usage() { |
|
+ cat <<END |
|
+usage: $0 {start|stop|monitor|validate-all|meta-data} |
|
+ |
|
+Expects to have a fully populated OCF RA-compliant environment set. |
|
+END |
|
+} |
|
+ |
|
+evacuate_stop() { |
|
+ rm -f "$statefile" |
|
+ return $OCF_SUCCESS |
|
+} |
|
+ |
|
+evacuate_start() { |
|
+ touch "$statefile" |
|
+ # Do not invole monitor here so that the start timeout can be low |
|
+ return $? |
|
+} |
|
+ |
|
+update_evacuation() { |
|
+ attrd_updater -p -n evacute -Q -N ${1} -v ${2} |
|
+ arc=$? |
|
+ if [ ${arc} != 0 ]; then |
|
+ ocf_log warn "Can not set evacuation state of ${1} to ${2}: ${arc}" |
|
+ fi |
|
+ return ${arc} |
|
+} |
|
+ |
|
+handle_evacuations() { |
|
+ while [ $# -gt 0 ]; do |
|
+ node=$1 |
|
+ state=$2 |
|
+ shift; shift; |
|
+ need_evacuate=0 |
|
+ |
|
+ case $state in |
|
+ "") ;; |
|
+ no) ocf_log debug "$node is either fine or already handled";; |
|
+ yes) need_evacuate=1;; |
|
+ *@*) |
|
+ where=$(echo $state | awk -F@ '{print $1}') |
|
+ when=$(echo $state | awk -F@ '{print $2}') |
|
+ now=$(date +%s) |
|
+ |
|
+ if [ $(($now - $when)) -gt 60 ]; then |
|
+ ocf_log info "Processing partial evacuation of $node by $where at $when" |
|
+ need_evacuate=1 |
|
+ else |
|
+ # Give some time for any in-flight evacuations to either complete or fail |
|
+ # Nova won't react well if there are two overlapping requests |
|
+ ocf_log info "Deferring processing partial evacuation of $node by $where at $when" |
|
+ fi |
|
+ ;; |
|
+ esac |
|
+ |
|
+ if [ $need_evacuate = 1 ]; then |
|
+ found=0 |
|
+ ocf_log notice "Initiating evacuation of $node" |
|
+ |
|
+ for known in $(fence_compute ${fence_options} -o list | tr -d ','); do |
|
+ if [ ${known} = ${node} ]; then |
|
+ found=1 |
|
+ break |
|
+ fi |
|
+ done |
|
+ |
|
+ if [ $found = 0 ]; then |
|
+ ocf_log info "Nova does not know about ${node}" |
|
+ # Dont mark as no because perhaps nova is unavailable right now |
|
+ continue |
|
+ fi |
|
+ |
|
+ update_evacuation ${node} "$(uname -n)@$(date +%s)" |
|
+ if [ $? != 0 ]; then |
|
+ return $OCF_SUCCESS |
|
+ fi |
|
+ |
|
+ fence_compute ${fence_options} -o reboot -n $node |
|
+ rc=$? |
|
+ |
|
+ if [ $rc = 0 ]; then |
|
+ update_evacuation ${node} no |
|
+ ocf_log notice "Completed evacuation of $node" |
|
+ else |
|
+ ocf_log warn "Evacuation of $node failed: $rc" |
|
+ update_evacuation ${node} yes |
|
+ fi |
|
+ fi |
|
+ done |
|
+ |
|
+ return $OCF_SUCCESS |
|
+} |
|
+ |
|
+evacuate_monitor() { |
|
+ if [ ! -f "$statefile" ]; then |
|
+ return $OCF_NOT_RUNNING |
|
+ fi |
|
+ |
|
+ handle_evacuations $(attrd_updater -n evacute -A | tr '="' ' ' | awk '{print $4" "$6}') |
|
+ return $OCF_SUCCESS |
|
+} |
|
+ |
|
+evacuate_validate() { |
|
+ rc=$OCF_SUCCESS |
|
+ fence_options="" |
|
+ |
|
+ check_binary fence_compute |
|
+ |
|
+ # Is the state directory writable? |
|
+ state_dir=$(dirname $statefile) |
|
+ touch "$state_dir/$$" |
|
+ if [ $? != 0 ]; then |
|
+ ocf_exit_reason "Invalid state directory: $state_dir" |
|
+ return $OCF_ERR_ARGS |
|
+ fi |
|
+ rm -f "$state_dir/$$" |
|
+ |
|
+ if [ -z "${OCF_RESKEY_auth_url}" ]; then |
|
+ ocf_exit_reason "auth_url not configured" |
|
+ exit $OCF_ERR_CONFIGURED |
|
+ fi |
|
+ |
|
+ fence_options="${fence_options} -k ${OCF_RESKEY_auth_url}" |
|
+ |
|
+ if [ -z "${OCF_RESKEY_username}" ]; then |
|
+ ocf_exit_reason "username not configured" |
|
+ exit $OCF_ERR_CONFIGURED |
|
+ fi |
|
+ |
|
+ fence_options="${fence_options} -l ${OCF_RESKEY_username}" |
|
+ |
|
+ if [ -z "${OCF_RESKEY_password}" ]; then |
|
+ ocf_exit_reason "password not configured" |
|
+ exit $OCF_ERR_CONFIGURED |
|
+ fi |
|
+ |
|
+ fence_options="${fence_options} -p ${OCF_RESKEY_password}" |
|
+ |
|
+ if [ -z "${OCF_RESKEY_tenant_name}" ]; then |
|
+ ocf_exit_reason "tenant_name not configured" |
|
+ exit $OCF_ERR_CONFIGURED |
|
+ fi |
|
+ |
|
+ fence_options="${fence_options} -t ${OCF_RESKEY_tenant_name}" |
|
+ |
|
+ if [ -n "${OCF_RESKEY_no_shared_storage}" ]; then |
|
+ if ocf_is_true "${OCF_RESKEY_no_shared_storage}"; then |
|
+ fence_options="${fence_options} --no-shared-storage" |
|
+ fi |
|
+ fi |
|
+ |
|
+ if [ -n "${OCF_RESKEY_endpoint_type}" ]; then |
|
+ case ${OCF_RESKEY_endpoint_type} in |
|
+ adminURL|publicURL|internalURL) ;; |
|
+ *) |
|
+ ocf_exit_reason "endpoint_type ${OCF_RESKEY_endpoint_type} not valid. Use adminURL or publicURL or internalURL" |
|
+ exit $OCF_ERR_CONFIGURED |
|
+ ;; |
|
+ esac |
|
+ fence_options="${fence_options} -e ${OCF_RESKEY_endpoint_type}" |
|
+ fi |
|
+ |
|
+ if [ $rc != $OCF_SUCCESS ]; then |
|
+ exit $rc |
|
+ fi |
|
+ return $rc |
|
+} |
|
+ |
|
+statefile="${HA_RSCTMP}/${OCF_RESOURCE_INSTANCE}.active" |
|
+ |
|
+case $__OCF_ACTION in |
|
+start) evacuate_validate; evacuate_start;; |
|
+stop) evacuate_stop;; |
|
+monitor) evacuate_validate; evacuate_monitor;; |
|
+meta-data) meta_data |
|
+ exit $OCF_SUCCESS |
|
+ ;; |
|
+usage|help) evacuate_usage |
|
+ exit $OCF_SUCCESS |
|
+ ;; |
|
+validate-all) exit $OCF_SUCCESS;; |
|
+*) evacuate_usage |
|
+ exit $OCF_ERR_UNIMPLEMENTED |
|
+ ;; |
|
+esac |
|
+rc=$? |
|
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION : $rc" |
|
+exit $rc |
|
-- |
|
1.8.4.2 |
|
|
|
|