You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
89 lines
3.2 KiB
89 lines
3.2 KiB
From 4d98bbcdadda60166faf7ccc512b9095b439e2bd Mon Sep 17 00:00:00 2001 |
|
From: Damien Ciabrini <dciabrin@redhat.com> |
|
Date: Tue, 2 Feb 2016 16:29:10 +0100 |
|
Subject: [PATCH] galera: prevent recovered nodes from bootstrapping cluster |
|
when possible |
|
|
|
--- |
|
heartbeat/README.galera | 19 ++++++++++++++++++- |
|
heartbeat/galera | 41 +++++++++++++++++++++++++++++++++++++++++ |
|
2 files changed, 59 insertions(+), 1 deletion(-) |
|
|
|
diff --git a/heartbeat/galera b/heartbeat/galera |
|
index ca94c21..84c92fd 100755 |
|
--- a/heartbeat/galera |
|
+++ b/heartbeat/galera |
|
@@ -276,6 +276,22 @@ is_bootstrap() |
|
|
|
} |
|
|
|
+set_heuristic_recovered() |
|
+{ |
|
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -v "true" |
|
+} |
|
+ |
|
+clear_heuristic_recovered() |
|
+{ |
|
+ ${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -D |
|
+} |
|
+ |
|
+is_heuristic_recovered() |
|
+{ |
|
+ local node=$1 |
|
+ ${HA_SBIN_DIR}/crm_attribute -N $node -l reboot --name "${INSTANCE_ATTR_NAME}-heuristic-recovered" -Q 2>/dev/null |
|
+} |
|
+ |
|
clear_last_commit() |
|
{ |
|
${HA_SBIN_DIR}/crm_attribute -N $NODENAME -l reboot --name "${INSTANCE_ATTR_NAME}-last-committed" -D |
|
@@ -398,8 +414,19 @@ detect_first_master() |
|
local best_node="$NODENAME" |
|
local last_commit=0 |
|
local missing_nodes=0 |
|
+ local nodes="" |
|
+ local nodes_recovered="" |
|
|
|
+ # avoid selecting a recovered node as bootstrap if possible |
|
for node in $(echo "$OCF_RESKEY_wsrep_cluster_address" | sed 's/gcomm:\/\///g' | tr -d ' ' | tr -s ',' ' '); do |
|
+ if is_heuristic_recovered $node; then |
|
+ nodes_recovered="$nodes_recovered $node" |
|
+ else |
|
+ nodes="$nodes $node" |
|
+ fi |
|
+ done |
|
+ |
|
+ for node in $nodes_recovered $nodes; do |
|
last_commit=$(get_last_commit $node) |
|
|
|
if [ -z "$last_commit" ]; then |
|
@@ -466,6 +493,12 @@ detect_last_commit() |
|
--tc-heuristic-recover=rollback > $tmp 2>/dev/null |
|
|
|
last_commit="$(cat $tmp | sed -n $recovered_position_regex)" |
|
+ if [ ! -z "$last_commit" ]; then |
|
+ ocf_log warn "State recovered. force SST at next restart for full resynchronization" |
|
+ rm -f ${OCF_RESKEY_datadir}/grastate.dat |
|
+ # try not to use this node if bootstrap is needed |
|
+ set_heuristic_recovered |
|
+ fi |
|
fi |
|
fi |
|
rm -f $tmp $tmperr |
|
@@ -549,11 +582,17 @@ galera_promote() |
|
if ocf_is_true $bootstrap; then |
|
promote_everyone |
|
clear_bootstrap_node |
|
+ # clear attribute heuristic-recovered. if last shutdown was |
|
+ # not clean, we cannot be extra-cautious by requesting a SST |
|
+ # since this is the bootstrap node |
|
+ clear_heuristic_recovered |
|
ocf_log info "Bootstrap complete, promoting the rest of the galera instances." |
|
else |
|
# if this is not the bootstrap node, make sure this instance |
|
# syncs with the rest of the cluster before promotion returns. |
|
wait_for_sync |
|
+ # sync is done, clear info about last recovery |
|
+ clear_heuristic_recovered |
|
fi |
|
|
|
ocf_log info "Galera started"
|
|
|