From d9833b68498e306d181be11adf9eee14b646a899 Mon Sep 17 00:00:00 2001 From: Damien Ciabrini Date: Tue, 2 Feb 2016 14:34:36 +0100 Subject: [PATCH] galera: force crash recovery if needed during last commit detection --- heartbeat/galera | 90 +++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 60 insertions(+), 30 deletions(-) diff --git a/heartbeat/galera b/heartbeat/galera index 7be2b00..ca94c21 100755 --- a/heartbeat/galera +++ b/heartbeat/galera @@ -525,6 +525,58 @@ detect_first_master() set_bootstrap_node $best_node } +detect_last_commit() +{ + local last_commit + local recover_args="--defaults-file=$OCF_RESKEY_config \ + --pid-file=$OCF_RESKEY_pid \ + --socket=$OCF_RESKEY_socket \ + --datadir=$OCF_RESKEY_datadir \ + --user=$OCF_RESKEY_user" + local recovered_position_regex='s/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p' + + ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" + last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" + if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then + local tmp=$(mktemp) + local tmperr=$(mktemp) + + ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" + + ${OCF_RESKEY_binary} $recover_args --wsrep-recover > $tmp 2> $tmperr + + last_commit="$(cat $tmp | sed -n $recovered_position_regex)" + if [ -z "$last_commit" ]; then + # Galera uses InnoDB's 2pc transactions internally. If + # server was stopped in the middle of a replication, the + # recovery may find a "prepared" XA transaction in the + # redo log, and mysql won't recover automatically + + cat $tmperr | grep -q -E '\[ERROR\]\s+Found\s+[0-9]+\s+prepared\s+transactions!' 2>/dev/null + if [ $? -eq 0 ]; then + # we can only rollback the transaction, but that's OK + # since the DB will get resynchronized anyway + ocf_log warn "local node <${NODENAME}> was not shutdown properly. Rollback stuck transaction with --tc-heuristic-recover" + ${OCF_RESKEY_binary} $recover_args --wsrep-recover \ + --tc-heuristic-recover=rollback > $tmp 2>/dev/null + + last_commit="$(cat $tmp | sed -n $recovered_position_regex)" + fi + fi + rm -f $tmp $tmperr + fi + + if [ ! -z "$last_commit" ]; then + ocf_log info "Last commit version found: $last_commit" + set_last_commit $last_commit + return $OCF_SUCCESS + else + ocf_exit_reason "Unable to detect last known write sequence number" + clear_last_commit + return $OCF_ERR_GENERIC + fi +} + # For galera, promote is really start galera_promote() { @@ -569,13 +620,15 @@ galera_demote() clear_bootstrap_node clear_last_commit - # record last commit by "starting" galera. start is just detection of the last sequence number - galera_start + # record last commit for next promotion + detect_last_commit + rc=$? + return $rc } galera_start() { - local last_commit + local rc echo $OCF_RESKEY_wsrep_cluster_address | grep -q $NODENAME if [ $? -ne 0 ]; then @@ -591,34 +644,11 @@ galera_start() mysql_common_prepare_dirs - ocf_log info "attempting to detect last commit version by reading ${OCF_RESKEY_datadir}/grastate.dat" - last_commit="$(cat ${OCF_RESKEY_datadir}/grastate.dat | sed -n 's/^seqno.\s*\(.*\)\s*$/\1/p')" - if [ -z "$last_commit" ] || [ "$last_commit" = "-1" ]; then - ocf_log info "now attempting to detect last commit version using 'mysqld_safe --wsrep-recover'" - local tmp=$(mktemp) - ${OCF_RESKEY_binary} --defaults-file=$OCF_RESKEY_config \ - --pid-file=$OCF_RESKEY_pid \ - --socket=$OCF_RESKEY_socket \ - --datadir=$OCF_RESKEY_datadir \ - --user=$OCF_RESKEY_user \ - --wsrep-recover > $tmp 2>&1 - - last_commit="$(cat $tmp | sed -n 's/.*WSREP\:\s*[R|r]ecovered\s*position.*\:\(.*\)\s*$/\1/p')" - rm -f $tmp - - if [ "$last_commit" = "-1" ]; then - last_commit="0" - fi - fi - - if [ -z "$last_commit" ]; then - ocf_exit_reason "Unable to detect last known write sequence number" - clear_last_commit - return $OCF_ERR_GENERIC + detect_last_commit + rc=$? + if [ $rc -ne $OCF_SUCCESS ]; then + return $rc fi - ocf_log info "Last commit version found: $last_commit" - - set_last_commit $last_commit master_exists if [ $? -eq 0 ]; then