glusterfs/SOURCES/0139-ec-shd-Cleanup-self-he...

From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001
From: Mohammed Rafi KC <rkavunga@redhat.com>
Date: Mon, 29 Apr 2019 13:22:32 +0530
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec
 fini

We were not properly cleaning self-heal daemon resources
during ec fini. With shd multiplexing, it is absolutely
necessary to cleanup all the resources during ec fini.

Back port of
 upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/
 >Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2
 >fixes: bz#1703948
 >Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>

BUG: 1703434
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
---
 libglusterfs/src/syncop-utils.c          |  2 +
 xlators/cluster/afr/src/afr-self-heald.c |  5 +++
 xlators/cluster/ec/src/ec-heald.c        | 77 +++++++++++++++++++++++++++-----
 xlators/cluster/ec/src/ec-heald.h        |  3 ++
 xlators/cluster/ec/src/ec-messages.h     |  3 +-
 xlators/cluster/ec/src/ec.c              | 47 +++++++++++++++++++
 6 files changed, 124 insertions(+), 13 deletions(-)

diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
index b842142..4167db4 100644
--- a/libglusterfs/src/syncop-utils.c
+++ b/libglusterfs/src/syncop-utils.c
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,

     if (frame) {
         this = frame->this;
+    } else {
+        this = THIS;
     }

     /*For this functionality to be implemented in general, we need
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index 8bc4720..522fe5d 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     afr_private_t *priv = NULL;

     priv = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (!priv->shd.enabled)
         return -EBUSY;

diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
index cba111a..edf5e11 100644
--- a/xlators/cluster/ec/src/ec-heald.c
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -71,6 +71,11 @@ disabled_loop:
             break;
     }

+    if (ec->shutdown) {
+        healer->running = _gf_false;
+        return -1;
+    }
+
     ret = healer->rerun;
     healer->rerun = 0;

@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer)
         goto out;
     }

+    _mask_cancellation();
     ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
                              healer, ec_shd_index_heal, xdata,
                              ec->shd.max_threads, ec->shd.wait_qlength);
+    _unmask_cancellation();
 out:
     if (xdata)
         dict_unref(xdata);
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
     int ret = 0;

     ec = this->private;
+
+    if (this->cleanup_starting) {
+        return -ENOTCONN;
+    }
+
     if (ec->xl_up_count <= ec->fragments) {
         return -ENOTCONN;
     }
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
 {
     ec_t *ec = NULL;
     loc_t loc = {0};
+    int ret = -1;

     ec = healer->this->private;
     loc.inode = inode;
-    return syncop_ftw(ec->xl_list[healer->subvol], &loc,
-                      GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _mask_cancellation();
+    ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+                     GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+    _unmask_cancellation();
+    return ret;
 }

 void *
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data)
 {
     struct subvol_healer *healer = NULL;
     xlator_t *this = NULL;
+    int run = 0;

     healer = data;
     THIS = this = healer->this;
     ec_t *ec = this->private;

     for (;;) {
-        ec_shd_healer_wait(healer);
+        run = ec_shd_healer_wait(healer);
+        if (run == -1)
+            break;

         if (ec->xl_up_count > ec->fragments) {
             gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data)

     rootloc.inode = this->itable->root;
     for (;;) {
-        pthread_mutex_lock(&healer->mutex);
-        {
-            run = __ec_shd_healer_wait(healer);
-            if (!run)
-                healer->running = _gf_false;
-        }
-        pthread_mutex_unlock(&healer->mutex);
-
-        if (!run)
+        run = ec_shd_healer_wait(healer);
+        if (run < 0) {
             break;
+        } else if (run == 0) {
+            continue;
+        }

         if (ec->xl_up_count > ec->fragments) {
             gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
@@ -562,3 +577,41 @@ out:
     dict_del(output, this->name);
     return ret;
 }
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+    if (!healer)
+        return;
+
+    pthread_cond_destroy(&healer->cond);
+    pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    ec_t *priv = NULL;
+    int i = 0;
+
+    priv = this->private;
+    if (!priv)
+        return;
+
+    shd = &priv->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < priv->nodes; i++) {
+        healer = &shd->index_healers[i];
+        ec_destroy_healer_object(this, healer);
+
+        healer = &shd->full_healers[i];
+        ec_destroy_healer_object(this, healer);
+    }
+
+    GF_FREE(shd->index_healers);
+    GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
index 2eda2a7..8184cf4 100644
--- a/xlators/cluster/ec/src/ec-heald.h
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this);
 void
 ec_shd_index_healer_wake(ec_t *ec);

+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
 #endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
index 7c28808..ce299bb 100644
--- a/xlators/cluster/ec/src/ec-messages.h
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
            EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
            EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
            EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
-           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED);
+           EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+           EC_MSG_THREAD_CLEANUP_FAILED);

 #endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
index 3c8013e..264582a 100644
--- a/xlators/cluster/ec/src/ec.c
+++ b/xlators/cluster/ec/src/ec.c
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec)
 }

 void
+ec_cleanup_healer_object(ec_t *ec)
+{
+    struct subvol_healer *healer = NULL;
+    ec_self_heald_t *shd = NULL;
+    void *res = NULL;
+    int i = 0;
+    gf_boolean_t is_join = _gf_false;
+
+    shd = &ec->shd;
+    if (!shd->iamshd)
+        return;
+
+    for (i = 0; i < ec->nodes; i++) {
+        healer = &shd->index_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+
+        healer = &shd->full_healers[i];
+        pthread_mutex_lock(&healer->mutex);
+        {
+            healer->rerun = 1;
+            if (healer->running) {
+                pthread_cond_signal(&healer->cond);
+                is_join = _gf_true;
+            }
+        }
+        pthread_mutex_unlock(&healer->mutex);
+        if (is_join) {
+            pthread_join(healer->thread, &res);
+            is_join = _gf_false;
+        }
+    }
+}
+void
 ec_pending_fops_completed(ec_t *ec)
 {
     if (ec->shutdown) {
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
         /* If there aren't pending fops running after we have waken up
          * them, we immediately propagate the notification. */
         propagate = ec_disable_delays(ec);
+        ec_cleanup_healer_object(ec);
         goto unlock;
     }

@@ -759,6 +805,7 @@ failed:
 void
 fini(xlator_t *this)
 {
+    ec_selfheal_daemon_fini(this);
     __ec_destroy_private(this);
 }

--
1.8.3.1