You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
300 lines
8.8 KiB
300 lines
8.8 KiB
From edc238e40060773f5f5fd59fcdad8ae27d65749f Mon Sep 17 00:00:00 2001 |
|
From: Mohammed Rafi KC <rkavunga@redhat.com> |
|
Date: Mon, 29 Apr 2019 13:22:32 +0530 |
|
Subject: [PATCH 139/141] ec/shd: Cleanup self heal daemon resources during ec |
|
fini |
|
|
|
We were not properly cleaning self-heal daemon resources |
|
during ec fini. With shd multiplexing, it is absolutely |
|
necessary to cleanup all the resources during ec fini. |
|
|
|
Back port of |
|
upstream patch: https://review.gluster.org/#/c/glusterfs/+/22644/ |
|
>Change-Id: Iae4f1bce7d8c2e1da51ac568700a51088f3cc7f2 |
|
>fixes: bz#1703948 |
|
>Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> |
|
|
|
BUG: 1703434 |
|
Change-Id: I98ae03178d3176772c62e34baa08a5c35b8f7217 |
|
Signed-off-by: Mohammed Rafi KC <rkavunga@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/169994 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
libglusterfs/src/syncop-utils.c | 2 + |
|
xlators/cluster/afr/src/afr-self-heald.c | 5 +++ |
|
xlators/cluster/ec/src/ec-heald.c | 77 +++++++++++++++++++++++++++----- |
|
xlators/cluster/ec/src/ec-heald.h | 3 ++ |
|
xlators/cluster/ec/src/ec-messages.h | 3 +- |
|
xlators/cluster/ec/src/ec.c | 47 +++++++++++++++++++ |
|
6 files changed, 124 insertions(+), 13 deletions(-) |
|
|
|
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c |
|
index b842142..4167db4 100644 |
|
--- a/libglusterfs/src/syncop-utils.c |
|
+++ b/libglusterfs/src/syncop-utils.c |
|
@@ -354,6 +354,8 @@ syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid, |
|
|
|
if (frame) { |
|
this = frame->this; |
|
+ } else { |
|
+ this = THIS; |
|
} |
|
|
|
/*For this functionality to be implemented in general, we need |
|
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c |
|
index 8bc4720..522fe5d 100644 |
|
--- a/xlators/cluster/afr/src/afr-self-heald.c |
|
+++ b/xlators/cluster/afr/src/afr-self-heald.c |
|
@@ -524,6 +524,11 @@ afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, |
|
afr_private_t *priv = NULL; |
|
|
|
priv = this->private; |
|
+ |
|
+ if (this->cleanup_starting) { |
|
+ return -ENOTCONN; |
|
+ } |
|
+ |
|
if (!priv->shd.enabled) |
|
return -EBUSY; |
|
|
|
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c |
|
index cba111a..edf5e11 100644 |
|
--- a/xlators/cluster/ec/src/ec-heald.c |
|
+++ b/xlators/cluster/ec/src/ec-heald.c |
|
@@ -71,6 +71,11 @@ disabled_loop: |
|
break; |
|
} |
|
|
|
+ if (ec->shutdown) { |
|
+ healer->running = _gf_false; |
|
+ return -1; |
|
+ } |
|
+ |
|
ret = healer->rerun; |
|
healer->rerun = 0; |
|
|
|
@@ -241,9 +246,11 @@ ec_shd_index_sweep(struct subvol_healer *healer) |
|
goto out; |
|
} |
|
|
|
+ _mask_cancellation(); |
|
ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD, |
|
healer, ec_shd_index_heal, xdata, |
|
ec->shd.max_threads, ec->shd.wait_qlength); |
|
+ _unmask_cancellation(); |
|
out: |
|
if (xdata) |
|
dict_unref(xdata); |
|
@@ -263,6 +270,11 @@ ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent, |
|
int ret = 0; |
|
|
|
ec = this->private; |
|
+ |
|
+ if (this->cleanup_starting) { |
|
+ return -ENOTCONN; |
|
+ } |
|
+ |
|
if (ec->xl_up_count <= ec->fragments) { |
|
return -ENOTCONN; |
|
} |
|
@@ -305,11 +317,15 @@ ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode) |
|
{ |
|
ec_t *ec = NULL; |
|
loc_t loc = {0}; |
|
+ int ret = -1; |
|
|
|
ec = healer->this->private; |
|
loc.inode = inode; |
|
- return syncop_ftw(ec->xl_list[healer->subvol], &loc, |
|
- GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); |
|
+ _mask_cancellation(); |
|
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc, |
|
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal); |
|
+ _unmask_cancellation(); |
|
+ return ret; |
|
} |
|
|
|
void * |
|
@@ -317,13 +333,16 @@ ec_shd_index_healer(void *data) |
|
{ |
|
struct subvol_healer *healer = NULL; |
|
xlator_t *this = NULL; |
|
+ int run = 0; |
|
|
|
healer = data; |
|
THIS = this = healer->this; |
|
ec_t *ec = this->private; |
|
|
|
for (;;) { |
|
- ec_shd_healer_wait(healer); |
|
+ run = ec_shd_healer_wait(healer); |
|
+ if (run == -1) |
|
+ break; |
|
|
|
if (ec->xl_up_count > ec->fragments) { |
|
gf_msg_debug(this->name, 0, "starting index sweep on subvol %s", |
|
@@ -352,16 +371,12 @@ ec_shd_full_healer(void *data) |
|
|
|
rootloc.inode = this->itable->root; |
|
for (;;) { |
|
- pthread_mutex_lock(&healer->mutex); |
|
- { |
|
- run = __ec_shd_healer_wait(healer); |
|
- if (!run) |
|
- healer->running = _gf_false; |
|
- } |
|
- pthread_mutex_unlock(&healer->mutex); |
|
- |
|
- if (!run) |
|
+ run = ec_shd_healer_wait(healer); |
|
+ if (run < 0) { |
|
break; |
|
+ } else if (run == 0) { |
|
+ continue; |
|
+ } |
|
|
|
if (ec->xl_up_count > ec->fragments) { |
|
gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START, |
|
@@ -562,3 +577,41 @@ out: |
|
dict_del(output, this->name); |
|
return ret; |
|
} |
|
+ |
|
+void |
|
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer) |
|
+{ |
|
+ if (!healer) |
|
+ return; |
|
+ |
|
+ pthread_cond_destroy(&healer->cond); |
|
+ pthread_mutex_destroy(&healer->mutex); |
|
+} |
|
+ |
|
+void |
|
+ec_selfheal_daemon_fini(xlator_t *this) |
|
+{ |
|
+ struct subvol_healer *healer = NULL; |
|
+ ec_self_heald_t *shd = NULL; |
|
+ ec_t *priv = NULL; |
|
+ int i = 0; |
|
+ |
|
+ priv = this->private; |
|
+ if (!priv) |
|
+ return; |
|
+ |
|
+ shd = &priv->shd; |
|
+ if (!shd->iamshd) |
|
+ return; |
|
+ |
|
+ for (i = 0; i < priv->nodes; i++) { |
|
+ healer = &shd->index_healers[i]; |
|
+ ec_destroy_healer_object(this, healer); |
|
+ |
|
+ healer = &shd->full_healers[i]; |
|
+ ec_destroy_healer_object(this, healer); |
|
+ } |
|
+ |
|
+ GF_FREE(shd->index_healers); |
|
+ GF_FREE(shd->full_healers); |
|
+} |
|
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h |
|
index 2eda2a7..8184cf4 100644 |
|
--- a/xlators/cluster/ec/src/ec-heald.h |
|
+++ b/xlators/cluster/ec/src/ec-heald.h |
|
@@ -24,4 +24,7 @@ ec_selfheal_daemon_init(xlator_t *this); |
|
void |
|
ec_shd_index_healer_wake(ec_t *ec); |
|
|
|
+void |
|
+ec_selfheal_daemon_fini(xlator_t *this); |
|
+ |
|
#endif /* __EC_HEALD_H__ */ |
|
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h |
|
index 7c28808..ce299bb 100644 |
|
--- a/xlators/cluster/ec/src/ec-messages.h |
|
+++ b/xlators/cluster/ec/src/ec-messages.h |
|
@@ -55,6 +55,7 @@ GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL, |
|
EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE, |
|
EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED, |
|
EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED, |
|
- EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED); |
|
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED, |
|
+ EC_MSG_THREAD_CLEANUP_FAILED); |
|
|
|
#endif /* !_EC_MESSAGES_H_ */ |
|
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c |
|
index 3c8013e..264582a 100644 |
|
--- a/xlators/cluster/ec/src/ec.c |
|
+++ b/xlators/cluster/ec/src/ec.c |
|
@@ -429,6 +429,51 @@ ec_disable_delays(ec_t *ec) |
|
} |
|
|
|
void |
|
+ec_cleanup_healer_object(ec_t *ec) |
|
+{ |
|
+ struct subvol_healer *healer = NULL; |
|
+ ec_self_heald_t *shd = NULL; |
|
+ void *res = NULL; |
|
+ int i = 0; |
|
+ gf_boolean_t is_join = _gf_false; |
|
+ |
|
+ shd = &ec->shd; |
|
+ if (!shd->iamshd) |
|
+ return; |
|
+ |
|
+ for (i = 0; i < ec->nodes; i++) { |
|
+ healer = &shd->index_healers[i]; |
|
+ pthread_mutex_lock(&healer->mutex); |
|
+ { |
|
+ healer->rerun = 1; |
|
+ if (healer->running) { |
|
+ pthread_cond_signal(&healer->cond); |
|
+ is_join = _gf_true; |
|
+ } |
|
+ } |
|
+ pthread_mutex_unlock(&healer->mutex); |
|
+ if (is_join) { |
|
+ pthread_join(healer->thread, &res); |
|
+ is_join = _gf_false; |
|
+ } |
|
+ |
|
+ healer = &shd->full_healers[i]; |
|
+ pthread_mutex_lock(&healer->mutex); |
|
+ { |
|
+ healer->rerun = 1; |
|
+ if (healer->running) { |
|
+ pthread_cond_signal(&healer->cond); |
|
+ is_join = _gf_true; |
|
+ } |
|
+ } |
|
+ pthread_mutex_unlock(&healer->mutex); |
|
+ if (is_join) { |
|
+ pthread_join(healer->thread, &res); |
|
+ is_join = _gf_false; |
|
+ } |
|
+ } |
|
+} |
|
+void |
|
ec_pending_fops_completed(ec_t *ec) |
|
{ |
|
if (ec->shutdown) { |
|
@@ -544,6 +589,7 @@ ec_notify(xlator_t *this, int32_t event, void *data, void *data2) |
|
/* If there aren't pending fops running after we have waken up |
|
* them, we immediately propagate the notification. */ |
|
propagate = ec_disable_delays(ec); |
|
+ ec_cleanup_healer_object(ec); |
|
goto unlock; |
|
} |
|
|
|
@@ -759,6 +805,7 @@ failed: |
|
void |
|
fini(xlator_t *this) |
|
{ |
|
+ ec_selfheal_daemon_fini(this); |
|
__ec_destroy_private(this); |
|
} |
|
|
|
-- |
|
1.8.3.1 |
|
|
|
|