You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
7.3 KiB
182 lines
7.3 KiB
From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001 |
|
From: Raghavendra Bhat <raghavendra@redhat.com> |
|
Date: Mon, 15 Apr 2019 14:09:34 -0400 |
|
Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after |
|
cancelling the signer thread |
|
|
|
When bit-rot feature is disabled, the signer thread from the bit-rot-stub |
|
xlator (the thread which performs the setxattr of the signature on to the |
|
disk) is cancelled. But, if the cancelled signer thread had already held |
|
the mutex (&priv->lock) which it uses to monitor the queue of files to |
|
be signed, then the mutex is never released. This creates problems in |
|
future when the feature is enabled again. Both the new instance of the |
|
signer thread and the regular thread which enqueues the files to be |
|
signed will be blocked on this mutex. |
|
|
|
So, as part of cancelling the signer thread, unlock the mutex associated |
|
with it as well using pthread_cleanup_push and pthread_cleanup_pop. |
|
|
|
Upstream patch: |
|
> patch: https://review.gluster.org/22572 |
|
> fixes: #bz1700078 |
|
> Change-Id: Ib761910caed90b268e69794ddeb108165487af40 |
|
|
|
Change-Id: Ib761910caed90b268e69794ddeb108165487af40 |
|
BUG: 1851424 |
|
Signed-off-by: Raghavendra M <raghavendra@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/208304 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
.../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +- |
|
xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++--- |
|
2 files changed, 59 insertions(+), 7 deletions(-) |
|
|
|
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h |
|
index 7f07f29..155802b 100644 |
|
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h |
|
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h |
|
@@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, |
|
BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, |
|
BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, |
|
BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, |
|
- BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); |
|
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, |
|
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, |
|
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); |
|
|
|
#endif /* !_BITROT_STUB_MESSAGES_H_ */ |
|
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c |
|
index 3f48a4b..c3f81bc 100644 |
|
--- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c |
|
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c |
|
@@ -26,6 +26,15 @@ |
|
|
|
#define BR_STUB_REQUEST_COOKIE 0x1 |
|
|
|
+void |
|
+br_stub_lock_cleaner(void *arg) |
|
+{ |
|
+ pthread_mutex_t *clean_mutex = arg; |
|
+ |
|
+ pthread_mutex_unlock(clean_mutex); |
|
+ return; |
|
+} |
|
+ |
|
void * |
|
br_stub_signth(void *); |
|
|
|
@@ -166,8 +175,11 @@ init(xlator_t *this) |
|
|
|
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, |
|
"brssign"); |
|
- if (ret != 0) |
|
+ if (ret != 0) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, |
|
+ "failed to create the new thread for signer"); |
|
goto cleanup_lock; |
|
+ } |
|
|
|
ret = br_stub_bad_object_container_init(this, priv); |
|
if (ret) { |
|
@@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) |
|
priv = this->private; |
|
|
|
GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); |
|
- if (priv->do_versioning) { |
|
+ if (priv->do_versioning && !priv->signth) { |
|
ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, |
|
"brssign"); |
|
- if (ret != 0) |
|
+ if (ret != 0) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, |
|
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, |
|
+ "failed to create the new thread for signer"); |
|
goto err; |
|
+ } |
|
|
|
ret = br_stub_bad_object_container_init(this, priv); |
|
if (ret) { |
|
@@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) |
|
gf_msg(this->name, GF_LOG_ERROR, 0, |
|
BRS_MSG_CANCEL_SIGN_THREAD_FAILED, |
|
"Could not cancel sign serializer thread"); |
|
+ } else { |
|
+ gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, |
|
+ "killed the signer thread"); |
|
+ priv->signth = 0; |
|
} |
|
- priv->signth = 0; |
|
} |
|
|
|
if (priv->container.thread) { |
|
@@ -902,6 +921,24 @@ br_stub_signth(void *arg) |
|
|
|
THIS = this; |
|
while (1) { |
|
+ /* |
|
+ * Disabling bit-rot feature leads to this particular thread |
|
+ * getting cleaned up by reconfigure via a call to the function |
|
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel |
|
+ * and pthread_join). But, if this thread had held the mutex |
|
+ * &priv->lock at the time of cancellation, then it leads to |
|
+ * deadlock in future when bit-rot feature is enabled (which |
|
+ * again spawns this thread which cant hold the lock as the |
|
+ * mutex is still held by the previous instance of the thread |
|
+ * which got killed). Also, the br_stub_handle_object_signature |
|
+ * function which is called whenever file has to be signed |
|
+ * also gets blocked as it too attempts to acquire &priv->lock. |
|
+ * |
|
+ * So, arrange for the lock to be unlocked as part of the |
|
+ * cleanup of this thread using pthread_cleanup_push and |
|
+ * pthread_cleanup_pop. |
|
+ */ |
|
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); |
|
pthread_mutex_lock(&priv->lock); |
|
{ |
|
while (list_empty(&priv->squeue)) |
|
@@ -912,6 +949,7 @@ br_stub_signth(void *arg) |
|
list_del_init(&sigstub->list); |
|
} |
|
pthread_mutex_unlock(&priv->lock); |
|
+ pthread_cleanup_pop(0); |
|
|
|
call_resume(sigstub->stub); |
|
|
|
@@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, |
|
|
|
priv = this->private; |
|
|
|
- if (frame->root->pid != GF_CLIENT_PID_BITD) |
|
+ if (frame->root->pid != GF_CLIENT_PID_BITD) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, |
|
+ "PID %d from where signature request" |
|
+ "came, does not belong to bit-rot daemon." |
|
+ "Unwinding the fop", |
|
+ frame->root->pid); |
|
goto dofop; |
|
+ } |
|
|
|
ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); |
|
- if (ret) |
|
+ if (ret) { |
|
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, |
|
+ "failed to prepare the signature for %s. Unwinding the fop", |
|
+ uuid_utoa(fd->inode->gfid)); |
|
goto dofop; |
|
+ } |
|
if (fakesuccess) { |
|
op_ret = op_errno = 0; |
|
goto dofop; |
|
@@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, |
|
/* object signature request */ |
|
ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); |
|
if (!ret) { |
|
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", |
|
+ uuid_utoa(fd->inode->gfid)); |
|
br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); |
|
goto done; |
|
} |
|
-- |
|
1.8.3.1 |
|
|
|
|