From ed73f2046dd3fbb22341bf9fc004087d90dfbe6d Mon Sep 17 00:00:00 2001 From: Raghavendra Bhat Date: Mon, 15 Apr 2019 14:09:34 -0400 Subject: [PATCH 458/465] features/bit-rot-stub: clean the mutex after cancelling the signer thread When bit-rot feature is disabled, the signer thread from the bit-rot-stub xlator (the thread which performs the setxattr of the signature on to the disk) is cancelled. But, if the cancelled signer thread had already held the mutex (&priv->lock) which it uses to monitor the queue of files to be signed, then the mutex is never released. This creates problems in future when the feature is enabled again. Both the new instance of the signer thread and the regular thread which enqueues the files to be signed will be blocked on this mutex. So, as part of cancelling the signer thread, unlock the mutex associated with it as well using pthread_cleanup_push and pthread_cleanup_pop. Upstream patch: > patch: https://review.gluster.org/22572 > fixes: #bz1700078 > Change-Id: Ib761910caed90b268e69794ddeb108165487af40 Change-Id: Ib761910caed90b268e69794ddeb108165487af40 BUG: 1851424 Signed-off-by: Raghavendra M Reviewed-on: https://code.engineering.redhat.com/gerrit/208304 Tested-by: RHGS Build Bot Reviewed-by: Sunil Kumar Heggodu Gopala Acharya --- .../bit-rot/src/stub/bit-rot-stub-messages.h | 4 +- xlators/features/bit-rot/src/stub/bit-rot-stub.c | 62 +++++++++++++++++++--- 2 files changed, 59 insertions(+), 7 deletions(-) diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h index 7f07f29..155802b 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h @@ -39,6 +39,8 @@ GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED, BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL, BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED, - BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL); + BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD, + BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL); #endif /* !_BITROT_STUB_MESSAGES_H_ */ diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c index 3f48a4b..c3f81bc 100644 --- a/xlators/features/bit-rot/src/stub/bit-rot-stub.c +++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c @@ -26,6 +26,15 @@ #define BR_STUB_REQUEST_COOKIE 0x1 +void +br_stub_lock_cleaner(void *arg) +{ + pthread_mutex_t *clean_mutex = arg; + + pthread_mutex_unlock(clean_mutex); + return; +} + void * br_stub_signth(void *); @@ -166,8 +175,11 @@ init(xlator_t *this) ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, "brssign"); - if (ret != 0) + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED, + "failed to create the new thread for signer"); goto cleanup_lock; + } ret = br_stub_bad_object_container_init(this, priv); if (ret) { @@ -214,11 +226,15 @@ reconfigure(xlator_t *this, dict_t *options) priv = this->private; GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err); - if (priv->do_versioning) { + if (priv->do_versioning && !priv->signth) { ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this, "brssign"); - if (ret != 0) + if (ret != 0) { + gf_msg(this->name, GF_LOG_WARNING, 0, + BRS_MSG_SPAWN_SIGN_THRD_FAILED, + "failed to create the new thread for signer"); goto err; + } ret = br_stub_bad_object_container_init(this, priv); if (ret) { @@ -232,8 +248,11 @@ reconfigure(xlator_t *this, dict_t *options) gf_msg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED, "Could not cancel sign serializer thread"); + } else { + gf_msg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD, + "killed the signer thread"); + priv->signth = 0; } - priv->signth = 0; } if (priv->container.thread) { @@ -902,6 +921,24 @@ br_stub_signth(void *arg) THIS = this; while (1) { + /* + * Disabling bit-rot feature leads to this particular thread + * getting cleaned up by reconfigure via a call to the function + * gf_thread_cleanup_xint (which in turn calls pthread_cancel + * and pthread_join). But, if this thread had held the mutex + * &priv->lock at the time of cancellation, then it leads to + * deadlock in future when bit-rot feature is enabled (which + * again spawns this thread which cant hold the lock as the + * mutex is still held by the previous instance of the thread + * which got killed). Also, the br_stub_handle_object_signature + * function which is called whenever file has to be signed + * also gets blocked as it too attempts to acquire &priv->lock. + * + * So, arrange for the lock to be unlocked as part of the + * cleanup of this thread using pthread_cleanup_push and + * pthread_cleanup_pop. + */ + pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock); pthread_mutex_lock(&priv->lock); { while (list_empty(&priv->squeue)) @@ -912,6 +949,7 @@ br_stub_signth(void *arg) list_del_init(&sigstub->list); } pthread_mutex_unlock(&priv->lock); + pthread_cleanup_pop(0); call_resume(sigstub->stub); @@ -1042,12 +1080,22 @@ br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd, priv = this->private; - if (frame->root->pid != GF_CLIENT_PID_BITD) + if (frame->root->pid != GF_CLIENT_PID_BITD) { + gf_msg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID, + "PID %d from where signature request" + "came, does not belong to bit-rot daemon." + "Unwinding the fop", + frame->root->pid); goto dofop; + } ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess); - if (ret) + if (ret) { + gf_msg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL, + "failed to prepare the signature for %s. Unwinding the fop", + uuid_utoa(fd->inode->gfid)); goto dofop; + } if (fakesuccess) { op_ret = op_errno = 0; goto dofop; @@ -1387,6 +1435,8 @@ br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict, /* object signature request */ ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign); if (!ret) { + gf_msg_debug(this->name, 0, "got SIGNATURE request on %s", + uuid_utoa(fd->inode->gfid)); br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata); goto done; } -- 1.8.3.1