You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
466 lines
17 KiB
466 lines
17 KiB
From 75a9d946d252ce70460144615ca17dbdf2e80fab Mon Sep 17 00:00:00 2001 |
|
From: Xavi Hernandez <xhernandez@redhat.com> |
|
Date: Fri, 7 Feb 2020 10:19:57 +0100 |
|
Subject: [PATCH 354/355] core: fix memory pool management races |
|
|
|
Objects allocated from a per-thread memory pool keep a reference to it |
|
to be able to return the object to the pool when not used anymore. The |
|
object holding this reference can have a long life cycle that could |
|
survive a glfs_fini() call. |
|
|
|
This means that it's unsafe to destroy memory pools from glfs_fini(). |
|
|
|
Another side effect of destroying memory pools from glfs_fini() is that |
|
the TLS variable that points to one of those pools cannot be reset for |
|
all alive threads. This means that any attempt to allocate memory from |
|
those threads will access already free'd memory, which is very |
|
dangerous. |
|
|
|
To fix these issues, mem_pools_fini() doesn't destroy pool lists |
|
anymore. Only at process termination the pools are destroyed. |
|
|
|
Upatream patch: |
|
> Upstream patch link: https://review.gluster.org/c/glusterfs/+/24099 |
|
> Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 |
|
> Fixes: bz#1801684 |
|
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> |
|
|
|
Change-Id: Ib189a5510ab6bdac78983c6c65a022e9634b0965 |
|
BUG: 1800703 |
|
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/192262 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
libglusterfs/src/globals.c | 13 ++- |
|
libglusterfs/src/glusterfs/globals.h | 3 + |
|
libglusterfs/src/glusterfs/mem-pool.h | 28 ++--- |
|
libglusterfs/src/mem-pool.c | 201 ++++++++++++++++++---------------- |
|
libglusterfs/src/syncop.c | 7 ++ |
|
5 files changed, 146 insertions(+), 106 deletions(-) |
|
|
|
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c |
|
index 02098e6..e433ee8 100644 |
|
--- a/libglusterfs/src/globals.c |
|
+++ b/libglusterfs/src/globals.c |
|
@@ -319,7 +319,18 @@ glusterfs_cleanup(void *ptr) |
|
GF_FREE(thread_syncopctx.groups); |
|
} |
|
|
|
- mem_pool_thread_destructor(); |
|
+ mem_pool_thread_destructor(NULL); |
|
+} |
|
+ |
|
+void |
|
+gf_thread_needs_cleanup(void) |
|
+{ |
|
+ /* The value stored in free_key TLS is not really used for anything, but |
|
+ * pthread implementation doesn't call the TLS destruction function unless |
|
+ * it's != NULL. This function must be called whenever something is |
|
+ * allocated for this thread so that glusterfs_cleanup() will be called |
|
+ * and resources can be released. */ |
|
+ (void)pthread_setspecific(free_key, (void *)1); |
|
} |
|
|
|
static void |
|
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h |
|
index e218285..31717ed 100644 |
|
--- a/libglusterfs/src/glusterfs/globals.h |
|
+++ b/libglusterfs/src/glusterfs/globals.h |
|
@@ -181,6 +181,9 @@ glusterfs_leaseid_exist(void); |
|
int |
|
glusterfs_globals_init(glusterfs_ctx_t *ctx); |
|
|
|
+void |
|
+gf_thread_needs_cleanup(void); |
|
+ |
|
struct tvec_base * |
|
glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx); |
|
void |
|
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h |
|
index be0a26d..97bf76c 100644 |
|
--- a/libglusterfs/src/glusterfs/mem-pool.h |
|
+++ b/libglusterfs/src/glusterfs/mem-pool.h |
|
@@ -245,24 +245,26 @@ typedef struct per_thread_pool { |
|
} per_thread_pool_t; |
|
|
|
typedef struct per_thread_pool_list { |
|
- /* |
|
- * These first two members are protected by the global pool lock. When |
|
- * a thread first tries to use any pool, we create one of these. We |
|
- * link it into the global list using thr_list so the pool-sweeper |
|
- * thread can find it, and use pthread_setspecific so this thread can |
|
- * find it. When the per-thread destructor runs, we "poison" the pool |
|
- * list to prevent further allocations. This also signals to the |
|
- * pool-sweeper thread that the list should be detached and freed after |
|
- * the next time it's swept. |
|
- */ |
|
+ /* thr_list is used to place the TLS pool_list into the active global list |
|
+ * (pool_threads) or the inactive global list (pool_free_threads). It's |
|
+ * protected by the global pool_lock. */ |
|
struct list_head thr_list; |
|
- unsigned int poison; |
|
+ |
|
+ /* This lock is used to update poison and the hot/cold lists of members |
|
+ * of 'pools' array. */ |
|
+ pthread_spinlock_t lock; |
|
+ |
|
+ /* This field is used to mark a pool_list as not being owned by any thread. |
|
+ * This means that the sweeper thread won't be cleaning objects stored in |
|
+ * its pools. mem_put() uses it to decide if the object being released is |
|
+ * placed into its original pool_list or directly destroyed. */ |
|
+ bool poison; |
|
+ |
|
/* |
|
* There's really more than one pool, but the actual number is hidden |
|
* in the implementation code so we just make it a single-element array |
|
* here. |
|
*/ |
|
- pthread_spinlock_t lock; |
|
per_thread_pool_t pools[1]; |
|
} per_thread_pool_list_t; |
|
|
|
@@ -307,7 +309,7 @@ void |
|
mem_pool_destroy(struct mem_pool *pool); |
|
|
|
void |
|
-mem_pool_thread_destructor(void); |
|
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list); |
|
|
|
void |
|
gf_mem_acct_enable_set(void *ctx); |
|
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c |
|
index d88041d..2b41c01 100644 |
|
--- a/libglusterfs/src/mem-pool.c |
|
+++ b/libglusterfs/src/mem-pool.c |
|
@@ -367,7 +367,6 @@ static __thread per_thread_pool_list_t *thread_pool_list = NULL; |
|
#define POOL_SWEEP_SECS 30 |
|
|
|
typedef struct { |
|
- struct list_head death_row; |
|
pooled_obj_hdr_t *cold_lists[N_COLD_LISTS]; |
|
unsigned int n_cold_lists; |
|
} sweep_state_t; |
|
@@ -384,36 +383,33 @@ static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER; |
|
static unsigned int init_count = 0; |
|
static pthread_t sweeper_tid; |
|
|
|
-gf_boolean_t |
|
+static bool |
|
collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list) |
|
{ |
|
unsigned int i; |
|
per_thread_pool_t *pt_pool; |
|
- gf_boolean_t poisoned; |
|
|
|
(void)pthread_spin_lock(&pool_list->lock); |
|
|
|
- poisoned = pool_list->poison != 0; |
|
- if (!poisoned) { |
|
- for (i = 0; i < NPOOLS; ++i) { |
|
- pt_pool = &pool_list->pools[i]; |
|
- if (pt_pool->cold_list) { |
|
- if (state->n_cold_lists >= N_COLD_LISTS) { |
|
- break; |
|
- } |
|
- state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; |
|
+ for (i = 0; i < NPOOLS; ++i) { |
|
+ pt_pool = &pool_list->pools[i]; |
|
+ if (pt_pool->cold_list) { |
|
+ if (state->n_cold_lists >= N_COLD_LISTS) { |
|
+ (void)pthread_spin_unlock(&pool_list->lock); |
|
+ return true; |
|
} |
|
- pt_pool->cold_list = pt_pool->hot_list; |
|
- pt_pool->hot_list = NULL; |
|
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list; |
|
} |
|
+ pt_pool->cold_list = pt_pool->hot_list; |
|
+ pt_pool->hot_list = NULL; |
|
} |
|
|
|
(void)pthread_spin_unlock(&pool_list->lock); |
|
|
|
- return poisoned; |
|
+ return false; |
|
} |
|
|
|
-void |
|
+static void |
|
free_obj_list(pooled_obj_hdr_t *victim) |
|
{ |
|
pooled_obj_hdr_t *next; |
|
@@ -425,82 +421,96 @@ free_obj_list(pooled_obj_hdr_t *victim) |
|
} |
|
} |
|
|
|
-void * |
|
+static void * |
|
pool_sweeper(void *arg) |
|
{ |
|
sweep_state_t state; |
|
per_thread_pool_list_t *pool_list; |
|
- per_thread_pool_list_t *next_pl; |
|
- per_thread_pool_t *pt_pool; |
|
- unsigned int i; |
|
- gf_boolean_t poisoned; |
|
+ uint32_t i; |
|
+ bool pending; |
|
|
|
/* |
|
* This is all a bit inelegant, but the point is to avoid doing |
|
* expensive things (like freeing thousands of objects) while holding a |
|
- * global lock. Thus, we split each iteration into three passes, with |
|
+ * global lock. Thus, we split each iteration into two passes, with |
|
* only the first and fastest holding the lock. |
|
*/ |
|
|
|
+ pending = true; |
|
+ |
|
for (;;) { |
|
- sleep(POOL_SWEEP_SECS); |
|
+ /* If we know there's pending work to do (or it's the first run), we |
|
+ * do collect garbage more often. */ |
|
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS); |
|
+ |
|
(void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); |
|
- INIT_LIST_HEAD(&state.death_row); |
|
state.n_cold_lists = 0; |
|
+ pending = false; |
|
|
|
/* First pass: collect stuff that needs our attention. */ |
|
(void)pthread_mutex_lock(&pool_lock); |
|
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, thr_list) |
|
+ list_for_each_entry(pool_list, &pool_threads, thr_list) |
|
{ |
|
- (void)pthread_mutex_unlock(&pool_lock); |
|
- poisoned = collect_garbage(&state, pool_list); |
|
- (void)pthread_mutex_lock(&pool_lock); |
|
- |
|
- if (poisoned) { |
|
- list_move(&pool_list->thr_list, &state.death_row); |
|
+ if (collect_garbage(&state, pool_list)) { |
|
+ pending = true; |
|
} |
|
} |
|
(void)pthread_mutex_unlock(&pool_lock); |
|
|
|
- /* Second pass: free dead pools. */ |
|
- (void)pthread_mutex_lock(&pool_free_lock); |
|
- list_for_each_entry_safe(pool_list, next_pl, &state.death_row, thr_list) |
|
- { |
|
- for (i = 0; i < NPOOLS; ++i) { |
|
- pt_pool = &pool_list->pools[i]; |
|
- free_obj_list(pt_pool->cold_list); |
|
- free_obj_list(pt_pool->hot_list); |
|
- pt_pool->hot_list = pt_pool->cold_list = NULL; |
|
- } |
|
- list_del(&pool_list->thr_list); |
|
- list_add(&pool_list->thr_list, &pool_free_threads); |
|
- } |
|
- (void)pthread_mutex_unlock(&pool_free_lock); |
|
- |
|
- /* Third pass: free cold objects from live pools. */ |
|
+ /* Second pass: free cold objects from live pools. */ |
|
for (i = 0; i < state.n_cold_lists; ++i) { |
|
free_obj_list(state.cold_lists[i]); |
|
} |
|
(void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); |
|
} |
|
+ |
|
+ return NULL; |
|
} |
|
|
|
void |
|
-mem_pool_thread_destructor(void) |
|
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) |
|
{ |
|
- per_thread_pool_list_t *pool_list = thread_pool_list; |
|
- |
|
- /* The pool-sweeper thread will take it from here. |
|
- * |
|
- * We can change 'poison' here without taking locks because the change |
|
- * itself doesn't interact with other parts of the code and a simple write |
|
- * is already atomic from the point of view of the processor. |
|
- * |
|
- * This change can modify what mem_put() does, but both possibilities are |
|
- * fine until the sweeper thread kicks in. The real synchronization must be |
|
- * between mem_put() and the sweeper thread. */ |
|
+ per_thread_pool_t *pt_pool; |
|
+ uint32_t i; |
|
+ |
|
+ if (pool_list == NULL) { |
|
+ pool_list = thread_pool_list; |
|
+ } |
|
+ |
|
+ /* The current thread is terminating. None of the allocated objects will |
|
+ * be used again. We can directly destroy them here instead of delaying |
|
+ * it until the next sweeper loop. */ |
|
if (pool_list != NULL) { |
|
- pool_list->poison = 1; |
|
+ /* Remove pool_list from the global list to avoid that sweeper |
|
+ * could touch it. */ |
|
+ pthread_mutex_lock(&pool_lock); |
|
+ list_del(&pool_list->thr_list); |
|
+ pthread_mutex_unlock(&pool_lock); |
|
+ |
|
+ /* We need to protect hot/cold changes from potential mem_put() calls |
|
+ * that reference this pool_list. Once poison is set to true, we are |
|
+ * sure that no one else will touch hot/cold lists. The only possible |
|
+ * race is when at the same moment a mem_put() is adding a new item |
|
+ * to the hot list. We protect from that by taking pool_list->lock. |
|
+ * After that we don't need the lock to destroy the hot/cold lists. */ |
|
+ pthread_spin_lock(&pool_list->lock); |
|
+ pool_list->poison = true; |
|
+ pthread_spin_unlock(&pool_list->lock); |
|
+ |
|
+ for (i = 0; i < NPOOLS; i++) { |
|
+ pt_pool = &pool_list->pools[i]; |
|
+ |
|
+ free_obj_list(pt_pool->hot_list); |
|
+ pt_pool->hot_list = NULL; |
|
+ |
|
+ free_obj_list(pt_pool->cold_list); |
|
+ pt_pool->cold_list = NULL; |
|
+ } |
|
+ |
|
+ pthread_mutex_lock(&pool_free_lock); |
|
+ list_add(&pool_list->thr_list, &pool_free_threads); |
|
+ pthread_mutex_unlock(&pool_free_lock); |
|
+ |
|
thread_pool_list = NULL; |
|
} |
|
} |
|
@@ -528,6 +538,30 @@ mem_pools_preinit(void) |
|
init_done = GF_MEMPOOL_INIT_EARLY; |
|
} |
|
|
|
+static __attribute__((destructor)) void |
|
+mem_pools_postfini(void) |
|
+{ |
|
+ per_thread_pool_list_t *pool_list, *next; |
|
+ |
|
+ /* This is part of a process shutdown (or dlclose()) which means that |
|
+ * most probably all threads should be stopped. However this is not the |
|
+ * case for gluster and there are even legitimate situations in which we |
|
+ * could have some threads alive. What is sure is that none of those |
|
+ * threads should be using anything from this library, so destroying |
|
+ * everything here should be fine and safe. */ |
|
+ |
|
+ list_for_each_entry_safe(pool_list, next, &pool_threads, thr_list) |
|
+ { |
|
+ mem_pool_thread_destructor(pool_list); |
|
+ } |
|
+ |
|
+ list_for_each_entry_safe(pool_list, next, &pool_free_threads, thr_list) |
|
+ { |
|
+ list_del(&pool_list->thr_list); |
|
+ FREE(pool_list); |
|
+ } |
|
+} |
|
+ |
|
/* Call mem_pools_init() once threading has been configured completely. This |
|
* prevent the pool_sweeper thread from getting killed once the main() thread |
|
* exits during deamonizing. */ |
|
@@ -560,10 +594,6 @@ mem_pools_fini(void) |
|
*/ |
|
break; |
|
case 1: { |
|
- per_thread_pool_list_t *pool_list; |
|
- per_thread_pool_list_t *next_pl; |
|
- unsigned int i; |
|
- |
|
/* if mem_pools_init() was not called, sweeper_tid will be invalid |
|
* and the functions will error out. That is not critical. In all |
|
* other cases, the sweeper_tid will be valid and the thread gets |
|
@@ -571,32 +601,11 @@ mem_pools_fini(void) |
|
(void)pthread_cancel(sweeper_tid); |
|
(void)pthread_join(sweeper_tid, NULL); |
|
|
|
- /* At this point all threads should have already terminated, so |
|
- * it should be safe to destroy all pending per_thread_pool_list_t |
|
- * structures that are stored for each thread. */ |
|
- mem_pool_thread_destructor(); |
|
- |
|
- /* free all objects from all pools */ |
|
- list_for_each_entry_safe(pool_list, next_pl, &pool_threads, |
|
- thr_list) |
|
- { |
|
- for (i = 0; i < NPOOLS; ++i) { |
|
- free_obj_list(pool_list->pools[i].hot_list); |
|
- free_obj_list(pool_list->pools[i].cold_list); |
|
- pool_list->pools[i].hot_list = NULL; |
|
- pool_list->pools[i].cold_list = NULL; |
|
- } |
|
- |
|
- list_del(&pool_list->thr_list); |
|
- FREE(pool_list); |
|
- } |
|
- |
|
- list_for_each_entry_safe(pool_list, next_pl, &pool_free_threads, |
|
- thr_list) |
|
- { |
|
- list_del(&pool_list->thr_list); |
|
- FREE(pool_list); |
|
- } |
|
+ /* There could be threads still running in some cases, so we can't |
|
+ * destroy pool_lists in use. We can also not destroy unused |
|
+ * pool_lists because some allocated objects may still be pointing |
|
+ * to them. */ |
|
+ mem_pool_thread_destructor(NULL); |
|
|
|
init_done = GF_MEMPOOL_INIT_DESTROY; |
|
/* Fall through. */ |
|
@@ -617,7 +626,7 @@ mem_pools_fini(void) |
|
{ |
|
} |
|
void |
|
-mem_pool_thread_destructor(void) |
|
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list) |
|
{ |
|
} |
|
|
|
@@ -738,13 +747,21 @@ mem_get_pool_list(void) |
|
} |
|
} |
|
|
|
+ /* There's no need to take pool_list->lock, because this is already an |
|
+ * atomic operation and we don't need to synchronize it with any change |
|
+ * in hot/cold lists. */ |
|
+ pool_list->poison = false; |
|
+ |
|
(void)pthread_mutex_lock(&pool_lock); |
|
- pool_list->poison = 0; |
|
list_add(&pool_list->thr_list, &pool_threads); |
|
(void)pthread_mutex_unlock(&pool_lock); |
|
|
|
thread_pool_list = pool_list; |
|
|
|
+ /* Ensure that all memory objects associated to the new pool_list are |
|
+ * destroyed when the thread terminates. */ |
|
+ gf_thread_needs_cleanup(); |
|
+ |
|
return pool_list; |
|
} |
|
|
|
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c |
|
index 2eb7b49..0de53c6 100644 |
|
--- a/libglusterfs/src/syncop.c |
|
+++ b/libglusterfs/src/syncop.c |
|
@@ -97,6 +97,13 @@ syncopctx_setfsgroups(int count, const void *groups) |
|
|
|
/* set/reset the ngrps, this is where reset of groups is handled */ |
|
opctx->ngrps = count; |
|
+ |
|
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) { |
|
+ /* This is the first time we are storing groups into the TLS structure |
|
+ * so we mark the current thread so that it will be properly cleaned |
|
+ * up when the thread terminates. */ |
|
+ gf_thread_needs_cleanup(); |
|
+ } |
|
opctx->valid |= SYNCOPCTX_GROUPS; |
|
|
|
out: |
|
-- |
|
1.8.3.1 |
|
|
|
|