You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
455 lines
16 KiB
455 lines
16 KiB
From ddb0038de77a4269fa7eed1bb217bfb6bed1b7ba Mon Sep 17 00:00:00 2001 |
|
From: N Balachandran <nbalacha@redhat.com> |
|
Date: Fri, 9 Aug 2019 14:34:22 +0530 |
|
Subject: [PATCH 337/344] fuse: Set limit on invalidate queue size |
|
|
|
If the glusterfs fuse client process is unable to |
|
process the invalidate requests quickly enough, the |
|
number of such requests quickly grows large enough |
|
to use a significant amount of memory. |
|
We are now introducing another option to set an upper |
|
limit on these to prevent runaway memory usage. |
|
|
|
> Upstream https://review.gluster.org/23187 |
|
> Change-Id: Iddfff1ee2de1466223e6717f7abd4b28ed947788 |
|
> Fixes: bz#1732717 |
|
> Signed-off-by: N Balachandran <nbalacha@redhat.com> |
|
|
|
BUG: 1763208 |
|
Change-Id: I666cdf6c70999a0f0bc79969e8df0a9dde93b6e4 |
|
Signed-off-by: Csaba Henk <csaba@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/187529 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
doc/mount.glusterfs.8 | 5 +++ |
|
glusterfsd/src/glusterfsd.c | 21 ++++++++++ |
|
glusterfsd/src/glusterfsd.h | 3 +- |
|
libglusterfs/src/glusterfs/glusterfs.h | 1 + |
|
libglusterfs/src/glusterfs/inode.h | 1 + |
|
libglusterfs/src/inode.c | 31 +++++++++++---- |
|
xlators/mount/fuse/src/fuse-bridge.c | 60 ++++++++++++++++++++++------- |
|
xlators/mount/fuse/src/fuse-bridge.h | 3 +- |
|
xlators/mount/fuse/utils/mount.glusterfs.in | 7 ++++ |
|
9 files changed, 108 insertions(+), 24 deletions(-) |
|
|
|
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8 |
|
index 286631b..b35b362 100644 |
|
--- a/doc/mount.glusterfs.8 |
|
+++ b/doc/mount.glusterfs.8 |
|
@@ -126,6 +126,11 @@ Provide list of backup volfile servers in the following format [default: None] |
|
Set fuse module's limit for number of inodes kept in LRU list to N [default: 131072] |
|
.TP |
|
.TP |
|
+\fBinvalidate-limit=\fRN |
|
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding |
|
+invalidations reaches N |
|
+.TP |
|
+.TP |
|
\fBbackground-qlen=\fRN |
|
Set fuse module's background queue length to N [default: 64] |
|
.TP |
|
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c |
|
index 5b5e996..0856471 100644 |
|
--- a/glusterfsd/src/glusterfsd.c |
|
+++ b/glusterfsd/src/glusterfsd.c |
|
@@ -212,6 +212,9 @@ static struct argp_option gf_options[] = { |
|
{"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0, |
|
"Set fuse module's limit for number of inodes kept in LRU list to N " |
|
"[default: 131072]"}, |
|
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0, |
|
+ "Suspend inode invalidations implied by 'lru-limit' if the number of " |
|
+ "outstanding invalidations reaches N"}, |
|
{"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0, |
|
"Set fuse module's background queue length to N " |
|
"[default: 64]"}, |
|
@@ -504,6 +507,16 @@ set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options) |
|
} |
|
} |
|
|
|
+ if (cmd_args->invalidate_limit >= 0) { |
|
+ ret = dict_set_int32(options, "invalidate-limit", |
|
+ cmd_args->invalidate_limit); |
|
+ if (ret < 0) { |
|
+ gf_msg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_4, |
|
+ "invalidate-limit"); |
|
+ goto err; |
|
+ } |
|
+ } |
|
+ |
|
if (cmd_args->background_qlen) { |
|
ret = dict_set_int32(options, "background-qlen", |
|
cmd_args->background_qlen); |
|
@@ -1283,6 +1296,14 @@ parse_opts(int key, char *arg, struct argp_state *state) |
|
argp_failure(state, -1, 0, "unknown LRU limit option %s", arg); |
|
break; |
|
|
|
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY: |
|
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit)) |
|
+ break; |
|
+ |
|
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s", |
|
+ arg); |
|
+ break; |
|
+ |
|
case ARGP_FUSE_BACKGROUND_QLEN_KEY: |
|
if (!gf_string2int(arg, &cmd_args->background_qlen)) |
|
break; |
|
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h |
|
index fa55789..ee655f0 100644 |
|
--- a/glusterfsd/src/glusterfsd.h |
|
+++ b/glusterfsd/src/glusterfsd.h |
|
@@ -111,7 +111,8 @@ enum argp_option_keys { |
|
ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189, |
|
ARGP_FUSE_LRU_LIMIT_KEY = 190, |
|
ARGP_FUSE_AUTO_INVAL_KEY = 191, |
|
- ARGP_BRICK_MUX_KEY = 192 |
|
+ ARGP_BRICK_MUX_KEY = 192, |
|
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195, |
|
}; |
|
|
|
struct _gfd_vol_top_priv { |
|
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h |
|
index 79c93ae..3b594c0 100644 |
|
--- a/libglusterfs/src/glusterfs/glusterfs.h |
|
+++ b/libglusterfs/src/glusterfs/glusterfs.h |
|
@@ -541,6 +541,7 @@ struct _cmd_args { |
|
int client_pid_set; |
|
unsigned uid_map_root; |
|
int32_t lru_limit; |
|
+ int32_t invalidate_limit; |
|
int background_qlen; |
|
int congestion_threshold; |
|
char *fuse_mountopts; |
|
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h |
|
index 52efdd8..4421c47 100644 |
|
--- a/libglusterfs/src/glusterfs/inode.h |
|
+++ b/libglusterfs/src/glusterfs/inode.h |
|
@@ -107,6 +107,7 @@ struct _inode { |
|
struct list_head list; /* active/lru/purge */ |
|
|
|
struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */ |
|
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */ |
|
bool invalidate_sent; /* Set it if invalidator_fn is called for inode */ |
|
}; |
|
|
|
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c |
|
index 96ddea5..5331e93 100644 |
|
--- a/libglusterfs/src/inode.c |
|
+++ b/libglusterfs/src/inode.c |
|
@@ -558,8 +558,8 @@ __inode_unref(inode_t *inode, bool clear) |
|
|
|
this = THIS; |
|
|
|
- if (clear && inode->invalidate_sent) { |
|
- inode->invalidate_sent = false; |
|
+ if (clear && inode->in_invalidate_list) { |
|
+ inode->in_invalidate_list = false; |
|
inode->table->invalidate_size--; |
|
__inode_activate(inode); |
|
} |
|
@@ -573,7 +573,7 @@ __inode_unref(inode_t *inode, bool clear) |
|
inode->_ctx[index].ref--; |
|
} |
|
|
|
- if (!inode->ref && !inode->invalidate_sent) { |
|
+ if (!inode->ref && !inode->in_invalidate_list) { |
|
inode->table->active_size--; |
|
|
|
nlookup = GF_ATOMIC_GET(inode->nlookup); |
|
@@ -609,14 +609,14 @@ __inode_ref(inode_t *inode, bool is_invalidate) |
|
return inode; |
|
|
|
if (!inode->ref) { |
|
- if (inode->invalidate_sent) { |
|
- inode->invalidate_sent = false; |
|
+ if (inode->in_invalidate_list) { |
|
+ inode->in_invalidate_list = false; |
|
inode->table->invalidate_size--; |
|
} else { |
|
inode->table->lru_size--; |
|
} |
|
if (is_invalidate) { |
|
- inode->invalidate_sent = true; |
|
+ inode->in_invalidate_list = true; |
|
inode->table->invalidate_size++; |
|
list_move_tail(&inode->list, &inode->table->invalidate); |
|
} else { |
|
@@ -1609,6 +1609,7 @@ static int |
|
inode_table_prune(inode_table_t *table) |
|
{ |
|
int ret = 0; |
|
+ int ret1 = 0; |
|
struct list_head purge = { |
|
0, |
|
}; |
|
@@ -1647,6 +1648,10 @@ inode_table_prune(inode_table_t *table) |
|
/* check for valid inode with 'nlookup' */ |
|
nlookup = GF_ATOMIC_GET(entry->nlookup); |
|
if (nlookup) { |
|
+ if (entry->invalidate_sent) { |
|
+ list_move_tail(&entry->list, &table->lru); |
|
+ continue; |
|
+ } |
|
__inode_ref(entry, true); |
|
tmp = entry; |
|
break; |
|
@@ -1668,9 +1673,19 @@ inode_table_prune(inode_table_t *table) |
|
if (tmp) { |
|
xlator_t *old_THIS = THIS; |
|
THIS = table->invalidator_xl; |
|
- table->invalidator_fn(table->invalidator_xl, tmp); |
|
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp); |
|
THIS = old_THIS; |
|
- inode_unref(tmp); |
|
+ pthread_mutex_lock(&table->lock); |
|
+ { |
|
+ if (!ret1) { |
|
+ tmp->invalidate_sent = true; |
|
+ __inode_unref(tmp, false); |
|
+ } else { |
|
+ /* Move this back to the lru list*/ |
|
+ __inode_unref(tmp, true); |
|
+ } |
|
+ } |
|
+ pthread_mutex_unlock(&table->lock); |
|
} |
|
|
|
/* Just so that if purge list is handled too, then clear it off */ |
|
diff --git a/xlators/mount/fuse/src/fuse-bridge.c b/xlators/mount/fuse/src/fuse-bridge.c |
|
index 1c946a2..8b2e7f0 100644 |
|
--- a/xlators/mount/fuse/src/fuse-bridge.c |
|
+++ b/xlators/mount/fuse/src/fuse-bridge.c |
|
@@ -26,7 +26,7 @@ static int gf_fuse_xattr_enotsup_log; |
|
void |
|
fini(xlator_t *this_xl); |
|
|
|
-static void |
|
+static int32_t |
|
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino); |
|
|
|
/* |
|
@@ -312,7 +312,7 @@ send_fuse_data(xlator_t *this, fuse_in_header_t *finh, void *data, size_t size) |
|
#define send_fuse_obj(this, finh, obj) \ |
|
send_fuse_data(this, finh, obj, sizeof(*(obj))) |
|
|
|
-static void |
|
+static int32_t |
|
fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) |
|
{ |
|
#if FUSE_KERNEL_MINOR_VERSION >= 11 |
|
@@ -328,17 +328,22 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) |
|
|
|
priv = this->private; |
|
if (!priv->reverse_fuse_thread_started) |
|
- return; |
|
+ return -1; |
|
+ |
|
+ if (priv->invalidate_limit && |
|
+ (priv->invalidate_count >= priv->invalidate_limit)) { |
|
+ return -1; |
|
+ } |
|
|
|
inode = (inode_t *)(unsigned long)fuse_ino; |
|
if (inode == NULL) |
|
- return; |
|
+ return -1; |
|
|
|
list_for_each_entry_safe(dentry, tmp, &inode->dentry_list, inode_list) |
|
{ |
|
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); |
|
if (node == NULL) |
|
- break; |
|
+ return -1; |
|
|
|
INIT_LIST_HEAD(&node->next); |
|
|
|
@@ -375,20 +380,21 @@ fuse_invalidate_entry(xlator_t *this, uint64_t fuse_ino) |
|
pthread_mutex_lock(&priv->invalidate_mutex); |
|
{ |
|
list_add_tail(&node->next, &priv->invalidate_list); |
|
+ priv->invalidate_count++; |
|
pthread_cond_signal(&priv->invalidate_cond); |
|
} |
|
pthread_mutex_unlock(&priv->invalidate_mutex); |
|
} |
|
|
|
#endif |
|
- return; |
|
+ return 0; |
|
} |
|
|
|
/* |
|
* Send an inval inode notification to fuse. This causes an invalidation of the |
|
* entire page cache mapping on the inode. |
|
*/ |
|
-static void |
|
+static int32_t |
|
fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) |
|
{ |
|
#if FUSE_KERNEL_MINOR_VERSION >= 11 |
|
@@ -401,15 +407,20 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) |
|
priv = this->private; |
|
|
|
if (!priv->reverse_fuse_thread_started) |
|
- return; |
|
+ return -1; |
|
+ |
|
+ if (priv->invalidate_limit && |
|
+ (priv->invalidate_count >= priv->invalidate_limit)) { |
|
+ return -1; |
|
+ } |
|
|
|
inode = (inode_t *)(unsigned long)fuse_ino; |
|
if (inode == NULL) |
|
- return; |
|
+ return -1; |
|
|
|
node = GF_CALLOC(1, sizeof(*node), gf_fuse_mt_invalidate_node_t); |
|
if (node == NULL) |
|
- return; |
|
+ return -1; |
|
|
|
INIT_LIST_HEAD(&node->next); |
|
|
|
@@ -435,6 +446,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) |
|
pthread_mutex_lock(&priv->invalidate_mutex); |
|
{ |
|
list_add_tail(&node->next, &priv->invalidate_list); |
|
+ priv->invalidate_count++; |
|
pthread_cond_signal(&priv->invalidate_cond); |
|
} |
|
pthread_mutex_unlock(&priv->invalidate_mutex); |
|
@@ -443,7 +455,7 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) |
|
gf_log("glusterfs-fuse", GF_LOG_WARNING, |
|
"fuse_invalidate_inode not implemented on this system"); |
|
#endif |
|
- return; |
|
+ return 0; |
|
} |
|
|
|
#if FUSE_KERNEL_MINOR_VERSION >= 11 |
|
@@ -451,8 +463,9 @@ fuse_invalidate_inode(xlator_t *this, uint64_t fuse_ino) |
|
static int32_t |
|
fuse_inode_invalidate_fn(xlator_t *this, inode_t *inode) |
|
{ |
|
- fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); |
|
- return 0; |
|
+ int32_t ret = 0; |
|
+ ret = fuse_invalidate_entry(this, (uint64_t)(uintptr_t)inode); |
|
+ return ret; |
|
} |
|
#endif |
|
|
|
@@ -4003,7 +4016,9 @@ fuse_setxattr(xlator_t *this, fuse_in_header_t *finh, void *msg, |
|
gf_log("fuse", GF_LOG_TRACE, "got request to invalidate %" PRIu64, |
|
finh->nodeid); |
|
#if FUSE_KERNEL_MINOR_VERSION >= 11 |
|
- fuse_invalidate_entry(this, finh->nodeid); |
|
+ ret = fuse_invalidate_entry(this, finh->nodeid); |
|
+ if (ret) |
|
+ op_errno = EBUSY; |
|
#endif |
|
goto done; |
|
} |
|
@@ -4812,6 +4827,7 @@ notify_kernel_loop(void *data) |
|
fuse_invalidate_node_t, next); |
|
|
|
list_del_init(&node->next); |
|
+ priv->invalidate_count--; |
|
} |
|
pthread_mutex_unlock(&priv->invalidate_mutex); |
|
|
|
@@ -4855,6 +4871,7 @@ notify_kernel_loop(void *data) |
|
list_del_init(&node->next); |
|
GF_FREE(node); |
|
} |
|
+ priv->invalidate_count = 0; |
|
} |
|
pthread_mutex_unlock(&priv->invalidate_mutex); |
|
|
|
@@ -6080,6 +6097,9 @@ fuse_priv_dump(xlator_t *this) |
|
(int)private->timed_response_fuse_thread_started); |
|
gf_proc_dump_write("reverse_thread_started", "%d", |
|
(int)private->reverse_fuse_thread_started); |
|
+ gf_proc_dump_write("invalidate_limit", "%u", private->invalidate_limit); |
|
+ gf_proc_dump_write("invalidate_queue_length", "%" PRIu64, |
|
+ private->invalidate_count); |
|
gf_proc_dump_write("use_readdirp", "%d", private->use_readdirp); |
|
|
|
return 0; |
|
@@ -6619,6 +6639,9 @@ init(xlator_t *this_xl) |
|
|
|
GF_OPTION_INIT("lru-limit", priv->lru_limit, uint32, cleanup_exit); |
|
|
|
+ GF_OPTION_INIT("invalidate-limit", priv->invalidate_limit, uint32, |
|
+ cleanup_exit); |
|
+ |
|
GF_OPTION_INIT("event-history", priv->event_history, bool, cleanup_exit); |
|
|
|
GF_OPTION_INIT("thin-client", priv->thin_client, bool, cleanup_exit); |
|
@@ -6955,6 +6978,15 @@ struct volume_options options[] = { |
|
"reaching this limit (0 means 'unlimited')", |
|
}, |
|
{ |
|
+ .key = {"invalidate-limit"}, |
|
+ .type = GF_OPTION_TYPE_INT, |
|
+ .default_value = "0", |
|
+ .min = 0, |
|
+ .description = "suspend invalidations as of 'lru-limit' if the number " |
|
+ "of outstanding invalidations reaches this limit " |
|
+ "(0 means 'unlimited')", |
|
+ }, |
|
+ { |
|
.key = {"auto-invalidation"}, |
|
.type = GF_OPTION_TYPE_BOOL, |
|
.default_value = "true", |
|
diff --git a/xlators/mount/fuse/src/fuse-bridge.h b/xlators/mount/fuse/src/fuse-bridge.h |
|
index 697bd88..2311582 100644 |
|
--- a/xlators/mount/fuse/src/fuse-bridge.h |
|
+++ b/xlators/mount/fuse/src/fuse-bridge.h |
|
@@ -139,7 +139,7 @@ struct fuse_private { |
|
pthread_cond_t invalidate_cond; |
|
pthread_mutex_t invalidate_mutex; |
|
gf_boolean_t reverse_fuse_thread_started; |
|
- |
|
+ uint64_t invalidate_count; |
|
/* For communicating with separate mount thread. */ |
|
int status_pipe[2]; |
|
|
|
@@ -191,6 +191,7 @@ struct fuse_private { |
|
|
|
/* LRU Limit, if not set, default is 128k for now */ |
|
uint32_t lru_limit; |
|
+ uint32_t invalidate_limit; |
|
}; |
|
typedef struct fuse_private fuse_private_t; |
|
|
|
diff --git a/xlators/mount/fuse/utils/mount.glusterfs.in b/xlators/mount/fuse/utils/mount.glusterfs.in |
|
index cbde42d..61d7422 100755 |
|
--- a/xlators/mount/fuse/utils/mount.glusterfs.in |
|
+++ b/xlators/mount/fuse/utils/mount.glusterfs.in |
|
@@ -257,6 +257,10 @@ start_glusterfs () |
|
cmd_line=$(echo "$cmd_line --lru-limit=$lru_limit"); |
|
fi |
|
|
|
+ if [ -n "$invalidate_limit" ]; then |
|
+ cmd_line=$(echo "$cmd_line --invalidate-limit=$invalidate_limit"); |
|
+ fi |
|
+ |
|
if [ -n "$bg_qlen" ]; then |
|
cmd_line=$(echo "$cmd_line --background-qlen=$bg_qlen"); |
|
fi |
|
@@ -505,6 +509,9 @@ with_options() |
|
"lru-limit") |
|
lru_limit=$value |
|
;; |
|
+ "invalidate-limit") |
|
+ invalidate_limit=$value |
|
+ ;; |
|
"background-qlen") |
|
bg_qlen=$value |
|
;; |
|
-- |
|
1.8.3.1 |
|
|
|
|