You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
216 lines
7.3 KiB
216 lines
7.3 KiB
From 91936fe5ef854bd9d2f91e643795d0e7791b97ba Mon Sep 17 00:00:00 2001 |
|
From: Harpreet Kaur <hlalwani@redhat.com> |
|
Date: Mon, 7 Jan 2019 16:38:25 +0530 |
|
Subject: [PATCH 426/449] geo-rep: Fix for "Transport End Point not connected" |
|
issue |
|
|
|
problem: Geo-rep gsyncd process mounts the master and slave volume |
|
on master nodes and slave nodes respectively and starts |
|
the sync. But it doesn't wait for the mount to be in ready |
|
state to accept I/O. The gluster mount is considered to be |
|
ready when all the distribute sub-volumes is up. If the all |
|
the distribute subvolumes are not up, it can cause ENOTCONN |
|
error, when lookup on file comes and file is on the subvol |
|
that is down. |
|
|
|
solution: Added a Virtual Xattr "dht.subvol.status" which returns "1" |
|
if all subvols are up and "0" if all subvols are not up. |
|
Geo-rep then uses this virtual xattr after a fresh mount, to |
|
check whether all subvols are up or not and then starts the |
|
I/O. |
|
|
|
>fixes: bz#1664335 |
|
>Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 |
|
>Signed-off-by: Harpreet Kaur <hlalwani@redhat.com> |
|
>Signed-off-by: Kotresh HR <khiremat@redhat.com> |
|
|
|
backport of https://review.gluster.org/#/c/glusterfs/+/22001/ |
|
BUG: 1640573 |
|
Change-Id: If3ad01d728b1372da7c08ccbe75a45bdc1ab2a91 |
|
Signed-off-by: Shwetha K Acharya <sacharya@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/202554 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
geo-replication/syncdaemon/resource.py | 11 ++++++ |
|
geo-replication/syncdaemon/syncdutils.py | 20 +++++++++-- |
|
xlators/cluster/dht/src/dht-common.c | 59 ++++++++++++++++++++++++++++++++ |
|
xlators/cluster/dht/src/dht-common.h | 4 +++ |
|
4 files changed, 91 insertions(+), 3 deletions(-) |
|
|
|
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py |
|
index 189d8a1..0c61de9 100644 |
|
--- a/geo-replication/syncdaemon/resource.py |
|
+++ b/geo-replication/syncdaemon/resource.py |
|
@@ -37,6 +37,7 @@ from syncdutils import ChangelogException, ChangelogHistoryNotAvailable |
|
from syncdutils import get_changelog_log_level, get_rsync_version |
|
from syncdutils import CHANGELOG_AGENT_CLIENT_VERSION |
|
from syncdutils import GX_GFID_CANONICAL_LEN |
|
+from syncdutils import gf_mount_ready |
|
from gsyncdstatus import GeorepStatus |
|
from syncdutils import lf, Popen, sup |
|
from syncdutils import Xattr, matching_disk_gfid, get_gfid_from_mnt |
|
@@ -950,6 +951,16 @@ class Mounter(object): |
|
logging.exception('mount cleanup failure:') |
|
rv = 200 |
|
os._exit(rv) |
|
+ |
|
+ #Polling the dht.subvol.status value. |
|
+ RETRIES = 10 |
|
+ while not gf_mount_ready(): |
|
+ if RETRIES < 0: |
|
+ logging.error('Subvols are not up') |
|
+ break |
|
+ RETRIES -= 1 |
|
+ time.sleep(0.2) |
|
+ |
|
logging.debug('auxiliary glusterfs mount prepared') |
|
|
|
|
|
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py |
|
index b08098e..7560fa1 100644 |
|
--- a/geo-replication/syncdaemon/syncdutils.py |
|
+++ b/geo-replication/syncdaemon/syncdutils.py |
|
@@ -21,8 +21,8 @@ import subprocess |
|
import socket |
|
from subprocess import PIPE |
|
from threading import Lock, Thread as baseThread |
|
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED |
|
-from errno import EINTR, ENOENT, ESTALE, EBUSY, errorcode |
|
+from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED |
|
+from errno import EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode |
|
from signal import signal, SIGTERM |
|
import select as oselect |
|
from os import waitpid as owaitpid |
|
@@ -55,6 +55,8 @@ from rconf import rconf |
|
|
|
from hashlib import sha256 as sha256 |
|
|
|
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP') |
|
+ |
|
# auxiliary gfid based access prefix |
|
_CL_AUX_GFID_PFX = ".gfid/" |
|
ROOT_GFID = "00000000-0000-0000-0000-000000000001" |
|
@@ -100,6 +102,19 @@ def unescape_space_newline(s): |
|
.replace(NEWLINE_ESCAPE_CHAR, "\n")\ |
|
.replace(PERCENTAGE_ESCAPE_CHAR, "%") |
|
|
|
+# gf_mount_ready() returns 1 if all subvols are up, else 0 |
|
+def gf_mount_ready(): |
|
+ ret = errno_wrap(Xattr.lgetxattr, |
|
+ ['.', 'dht.subvol.status', 16], |
|
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM]) |
|
+ |
|
+ if isinstance(ret, int): |
|
+ logging.error("failed to get the xattr value") |
|
+ return 1 |
|
+ ret = ret.rstrip('\x00') |
|
+ if ret == "1": |
|
+ return 1 |
|
+ return 0 |
|
|
|
def norm(s): |
|
if s: |
|
@@ -564,7 +579,6 @@ def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]): |
|
def lstat(e): |
|
return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY]) |
|
|
|
- |
|
def get_gfid_from_mnt(gfidpath): |
|
return errno_wrap(Xattr.lgetxattr, |
|
[gfidpath, 'glusterfs.gfid.string', |
|
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c |
|
index 6aa18f3..23cc80c 100644 |
|
--- a/xlators/cluster/dht/src/dht-common.c |
|
+++ b/xlators/cluster/dht/src/dht-common.c |
|
@@ -4858,6 +4858,60 @@ out: |
|
return 0; |
|
} |
|
|
|
+/* Virtual Xattr which returns 1 if all subvols are up, |
|
+ else returns 0. Geo-rep then uses this virtual xattr |
|
+ after a fresh mount and starts the I/O. |
|
+*/ |
|
+ |
|
+enum dht_vxattr_subvol { |
|
+ DHT_VXATTR_SUBVOLS_UP = 1, |
|
+ DHT_VXATTR_SUBVOLS_DOWN = 0, |
|
+}; |
|
+ |
|
+int |
|
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this, |
|
+ const char *key) |
|
+{ |
|
+ dht_local_t *local = NULL; |
|
+ int ret = -1; |
|
+ int op_errno = ENODATA; |
|
+ int value = DHT_VXATTR_SUBVOLS_UP; |
|
+ int i = 0; |
|
+ dht_conf_t *conf = NULL; |
|
+ |
|
+ conf = this->private; |
|
+ local = frame->local; |
|
+ |
|
+ if (!key) { |
|
+ op_errno = EINVAL; |
|
+ goto out; |
|
+ } |
|
+ local->xattr = dict_new(); |
|
+ if (!local->xattr) { |
|
+ op_errno = ENOMEM; |
|
+ goto out; |
|
+ } |
|
+ for (i = 0; i < conf->subvolume_cnt; i++) { |
|
+ if (!conf->subvolume_status[i]) { |
|
+ value = DHT_VXATTR_SUBVOLS_DOWN; |
|
+ gf_msg_debug(this->name, 0, "subvol %s is down ", |
|
+ conf->subvolumes[i]->name); |
|
+ break; |
|
+ } |
|
+ } |
|
+ ret = dict_set_int8(local->xattr, (char *)key, value); |
|
+ if (ret < 0) { |
|
+ op_errno = -ret; |
|
+ ret = -1; |
|
+ goto out; |
|
+ } |
|
+ ret = 0; |
|
+ |
|
+out: |
|
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL); |
|
+ return 0; |
|
+} |
|
+ |
|
int |
|
dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, |
|
dict_t *xdata) |
|
@@ -4915,6 +4969,11 @@ dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key, |
|
goto err; |
|
} |
|
|
|
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) { |
|
+ dht_vgetxattr_subvol_status(frame, this, key); |
|
+ return 0; |
|
+ } |
|
+ |
|
/* skip over code which is irrelevant if !DHT_IS_DIR(layout) */ |
|
if (!DHT_IS_DIR(layout)) |
|
goto no_dht_is_dir; |
|
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h |
|
index 1b3e826..9ec5b51 100644 |
|
--- a/xlators/cluster/dht/src/dht-common.h |
|
+++ b/xlators/cluster/dht/src/dht-common.h |
|
@@ -45,6 +45,10 @@ |
|
#define DHT_DIR_STAT_BLOCKS 8 |
|
#define DHT_DIR_STAT_SIZE 4096 |
|
|
|
+/* Virtual xattr for subvols status */ |
|
+ |
|
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status" |
|
+ |
|
/* Virtual xattrs for debugging */ |
|
|
|
#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*" |
|
-- |
|
1.8.3.1 |
|
|
|
|