You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
289 lines
12 KiB
289 lines
12 KiB
From 69ac1fd2da7a57f2f0854412863911959bf71fde Mon Sep 17 00:00:00 2001 |
|
From: Sunny Kumar <sunkumar@redhat.com> |
|
Date: Tue, 2 Apr 2019 12:38:09 +0530 |
|
Subject: [PATCH 161/169] geo-rep: Fix rename with existing destination with |
|
same gfid |
|
|
|
Problem: |
|
Geo-rep fails to sync the rename properly if destination exists. |
|
It results in source to be remained on slave causing more number of |
|
files on slave. Also heavy rename workload like logrotate caused |
|
lot of ESTALE errors |
|
|
|
Cause: |
|
Geo-rep fails to sync rename if destination exists if creation |
|
of source file also falls into single batch of changelogs being |
|
processed. This is because, after fixing problematic gfids verifying |
|
from master, while re-processing original entries, CREATE also was |
|
re-processed causing more files on slave and rename to be failed. |
|
|
|
Solution: |
|
Entries need to be removed from retrial list after fixing |
|
problematic gfids on slave so that it's not re-created again on slave. |
|
Also treat ESTALE as EEXIST so that the error is properly handled |
|
verifying the op on master volume. |
|
|
|
Backport of: |
|
> Patch: https://review.gluster.org/22519 |
|
> Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9 |
|
> BUG: 1694820 |
|
> Signed-off-by: Kotresh HR <khiremat@redhat.com> |
|
> Signed-off-by: Sunny Kumar <sunkumar@redhat.com> |
|
|
|
Change-Id: I50cf289e06b997adddff0552bf2466d9201dd1f9 |
|
fixes: bz#1708121 |
|
Signed-off-by: Kotresh HR <khiremat@redhat.com> |
|
Signed-off-by: Sunny Kumar <sunkumar@redhat.com> |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/172393 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Atin Mukherjee <amukherj@redhat.com> |
|
--- |
|
geo-replication/syncdaemon/master.py | 41 +++++++++++++++++++++-- |
|
geo-replication/syncdaemon/resource.py | 10 ++++-- |
|
tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t | 5 +++ |
|
tests/00-geo-rep/georep-basic-dr-rsync.t | 5 +++ |
|
tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t | 5 +++ |
|
tests/00-geo-rep/georep-basic-dr-tarssh.t | 5 +++ |
|
tests/geo-rep.rc | 36 ++++++++++++++++++++ |
|
7 files changed, 102 insertions(+), 5 deletions(-) |
|
|
|
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py |
|
index 3da7610..42c86d7 100644 |
|
--- a/geo-replication/syncdaemon/master.py |
|
+++ b/geo-replication/syncdaemon/master.py |
|
@@ -65,6 +65,9 @@ def _volinfo_hook_relax_foreign(self): |
|
def edct(op, **ed): |
|
dct = {} |
|
dct['op'] = op |
|
+ # This is used in automatic gfid conflict resolution. |
|
+ # When marked True, it's skipped during re-processing. |
|
+ dct['skip_entry'] = False |
|
for k in ed: |
|
if k == 'stat': |
|
st = ed[k] |
|
@@ -792,6 +795,7 @@ class GMasterChangelogMixin(GMasterCommon): |
|
pfx = gauxpfx() |
|
fix_entry_ops = [] |
|
failures1 = [] |
|
+ remove_gfids = set() |
|
for failure in failures: |
|
if failure[2]['name_mismatch']: |
|
pbname = failure[2]['slave_entry'] |
|
@@ -822,6 +826,18 @@ class GMasterChangelogMixin(GMasterCommon): |
|
edct('UNLINK', |
|
gfid=failure[2]['slave_gfid'], |
|
entry=pbname)) |
|
+ remove_gfids.add(slave_gfid) |
|
+ if op in ['RENAME']: |
|
+ # If renamed gfid doesn't exists on master, remove |
|
+ # rename entry and unlink src on slave |
|
+ st = lstat(os.path.join(pfx, failure[0]['gfid'])) |
|
+ if isinstance(st, int) and st == ENOENT: |
|
+ logging.debug("Unlink source %s" % repr(failure)) |
|
+ remove_gfids.add(failure[0]['gfid']) |
|
+ fix_entry_ops.append( |
|
+ edct('UNLINK', |
|
+ gfid=failure[0]['gfid'], |
|
+ entry=failure[0]['entry'])) |
|
# Takes care of scenarios of hardlinks/renames on master |
|
elif not isinstance(st, int): |
|
if matching_disk_gfid(slave_gfid, pbname): |
|
@@ -831,7 +847,12 @@ class GMasterChangelogMixin(GMasterCommon): |
|
' Safe to ignore, take out entry', |
|
retry_count=retry_count, |
|
entry=repr(failure))) |
|
- entries.remove(failure[0]) |
|
+ remove_gfids.add(failure[0]['gfid']) |
|
+ if op == 'RENAME': |
|
+ fix_entry_ops.append( |
|
+ edct('UNLINK', |
|
+ gfid=failure[0]['gfid'], |
|
+ entry=failure[0]['entry'])) |
|
# The file exists on master but with different name. |
|
# Probably renamed and got missed during xsync crawl. |
|
elif failure[2]['slave_isdir']: |
|
@@ -856,7 +877,10 @@ class GMasterChangelogMixin(GMasterCommon): |
|
'take out entry', |
|
retry_count=retry_count, |
|
entry=repr(failure))) |
|
- entries.remove(failure[0]) |
|
+ try: |
|
+ entries.remove(failure[0]) |
|
+ except ValueError: |
|
+ pass |
|
else: |
|
rename_dict = edct('RENAME', gfid=slave_gfid, |
|
entry=src_entry, |
|
@@ -896,7 +920,10 @@ class GMasterChangelogMixin(GMasterCommon): |
|
'ignore, take out entry', |
|
retry_count=retry_count, |
|
entry=repr(failure))) |
|
- entries.remove(failure[0]) |
|
+ try: |
|
+ entries.remove(failure[0]) |
|
+ except ValueError: |
|
+ pass |
|
else: |
|
logging.info(lf('Fixing ENOENT error in slave. Create ' |
|
'parent directory on slave.', |
|
@@ -913,6 +940,14 @@ class GMasterChangelogMixin(GMasterCommon): |
|
edct('MKDIR', gfid=pargfid, entry=dir_entry, |
|
mode=st.st_mode, uid=st.st_uid, gid=st.st_gid)) |
|
|
|
+ logging.debug("remove_gfids: %s" % repr(remove_gfids)) |
|
+ if remove_gfids: |
|
+ for e in entries: |
|
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \ |
|
+ and e['gfid'] in remove_gfids: |
|
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e)) |
|
+ e['skip_entry'] = True |
|
+ |
|
if fix_entry_ops: |
|
# Process deletions of entries whose gfids are mismatched |
|
failures1 = self.slave.server.entry_ops(fix_entry_ops) |
|
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py |
|
index c290d86..f54ccd9 100644 |
|
--- a/geo-replication/syncdaemon/resource.py |
|
+++ b/geo-replication/syncdaemon/resource.py |
|
@@ -426,7 +426,7 @@ class Server(object): |
|
e['stat']['uid'] = uid |
|
e['stat']['gid'] = gid |
|
|
|
- if cmd_ret == EEXIST: |
|
+ if cmd_ret in [EEXIST, ESTALE]: |
|
if dst: |
|
en = e['entry1'] |
|
else: |
|
@@ -510,6 +510,12 @@ class Server(object): |
|
entry = e['entry'] |
|
uid = 0 |
|
gid = 0 |
|
+ |
|
+ # Skip entry processing if it's marked true during gfid |
|
+ # conflict resolution |
|
+ if e['skip_entry']: |
|
+ continue |
|
+ |
|
if e.get("stat", {}): |
|
# Copy UID/GID value and then reset to zero. Copied UID/GID |
|
# will be used to run chown once entry is created. |
|
@@ -688,7 +694,7 @@ class Server(object): |
|
if blob: |
|
cmd_ret = errno_wrap(Xattr.lsetxattr, |
|
[pg, 'glusterfs.gfid.newfile', blob], |
|
- [EEXIST, ENOENT], |
|
+ [EEXIST, ENOENT, ESTALE], |
|
[ESTALE, EINVAL, EBUSY]) |
|
collect_failure(e, cmd_ret, uid, gid) |
|
|
|
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t |
|
index 67ac167..1a55ed2 100644 |
|
--- a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t |
|
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t |
|
@@ -203,6 +203,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} |
|
TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file" |
|
TEST "echo sampledata > $master_mnt/rsync_option_test_file" |
|
|
|
+#rename with existing destination case BUG:1694820 |
|
+TEST create_rename_with_existing_destination ${master_mnt} |
|
+#verify rename with existing destination case BUG:1694820 |
|
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} |
|
+ |
|
#Verify arequal for whole volume |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} |
|
|
|
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t |
|
index 8b64370..d0c0fc9 100644 |
|
--- a/tests/00-geo-rep/georep-basic-dr-rsync.t |
|
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t |
|
@@ -204,6 +204,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} |
|
TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file" |
|
TEST "echo sampledata > $master_mnt/rsync_option_test_file" |
|
|
|
+#rename with existing destination case BUG:1694820 |
|
+TEST create_rename_with_existing_destination ${master_mnt} |
|
+#verify rename with existing destination case BUG:1694820 |
|
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} |
|
+ |
|
#Verify arequal for whole volume |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} |
|
|
|
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t |
|
index 1726d0b..cb530ad 100644 |
|
--- a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t |
|
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t |
|
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/s |
|
#rsnapshot usecase |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} |
|
|
|
+#rename with existing destination case BUG:1694820 |
|
+TEST create_rename_with_existing_destination ${master_mnt} |
|
+#verify rename with existing destination case BUG:1694820 |
|
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} |
|
+ |
|
#Verify arequal for whole volume |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} |
|
|
|
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t |
|
index c5d16ac..9e2f613 100644 |
|
--- a/tests/00-geo-rep/georep-basic-dr-tarssh.t |
|
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t |
|
@@ -202,6 +202,11 @@ EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt} |
|
#rsnapshot usecase |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt} |
|
|
|
+#rename with existing destination case BUG:1694820 |
|
+TEST create_rename_with_existing_destination ${master_mnt} |
|
+#verify rename with existing destination case BUG:1694820 |
|
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt} |
|
+ |
|
#Verify arequal for whole volume |
|
EXPECT_WITHIN $GEO_REP_TIMEOUT 0 arequal_checksum ${master_mnt} ${slave_mnt} |
|
|
|
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc |
|
index d723129..e357ba8 100644 |
|
--- a/tests/geo-rep.rc |
|
+++ b/tests/geo-rep.rc |
|
@@ -403,3 +403,39 @@ function check_slave_read_only() |
|
gluster volume info $1 | grep 'features.read-only: on' |
|
echo $? |
|
} |
|
+ |
|
+function create_rename_with_existing_destination() |
|
+{ |
|
+ dir=$1/rename_with_existing_destination |
|
+ mkdir $dir |
|
+ for i in {1..5} |
|
+ do |
|
+ echo "Data_set$i" > $dir/data_set$i |
|
+ mv $dir/data_set$i $dir/data_set -f |
|
+ done |
|
+} |
|
+ |
|
+function verify_rename_with_existing_destination() |
|
+{ |
|
+ dir=$1/rename_with_existing_destination |
|
+ |
|
+ if [ ! -d $dir ]; then |
|
+ echo 1 |
|
+ elif [ ! -f $dir/data_set ]; then |
|
+ echo 2 |
|
+ elif [ -f $dir/data_set1 ]; then |
|
+ echo 3 |
|
+ elif [ -f $dir/data_set2 ]; then |
|
+ echo 4 |
|
+ elif [ -f $dir/data_set3 ]; then |
|
+ echo 5 |
|
+ elif [ -f $dir/data_set4 ]; then |
|
+ echo 6 |
|
+ elif [ -f $dir/data_set5 ]; then |
|
+ echo 7 |
|
+ elif test "XData_set5" != "X$(cat $dir/data_set)"; then |
|
+ echo 8 |
|
+ else |
|
+ echo 0 |
|
+ fi |
|
+} |
|
-- |
|
1.8.3.1 |
|
|
|
|