You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
6.9 KiB
182 lines
6.9 KiB
3 years ago
|
From 80eef2f52bb92ed740ac00eeb11ee7a3e7fffff2 Mon Sep 17 00:00:00 2001
|
||
|
From: Raghavendra Bhat <raghavendra@redhat.com>
|
||
|
Date: Mon, 11 Mar 2019 12:16:50 -0400
|
||
|
Subject: [PATCH 459/465] features/bit-rot: Unconditionally sign the files
|
||
|
during oneshot crawl
|
||
|
|
||
|
Currently bit-rot feature has an issue with disabling and reenabling it
|
||
|
on the same volume. Consider enabling bit-rot detection which goes on to
|
||
|
crawl and sign all the files present in the volume. Then some files are
|
||
|
modified and the bit-rot daemon goes on to sign the modified files with
|
||
|
the correct signature. Now, disable bit-rot feature. While, signing and
|
||
|
scrubbing are not happening, previous checksums of the files continue to
|
||
|
exist as extended attributes. Now, if some files with checksum xattrs get
|
||
|
modified, they are not signed with new signature as the feature is off.
|
||
|
|
||
|
At this point, if the feature is enabled again, the bit rot daemon will
|
||
|
go and sign those files which does not have any bit-rot specific xattrs
|
||
|
(i.e. those files which were created after bit-rot was disabled). Whereas
|
||
|
the files with bit-rot xattrs wont get signed with proper new checksum.
|
||
|
At this point if scrubber runs, it finds the on disk checksum and the actual
|
||
|
checksum of the file to be different (because the file got modified) and
|
||
|
marks the file as corrupted.
|
||
|
|
||
|
FIX:
|
||
|
|
||
|
The fix is to unconditionally sign the files when the bit-rot daemon
|
||
|
comes up (instead of skipping the files with bit-rot xattrs).
|
||
|
|
||
|
upstream fix:
|
||
|
> patch: https://review.gluster.org/#/c/glusterfs/+/22360/
|
||
|
> fixes: #bz1700078
|
||
|
> Change-ID: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
||
|
|
||
|
Change-Id: Iadfb47dd39f7e2e77f22d549a4a07a385284f4f5
|
||
|
BUG: 1851424
|
||
|
Signed-off-by: Raghavendra M <raghavendra@redhat.com>
|
||
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/208305
|
||
|
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
||
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com>
|
||
|
---
|
||
|
tests/bitrot/bug-1700078.t | 87 +++++++++++++++++++++++++++++
|
||
|
xlators/features/bit-rot/src/bitd/bit-rot.c | 15 ++++-
|
||
|
2 files changed, 101 insertions(+), 1 deletion(-)
|
||
|
create mode 100644 tests/bitrot/bug-1700078.t
|
||
|
|
||
|
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
|
||
|
new file mode 100644
|
||
|
index 0000000..f273742
|
||
|
--- /dev/null
|
||
|
+++ b/tests/bitrot/bug-1700078.t
|
||
|
@@ -0,0 +1,87 @@
|
||
|
+#!/bin/bash
|
||
|
+
|
||
|
+. $(dirname $0)/../include.rc
|
||
|
+. $(dirname $0)/../volume.rc
|
||
|
+
|
||
|
+cleanup;
|
||
|
+
|
||
|
+## Start glusterd
|
||
|
+TEST glusterd;
|
||
|
+TEST pidof glusterd;
|
||
|
+
|
||
|
+## Lets create and start the volume
|
||
|
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
|
||
|
+TEST $CLI volume start $V0
|
||
|
+
|
||
|
+## Enable bitrot for volume $V0
|
||
|
+TEST $CLI volume bitrot $V0 enable
|
||
|
+
|
||
|
+## Turn off quick-read so that it wont cache the contents
|
||
|
+# of the file in lookup. For corrupted files, it might
|
||
|
+# end up in reads being served from the cache instead of
|
||
|
+# an error.
|
||
|
+TEST $CLI volume set $V0 performance.quick-read off
|
||
|
+
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
||
|
+
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
||
|
+
|
||
|
+## Set expiry-timeout to 1 sec
|
||
|
+TEST $CLI volume set $V0 features.expiry-time 1
|
||
|
+
|
||
|
+##Mount $V0
|
||
|
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
||
|
+
|
||
|
+## Turn off quick-read xlator so that, the contents are not served from the
|
||
|
+# quick-read cache.
|
||
|
+TEST $CLI volume set $V0 performance.quick-read off
|
||
|
+
|
||
|
+#Create sample file
|
||
|
+TEST `echo "1234" > $M0/FILE1`
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
|
||
|
+
|
||
|
+##disable bitrot
|
||
|
+TEST $CLI volume bitrot $V0 disable
|
||
|
+
|
||
|
+## modify the file
|
||
|
+TEST `echo "write" >> $M0/FILE1`
|
||
|
+
|
||
|
+# unmount and remount when the file has to be accessed.
|
||
|
+# This is to ensure that, when the remount happens,
|
||
|
+# and the file is read, its contents are served from the
|
||
|
+# brick instead of cache.
|
||
|
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
|
||
|
+
|
||
|
+##enable bitrot
|
||
|
+TEST $CLI volume bitrot $V0 enable
|
||
|
+
|
||
|
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
|
||
|
+# oneshot crawler to finish its crawling and sign the file properly.
|
||
|
+sleep 2
|
||
|
+
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
|
||
|
+
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
|
||
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
|
||
|
+
|
||
|
+## Ondemand scrub
|
||
|
+TEST $CLI volume bitrot $V0 scrub ondemand
|
||
|
+
|
||
|
+# the scrub ondemand CLI command, just ensures that
|
||
|
+# the scrubber has received the ondemand scrub directive
|
||
|
+# and started. sleep for 2 seconds for scrubber to finish
|
||
|
+# crawling and marking file(s) as bad (if if finds that
|
||
|
+# corruption has happened) which are filesystem operations.
|
||
|
+sleep 2
|
||
|
+
|
||
|
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
|
||
|
+
|
||
|
+##Mount $V0
|
||
|
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
|
||
|
+
|
||
|
+TEST cat $M0/FILE1
|
||
|
+
|
||
|
+cleanup;
|
||
|
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||
|
index b8feef7..424c0d5 100644
|
||
|
--- a/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||
|
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
|
||
|
@@ -973,6 +973,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||
|
int32_t ret = -1;
|
||
|
inode_t *linked_inode = NULL;
|
||
|
gf_boolean_t need_signing = _gf_false;
|
||
|
+ gf_boolean_t need_reopen = _gf_true;
|
||
|
|
||
|
GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
|
||
|
GF_VALIDATE_OR_GOTO("bit-rot", data, out);
|
||
|
@@ -1046,6 +1047,18 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||
|
uuid_utoa(linked_inode->gfid));
|
||
|
} else {
|
||
|
need_signing = br_check_object_need_sign(this, xattr, child);
|
||
|
+
|
||
|
+ /*
|
||
|
+ * If we are here means, bitrot daemon has started. Is it just
|
||
|
+ * a simple restart of the daemon or is it started because the
|
||
|
+ * feature is enabled is something hard to determine. Hence,
|
||
|
+ * if need_signing is false (because bit-rot version and signature
|
||
|
+ * are present), then still go ahead and sign it.
|
||
|
+ */
|
||
|
+ if (!need_signing) {
|
||
|
+ need_signing = _gf_true;
|
||
|
+ need_reopen = _gf_true;
|
||
|
+ }
|
||
|
}
|
||
|
|
||
|
if (!need_signing)
|
||
|
@@ -1054,7 +1067,7 @@ bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
|
||
|
gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN,
|
||
|
"Triggering signing for %s [GFID: %s | Brick: %s]", loc.path,
|
||
|
uuid_utoa(linked_inode->gfid), child->brick_path);
|
||
|
- br_trigger_sign(this, child, linked_inode, &loc, _gf_true);
|
||
|
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
|
||
|
|
||
|
ret = 0;
|
||
|
|
||
|
--
|
||
|
1.8.3.1
|
||
|
|