You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
165 lines
6.1 KiB
165 lines
6.1 KiB
From babbd49cc053993a4ecff8eaf178d5a29f3a0bf0 Mon Sep 17 00:00:00 2001 |
|
From: karthik-us <ksubrahm@redhat.com> |
|
Date: Wed, 20 Nov 2019 12:26:11 +0530 |
|
Subject: [PATCH 329/335] cluster/afr: Heal entries when there is a source & no |
|
healed_sinks |
|
|
|
Backport of: https://review.gluster.org/#/c/glusterfs/+/23364/ |
|
|
|
Problem: |
|
In a situation where B1 blames B2, B2 blames B1 and B3 doesn't blame |
|
anything for entry heal, heal will not complete even though we have |
|
clear source and sinks. This will happen because while doing |
|
afr_selfheal_find_direction() only the bricks which are blamed by |
|
non-accused bricks are considered as sinks. Later in |
|
__afr_selfheal_entry_finalize_source() when it tries to mark all the |
|
non-sources as sinks it fails to do so because there won't be any |
|
healed_sinks marked, no witness present and there will be a source. |
|
|
|
Fix: |
|
If there is a source and no healed_sinks, then reset all the locked |
|
sources to 0 and healed sinks to 1 to do conservative merge. |
|
|
|
Change-Id: I8831603ac037b6a3000bee092abfdcc92f7f2e57 |
|
Signed-off-by: karthik-us <ksubrahm@redhat.com> |
|
BUG: 1764095 |
|
Reviewed-on: https://code.engineering.redhat.com/gerrit/185834 |
|
Tested-by: RHGS Build Bot <nigelb@redhat.com> |
|
Reviewed-by: Sunil Kumar Heggodu Gopala Acharya <sheggodu@redhat.com> |
|
--- |
|
.../bug-1749322-entry-heal-not-happening.t | 89 ++++++++++++++++++++++ |
|
xlators/cluster/afr/src/afr-self-heal-entry.c | 15 ++++ |
|
2 files changed, 104 insertions(+) |
|
create mode 100644 tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t |
|
|
|
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t |
|
new file mode 100644 |
|
index 0000000..9627908 |
|
--- /dev/null |
|
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t |
|
@@ -0,0 +1,89 @@ |
|
+#!/bin/bash |
|
+ |
|
+. $(dirname $0)/../../include.rc |
|
+. $(dirname $0)/../../volume.rc |
|
+. $(dirname $0)/../../afr.rc |
|
+ |
|
+cleanup |
|
+ |
|
+function check_gfid_and_link_count |
|
+{ |
|
+ local file=$1 |
|
+ |
|
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file) |
|
+ TEST [ ! -z $file_gfid_b0 ] |
|
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file) |
|
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file) |
|
+ EXPECT $file_gfid_b0 echo $file_gfid_b1 |
|
+ EXPECT $file_gfid_b0 echo $file_gfid_b2 |
|
+ |
|
+ EXPECT "2" stat -c %h $B0/${V0}0/$file |
|
+ EXPECT "2" stat -c %h $B0/${V0}1/$file |
|
+ EXPECT "2" stat -c %h $B0/${V0}2/$file |
|
+} |
|
+TESTS_EXPECTED_IN_LOOP=18 |
|
+ |
|
+################################################################################ |
|
+## Start and create a volume |
|
+TEST glusterd; |
|
+TEST pidof glusterd; |
|
+TEST $CLI volume info; |
|
+ |
|
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}; |
|
+TEST $CLI volume start $V0; |
|
+TEST $CLI volume set $V0 cluster.heal-timeout 5 |
|
+TEST $CLI volume heal $V0 disable |
|
+EXPECT 'Started' volinfo_field $V0 'Status'; |
|
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 |
|
+ |
|
+TEST mkdir $M0/dir |
|
+TEST `echo "File 1 " > $M0/dir/file1` |
|
+TEST touch $M0/dir/file{2..4} |
|
+ |
|
+# Remove file2 from 1st & 3rd bricks |
|
+TEST rm -f $B0/$V0"0"/dir/file2 |
|
+TEST rm -f $B0/$V0"2"/dir/file2 |
|
+ |
|
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks |
|
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3) |
|
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3) |
|
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 |
|
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3 |
|
+TEST rm -f $B0/$V0"0"/dir/file3 |
|
+TEST rm -f $B0/$V0"1"/dir/file3 |
|
+ |
|
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick |
|
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4) |
|
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4) |
|
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4 |
|
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4 |
|
+ |
|
+# B0 and B2 blame each other |
|
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir |
|
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir |
|
+ |
|
+# Add entry to xattrop dir on first brick. |
|
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0") |
|
+base_entry_b0=`ls $xattrop_dir0` |
|
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/)) |
|
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str |
|
+ |
|
+EXPECT "^1$" get_pending_heal_count $V0 |
|
+ |
|
+# Launch heal |
|
+TEST $CLI volume heal $V0 enable |
|
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status |
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0 |
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1 |
|
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2 |
|
+TEST $CLI volume heal $V0 |
|
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 |
|
+ |
|
+# All the files must be present on all the bricks after conservative merge and |
|
+# should have the gfid xattr and the .glusterfs hardlink. |
|
+check_gfid_and_link_count dir/file1 |
|
+check_gfid_and_link_count dir/file2 |
|
+check_gfid_and_link_count dir/file3 |
|
+check_gfid_and_link_count dir/file4 |
|
+ |
|
+cleanup |
|
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c |
|
index 35b600f..3ce882e 100644 |
|
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c |
|
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c |
|
@@ -479,6 +479,7 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, |
|
afr_private_t *priv = NULL; |
|
int source = -1; |
|
int sources_count = 0; |
|
+ int i = 0; |
|
|
|
priv = this->private; |
|
|
|
@@ -492,6 +493,20 @@ __afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources, |
|
} |
|
|
|
source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION); |
|
+ |
|
+ /*If the selected source does not blame any other brick, then mark |
|
+ * everything as sink to trigger conservative merge. |
|
+ */ |
|
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) { |
|
+ for (i = 0; i < priv->child_count; i++) { |
|
+ if (locked_on[i]) { |
|
+ sources[i] = 0; |
|
+ healed_sinks[i] = 1; |
|
+ } |
|
+ } |
|
+ return -1; |
|
+ } |
|
+ |
|
return source; |
|
} |
|
|
|
-- |
|
1.8.3.1 |
|
|
|
|