You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
182 lines
6.1 KiB
182 lines
6.1 KiB
7 years ago
|
From fb12a7454000c56aa0439c5bc07fb29de2f3f2a1 Mon Sep 17 00:00:00 2001
|
||
|
From: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
|
||
|
Date: Thu, 10 Aug 2017 15:47:22 +0200
|
||
|
Subject: [RHEL7.5 PATCH 163/169] imsm: rebuild from 2-disk RAID10
|
||
|
|
||
|
When RAID10 loses 2 disks and it is still operational, it cannot be
|
||
|
rebuilt. The rebuild process starts for the first disk and completes,
|
||
|
however completion is not recorded in metadata. There is an assumption
|
||
|
that rebuild completion corresponds to transition from degraded to
|
||
|
normal state. It's not the case for 2-disk RAID10 as it's still degraded
|
||
|
after rebuild to first disk completes.
|
||
|
|
||
|
Check if disk rebuild flag is set in the second map and clear it. So far it
|
||
|
has been checked only in the first map (where it was not set). The flag in
|
||
|
the second map has not been cleared but rebuild completion dropped second
|
||
|
map so the problem was not visible.
|
||
|
|
||
|
If rebuild completion is notified and array still has failed disks and is in
|
||
|
degraded state, check first if rebuild position is really unset (the same
|
||
|
check as for array in normal state). If so, mark migration as done but don't
|
||
|
change array state (it should remain degraded). Update failed disk number.
|
||
|
|
||
|
On rebuild start don't clear the rebuild flag in the destination map for all
|
||
|
the drives because failed state is lost for one of them. Just do a copy of
|
||
|
a map and clear the flag in the destination map for the disk that goes into
|
||
|
rebuild. Similarily preserve the rebuild flag in the map during disk removal.
|
||
|
|
||
|
If the disk is missing on array start and migration has been in progress,
|
||
|
don't just cancel it. Check first if maybe one of the disks was not under
|
||
|
rebuild (rebuild flag present both in source and destination map). If so,
|
||
|
rebuild was running despite of failed disk so there is no need to cancel
|
||
|
migration.
|
||
|
|
||
|
Signed-off-by: Tomasz Majchrzak <tomasz.majchrzak@intel.com>
|
||
|
Signed-off-by: Jes Sorensen <jsorensen@fb.com>
|
||
|
---
|
||
|
super-intel.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++--------
|
||
|
1 file changed, 67 insertions(+), 10 deletions(-)
|
||
|
|
||
|
diff --git a/super-intel.c b/super-intel.c
|
||
|
index 51b7cc3..125c3a9 100644
|
||
|
--- a/super-intel.c
|
||
|
+++ b/super-intel.c
|
||
|
@@ -4023,7 +4023,7 @@ static void migrate(struct imsm_dev *dev, struct intel_super *super,
|
||
|
|
||
|
/* duplicate and then set the target end state in map[0] */
|
||
|
memcpy(dest, src, sizeof_imsm_map(src));
|
||
|
- if (migr_type == MIGR_REBUILD || migr_type == MIGR_GEN_MIGR) {
|
||
|
+ if (migr_type == MIGR_GEN_MIGR) {
|
||
|
__u32 ord;
|
||
|
int i;
|
||
|
|
||
|
@@ -7936,14 +7936,35 @@ static void handle_missing(struct intel_super *super, struct imsm_dev *dev)
|
||
|
/* end process for initialization and rebuild only
|
||
|
*/
|
||
|
if (is_gen_migration(dev) == 0) {
|
||
|
- __u8 map_state;
|
||
|
- int failed;
|
||
|
+ int failed = imsm_count_failed(super, dev, MAP_0);
|
||
|
|
||
|
- failed = imsm_count_failed(super, dev, MAP_0);
|
||
|
- map_state = imsm_check_degraded(super, dev, failed, MAP_0);
|
||
|
+ if (failed) {
|
||
|
+ __u8 map_state;
|
||
|
+ struct imsm_map *map = get_imsm_map(dev, MAP_0);
|
||
|
+ struct imsm_map *map1;
|
||
|
+ int i, ord, ord_map1;
|
||
|
+ int rebuilt = 1;
|
||
|
|
||
|
- if (failed)
|
||
|
- end_migration(dev, super, map_state);
|
||
|
+ for (i = 0; i < map->num_members; i++) {
|
||
|
+ ord = get_imsm_ord_tbl_ent(dev, i, MAP_0);
|
||
|
+ if (!(ord & IMSM_ORD_REBUILD))
|
||
|
+ continue;
|
||
|
+
|
||
|
+ map1 = get_imsm_map(dev, MAP_1);
|
||
|
+ if (!map1)
|
||
|
+ continue;
|
||
|
+
|
||
|
+ ord_map1 = __le32_to_cpu(map1->disk_ord_tbl[i]);
|
||
|
+ if (ord_map1 & IMSM_ORD_REBUILD)
|
||
|
+ rebuilt = 0;
|
||
|
+ }
|
||
|
+
|
||
|
+ if (rebuilt) {
|
||
|
+ map_state = imsm_check_degraded(super, dev,
|
||
|
+ failed, MAP_0);
|
||
|
+ end_migration(dev, super, map_state);
|
||
|
+ }
|
||
|
+ }
|
||
|
}
|
||
|
for (dl = super->missing; dl; dl = dl->next)
|
||
|
mark_missing(super, dev, &dl->disk, dl->index);
|
||
|
@@ -8225,8 +8246,10 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
||
|
int failed;
|
||
|
int ord;
|
||
|
__u8 map_state;
|
||
|
+ int rebuild_done = 0;
|
||
|
+ int i;
|
||
|
|
||
|
- ord = imsm_disk_slot_to_ord(a, n);
|
||
|
+ ord = get_imsm_ord_tbl_ent(dev, n, MAP_X);
|
||
|
if (ord < 0)
|
||
|
return;
|
||
|
|
||
|
@@ -8244,6 +8267,7 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
||
|
struct imsm_map *migr_map = get_imsm_map(dev, MAP_1);
|
||
|
|
||
|
set_imsm_ord_tbl_ent(migr_map, n, ord_to_idx(ord));
|
||
|
+ rebuild_done = 1;
|
||
|
super->updates_pending++;
|
||
|
}
|
||
|
|
||
|
@@ -8306,7 +8330,39 @@ static void imsm_set_disk(struct active_array *a, int n, int state)
|
||
|
dprintf_cont(" Map state change");
|
||
|
end_migration(dev, super, map_state);
|
||
|
super->updates_pending++;
|
||
|
+ } else if (!rebuild_done) {
|
||
|
+ break;
|
||
|
+ }
|
||
|
+
|
||
|
+ /* check if recovery is really finished */
|
||
|
+ for (mdi = a->info.devs; mdi ; mdi = mdi->next)
|
||
|
+ if (mdi->recovery_start != MaxSector) {
|
||
|
+ recovery_not_finished = 1;
|
||
|
+ break;
|
||
|
+ }
|
||
|
+ if (recovery_not_finished) {
|
||
|
+ dprintf_cont("\n");
|
||
|
+ dprintf("Rebuild has not finished yet, state not changed");
|
||
|
+ if (a->last_checkpoint < mdi->recovery_start) {
|
||
|
+ a->last_checkpoint =
|
||
|
+ mdi->recovery_start;
|
||
|
+ super->updates_pending++;
|
||
|
+ }
|
||
|
+ break;
|
||
|
}
|
||
|
+
|
||
|
+ dprintf_cont(" Rebuild done, still degraded");
|
||
|
+ dev->vol.migr_state = 0;
|
||
|
+ set_migr_type(dev, 0);
|
||
|
+ dev->vol.curr_migr_unit = 0;
|
||
|
+
|
||
|
+ for (i = 0; i < map->num_members; i++) {
|
||
|
+ int idx = get_imsm_ord_tbl_ent(dev, i, MAP_0);
|
||
|
+
|
||
|
+ if (idx & IMSM_ORD_REBUILD)
|
||
|
+ map->failed_disk_num = i;
|
||
|
+ }
|
||
|
+ super->updates_pending++;
|
||
|
break;
|
||
|
}
|
||
|
if (is_gen_migration(dev)) {
|
||
|
@@ -9936,7 +9992,7 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
|
||
|
struct imsm_dev *dev;
|
||
|
struct imsm_map *map;
|
||
|
unsigned int i, j, num_members;
|
||
|
- __u32 ord;
|
||
|
+ __u32 ord, ord_map0;
|
||
|
struct bbm_log *log = super->bbm_log;
|
||
|
|
||
|
dprintf("deleting device[%d] from imsm_super\n", index);
|
||
|
@@ -9958,12 +10014,13 @@ static void imsm_delete(struct intel_super *super, struct dl **dlp, unsigned ind
|
||
|
* ord-flags to the first map
|
||
|
*/
|
||
|
ord = get_imsm_ord_tbl_ent(dev, j, MAP_X);
|
||
|
+ ord_map0 = get_imsm_ord_tbl_ent(dev, j, MAP_0);
|
||
|
|
||
|
if (ord_to_idx(ord) <= index)
|
||
|
continue;
|
||
|
|
||
|
map = get_imsm_map(dev, MAP_0);
|
||
|
- set_imsm_ord_tbl_ent(map, j, ord_to_idx(ord - 1));
|
||
|
+ set_imsm_ord_tbl_ent(map, j, ord_map0 - 1);
|
||
|
map = get_imsm_map(dev, MAP_1);
|
||
|
if (map)
|
||
|
set_imsm_ord_tbl_ent(map, j, ord - 1);
|
||
|
--
|
||
|
2.7.4
|
||
|
|