mirror of
https://github.com/torvalds/linux.git
synced 2026-01-25 07:47:50 +00:00
md: add a new recovery_flag MD_RECOVERY_LAZY_RECOVER
This flag is used by llbitmap in later patches to skip raid456 initial recover and delay building initial xor data to first write. https://lore.kernel.org/linux-raid/20250829080426.1441678-10-yukuai1@huaweicloud.com Signed-off-by: Yu Kuai <yukuai3@huawei.com>
This commit is contained in:
@@ -9199,6 +9199,39 @@ static sector_t md_sync_max_sectors(struct mddev *mddev,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If lazy recovery is requested and all rdevs are in sync, select the rdev with
|
||||
* the higest index to perfore recovery to build initial xor data, this is the
|
||||
* same as old bitmap.
|
||||
*/
|
||||
static bool mddev_select_lazy_recover_rdev(struct mddev *mddev)
|
||||
{
|
||||
struct md_rdev *recover_rdev = NULL;
|
||||
struct md_rdev *rdev;
|
||||
bool ret = false;
|
||||
|
||||
rcu_read_lock();
|
||||
rdev_for_each_rcu(rdev, mddev) {
|
||||
if (rdev->raid_disk < 0)
|
||||
continue;
|
||||
|
||||
if (test_bit(Faulty, &rdev->flags) ||
|
||||
!test_bit(In_sync, &rdev->flags))
|
||||
break;
|
||||
|
||||
if (!recover_rdev || recover_rdev->raid_disk < rdev->raid_disk)
|
||||
recover_rdev = rdev;
|
||||
}
|
||||
|
||||
if (recover_rdev) {
|
||||
clear_bit(In_sync, &recover_rdev->flags);
|
||||
ret = true;
|
||||
}
|
||||
|
||||
rcu_read_unlock();
|
||||
return ret;
|
||||
}
|
||||
|
||||
static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
|
||||
{
|
||||
sector_t start = 0;
|
||||
@@ -9230,6 +9263,14 @@ static sector_t md_sync_position(struct mddev *mddev, enum sync_action action)
|
||||
start = rdev->recovery_offset;
|
||||
rcu_read_unlock();
|
||||
|
||||
/*
|
||||
* If there are no spares, and raid456 lazy initial recover is
|
||||
* requested.
|
||||
*/
|
||||
if (test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery) &&
|
||||
start == MaxSector && mddev_select_lazy_recover_rdev(mddev))
|
||||
start = 0;
|
||||
|
||||
/* If there is a bitmap, we need to make sure all
|
||||
* writes that started before we added a spare
|
||||
* complete before we start doing a recovery.
|
||||
@@ -9791,6 +9832,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
||||
|
||||
set_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -9799,6 +9841,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
||||
remove_spares(mddev, NULL);
|
||||
set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -9808,7 +9851,7 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
|
||||
* re-add.
|
||||
*/
|
||||
*spares = remove_and_add_spares(mddev, NULL);
|
||||
if (*spares) {
|
||||
if (*spares || test_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery)) {
|
||||
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
@@ -10021,6 +10064,7 @@ void md_check_recovery(struct mddev *mddev)
|
||||
}
|
||||
|
||||
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
|
||||
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
|
||||
|
||||
@@ -10131,6 +10175,7 @@ void md_reap_sync_thread(struct mddev *mddev)
|
||||
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
|
||||
clear_bit(MD_RECOVERY_LAZY_RECOVER, &mddev->recovery);
|
||||
/*
|
||||
* We call mddev->cluster_ops->update_size here because sync_size could
|
||||
* be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
|
||||
|
||||
@@ -667,6 +667,8 @@ enum recovery_flags {
|
||||
MD_RECOVERY_RESHAPE,
|
||||
/* remote node is running resync thread */
|
||||
MD_RESYNCING_REMOTE,
|
||||
/* raid456 lazy initial recover */
|
||||
MD_RECOVERY_LAZY_RECOVER,
|
||||
};
|
||||
|
||||
enum md_ro_state {
|
||||
|
||||
@@ -4705,10 +4705,21 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
|
||||
}
|
||||
} else if (test_bit(In_sync, &rdev->flags))
|
||||
set_bit(R5_Insync, &dev->flags);
|
||||
else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <= rdev->recovery_offset)
|
||||
/* in sync if before recovery_offset */
|
||||
set_bit(R5_Insync, &dev->flags);
|
||||
else if (test_bit(R5_UPTODATE, &dev->flags) &&
|
||||
else if (sh->sector + RAID5_STRIPE_SECTORS(conf) <=
|
||||
rdev->recovery_offset) {
|
||||
/*
|
||||
* in sync if:
|
||||
* - normal IO, or
|
||||
* - resync IO that is not lazy recovery
|
||||
*
|
||||
* For lazy recovery, we have to mark the rdev without
|
||||
* In_sync as failed, to build initial xor data.
|
||||
*/
|
||||
if (!test_bit(STRIPE_SYNCING, &sh->state) ||
|
||||
!test_bit(MD_RECOVERY_LAZY_RECOVER,
|
||||
&conf->mddev->recovery))
|
||||
set_bit(R5_Insync, &dev->flags);
|
||||
} else if (test_bit(R5_UPTODATE, &dev->flags) &&
|
||||
test_bit(R5_Expanded, &dev->flags))
|
||||
/* If we've reshaped into here, we assume it is Insync.
|
||||
* We will shortly update recovery_offset to make
|
||||
|
||||
Reference in New Issue
Block a user