btrfs: scrub: update device stats when an error is detected
[ Upstream commit ec1f3a207cdf314eae4d4ae145f1ffdb829f0652 ]
[BUG]
Since the migration to the new scrub_stripe interface, scrub no longer
updates the device stats when hitting an error, no matter if it's a read
or checksum mismatch error. E.g:
BTRFS info (device dm-2): scrub: started on devid 1
BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488
BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file)
BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488
BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file)
BTRFS info (device dm-2): scrub: finished on devid 1 with status: 0
Note there is no line showing the device stats error update.
[CAUSE]
In the migration to the new scrub_stripe interface, we no longer call
btrfs_dev_stat_inc_and_print().
[FIX]
- Introduce a new bitmap for metadata generation errors
* A new bitmap
@meta_gen_error_bitmap is introduced to record which blocks have
metadata generation mismatch errors.
* A new counter for that bitmap
@init_nr_meta_gen_errors, is also introduced to store the number of
generation mismatch errors that are found during the initial read.
This is for the error reporting at scrub_stripe_report_errors().
* New dedicated error message for unrepaired generation mismatches
* Update @meta_gen_error_bitmap if a transid mismatch is hit
- Add btrfs_dev_stat_inc_and_print() calls to the following call sites
* scrub_stripe_report_errors()
* scrub_write_endio()
This is only for the write errors.
This means there is a minor behavior change:
- The timing of device stats error message
Since we concentrate the error messages at
scrub_stripe_report_errors(), the device stats error messages will all
show up in one go, after the detailed scrub error messages:
BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488
BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file)
BTRFS error (device dm-2): unable to fixup (regular) error at logical 13631488 on dev /dev/mapper/test-scratch1 physical 13631488
BTRFS warning (device dm-2): checksum error at logical 13631488 on dev /dev/mapper/test-scratch1, physical 13631488, root 5, inode 257, offset 0, length 4096, links 1 (path: file)
BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 1, gen 0
BTRFS error (device dm-2): bdev /dev/mapper/test-scratch1 errs: wr 0, rd 0, flush 0, corrupt 2, gen 0
Fixes: e02ee89baa
("btrfs: scrub: switch scrub_simple_mirror() to scrub_stripe infrastructure")
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
committed by
Greg Kroah-Hartman
parent
3cf4d9cae4
commit
7cfb9086b6
@@ -153,12 +153,14 @@ struct scrub_stripe {
|
|||||||
unsigned int init_nr_io_errors;
|
unsigned int init_nr_io_errors;
|
||||||
unsigned int init_nr_csum_errors;
|
unsigned int init_nr_csum_errors;
|
||||||
unsigned int init_nr_meta_errors;
|
unsigned int init_nr_meta_errors;
|
||||||
|
unsigned int init_nr_meta_gen_errors;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following error bitmaps are all for the current status.
|
* The following error bitmaps are all for the current status.
|
||||||
* Every time we submit a new read, these bitmaps may be updated.
|
* Every time we submit a new read, these bitmaps may be updated.
|
||||||
*
|
*
|
||||||
* error_bitmap = io_error_bitmap | csum_error_bitmap | meta_error_bitmap;
|
* error_bitmap = io_error_bitmap | csum_error_bitmap |
|
||||||
|
* meta_error_bitmap | meta_generation_bitmap;
|
||||||
*
|
*
|
||||||
* IO and csum errors can happen for both metadata and data.
|
* IO and csum errors can happen for both metadata and data.
|
||||||
*/
|
*/
|
||||||
@@ -166,6 +168,7 @@ struct scrub_stripe {
|
|||||||
unsigned long io_error_bitmap;
|
unsigned long io_error_bitmap;
|
||||||
unsigned long csum_error_bitmap;
|
unsigned long csum_error_bitmap;
|
||||||
unsigned long meta_error_bitmap;
|
unsigned long meta_error_bitmap;
|
||||||
|
unsigned long meta_gen_error_bitmap;
|
||||||
|
|
||||||
/* For writeback (repair or replace) error reporting. */
|
/* For writeback (repair or replace) error reporting. */
|
||||||
unsigned long write_error_bitmap;
|
unsigned long write_error_bitmap;
|
||||||
@@ -673,7 +676,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
|
|||||||
}
|
}
|
||||||
if (stripe->sectors[sector_nr].generation !=
|
if (stripe->sectors[sector_nr].generation !=
|
||||||
btrfs_stack_header_generation(header)) {
|
btrfs_stack_header_generation(header)) {
|
||||||
bitmap_set(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
|
bitmap_set(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
|
||||||
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
|
bitmap_set(&stripe->error_bitmap, sector_nr, sectors_per_tree);
|
||||||
btrfs_warn_rl(fs_info,
|
btrfs_warn_rl(fs_info,
|
||||||
"tree block %llu mirror %u has bad generation, has %llu want %llu",
|
"tree block %llu mirror %u has bad generation, has %llu want %llu",
|
||||||
@@ -685,6 +688,7 @@ static void scrub_verify_one_metadata(struct scrub_stripe *stripe, int sector_nr
|
|||||||
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
|
bitmap_clear(&stripe->error_bitmap, sector_nr, sectors_per_tree);
|
||||||
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
|
bitmap_clear(&stripe->csum_error_bitmap, sector_nr, sectors_per_tree);
|
||||||
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
|
bitmap_clear(&stripe->meta_error_bitmap, sector_nr, sectors_per_tree);
|
||||||
|
bitmap_clear(&stripe->meta_gen_error_bitmap, sector_nr, sectors_per_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
|
static void scrub_verify_one_sector(struct scrub_stripe *stripe, int sector_nr)
|
||||||
@@ -973,8 +977,22 @@ skip:
|
|||||||
if (__ratelimit(&rs) && dev)
|
if (__ratelimit(&rs) && dev)
|
||||||
scrub_print_common_warning("header error", dev, false,
|
scrub_print_common_warning("header error", dev, false,
|
||||||
stripe->logical, physical);
|
stripe->logical, physical);
|
||||||
|
if (test_bit(sector_nr, &stripe->meta_gen_error_bitmap))
|
||||||
|
if (__ratelimit(&rs) && dev)
|
||||||
|
scrub_print_common_warning("generation error", dev, false,
|
||||||
|
stripe->logical, physical);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Update the device stats. */
|
||||||
|
for (int i = 0; i < stripe->init_nr_io_errors; i++)
|
||||||
|
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_READ_ERRS);
|
||||||
|
for (int i = 0; i < stripe->init_nr_csum_errors; i++)
|
||||||
|
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_CORRUPTION_ERRS);
|
||||||
|
/* Generation mismatch error is based on each metadata, not each block. */
|
||||||
|
for (int i = 0; i < stripe->init_nr_meta_gen_errors;
|
||||||
|
i += (fs_info->nodesize >> fs_info->sectorsize_bits))
|
||||||
|
btrfs_dev_stat_inc_and_print(stripe->dev, BTRFS_DEV_STAT_GENERATION_ERRS);
|
||||||
|
|
||||||
spin_lock(&sctx->stat_lock);
|
spin_lock(&sctx->stat_lock);
|
||||||
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
|
sctx->stat.data_extents_scrubbed += stripe->nr_data_extents;
|
||||||
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
|
sctx->stat.tree_extents_scrubbed += stripe->nr_meta_extents;
|
||||||
@@ -983,7 +1001,8 @@ skip:
|
|||||||
sctx->stat.no_csum += nr_nodatacsum_sectors;
|
sctx->stat.no_csum += nr_nodatacsum_sectors;
|
||||||
sctx->stat.read_errors += stripe->init_nr_io_errors;
|
sctx->stat.read_errors += stripe->init_nr_io_errors;
|
||||||
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
|
sctx->stat.csum_errors += stripe->init_nr_csum_errors;
|
||||||
sctx->stat.verify_errors += stripe->init_nr_meta_errors;
|
sctx->stat.verify_errors += stripe->init_nr_meta_errors +
|
||||||
|
stripe->init_nr_meta_gen_errors;
|
||||||
sctx->stat.uncorrectable_errors +=
|
sctx->stat.uncorrectable_errors +=
|
||||||
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
|
bitmap_weight(&stripe->error_bitmap, stripe->nr_sectors);
|
||||||
sctx->stat.corrected_errors += nr_repaired_sectors;
|
sctx->stat.corrected_errors += nr_repaired_sectors;
|
||||||
@@ -1029,6 +1048,8 @@ static void scrub_stripe_read_repair_worker(struct work_struct *work)
|
|||||||
stripe->nr_sectors);
|
stripe->nr_sectors);
|
||||||
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
|
stripe->init_nr_meta_errors = bitmap_weight(&stripe->meta_error_bitmap,
|
||||||
stripe->nr_sectors);
|
stripe->nr_sectors);
|
||||||
|
stripe->init_nr_meta_gen_errors = bitmap_weight(&stripe->meta_gen_error_bitmap,
|
||||||
|
stripe->nr_sectors);
|
||||||
|
|
||||||
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
|
if (bitmap_empty(&stripe->init_error_bitmap, stripe->nr_sectors))
|
||||||
goto out;
|
goto out;
|
||||||
@@ -1143,6 +1164,9 @@ static void scrub_write_endio(struct btrfs_bio *bbio)
|
|||||||
bitmap_set(&stripe->write_error_bitmap, sector_nr,
|
bitmap_set(&stripe->write_error_bitmap, sector_nr,
|
||||||
bio_size >> fs_info->sectorsize_bits);
|
bio_size >> fs_info->sectorsize_bits);
|
||||||
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
|
spin_unlock_irqrestore(&stripe->write_error_lock, flags);
|
||||||
|
for (int i = 0; i < (bio_size >> fs_info->sectorsize_bits); i++)
|
||||||
|
btrfs_dev_stat_inc_and_print(stripe->dev,
|
||||||
|
BTRFS_DEV_STAT_WRITE_ERRS);
|
||||||
}
|
}
|
||||||
bio_put(&bbio->bio);
|
bio_put(&bbio->bio);
|
||||||
|
|
||||||
@@ -1505,10 +1529,12 @@ static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe)
|
|||||||
stripe->init_nr_io_errors = 0;
|
stripe->init_nr_io_errors = 0;
|
||||||
stripe->init_nr_csum_errors = 0;
|
stripe->init_nr_csum_errors = 0;
|
||||||
stripe->init_nr_meta_errors = 0;
|
stripe->init_nr_meta_errors = 0;
|
||||||
|
stripe->init_nr_meta_gen_errors = 0;
|
||||||
stripe->error_bitmap = 0;
|
stripe->error_bitmap = 0;
|
||||||
stripe->io_error_bitmap = 0;
|
stripe->io_error_bitmap = 0;
|
||||||
stripe->csum_error_bitmap = 0;
|
stripe->csum_error_bitmap = 0;
|
||||||
stripe->meta_error_bitmap = 0;
|
stripe->meta_error_bitmap = 0;
|
||||||
|
stripe->meta_gen_error_bitmap = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
Reference in New Issue
Block a user