[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check
From: |
Max Reitz |
Subject: |
[Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check |
Date: |
Mon, 26 Aug 2013 12:18:25 +0200 |
Adds a new function checking for overlapping cluster allocations.
Furthermore, qcow2_check now marks the image as consistent if no
corruptions have been found.
Signed-off-by: Max Reitz <address@hidden>
---
block/qcow2-cluster.c | 414 ++++++++++++++++++++++++++++++++++++++++++++++++++
block/qcow2.c | 15 +-
block/qcow2.h | 2 +
3 files changed, 429 insertions(+), 2 deletions(-)
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
index be35983..ea7d334 100644
--- a/block/qcow2-cluster.c
+++ b/block/qcow2-cluster.c
@@ -1499,3 +1499,417 @@ fail:
return ret;
}
+
+static const char *const overlap_strings[8] = {
+ "main header",
+ "active L1 table",
+ "active L2 table",
+ "refcount table",
+ "refcount block",
+ "snapshot table",
+ "inactive L1 table",
+ "inactive L2 table"
+};
+
+/*
+ * Checks the table and cluster allocations for inconsistencies.
+ */
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix)
+{
+ BDRVQcowState *s = bs->opaque;
+ int ret;
+ int i, j;
+ uint8_t *bm; /* bitmap */
+ /* bitmap byte shift (bytes per bitmap-bit = 1 << bm_shift) */
+ int bm_shift = s->cluster_bits;
+ uint64_t bm_size;
+
+ /* Checking every data cluster through qcow2_check_metadata_overlap takes a
+ * considerable amount of time; a bitmap would help out very much, although
+ * it quickly assumes ridiculous sizes if not limited; therefore we're
+ * grouping several clusters together to not exceed the size of the L1
table
+ * in RAM (which seems a reasonable limit). */
+ while ((bm_size = bs->total_sectors >> (bm_shift++ - BDRV_SECTOR_BITS)) >
+ s->l1_size * sizeof(uint64_t));
+
+ bm = g_malloc0(++bm_size);
+
+ bm[0] = 1; /* main header */
+ /* active L1 */
+ for (i = 0; i < s->l1_size * sizeof(uint64_t) >> bm_shift; i++) {
+ uintptr_t bit = i + (s->l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L1 table offset is after EOF\n");
+ result->corruptions++;
+ /* this is such a fundamental error that it will probably crash
+ * qcow2_check_metadata_overlap if this function doesn't return
+ * already here (qcow2_check_metadata_overlap is for keeping
+ * consistency, therefore it always assumes a pretty consistent
+ * image) */
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->l1_table) {
+ /* active L2 */
+ for (i = 0; i < s->l1_size; i++) {
+ uintptr_t bit = (s->l1_table[i] & L1E_OFFSET_MASK) >> bm_shift;
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR L2 table %i offset is after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ /* the test for j != 0 is unneccessary, since that bit is set
+ * anyway (because of the main header) */
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ /* snapshot table */
+ for (i = 0; i < s->snapshots_size >> bm_shift; i++) {
+ uintptr_t bit = i + (s->snapshots_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR snapshot table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->snapshots) {
+ /* inactive L1 */
+ for (i = 0; i < s->nb_snapshots; i++) {
+ for (j = 0; j < s->snapshots[i].l1_size * sizeof(uint64_t) >>
+ bm_shift; j++) {
+ uintptr_t bit =
+ j + (s->snapshots[i].l1_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR inactive L1 table %i offset is "
+ "after EOF\n", i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+ }
+
+ /* refcount table */
+ for (i = 0; i < s->refcount_table_size * sizeof(uint64_t) >> bm_shift; i++)
+ {
+ uintptr_t bit = i + (s->refcount_table_offset >> bm_shift);
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount table offset is after EOF\n");
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+
+ if (s->refcount_table) {
+ int initial_corruptions = result->corruptions;
+
+ /* Do the refcount blocks last; that way, we can directly use the
bitmap
+ * to check for overlaps */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+
+ if (!rb_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (rb_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR refcount block %i is not cluster "
+ "aligned in image\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ if (bit / 8 >= bm_size) {
+ fprintf(stderr, "ERROR refcount block %i offset is after
EOF\n",
+ i);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ }
+
+ if (bm[bit / 8] & (1 << (bit % 8))) {
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_REFCOUNT_BLOCK, rb_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount block %i overlaps with
%s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount block %i
overlap: "
+ "%s\n", i, strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+ }
+ }
+
+ if (result->corruptions != initial_corruptions) {
+ ret = 0;
+ goto fail;
+ }
+
+ /* Now, enter the refcount blocks into the bitmap */
+ for (i = 0; i < s->refcount_table_size; i++) {
+ uint64_t rb_offset = s->refcount_table[i] & REFT_OFFSET_MASK;
+ uintptr_t bit = rb_offset >> bm_shift;
+ bm[bit / 8] |= 1 << (bit % 8);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_REFCOUNT_TABLE | QCOW2_OL_SNAPSHOT_TABLE,
+ s->l1_table_offset, s->l1_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L1 offset intersects %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L1 overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (s->l1_table) {
+ for (i = 0; i < s->l1_size; i++) {
+ uint64_t l2_offset = s->l1_table[i] & L1E_OFFSET_MASK;
+ uint64_t *l2;
+
+ if (!l2_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ if (l2_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR L2 %i is not cluster aligned; L1 table "
+ "entry corrupted\n", i);
+ result->corruptions++;
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs,
+ QCOW2_OL_CACHED & ~QCOW2_OL_ACTIVE_L2, l2_offset,
+ s->cluster_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR L2 %i overlaps with %s\n", i,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ continue;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check L2 %i overlap: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ ret = l2_load(bs, l2_offset, &l2);
+ if ((ret < 0) || !l2) {
+ fprintf(stderr, "ERROR could not load L2 %i: %s\n", i,
+ strerror(-ret));
+ result->check_errors++;
+ if (ret < 0) {
+ goto fail;
+ } else {
+ continue;
+ }
+ }
+
+ for (j = 0; j < s->l2_size; j++) {
+ uint64_t cl_offset, cl_size;
+ unsigned long long cl_index;
+ uint64_t l2_entry = be64_to_cpu(l2[j]);
+
+ if ((s->qcow_version >= 3) && (l2_entry & 1)) {
+ /* cluster reads as all zeroes */
+ continue;
+ }
+
+ if (l2_entry & (1ULL << 62)) { /* compressed */
+ int xp1 = 62 - (s->cluster_bits - 9);
+ cl_offset = l2_entry & ((1 << xp1) - 1);
+ cl_size = ((l2_entry & L2E_COMPRESSED_OFFSET_SIZE_MASK) >>
+ xp1) * BDRV_SECTOR_SIZE;
+ } else { /* uncompressed */
+ cl_offset = l2_entry & L2E_OFFSET_MASK;
+ cl_size = s->cluster_size;
+ }
+
+ if (!cl_offset) {
+ /* unallocated */
+ continue;
+ }
+
+ cl_index = ((unsigned long long)i << s->l2_bits) | j;
+
+ if (!(l2_entry & (1ULL << 62)) &&
+ cl_offset & (s->cluster_size - 1)) {
+ fprintf(stderr, "ERROR data cluster 0x%llx is not cluster "
+ "aligned in image\n", cl_index);
+ result->corruptions++;
+ continue;
+ }
+
+ if (!(bm[(cl_offset >> bm_shift) / 8] &
+ (1 << ((cl_offset >> bm_shift) % 8)))) {
+ continue;
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_CACHED,
cl_offset,
+ cl_size);
+ if (ret > 0) {
+ /* TODO: fix compressed clusters */
+ if ((fix & BDRV_FIX_ERRORS) && !(l2_entry & (1ULL << 62)))
{
+ void *content = g_malloc(s->cluster_size);
+ uint64_t sector = cl_index <<
+ (s->cluster_bits - BDRV_SECTOR_BITS);
+
+ fprintf(stderr, "Reallocating data cluster 0x%llx "
+ "(currently overlapping with %s)\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+
+ ret = bdrv_read(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ fprintf(stderr, "ERROR could not read cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ /* unallocate cluster (but leaving the refcount for
+ * now) */
+ l2[j] = 0;
+
+ /* FIXME: the image obviously is inconsistent, so this
+ * write access may trigger new inconsistencies
+ * (although the refcount check, which is supposed to
be
+ * performed before these checks, should take care of
+ * this) */
+ ret = bdrv_write(bs, sector, content,
+ s->cluster_sectors);
+ if (ret < 0) {
+ /* at least leave it as it was */
+ l2[j] = cpu_to_be64(l2_entry);
+
+ fprintf(stderr, "ERROR could not write cluster: "
+ "%s\n", strerror(-ret));
+ result->corruptions++;
+ g_free(content);
+ qcow2_cache_put(bs, s->l2_table_cache,
+ (void **)&l2);
+ goto fail;
+ }
+
+ g_free(content);
+ result->corruptions_fixed++;
+
+ /* Now decrement the refcount (although an invalid
+ * refcount might have been the problem in the first
+ * place, the refcount check (which is running before
+ * this) will have fixed that) */
+ qcow2_free_any_clusters(bs, l2_entry, 1,
+ QCOW2_DISCARD_ALWAYS);
+ } else {
+ fprintf(stderr, "ERROR data cluster 0x%llx overlaps "
+ "with %s\n", cl_index,
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ }
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check data cluster 0x%llx
"
+ "overlap: %s\n", cl_index, strerror(-ret));
+ result->check_errors++;
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ goto fail;
+ }
+ }
+
+ qcow2_cache_put(bs, s->l2_table_cache, (void **)&l2);
+ }
+ }
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_SNAPSHOT_TABLE,
+ s->refcount_table_offset,
+ s->refcount_table_size * sizeof(uint64_t));
+ if (ret > 0) {
+ fprintf(stderr, "ERROR refcount table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check refcount table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ if (!s->refcount_table) {
+ fprintf(stderr, "ERROR could not check refcount block overlap: "
+ "refcount table not loaded\n");
+ result->check_errors++;
+ ret = -EIO;
+ goto fail;
+ }
+
+ /* refcount blocks have already been checked (when creating the bitmap) */
+
+ ret = qcow2_check_metadata_overlap(bs, QCOW2_OL_MAIN_HEADER |
+ QCOW2_OL_ACTIVE_L1 | QCOW2_OL_REFCOUNT_TABLE, s->snapshots_offset,
+ s->snapshots_size);
+ if (ret > 0) {
+ fprintf(stderr, "ERROR snapshot table overlaps with %s\n",
+ overlap_strings[ffs(ret) - 1]);
+ result->corruptions++;
+ ret = 0;
+ goto fail;
+ } else if (ret < 0) {
+ fprintf(stderr, "ERROR could not check snapshot table overlap: %s\n",
+ strerror(-ret));
+ result->check_errors++;
+ goto fail;
+ }
+
+ /* We now know for sure that nothing overlaps with the inactive L1 tables -
+ * therefore, we can skip explicitly checking them. */
+
+ ret = 0;
+
+fail:
+ g_free(bm);
+
+ if (fix & BDRV_FIX_ERRORS) {
+ /* qemu-img check will double check if some corruptions could be fixed
+ * - without reopening the underlying image file. Since the refcount
+ * check doesn't use the L2 cache for some reason, it must be flushed
+ * to disk before the second check can commence (else it will report
+ * inexistent errors); and if we're already at it, the refcount cache
+ * may be flushed just as well */
+ qcow2_cache_flush(bs, s->l2_table_cache);
+ qcow2_cache_flush(bs, s->refcount_block_cache);
+ }
+
+ return ret;
+}
diff --git a/block/qcow2.c b/block/qcow2.c
index 95497c6..0b0f0ac 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -311,8 +311,19 @@ static int qcow2_check(BlockDriverState *bs,
BdrvCheckResult *result,
return ret;
}
- if (fix && result->check_errors == 0 && result->corruptions == 0) {
- return qcow2_mark_clean(bs);
+ ret = qcow2_check_allocations(bs, result, fix);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (result->check_errors == 0 && result->corruptions == 0) {
+ if (fix) {
+ ret = qcow2_mark_clean(bs);
+ if (ret < 0) {
+ return ret;
+ }
+ }
+ return qcow2_mark_consistent(bs);
}
return ret;
}
diff --git a/block/qcow2.h b/block/qcow2.h
index 8a55da0..3772ab6 100644
--- a/block/qcow2.h
+++ b/block/qcow2.h
@@ -410,6 +410,8 @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
+int qcow2_check_allocations(BlockDriverState *bs, BdrvCheckResult *result,
+ BdrvCheckMode fix);
void qcow2_process_discards(BlockDriverState *bs, int ret);
--
1.8.3.1
- [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks, (continued)
- [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check, Max Reitz, 2013/08/26
- [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks, Max Reitz, 2013/08/26
- [Qemu-devel] [PATCH 1/5] qcow2: Add corrupt bit, Max Reitz, 2013/08/26
- [Qemu-devel] [PATCH 2/5] qcow2: Metadata overlap checks, Max Reitz, 2013/08/26
- [Qemu-devel] [PATCH 4/5] qcow2: Check allocations in qcow2_check,
Max Reitz <=
- [Qemu-devel] [PATCH 3/5] qcow2: Employ metadata overlap checks, Max Reitz, 2013/08/26
- [Qemu-devel] [PATCH 5/5] qemu-iotests: Overlapping cluster allocations, Max Reitz, 2013/08/26
- Re: [Qemu-devel] [PATCH 0/5] qcow2: Add metadata overlap checks, Max Reitz, 2013/08/26