|
From: | Max Reitz |
Subject: | Re: [Qemu-devel] [PATCH v2 4/5] qcow2: More complete consistency check |
Date: | Thu, 29 Aug 2013 14:09:43 +0200 |
User-agent: | Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8 |
Am 29.08.2013 13:36, schrieb Kevin Wolf:
Am 28.08.2013 um 16:55 hat Max Reitz geschrieben:The qcow2_check_refcounts function has been extended to be able to fix OFLAG_COPIED errors and multiple references on refcount blocks. If no corruptions remain after an image repair (and no errors have been encountered), clear the corrupt flag in qcow2_check. Signed-off-by: Max Reitz <address@hidden>This would be easier to review if you kept the code changes of the actual check (mostly code movement, I guess) and the repair of each type of errors in separate patches.block/qcow2-cluster.c | 4 +- block/qcow2-refcount.c | 249 ++++++++++++++++++++++++++++++++++++++++++------- block/qcow2.c | 6 +- block/qcow2.h | 1 + include/block/block.h | 1 + 5 files changed, 222 insertions(+), 39 deletions(-) @@ -1240,7 +1243,8 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, { BDRVQcowState *s = bs->opaque; int64_t size, i, highest_cluster; - int nb_clusters, refcount1, refcount2; + uint64_t *l2_table = NULL; + int nb_clusters, refcount1, refcount2, j; QCowSnapshot *sn; uint16_t *refcount_table; int ret; @@ -1305,10 +1309,85 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, inc_refcounts(bs, res, refcount_table, nb_clusters, offset, s->cluster_size); if (refcount_table[cluster] != 1) { - fprintf(stderr, "ERROR refcount block %" PRId64 + fprintf(stderr, "%s refcount block %" PRId64 " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", i, refcount_table[cluster]); - res->corruptions++; + if (fix & BDRV_FIX_ERRORS) {This is a quite long if block. Probably worth splitting into its own function. (The res->corruptions++ part could be in a common fail: part then.)
Seems reasonable.
I personally don't really see any other way to do this than to just fail without any attempt on repairing. If the refcounts are completely broken, I don't see a reasonable way of allocating a free cluster. Furthermore, the following pre-write check should at least prevent this from overwriting any metadata.+ int64_t new_offset; + void *refcount_block; + + /* allocate new refcount block */ + new_offset = qcow2_alloc_clusters(bs, s->cluster_size);How trustworthy is qcow2_alloc_clusters() when we know that our refcount information is corrupted? Couldn't we end up accidentally overwriting things?
+ if (new_offset < 0) { + fprintf(stderr, "Could not allocate new cluster\n"); + res->corruptions++; + continue; + } + /* fetch current content */ + ret = qcow2_cache_get(bs, s->refcount_block_cache, offset, + &refcount_block); + if (ret < 0) { + fprintf(stderr, "Could not fetch refcount block\n");strerror(-ret) would be useful information in almost all of the error cases.
Yes, right.
+ qcow2_free_clusters(bs, new_offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + res->corruptions++; + continue; + } + /* new block has not yet been entered into refcount table, + * therefore it is no refcount block yet (regarding this + * check) */ + ret = qcow2_pre_write_overlap_check(bs, QCOW2_OL_DEFAULT, + new_offset, s->cluster_sectors * BDRV_SECTOR_SIZE); + if (ret < 0) { + fprintf(stderr, "Could not write refcount block (would " + "overlap with existing metadata)\n"); + /* the image will be marked corrupt here, so don't even + * attempt on freeing the cluster */ + res->corruptions++; + goto fail; + } + /* write to new block */ + ret = bdrv_write(bs->file, new_offset >> BDRV_SECTOR_BITS, + refcount_block, s->cluster_sectors); + if (ret < 0) { + fprintf(stderr, "Could not write refcount block\n"); + qcow2_free_clusters(bs, new_offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + res->corruptions++; + continue; + } + /* update refcount table */ + assert(!(new_offset & (s->cluster_size - 1))); + s->refcount_table[i] = new_offset; + ret = write_reftable_entry(bs, i); + if (ret < 0) { + fprintf(stderr, "Could not update refcount table\n"); + s->refcount_table[i] = offset; + qcow2_free_clusters(bs, new_offset, s->cluster_size, + QCOW2_DISCARD_ALWAYS); + res->corruptions++; + continue; + } + qcow2_cache_put(bs, s->refcount_block_cache, + &refcount_block); + /* update refcounts */ + if ((new_offset >> s->cluster_bits) >= nb_clusters) { + /* increase refcount_table size if necessary */ + int old_nb_clusters = nb_clusters; + nb_clusters = (new_offset >> s->cluster_bits) + 1; + refcount_table = g_realloc(refcount_table, + nb_clusters * sizeof(uint16_t)); + memset(&refcount_table[old_nb_clusters], 0, (nb_clusters + - old_nb_clusters) * sizeof(uint16_t)); + } + refcount_table[cluster]--; + inc_refcounts(bs, res, refcount_table, nb_clusters, + new_offset, s->cluster_size);res->corruptions_fixed++?
Oh, yes, I forgot that.
+ } else { + res->corruptions++; + } } } } @@ -1364,10 +1443,108 @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, } }+ l2_table = g_malloc(s->l2_size * sizeof(uint64_t));qemu_blockalign() is better than g_malloc() because it avoids using a bounce buffer for O_DIRECT images.
Ah, okay. I'll include that in my other series as well.
+ + /* check OFLAG_COPIED */ + for (i = 0; i < s->l1_size; i++) { + uint64_t l1_entry = s->l1_table[i]; + uint64_t l2_offset = l1_entry & L1E_OFFSET_MASK; + bool l2_dirty = false; + int refcount; + + if (!l2_offset) { + continue; + } + + refcount = get_refcount(bs, l2_offset >> s->cluster_bits); + if (refcount < 0) { + /* don't print message nor increment check_errors, since the above + * loop will have done this already */ + continue; + } + if ((refcount == 1) != ((l1_entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "%s OFLAG_COPIED L2 cluster: l1_entry=%" PRIx64 + " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", + l1_entry, refcount); + if (fix & BDRV_FIX_ERRORS) { + s->l1_table[i] = refcount == 1 + ? l1_entry | QCOW_OFLAG_COPIED + : l1_entry & ~QCOW_OFLAG_COPIED; + ret = qcow2_write_l1_entry(bs, i); + if (ret < 0) { + res->check_errors++; + goto fail; + }else res->corruptions_fixed++?+ } else { + res->corruptions++; + } + } + + ret = bdrv_pread(bs->file, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret != s->l2_size * sizeof(uint64_t)) { + fprintf(stderr, "ERROR: Could not read L2 table\n"); + res->check_errors++; + if (ret >= 0) { + ret = -EIO; + }Doesn't happen. You can just check ret < 0 instead of ret != ... in the first place, bdrv_pread() never returns short reads.
Okay, I think I just saw it that way in some other code.
I didn't want to duplicate the whole comment from above, but maybe it makes sense if it's not obvious:+ goto fail; + } + + for (j = 0; j < s->l2_size; j++) { + uint64_t l2_entry = be64_to_cpu(l2_table[j]); + uint64_t data_offset; + + if (qcow2_get_cluster_type(l2_entry) != QCOW2_CLUSTER_NORMAL) { + continue; + } + + data_offset = l2_entry & L2E_OFFSET_MASK; + + refcount = get_refcount(bs, data_offset >> s->cluster_bits); + if (refcount < 0) { + /* don't print message nor increment check_errors */Why?
/* don't print message nor increment check_errors, since the above * loop will have done this already */
+ continue; + } + if ((refcount == 1) != ((l2_entry & QCOW_OFLAG_COPIED) != 0)) { + fprintf(stderr, "%s OFLAG_COPIED data cluster: l2_entry=%" + PRIx64 " refcount=%d\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : + "ERROR", + l2_entry, refcount); + if (fix & BDRV_FIX_ERRORS) { + l2_table[j] = cpu_to_be64(refcount == 1 + ? l2_entry | QCOW_OFLAG_COPIED + : l2_entry & ~QCOW_OFLAG_COPIED); + l2_dirty = true;res->corruptions_fixed+++ } else { + res->corruptions++; + } + } + } + + if (l2_dirty) { + ret = bdrv_pwrite(bs->file, l2_offset, l2_table, + s->l2_size * sizeof(uint64_t)); + if (ret != s->l2_size * sizeof(uint64_t)) { + fprintf(stderr, "ERROR: Could not write L2 table\n"); + res->check_errors++; + if (ret >= 0) { + ret = -EIO; + }bdrv_pwrite() also doesn't return short writes.+ goto fail; + } + } + } + + res->image_end_offset = (highest_cluster + 1) * s->cluster_size; ret = 0;fail:+ g_free(l2_table); g_free(refcount_table);return ret;Kevin
Max
[Prev in Thread] | Current Thread | [Next in Thread] |