qemu-block
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-block] [PATCH 6/7] nbd/server: Avoid unaligned read/block_status f


From: Eric Blake
Subject: [Qemu-block] [PATCH 6/7] nbd/server: Avoid unaligned read/block_status from backing
Date: Tue, 2 Apr 2019 22:05:25 -0500

The NBD server code used bdrv_block_status_above() to determine where
to fragment structured read and block status replies.  However, the
protocol can only advertise the active layer's minimum block size; if
the active layer is backed by another file with smaller alignment,
then we can end up leaking unaligned results back through to the
client, in violation of the spec.

Fix this by exposing a new bdrv_block_status_aligned() function around
the recently-added internal bdrv_co_block_status_aligned, to guarantee
that all block status answers from backing layers are rounded up to
the alignment of the current layer.  Note that the underlying function
still requires aligned boundaries, but the public function copes with
unaligned inputs.

iotest 241 does not change in output, but running it manually with
traces shows the improved behavior; furthermore, reverting 737d3f5244
but leaving this patch in place lets the test pass (whereas before the
test would fail because the client had to work around the server's
non-compliance).

Note that while this fixes NBD_CMD_READ and NBD_CMD_BLOCK_STATUS for
base:allocation (because those rely on bdrv_block_status*), we still
have a theoretical compliance problem with NBD_CMD_BLOCK_STATUS for
qemu:dirty-bitmap:NN when visiting a bitmap created at a smaller
granularity than what we advertised. On the other hand, dirty bitmap
granularity cannot go below 512, and without the use of blkdebug
(which in turn needs patches before it can properly find dirty bitmaps
or track status from qcow2 backing chains), I can't provoke an NBD
server to advertise an alignment larger than 512.

Signed-off-by: Eric Blake <address@hidden>
---
 include/block/block.h |  2 ++
 block/io.c            | 47 ++++++++++++++++++++++++++++++++++++++-----
 nbd/server.c          | 14 ++++++-------
 3 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/include/block/block.h b/include/block/block.h
index c7a26199aa0..3f38fa57f2d 100644
--- a/include/block/block.h
+++ b/include/block/block.h
@@ -444,6 +444,8 @@ int bdrv_block_status(BlockDriverState *bs, int64_t offset,
 int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
                             int64_t offset, int64_t bytes, int64_t *pnum,
                             int64_t *map, BlockDriverState **file);
+int bdrv_block_status_aligned(BlockDriverState *bs, int64_t offset,
+                              int64_t bytes, int64_t *pnum);
 int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
                       int64_t *pnum);
 int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
diff --git a/block/io.c b/block/io.c
index 936877d3745..5e6c0d06018 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1981,6 +1981,7 @@ int bdrv_flush_all(void)
 typedef struct BdrvCoBlockStatusData {
     BlockDriverState *bs;
     BlockDriverState *base;
+    uint32_t align;
     bool want_zero;
     int64_t offset;
     int64_t bytes;
@@ -2298,6 +2299,7 @@ early_out:

 static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
                                                    BlockDriverState *base,
+                                                   uint32_t align,
                                                    bool want_zero,
                                                    int64_t offset,
                                                    int64_t bytes,
@@ -2311,8 +2313,8 @@ static int coroutine_fn 
bdrv_co_block_status_above(BlockDriverState *bs,

     assert(bs != base);
     for (p = bs; p != base; p = backing_bs(p)) {
-        ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
-                                   file);
+        ret = bdrv_co_block_status_aligned(p, align, want_zero, offset, bytes,
+                                           pnum, map, file);
         if (ret < 0) {
             break;
         }
@@ -2342,7 +2344,7 @@ static void coroutine_fn 
bdrv_block_status_above_co_entry(void *opaque)
     BdrvCoBlockStatusData *data = opaque;

     data->ret = bdrv_co_block_status_above(data->bs, data->base,
-                                           data->want_zero,
+                                           data->align, data->want_zero,
                                            data->offset, data->bytes,
                                            data->pnum, data->map, data->file);
     data->done = true;
@@ -2356,6 +2358,7 @@ static void coroutine_fn 
bdrv_block_status_above_co_entry(void *opaque)
  */
 static int bdrv_common_block_status_above(BlockDriverState *bs,
                                           BlockDriverState *base,
+                                          uint32_t align,
                                           bool want_zero, int64_t offset,
                                           int64_t bytes, int64_t *pnum,
                                           int64_t *map,
@@ -2365,6 +2368,7 @@ static int 
bdrv_common_block_status_above(BlockDriverState *bs,
     BdrvCoBlockStatusData data = {
         .bs = bs,
         .base = base,
+        .align = align,
         .want_zero = want_zero,
         .offset = offset,
         .bytes = bytes,
@@ -2389,7 +2393,7 @@ int bdrv_block_status_above(BlockDriverState *bs, 
BlockDriverState *base,
                             int64_t offset, int64_t bytes, int64_t *pnum,
                             int64_t *map, BlockDriverState **file)
 {
-    return bdrv_common_block_status_above(bs, base, true, offset, bytes,
+    return bdrv_common_block_status_above(bs, base, 1, true, offset, bytes,
                                           pnum, map, file);
 }

@@ -2400,13 +2404,46 @@ int bdrv_block_status(BlockDriverState *bs, int64_t 
offset, int64_t bytes,
                                    offset, bytes, pnum, map, file);
 }

+/*
+ * Similar to bdrv_block_status_above(bs, NULL, ...), but ensures that
+ * the answer matches the minimum alignment of bs (smaller alignments
+ * in layers above will not leak through to the active layer). It is
+ * assumed that callers do not care about the resulting mapping of
+ * offsets to an underlying BDS.
+ */
+int bdrv_block_status_aligned(BlockDriverState *bs, int64_t offset,
+                              int64_t bytes, int64_t *pnum)
+{
+    /* Widen the request to aligned boundaries */
+    int64_t aligned_offset, aligned_bytes;
+    uint32_t align = bs->bl.request_alignment;
+    int ret;
+
+    assert(pnum);
+    aligned_offset = QEMU_ALIGN_DOWN(offset, align);
+    aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
+    ret = bdrv_common_block_status_above(bs, NULL, align, true, aligned_offset,
+                                         aligned_bytes, pnum, NULL, NULL);
+    if (ret < 0) {
+        *pnum = 0;
+        return ret;
+    }
+    assert(*pnum && QEMU_IS_ALIGNED(*pnum, align) &&
+           align > offset - aligned_offset);
+    *pnum -= offset - aligned_offset;
+    if (*pnum > bytes) {
+        *pnum = bytes;
+    }
+    return ret;
+}
+
 int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
                                    int64_t bytes, int64_t *pnum)
 {
     int ret;
     int64_t dummy;

-    ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset,
+    ret = bdrv_common_block_status_above(bs, backing_bs(bs), 1, false, offset,
                                          bytes, pnum ? pnum : &dummy, NULL,
                                          NULL);
     if (ret < 0) {
diff --git a/nbd/server.c b/nbd/server.c
index faa3c7879fd..576deb931c8 100644
--- a/nbd/server.c
+++ b/nbd/server.c
@@ -1792,7 +1792,7 @@ static int coroutine_fn 
nbd_co_send_structured_error(NBDClient *client,
 }

 /* Do a sparse read and send the structured reply to the client.
- * Returns -errno if sending fails. bdrv_block_status_above() failure is
+ * Returns -errno if sending fails. bdrv_block_status_aligned() failure is
  * reported to the client, at which point this function succeeds.
  */
 static int coroutine_fn nbd_co_send_sparse_read(NBDClient *client,
@@ -1808,10 +1808,9 @@ static int coroutine_fn 
nbd_co_send_sparse_read(NBDClient *client,

     while (progress < size) {
         int64_t pnum;
-        int status = bdrv_block_status_above(blk_bs(exp->blk), NULL,
-                                             offset + progress,
-                                             size - progress, &pnum, NULL,
-                                             NULL);
+        int status = bdrv_block_status_aligned(blk_bs(exp->blk),
+                                               offset + progress,
+                                               size - progress, &pnum);
         bool final;

         if (status < 0) {
@@ -1864,7 +1863,7 @@ static int coroutine_fn nbd_co_send_sparse_read(NBDClient 
*client,
  * length encoded (which may be smaller than the original), and update
  * @nb_extents to the number of extents used.
  *
- * Returns zero on success and -errno on bdrv_block_status_above failure.
+ * Returns zero on success and -errno on bdrv_block_status_aligned failure.
  */
 static int blockstatus_to_extents(BlockDriverState *bs, uint64_t offset,
                                   uint64_t *bytes, NBDExtent *extents,
@@ -1878,8 +1877,7 @@ static int blockstatus_to_extents(BlockDriverState *bs, 
uint64_t offset,
     while (remaining_bytes) {
         uint32_t flags;
         int64_t num;
-        int ret = bdrv_block_status_above(bs, NULL, offset, remaining_bytes,
-                                          &num, NULL, NULL);
+        int ret = bdrv_block_status_aligned(bs, offset, remaining_bytes, &num);

         if (ret < 0) {
             return ret;
-- 
2.20.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]