qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH] block: optimize zero writes with bdrv_write_zeroes


From: Peter Lieven
Subject: [Qemu-devel] [PATCH] block: optimize zero writes with bdrv_write_zeroes
Date: Thu, 27 Mar 2014 14:22:10 +0100

this patch tries to optimize zero write requests
by automatically using bdrv_write_zeroes if it is
supported by the format.

this should significantly speed up file system initialization and
should speed zero write test used to test backend storage performance.

the difference can simply be tested by e.g.

   dd if=/dev/zero of=/dev/vdX bs=1M

or

   mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/vdX

Signed-off-by: Peter Lieven <address@hidden>
---
RFCv2->v1  - fixed paramter parsing strncmp -> strcmp (Eric)
           - fixed typo (choosen->chosen)
           - added second example to commit msg

RFCv1->RFCv2: - add detect-zeroes=off|on|unmap knob to drive cmdline parameter
              - call zero detection only for format (bs->file != NULL)

 block.c                   |   39 ++++++++++++++++++++++++++++++++++++++-
 include/block/block_int.h |   12 ++++++++++++
 include/qemu-common.h     |    1 +
 qemu-options.hx           |    6 ++++++
 util/iov.c                |   21 +++++++++++++++++++++
 5 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/block.c b/block.c
index acb70fd..3f61f76 100644
--- a/block.c
+++ b/block.c
@@ -817,6 +817,25 @@ static int bdrv_assign_node_name(BlockDriverState *bs,
     return 0;
 }
 
+static int bdrv_set_detect_zeroes(BlockDriverState *bs,
+                                  const char *detect_zeroes,
+                                  Error **errp)
+{
+    if (!detect_zeroes || !strcmp(detect_zeroes, "off")) {
+        bs->detect_zeroes = BDRV_DETECT_ZEROES_OFF;
+    } else if (!strcmp(detect_zeroes, "on")) {
+        bs->detect_zeroes = BDRV_DETECT_ZEROES_ON;
+    } else if (!strcmp(detect_zeroes, "unmap")) {
+        bs->detect_zeroes = BDRV_DETECT_ZEROES_UNMAP;
+    } else {
+        error_setg(errp, "invalid value for detect-zeroes: %s",
+                   detect_zeroes);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
 /*
  * Common part for opening disk images and files
  *
@@ -827,7 +846,7 @@ static int bdrv_open_common(BlockDriverState *bs, 
BlockDriverState *file,
 {
     int ret, open_flags;
     const char *filename;
-    const char *node_name = NULL;
+    const char *node_name = NULL, *detect_zeroes = NULL;
     Error *local_err = NULL;
 
     assert(drv != NULL);
@@ -855,6 +874,13 @@ static int bdrv_open_common(BlockDriverState *bs, 
BlockDriverState *file,
     }
     qdict_del(options, "node-name");
 
+    detect_zeroes = qdict_get_try_str(options, "detect-zeroes");
+    ret = bdrv_set_detect_zeroes(bs, detect_zeroes, errp);
+    if (ret < 0) {
+        return ret;
+    }
+    qdict_del(options, "detect-zeroes");
+
     /* bdrv_open() with directly using a protocol as drv. This layer is already
      * opened, so assign it to bs (while file becomes a closed 
BlockDriverState)
      * and return immediately. */
@@ -3179,6 +3205,17 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
 
     ret = notifier_with_return_list_notify(&bs->before_write_notifiers, req);
 
+    if (!ret && bs->detect_zeroes != BDRV_DETECT_ZEROES_OFF &&
+        !(flags & BDRV_REQ_ZERO_WRITE) && bs->file &&
+        drv->bdrv_co_write_zeroes && qemu_iovec_is_zero(qiov)) {
+        flags |= BDRV_REQ_ZERO_WRITE;
+        /* if the device was not opened with discard=on the below flag
+         * is immediately cleared again in bdrv_co_do_write_zeroes */
+        if (bs->detect_zeroes == BDRV_DETECT_ZEROES_UNMAP) {
+            flags |= BDRV_REQ_MAY_UNMAP;
+        }
+    }
+
     if (ret < 0) {
         /* Do nothing, write notifier decided to fail this request */
     } else if (flags & BDRV_REQ_ZERO_WRITE) {
diff --git a/include/block/block_int.h b/include/block/block_int.h
index cd5bc73..7a3013a 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -271,6 +271,17 @@ typedef struct BlockLimits {
 } BlockLimits;
 
 /*
+ * Different operation modes for automatic zero detection
+ * to speed the write operation up with bdrv_write_zeroes.
+ */
+typedef enum {
+    BDRV_DETECT_ZEROES_OFF   = 0x0,
+    BDRV_DETECT_ZEROES_ON    = 0x1,
+    /* also set the BDRV_MAY_UNMAP flag with bdrv_write_zeroes */
+    BDRV_DETECT_ZEROES_UNMAP = 0x2,
+} BdrvDetectZeroes;
+
+/*
  * Note: the function bdrv_append() copies and swaps contents of
  * BlockDriverStates, so if you add new fields to this struct, please
  * inspect bdrv_append() to determine if the new fields need to be
@@ -365,6 +376,7 @@ struct BlockDriverState {
     BlockJob *job;
 
     QDict *options;
+    BdrvDetectZeroes detect_zeroes;
 };
 
 int get_tmp_filename(char *filename, int size);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index c8a58a8..574da73 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -330,6 +330,7 @@ void qemu_iovec_concat(QEMUIOVector *dst,
 void qemu_iovec_concat_iov(QEMUIOVector *dst,
                            struct iovec *src_iov, unsigned int src_cnt,
                            size_t soffset, size_t sbytes);
+bool qemu_iovec_is_zero(QEMUIOVector *qiov);
 void qemu_iovec_destroy(QEMUIOVector *qiov);
 void qemu_iovec_reset(QEMUIOVector *qiov);
 size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
diff --git a/qemu-options.hx b/qemu-options.hx
index ee5437b..f824d9b 100644
--- a/qemu-options.hx
+++ b/qemu-options.hx
@@ -410,6 +410,7 @@ DEF("drive", HAS_ARG, QEMU_OPTION_drive,
     "       
[,cache=writethrough|writeback|none|directsync|unsafe][,format=f]\n"
     "       [,serial=s][,addr=A][,id=name][,aio=threads|native]\n"
     "       [,readonly=on|off][,copy-on-read=on|off]\n"
+    "       [,detect-zeroes=on|off|unmap]\n"
     "       [[,bps=b]|[[,bps_rd=r][,bps_wr=w]]]\n"
     "       [[,iops=i]|[[,iops_rd=r][,iops_wr=w]]]\n"
     "       [[,bps_max=bm]|[[,bps_rd_max=rm][,bps_wr_max=wm]]]\n"
@@ -470,6 +471,11 @@ Open drive @option{file} as read-only. Guest write 
attempts will fail.
 @item address@hidden
 @var{copy-on-read} is "on" or "off" and enables whether to copy read backing
 file sectors into the image file.
address@hidden address@hidden
address@hidden is "off", "on" or "unmap" and enables the automatic
+conversion of plain zero writes by the OS to driver specific optimized
+zero write commands. If "unmap" is chosen and @var{discard} is "on"
+a zero write may even be converted to an UNMAP operation.
 @end table
 
 By default, the @option{cache=writeback} mode is used. It will report data
diff --git a/util/iov.c b/util/iov.c
index 6569b5a..0b17392 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -335,6 +335,27 @@ void qemu_iovec_concat(QEMUIOVector *dst,
     qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
 }
 
+/*
+ *  check if the contents of the iovecs is all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = qiov->iov[i].iov_len & ~(4 * sizeof(long) - 1);
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
 void qemu_iovec_destroy(QEMUIOVector *qiov)
 {
     assert(qiov->nalloc != -1);
-- 
1.7.9.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]