[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algori
From: |
Zhi Yong Wu |
Subject: |
[Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm |
Date: |
Thu, 8 Sep 2011 18:11:07 +0800 |
Note:
1.) When bps/iops limits are specified to a small value such as 511
bytes/s, this VM will hang up. We are considering how to handle this senario.
2.) When "dd" command is issued in guest, if its option bs is set to a
large value such as "bs=1024K", the result speed will slightly bigger than the
limits.
For these problems, if you have nice thought, pls let us know.:)
Signed-off-by: Zhi Yong Wu <address@hidden>
---
block.c | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
block.h | 1 -
2 files changed, 248 insertions(+), 12 deletions(-)
diff --git a/block.c b/block.c
index cd75183..c08fde8 100644
--- a/block.c
+++ b/block.c
@@ -30,6 +30,9 @@
#include "qemu-objects.h"
#include "qemu-coroutine.h"
+#include "qemu-timer.h"
+#include "block/blk-queue.h"
+
#ifdef CONFIG_BSD
#include <sys/types.h>
#include <sys/stat.h>
@@ -72,6 +75,13 @@ static int coroutine_fn bdrv_co_writev_em(BlockDriverState
*bs,
QEMUIOVector *iov);
static int coroutine_fn bdrv_co_flush_em(BlockDriverState *bs);
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait);
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait);
+
static QTAILQ_HEAD(, BlockDriverState) bdrv_states =
QTAILQ_HEAD_INITIALIZER(bdrv_states);
@@ -745,6 +755,11 @@ int bdrv_open(BlockDriverState *bs, const char *filename,
int flags,
bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
}
+ /* throttling disk I/O limits */
+ if (bs->io_limits_enabled) {
+ bdrv_io_limits_enable(bs);
+ }
+
return 0;
unlink_and_fail:
@@ -783,6 +798,18 @@ void bdrv_close(BlockDriverState *bs)
if (bs->change_cb)
bs->change_cb(bs->change_opaque, CHANGE_MEDIA);
}
+
+ /* throttling disk I/O limits */
+ if (bs->block_queue) {
+ qemu_del_block_queue(bs->block_queue);
+ bs->block_queue = NULL;
+ }
+
+ if (bs->block_timer) {
+ qemu_del_timer(bs->block_timer);
+ qemu_free_timer(bs->block_timer);
+ bs->block_timer = NULL;
+ }
}
void bdrv_close_all(void)
@@ -2341,16 +2368,48 @@ BlockDriverAIOCB *bdrv_aio_readv(BlockDriverState *bs,
int64_t sector_num,
BlockDriverCompletionFunc *cb, void *opaque)
{
BlockDriver *drv = bs->drv;
-
+ BlockDriverAIOCB *ret;
+ int64_t wait_time = -1;
+printf("sector_num=%ld, nb_sectors=%d\n", sector_num, nb_sectors);
trace_bdrv_aio_readv(bs, sector_num, nb_sectors, opaque);
- if (!drv)
- return NULL;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
+ if (!drv || bdrv_check_request(bs, sector_num, nb_sectors)) {
return NULL;
+ }
+
+ /* throttling disk read I/O */
+ if (bs->io_limits_enabled) {
+ if (bdrv_exceed_io_limits(bs, nb_sectors, false, &wait_time)) {
+ ret = qemu_block_queue_enqueue(bs->block_queue, bs, bdrv_aio_readv,
+ sector_num, qiov, nb_sectors, cb, opaque);
+ printf("wait_time=%ld\n", wait_time);
+ if (wait_time != -1) {
+ printf("reset block timer\n");
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ }
+
+ if (ret) {
+ printf("ori ret is not null\n");
+ } else {
+ printf("ori ret is null\n");
+ }
+
+ return ret;
+ }
+ }
- return drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
+ ret = drv->bdrv_aio_readv(bs, sector_num, qiov, nb_sectors,
cb, opaque);
+ if (ret) {
+ if (bs->io_limits_enabled) {
+ bs->io_disps.bytes[BLOCK_IO_LIMIT_READ] +=
+ (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+ bs->io_disps.ios[BLOCK_IO_LIMIT_READ]++;
+ }
+ }
+
+ return ret;
}
typedef struct BlockCompleteData {
@@ -2396,15 +2455,14 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs,
int64_t sector_num,
BlockDriver *drv = bs->drv;
BlockDriverAIOCB *ret;
BlockCompleteData *blk_cb_data;
+ int64_t wait_time = -1;
trace_bdrv_aio_writev(bs, sector_num, nb_sectors, opaque);
- if (!drv)
- return NULL;
- if (bs->read_only)
- return NULL;
- if (bdrv_check_request(bs, sector_num, nb_sectors))
+ if (!drv || bs->read_only
+ || bdrv_check_request(bs, sector_num, nb_sectors)) {
return NULL;
+ }
if (bs->dirty_bitmap) {
blk_cb_data = blk_dirty_cb_alloc(bs, sector_num, nb_sectors, cb,
@@ -2413,13 +2471,32 @@ BlockDriverAIOCB *bdrv_aio_writev(BlockDriverState *bs,
int64_t sector_num,
opaque = blk_cb_data;
}
+ /* throttling disk write I/O */
+ if (bs->io_limits_enabled) {
+ if (bdrv_exceed_io_limits(bs, nb_sectors, true, &wait_time)) {
+ ret = qemu_block_queue_enqueue(bs->block_queue, bs,
bdrv_aio_writev,
+ sector_num, qiov, nb_sectors, cb, opaque);
+ if (wait_time != -1) {
+ qemu_mod_timer(bs->block_timer,
+ wait_time + qemu_get_clock_ns(vm_clock));
+ }
+
+ return ret;
+ }
+ }
+
ret = drv->bdrv_aio_writev(bs, sector_num, qiov, nb_sectors,
cb, opaque);
-
if (ret) {
if (bs->wr_highest_sector < sector_num + nb_sectors - 1) {
bs->wr_highest_sector = sector_num + nb_sectors - 1;
}
+
+ if (bs->io_limits_enabled) {
+ bs->io_disps.bytes[BLOCK_IO_LIMIT_WRITE] +=
+ (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+ bs->io_disps.ios[BLOCK_IO_LIMIT_WRITE]++;
+ }
}
return ret;
@@ -2684,6 +2761,166 @@ void bdrv_aio_cancel(BlockDriverAIOCB *acb)
acb->pool->cancel(acb);
}
+static bool bdrv_exceed_bps_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, double elapsed_time, uint64_t *wait) {
+ uint64_t bps_limit = 0;
+ double bytes_limit, bytes_disp, bytes_res;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bps_limit = bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.bps[is_write]) {
+ bps_limit = bs->io_limits.bps[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ bytes_limit = bps_limit * slice_time;
+ bytes_disp = bs->io_disps.bytes[is_write];
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]) {
+ bytes_disp += bs->io_disps.bytes[!is_write];
+ }
+
+ bytes_res = (unsigned) nb_sectors * BDRV_SECTOR_SIZE;
+
+ if (bytes_disp + bytes_res <= bytes_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (bytes_disp + bytes_res) / bps_limit - elapsed_time;
+
+ if (wait) {
+ *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+ }
+
+ printf("1 wait=%ld\n", *wait);
+ return true;
+}
+
+static bool bdrv_exceed_iops_limits(BlockDriverState *bs, bool is_write,
+ double elapsed_time, uint64_t *wait) {
+ uint64_t iops_limit = 0;
+ double ios_limit, ios_disp;
+ double slice_time, wait_time;
+
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ iops_limit = bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL];
+ } else if (bs->io_limits.iops[is_write]) {
+ iops_limit = bs->io_limits.iops[is_write];
+ } else {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ slice_time = bs->slice_end - bs->slice_start;
+ slice_time /= (NANOSECONDS_PER_SECOND);
+ ios_limit = iops_limit * slice_time;
+ ios_disp = bs->io_disps.ios[is_write];
+ if (bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ ios_disp += bs->io_disps.ios[!is_write];
+ }
+
+ if (ios_disp + 1 <= ios_limit) {
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+ }
+
+ /* Calc approx time to dispatch */
+ wait_time = (ios_disp + 1) / iops_limit;
+ if (wait_time > elapsed_time) {
+ wait_time = wait_time - elapsed_time;
+ } else {
+ wait_time = 0;
+ }
+
+ if (wait) {
+ *wait = wait_time * BLOCK_IO_SLICE_TIME * 10;
+ }
+
+ return true;
+}
+
+static bool bdrv_exceed_io_limits(BlockDriverState *bs, int nb_sectors,
+ bool is_write, int64_t *wait) {
+ int64_t now, max_wait;
+ uint64_t bps_wait = 0, iops_wait = 0;
+ double elapsed_time;
+ int bps_ret, iops_ret;
+
+ now = qemu_get_clock_ns(vm_clock);
+ if ((bs->slice_start < now)
+ && (bs->slice_end > now)) {
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+ } else {
+ bs->slice_start = now;
+ bs->slice_end = now + BLOCK_IO_SLICE_TIME;
+
+ bs->io_disps.bytes[is_write] = 0;
+ bs->io_disps.bytes[!is_write] = 0;
+
+ bs->io_disps.ios[is_write] = 0;
+ bs->io_disps.ios[!is_write] = 0;
+ }
+
+ /* If a limit was exceeded, immediately queue this request */
+ if (qemu_block_queue_has_pending(bs->block_queue)) {
+ if (bs->io_limits.bps[BLOCK_IO_LIMIT_TOTAL]
+ || bs->io_limits.bps[is_write] || bs->io_limits.iops[is_write]
+ || bs->io_limits.iops[BLOCK_IO_LIMIT_TOTAL]) {
+ if (wait) {
+ *wait = -1;
+ }
+
+ return true;
+ }
+ }
+
+ elapsed_time = now - bs->slice_start;
+ elapsed_time /= (NANOSECONDS_PER_SECOND);
+
+ bps_ret = bdrv_exceed_bps_limits(bs, nb_sectors,
+ is_write, elapsed_time, &bps_wait);
+ iops_ret = bdrv_exceed_iops_limits(bs, is_write,
+ elapsed_time, &iops_wait);
+ if (bps_ret || iops_ret) {
+ max_wait = bps_wait > iops_wait ? bps_wait : iops_wait;
+ if (wait) {
+ *wait = max_wait;
+ }
+
+ now = qemu_get_clock_ns(vm_clock);
+ if (bs->slice_end < now + max_wait) {
+ bs->slice_end = now + max_wait;
+ }
+
+ printf("end wait=%ld\n", *wait);
+
+ return true;
+ }
+
+ if (wait) {
+ *wait = 0;
+ }
+
+ return false;
+}
/**************************************************************/
/* async block device emulation */
diff --git a/block.h b/block.h
index a3e69db..10d2828 100644
--- a/block.h
+++ b/block.h
@@ -107,7 +107,6 @@ int bdrv_change_backing_file(BlockDriverState *bs,
const char *backing_file, const char *backing_fmt);
void bdrv_register(BlockDriver *bdrv);
-
typedef struct BdrvCheckResult {
int corruptions;
int leaks;
--
1.7.6
- [Qemu-devel] [PATCH v8 0/4] The intro of QEMU block I/O throttling, Zhi Yong Wu, 2011/09/07
- [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm,
Zhi Yong Wu <=
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Marcelo Tosatti, 2011/09/09
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/12
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Marcelo Tosatti, 2011/09/14
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/19
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Marcelo Tosatti, 2011/09/20
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/20
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/21
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/21
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Zhi Yong Wu, 2011/09/26
- Re: [Qemu-devel] [PATCH v8 3/4] block: add block timer and throttling algorithm, Kevin Wolf, 2011/09/23