[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2 08/24] raw: Probe required direct I/O alignment
From: |
Kevin Wolf |
Subject: |
[Qemu-devel] [PATCH v2 08/24] raw: Probe required direct I/O alignment |
Date: |
Fri, 13 Dec 2013 14:22:43 +0100 |
From: Paolo Bonzini <address@hidden>
Add a bs->request_alignment field that contains the required
offset/length alignment for I/O requests and fill it in the raw block
drivers. Use ioctls if possible, else see what alignment it takes for
O_DIRECT to succeed.
While at it, also expose the memory alignment requirements, which may be
(and in practice are) different from the disk alignment requirements.
Signed-off-by: Paolo Bonzini <address@hidden>
Signed-off-by: Kevin Wolf <address@hidden>
---
block.c | 3 ++
block/raw-posix.c | 102 ++++++++++++++++++++++++++++++++++++++--------
block/raw-win32.c | 41 +++++++++++++++++++
include/block/block_int.h | 3 ++
4 files changed, 132 insertions(+), 17 deletions(-)
diff --git a/block.c b/block.c
index 3504d17..b86e754 100644
--- a/block.c
+++ b/block.c
@@ -813,6 +813,7 @@ static int bdrv_open_common(BlockDriverState *bs,
BlockDriverState *file,
bs->open_flags = flags;
bs->guest_block_size = 512;
+ bs->request_alignment = 512;
bs->zero_beyond_eof = true;
open_flags = bdrv_open_flags(bs, flags);
bs->read_only = !(open_flags & BDRV_O_RDWR);
@@ -881,6 +882,8 @@ static int bdrv_open_common(BlockDriverState *bs,
BlockDriverState *file,
}
bdrv_refresh_limits(bs);
+ assert(bdrv_opt_mem_align(bs) != 0);
+ assert(bs->request_alignment != 0);
#ifndef _WIN32
if (bs->is_temporary) {
diff --git a/block/raw-posix.c b/block/raw-posix.c
index 10c6b34..e8e75a7 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -127,6 +127,8 @@ typedef struct BDRVRawState {
int fd;
int type;
int open_flags;
+ size_t buf_align;
+
#if defined(__linux__)
/* linux floppy specific */
int64_t fd_open_time;
@@ -213,6 +215,76 @@ static int raw_normalize_devicepath(const char **filename)
}
#endif
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ char *buf;
+ unsigned int sector_size;
+
+ /* For /dev/sg devices the alignment is not really used.
+ With buffered I/O, we don't have any restrictions. */
+ if (bs->sg || !(s->open_flags & O_DIRECT)) {
+ bs->request_alignment = 1;
+ s->buf_align = 1;
+ return;
+ }
+
+ /* Try a few ioctls to get the right size */
+ bs->request_alignment = 0;
+ s->buf_align = 0;
+
+#ifdef BLKSSZGET
+ if (ioctl(s->fd, BLKSSZGET, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DKIOCGETBLOCKSIZE
+ if (ioctl(s->fd, DKIOCGETBLOCKSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef DIOCGSECTORSIZE
+ if (ioctl(s->fd, DIOCGSECTORSIZE, §or_size) >= 0) {
+ bs->request_alignment = sector_size;
+ }
+#endif
+#ifdef CONFIG_XFS
+ if (s->is_xfs) {
+ struct dioattr da;
+ if (xfsctl(NULL, s->fd, XFS_IOC_DIOINFO, &da) >= 0) {
+ bs->request_alignment = da.d_miniosz;
+ /* The kernel returns wrong information for d_mem */
+ /* s->buf_align = da.d_mem; */
+ }
+ }
+#endif
+
+ /* If we could not get the sizes so far, we can only guess them */
+ if (!s->buf_align) {
+ size_t align;
+ buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf + align, MAX_BLOCKSIZE, 0) >= 0) {
+ s->buf_align = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+
+ if (!bs->request_alignment) {
+ size_t align;
+ buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE);
+ for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) {
+ if (pread(s->fd, buf, align, 0) >= 0) {
+ bs->request_alignment = align;
+ break;
+ }
+ }
+ qemu_vfree(buf);
+ }
+}
+
static void raw_parse_flags(int bdrv_flags, int *open_flags)
{
assert(open_flags != NULL);
@@ -463,7 +535,6 @@ static int raw_reopen_prepare(BDRVReopenState *state,
return ret;
}
-
static void raw_reopen_commit(BDRVReopenState *state)
{
BDRVRawReopenState *raw_s = state->opaque;
@@ -499,23 +570,15 @@ static void raw_reopen_abort(BDRVReopenState *state)
state->opaque = NULL;
}
+static int raw_refresh_limits(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
-/* XXX: use host sector size if necessary with:
-#ifdef DIOCGSECTORSIZE
- {
- unsigned int sectorsize = 512;
- if (!ioctl(fd, DIOCGSECTORSIZE, §orsize) &&
- sectorsize > bufsize)
- bufsize = sectorsize;
- }
-#endif
-#ifdef CONFIG_COCOA
- uint32_t blockSize = 512;
- if ( !ioctl( fd, DKIOCGETBLOCKSIZE, &blockSize ) && blockSize >
bufsize) {
- bufsize = blockSize;
- }
-#endif
-*/
+ raw_probe_alignment(bs);
+ bs->bl.opt_mem_alignment = s->buf_align;
+
+ return 0;
+}
static ssize_t handle_aiocb_ioctl(RawPosixAIOData *aiocb)
{
@@ -1363,6 +1426,7 @@ static BlockDriver bdrv_file = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = raw_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1740,6 +1804,7 @@ static BlockDriver bdrv_host_device = {
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
.bdrv_aio_discard = hdev_aio_discard,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1871,6 +1936,7 @@ static BlockDriver bdrv_host_floppy = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -1981,6 +2047,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
@@ -2110,6 +2177,7 @@ static BlockDriver bdrv_host_cdrom = {
.bdrv_aio_readv = raw_aio_readv,
.bdrv_aio_writev = raw_aio_writev,
.bdrv_aio_flush = raw_aio_flush,
+ .bdrv_refresh_limits = raw_refresh_limits,
.bdrv_truncate = raw_truncate,
.bdrv_getlength = raw_getlength,
diff --git a/block/raw-win32.c b/block/raw-win32.c
index 2bad5a3..6fe64d2 100644
--- a/block/raw-win32.c
+++ b/block/raw-win32.c
@@ -202,6 +202,35 @@ static int set_sparse(int fd)
NULL, 0, NULL, 0, &returned, NULL);
}
+static void raw_probe_alignment(BlockDriverState *bs)
+{
+ BDRVRawState *s = bs->opaque;
+ DWORD sectorsPerCluster, freeClusters, totalClusters, count;
+ DISK_GEOMETRY_EX dg;
+ BOOL status;
+
+ if (s->type == FTYPE_CD) {
+ bs->request_alignment = 2048;
+ return;
+ }
+ if (s->type == FTYPE_HARDDISK) {
+ status = DeviceIoControl(s->hfile, IOCTL_DISK_GET_DRIVE_GEOMETRY_EX,
+ NULL, 0, &dg, sizeof(dg), &count, NULL);
+ if (status != 0) {
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ return;
+ }
+ /* try GetDiskFreeSpace too */
+ }
+
+ if (s->drive_path[0]) {
+ GetDiskFreeSpace(s->drive_path, §orsPerCluster,
+ &dg.Geometry.BytesPerSector,
+ &freeClusters, &totalClusters);
+ bs->request_alignment = dg.Geometry.BytesPerSector;
+ }
+}
+
static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
{
assert(access_flags != NULL);
@@ -269,6 +298,17 @@ static int raw_open(BlockDriverState *bs, QDict *options,
int flags,
}
}
+ if (filename[0] && filename[1] == ':') {
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
+ } else if (filename[0] == '\\' && filename[1] == '\\') {
+ s->drive_path[0] = 0;
+ } else {
+ /* Relative path. */
+ char buf[MAX_PATH];
+ GetCurrentDirectory(MAX_PATH, buf);
+ snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", buf[0]);
+ }
+
s->hfile = CreateFile(filename, access_flags,
FILE_SHARE_READ, NULL,
OPEN_EXISTING, overlapped, NULL);
@@ -293,6 +333,7 @@ static int raw_open(BlockDriverState *bs, QDict *options,
int flags,
s->aio = aio;
}
+ raw_probe_alignment(bs);
ret = 0;
fail:
qemu_opts_del(opts);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 9921cd3..0a01b69 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -319,6 +319,9 @@ struct BlockDriverState {
/* Whether produces zeros when read beyond eof */
bool zero_beyond_eof;
+ /* Alignment requirement for offset/length of I/O requests */
+ unsigned int request_alignment;
+
/* the block size for which the guest device expects atomicity */
int guest_block_size;
--
1.8.1.4
- [Qemu-devel] [PATCH v2 00/24] block: Support for 512b-on-4k emulatio, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 02/24] block: Inherit opt_transfer_length, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 03/24] block: Update BlockLimits when they might have changed, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 01/24] block: Move initialisation of BlockLimits to bdrv_refresh_limits(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 04/24] qemu_memalign: Allow small alignments, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 06/24] block: Don't use guest sector size for qemu_blockalign(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 07/24] block: rename buffer_alignment to guest_block_size, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 08/24] raw: Probe required direct I/O alignment,
Kevin Wolf <=
- [Qemu-devel] [PATCH v2 05/24] block: Detect unaligned length in bdrv_qiov_is_aligned(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 09/24] block: Introduce bdrv_aligned_preadv(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 10/24] block: Introduce bdrv_co_do_preadv(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 11/24] block: Introduce bdrv_aligned_pwritev(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 12/24] block: write: Handle COR dependency after I/O throttling, Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 13/24] block: Introduce bdrv_co_do_pwritev(), Kevin Wolf, 2013/12/13
- [Qemu-devel] [PATCH v2 14/24] block: Switch BdrvTrackedRequest to byte granularity, Kevin Wolf, 2013/12/13