[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 16/26] vfio-user: region read/write
From: |
John Levon |
Subject: |
[PATCH 16/26] vfio-user: region read/write |
Date: |
Wed, 8 Jan 2025 11:50:22 +0000 |
From: Jagannathan Raman <jag.raman@oracle.com>
Add support for posted writes on remote devices
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/helpers.c | 20 ++++--
hw/vfio/pci.c | 5 +-
hw/vfio/trace-events | 1 +
hw/vfio/user-pci.c | 5 ++
hw/vfio/user-protocol.h | 12 ++++
hw/vfio/user.c | 120 ++++++++++++++++++++++++++++++++++
hw/vfio/user.h | 1 +
include/hw/vfio/vfio-common.h | 3 +-
8 files changed, 158 insertions(+), 9 deletions(-)
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 802d6ae101..ea3dbfa96d 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -183,12 +183,15 @@ void vfio_region_write(void *opaque, hwaddr addr,
break;
}
- ret = vbasedev->io->region_write(vbasedev, region->nr, addr, size, &buf);
+ ret = vbasedev->io->region_write(vbasedev, region->nr, addr, size, &buf,
+ region->post_wr);
if (ret != size) {
+ const char *errmsg = ret < 0 ? strerror(-ret) : "short write";
+
error_report("%s(%s:region%d+0x%"HWADDR_PRIx", 0x%"PRIx64
- ",%d) failed: %m",
+ ",%d) failed: %s",
__func__, vbasedev->name, region->nr,
- addr, data, size);
+ addr, data, size, errmsg);
}
trace_vfio_region_write(vbasedev->name, region->nr, addr, data, size);
@@ -220,9 +223,11 @@ uint64_t vfio_region_read(void *opaque,
ret = vbasedev->io->region_read(vbasedev, region->nr, addr, size, &buf);
if (ret != size) {
- error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %m",
+ const char *errmsg = ret < 0 ? strerror(-ret) : "short read";
+
+ error_report("%s(%s:region%d+0x%"HWADDR_PRIx", %d) failed: %s",
__func__, vbasedev->name, region->nr,
- addr, size);
+ addr, size, errmsg);
return (uint64_t)-1;
}
switch (size) {
@@ -364,13 +369,14 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev,
VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
+ region->post_wr = false;
+
if (vbasedev->regfds != NULL) {
region->fd = vbasedev->regfds[index];
} else {
region->fd = vbasedev->fd;
}
-
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
memory_region_init_io(region->mem, obj, &vfio_region_ops,
@@ -827,7 +833,7 @@ static int vfio_io_region_read(VFIODevice *vbasedev,
uint8_t index, off_t off,
}
static int vfio_io_region_write(VFIODevice *vbasedev, uint8_t index, off_t off,
- uint32_t size, void *data)
+ uint32_t size, void *data, bool post)
{
struct vfio_region_info *info = vbasedev->regions[index];
int ret;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index b57059d676..90cf29325f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -51,7 +51,7 @@
(off), (size), (data)))
#define VDEV_CONFIG_WRITE(vbasedev, off, size, data) \
((vbasedev)->io->region_write((vbasedev), VFIO_PCI_CONFIG_REGION_INDEX, \
- (off), (size), (data)))
+ (off), (size), (data), false))
#define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug"
@@ -1780,6 +1780,9 @@ static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
bar->type = pci_bar & (bar->ioport ? ~PCI_BASE_ADDRESS_IO_MASK :
~PCI_BASE_ADDRESS_MEM_MASK);
bar->size = bar->region.size;
+
+ /* IO regions are sync, memory can be async */
+ bar->region.post_wr = (bar->ioport == 0);
}
static void vfio_bars_prepare(VFIOPCIDevice *vdev)
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index ee6d7a0d0a..da8af45ee9 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -189,3 +189,4 @@ vfio_user_send_write(uint16_t id, int wrote) " id 0x%x
wrote 0x%x"
vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d
minor %d caps: %s"
vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d"
vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) "
index %d flags 0x%x size 0x%"PRIx64
+vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d
offset 0x%"PRIx64" count %d"
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index 60cd9c941c..aa5146db0a 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -40,6 +40,7 @@ struct VFIOUserPCIDevice {
VFIOPCIDevice device;
char *sock_name;
bool send_queued; /* all sends are queued */
+ bool no_post; /* all regions write are sync */
};
/*
@@ -102,6 +103,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error
**errp)
if (udev->send_queued) {
proxy->flags |= VFIO_PROXY_FORCE_QUEUED;
}
+ if (udev->no_post) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
if (!vfio_user_validate_version(proxy, errp)) {
goto error;
@@ -173,6 +177,7 @@ static void vfio_user_instance_finalize(Object *obj)
static const Property vfio_user_pci_dev_properties[] = {
DEFINE_PROP_STRING("socket", VFIOUserPCIDevice, sock_name),
DEFINE_PROP_BOOL("x-send-queued", VFIOUserPCIDevice, send_queued, false),
+ DEFINE_PROP_BOOL("x-no-posted-writes", VFIOUserPCIDevice, no_post, false),
};
static void vfio_user_pci_dev_class_init(ObjectClass *klass, void *data)
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 6f70a48905..6987435e96 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -139,4 +139,16 @@ typedef struct {
uint64_t offset;
} VFIOUserRegionInfo;
+/*
+ * VFIO_USER_REGION_READ
+ * VFIO_USER_REGION_WRITE
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint64_t offset;
+ uint32_t region;
+ uint32_t count;
+ char data[];
+} VFIOUserRegionRW;
+
#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 44e8da8aa1..118314b363 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -55,6 +55,8 @@ static void vfio_user_cb(void *opaque);
static void vfio_user_request(void *opaque);
static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg);
+static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds);
static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize);
static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
@@ -626,6 +628,33 @@ static int vfio_user_send_queued(VFIOUserProxy *proxy,
VFIOUserMsg *msg)
return 0;
}
+/*
+ * async send - msg can be queued, but will be freed when sent
+ */
+static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds)
+{
+ VFIOUserMsg *msg;
+ int ret;
+
+ if (!(hdr->flags & (VFIO_USER_NO_REPLY | VFIO_USER_REPLY))) {
+ error_printf("vfio_user_send_async on sync message\n");
+ return;
+ }
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = 0;
+ msg->type = VFIO_MSG_ASYNC;
+
+ ret = vfio_user_send_queued(proxy, msg);
+ if (ret < 0) {
+ vfio_user_recycle(proxy, msg);
+ }
+}
+
static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds, int rsize)
{
@@ -1139,9 +1168,84 @@ static int vfio_user_get_region_info(VFIOUserProxy
*proxy,
trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
memcpy(info, &msgp->argsz, info->argsz);
+
+ /* read-after-write hazard if guest can directly access region */
+ if (info->flags & VFIO_REGION_INFO_FLAG_MMAP) {
+ WITH_QEMU_LOCK_GUARD(&proxy->lock) {
+ proxy->flags |= VFIO_PROXY_NO_POST;
+ }
+ }
+
return 0;
}
+static int vfio_user_region_read(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data)
+{
+ g_autofree VFIOUserRegionRW *msgp = NULL;
+ int size = sizeof(*msgp) + count;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_READ, sizeof(*msgp), 0);
+ msgp->offset = offset;
+ msgp->region = index;
+ msgp->count = count;
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ vfio_user_send_wait(proxy, &msgp->hdr, NULL, size);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ } else if (msgp->count > count) {
+ return -E2BIG;
+ } else {
+ memcpy(data, &msgp->data, msgp->count);
+ }
+
+ return msgp->count;
+}
+
+static int vfio_user_region_write(VFIOUserProxy *proxy, uint8_t index,
+ off_t offset, uint32_t count, void *data,
+ bool post)
+{
+ VFIOUserRegionRW *msgp = NULL;
+ int flags = post ? VFIO_USER_NO_REPLY : 0;
+ int size = sizeof(*msgp) + count;
+ int ret;
+
+ if (count > proxy->max_xfer_size) {
+ return -EINVAL;
+ }
+
+ msgp = g_malloc0(size);
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_REGION_WRITE, size, flags);
+ msgp->offset = offset;
+ msgp->region = index;
+ msgp->count = count;
+ memcpy(&msgp->data, data, count);
+ trace_vfio_user_region_rw(msgp->region, msgp->offset, msgp->count);
+
+ /* async send will free msg after it's sent */
+ if (post && !(proxy->flags & VFIO_PROXY_NO_POST)) {
+ vfio_user_send_async(proxy, &msgp->hdr, NULL);
+ return count;
+ }
+
+ vfio_user_send_wait(proxy, &msgp->hdr, NULL, 0);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ ret = -msgp->hdr.error_reply;
+ } else {
+ ret = count;
+ }
+
+ g_free(msgp);
+ return ret;
+}
+
/*
* Socket-based io_ops
@@ -1171,6 +1275,22 @@ static int vfio_user_io_get_region_info(VFIODevice
*vbasedev,
return 0;
}
+static int vfio_user_io_region_read(VFIODevice *vbasedev, uint8_t index,
+ off_t off, uint32_t size, void *data)
+{
+ return vfio_user_region_read(vbasedev->proxy, index, off, size, data);
+}
+
+static int vfio_user_io_region_write(VFIODevice *vbasedev, uint8_t index,
+ off_t off, unsigned size, void *data,
+ bool post)
+{
+ return vfio_user_region_write(vbasedev->proxy, index, off, size, data,
+ post);
+}
+
VFIODeviceIO vfio_dev_io_sock = {
.get_region_info = vfio_user_io_get_region_info,
+ .region_read = vfio_user_io_region_read,
+ .region_write = vfio_user_io_region_write,
};
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 18a5a40073..1f99a976d6 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -84,6 +84,7 @@ typedef struct VFIOUserProxy {
/* VFIOProxy flags */
#define VFIO_PROXY_CLIENT 0x1
#define VFIO_PROXY_FORCE_QUEUED 0x4
+#define VFIO_PROXY_NO_POST 0x8
typedef struct VFIODevice VFIODevice;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 50b136b7dc..3a2e3afaaf 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -60,6 +60,7 @@ typedef struct VFIORegion {
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
int fd; /* fd to mmap() region */
+ bool post_wr; /* writes can be posted */
} VFIORegion;
typedef struct VFIOMigration {
@@ -218,7 +219,7 @@ struct VFIODeviceIO {
int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
void *data);
int (*region_write)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
- void *data);
+ void *data, bool post);
};
extern VFIODeviceIO vfio_dev_io_ioctl;
--
2.34.1
- [PATCH 13/26] vfio-user: define socket send functions, (continued)
- [PATCH 13/26] vfio-user: define socket send functions, John Levon, 2025/01/08
- [PATCH 17/26] vfio-user: pci_user_realize PCI setup, John Levon, 2025/01/08
- [PATCH 15/26] vfio-user: get region info, John Levon, 2025/01/08
- [PATCH 20/26] vfio-user: proxy container connect/disconnect, John Levon, 2025/01/08
- [PATCH 22/26] vfio-user: no-mmap DMA support, John Levon, 2025/01/08
- [PATCH 19/26] vfio-user: forward msix BAR accesses to server, John Levon, 2025/01/08
- [PATCH 06/26] vfio: add region cache, John Levon, 2025/01/08
- [PATCH 01/26] vfio/container: pass MemoryRegion to DMA operations, John Levon, 2025/01/08
- [PATCH 24/26] vfio-user: pci reset, John Levon, 2025/01/08
- [PATCH 03/26] vfio/container: support VFIO_DMA_UNMAP_FLAG_ALL, John Levon, 2025/01/08
- [PATCH 16/26] vfio-user: region read/write,
John Levon <=
- [PATCH 18/26] vfio-user: get and set IRQs, John Levon, 2025/01/08
- [PATCH 23/26] vfio-user: dma read/write operations, John Levon, 2025/01/08
- [PATCH 21/26] vfio-user: dma map/unmap operations, John Levon, 2025/01/08
- [PATCH 25/26] vfio-user: add 'x-msg-timeout' option that specifies msg wait times, John Levon, 2025/01/08
- [PATCH 26/26] vfio-user: add coalesced posted writes, John Levon, 2025/01/08