[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 21/26] vfio-user: dma map/unmap operations
From: |
John Levon |
Subject: |
[PATCH 21/26] vfio-user: dma map/unmap operations |
Date: |
Wed, 8 Jan 2025 11:50:27 +0000 |
From: John Levon <levon@movementarian.org>
Implement DMA map/unmap for the vfio-user container.
Add ability to do async operations during memory transactions.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/trace-events | 4 ++
hw/vfio/user-container.c | 107 ++++++++++++++++++++++++++++++++++++++-
hw/vfio/user-protocol.h | 32 ++++++++++++
hw/vfio/user.c | 89 ++++++++++++++++++++++++++++----
hw/vfio/user.h | 10 ++++
5 files changed, 230 insertions(+), 12 deletions(-)
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index eceaa0c0fd..e3a7f82550 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -192,3 +192,7 @@ vfio_user_get_region_info(uint32_t index, uint32_t flags,
uint64_t size) " index
vfio_user_region_rw(uint32_t region, uint64_t off, uint32_t count) " region %d
offset 0x%"PRIx64" count %d"
vfio_user_get_irq_info(uint32_t index, uint32_t flags, uint32_t count) " index
%d flags 0x%x count %d"
vfio_user_set_irqs(uint32_t index, uint32_t start, uint32_t count, uint32_t
flags) " index %d start %d count %d flags 0x%x"
+
+# user-container.c
+vfio_user_dma_map(uint64_t iova, uint64_t size, uint64_t off, uint32_t flags,
bool async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" off 0x%"PRIx64" flags 0x%x
async_ops %d"
+vfio_user_dma_unmap(uint64_t iova, uint64_t size, uint32_t flags, bool
async_ops) " iova 0x%"PRIx64" size 0x%"PRIx64" flags 0x%x async_ops %d"
diff --git a/hw/vfio/user-container.c b/hw/vfio/user-container.c
index 99839edeed..77ffec9561 100644
--- a/hw/vfio/user-container.c
+++ b/hw/vfio/user-container.c
@@ -23,18 +23,119 @@
#include "qapi/error.h"
#include "pci.h"
+/*
+ * When DMA space is the physical address space, the region add/del listeners
+ * will fire during memory update transactions. These depend on BQL being
held,
+ * so do any resulting map/demap ops async while keeping BQL.
+ */
+static void vfio_user_listener_begin(VFIOContainerBase *bcontainer)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ container->proxy->async_ops = true;
+}
+
+static void vfio_user_listener_commit(VFIOContainerBase *bcontainer)
+{
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ /* wait here for any async requests sent during the transaction */
+ container->proxy->async_ops = false;
+ vfio_user_wait_reqs(container->proxy);
+}
+
static int vfio_user_dma_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
IOMMUTLBEntry *iotlb, int flags)
{
- return -ENOTSUP;
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ VFIOUserDMAUnmap *msgp = g_malloc(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_UNMAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_unmap);
+ msgp->flags = flags;
+ msgp->iova = iova;
+ msgp->size = size;
+ trace_vfio_user_dma_unmap(msgp->iova, msgp->size, msgp->flags,
+ container->proxy->async_ops);
+
+ if (container->proxy->async_ops) {
+ vfio_user_send_nowait(container->proxy, &msgp->hdr, NULL, 0);
+ return 0;
+ }
+
+ vfio_user_send_wait(container->proxy, &msgp->hdr, NULL, 0);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+
+ g_free(msgp);
+ return 0;
}
static int vfio_user_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly,
MemoryRegion *mrp)
{
- return -ENOTSUP;
+ VFIOUserContainer *container = container_of(bcontainer, VFIOUserContainer,
+ bcontainer);
+
+ VFIOUserProxy *proxy = container->proxy;
+ int fd = memory_region_get_fd(mrp);
+ int ret;
+
+ VFIOUserFDs *fds = NULL;
+ VFIOUserDMAMap *msgp = g_malloc0(sizeof(*msgp));
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DMA_MAP, sizeof(*msgp), 0);
+ msgp->argsz = sizeof(struct vfio_iommu_type1_dma_map);
+ msgp->flags = VFIO_DMA_MAP_FLAG_READ;
+ msgp->offset = 0;
+ msgp->iova = iova;
+ msgp->size = size;
+
+ /*
+ * vaddr enters as a QEMU process address; make it either a file offset
+ * for mapped areas or leave as 0.
+ */
+ if (fd != -1) {
+ msgp->offset = qemu_ram_block_host_offset(mrp->ram_block, vaddr);
+ }
+
+ if (!readonly) {
+ msgp->flags |= VFIO_DMA_MAP_FLAG_WRITE;
+ }
+
+ trace_vfio_user_dma_map(msgp->iova, msgp->size, msgp->offset, msgp->flags,
+ container->proxy->async_ops);
+
+ /*
+ * The async_ops case sends without blocking or dropping BQL.
+ * They're later waited for in vfio_send_wait_reqs.
+ */
+ if (container->proxy->async_ops) {
+ /* can't use auto variable since we don't block */
+ if (fd != -1) {
+ fds = vfio_user_getfds(1);
+ fds->send_fds = 1;
+ fds->fds[0] = fd;
+ }
+ vfio_user_send_nowait(proxy, &msgp->hdr, fds, 0);
+ ret = 0;
+ } else {
+ VFIOUserFDs local_fds = { 1, 0, &fd };
+
+ fds = fd != -1 ? &local_fds : NULL;
+ vfio_user_send_wait(proxy, &msgp->hdr, fds, 0);
+ ret = (msgp->hdr.flags & VFIO_USER_ERROR) ? -msgp->hdr.error_reply : 0;
+ g_free(msgp);
+ }
+
+ return ret;
}
static int
@@ -234,6 +335,8 @@ static void vfio_iommu_user_class_init(ObjectClass *klass,
void *data)
VFIOIOMMUClass *vioc = VFIO_IOMMU_CLASS(klass);
vioc->setup = vfio_user_setup;
+ vioc->listener_begin = vfio_user_listener_begin,
+ vioc->listener_commit = vfio_user_listener_commit,
vioc->dma_map = vfio_user_dma_map;
vioc->dma_unmap = vfio_user_dma_unmap;
vioc->attach_device = vfio_user_attach_device;
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 87e43ddc72..9b569156fa 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -115,6 +115,31 @@ typedef struct {
*/
#define VFIO_USER_DEF_MAX_BITMAP (256 * 1024 * 1024)
+/*
+ * VFIO_USER_DMA_MAP
+ * imported from struct vfio_iommu_type1_dma_map
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t offset; /* FD offset */
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAMap;
+
+/*
+ * VFIO_USER_DMA_UNMAP
+ * imported from struct vfio_iommu_type1_dma_unmap
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint64_t iova;
+ uint64_t size;
+} VFIOUserDMAUnmap;
+
/*
* VFIO_USER_DEVICE_GET_INFO
* imported from struct vfio_device_info
@@ -178,4 +203,11 @@ typedef struct {
char data[];
} VFIOUserRegionRW;
+/*imported from struct vfio_bitmap */
+typedef struct {
+ uint64_t pgsize;
+ uint64_t size;
+ char data[];
+} VFIOUserBitmap;
+
#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 4b1549cf8e..ef644848ed 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -47,7 +47,6 @@ static void vfio_user_shutdown(VFIOUserProxy *proxy);
static int vfio_user_send_qio(VFIOUserProxy *proxy, VFIOUserMsg *msg);
static VFIOUserMsg *vfio_user_getmsg(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds);
-static VFIOUserFDs *vfio_user_getfds(int numfds);
static void vfio_user_recycle(VFIOUserProxy *proxy, VFIOUserMsg *msg);
static void vfio_user_recv(void *opaque);
@@ -60,10 +59,6 @@ static void vfio_user_request(void *opaque);
static int vfio_user_send_queued(VFIOUserProxy *proxy, VFIOUserMsg *msg);
static void vfio_user_send_async(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
VFIOUserFDs *fds);
-static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
- VFIOUserFDs *fds, int rsize);
-static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
- uint32_t size, uint32_t flags);
static inline void vfio_user_set_error(VFIOUserHdr *hdr, uint32_t err)
{
@@ -155,7 +150,7 @@ static void vfio_user_recycle(VFIOUserProxy *proxy,
VFIOUserMsg *msg)
QTAILQ_INSERT_HEAD(&proxy->free, msg, next);
}
-static VFIOUserFDs *vfio_user_getfds(int numfds)
+VFIOUserFDs *vfio_user_getfds(int numfds)
{
VFIOUserFDs *fds = g_malloc0(sizeof(*fds) + (numfds * sizeof(int)));
@@ -658,8 +653,38 @@ static void vfio_user_send_async(VFIOUserProxy *proxy,
VFIOUserHdr *hdr,
}
}
-static void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
- VFIOUserFDs *fds, int rsize)
+/*
+ * nowait send - vfio_wait_reqs() can wait for it later
+ */
+void vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize)
+{
+ VFIOUserMsg *msg;
+ int ret;
+
+ if (hdr->flags & VFIO_USER_NO_REPLY) {
+ error_printf("vfio_user_send_nowait on async message\n");
+ return;
+ }
+
+ QEMU_LOCK_GUARD(&proxy->lock);
+
+ msg = vfio_user_getmsg(proxy, hdr, fds);
+ msg->id = hdr->id;
+ msg->rsize = rsize ? rsize : hdr->size;
+ msg->type = VFIO_MSG_NOWAIT;
+
+ ret = vfio_user_send_queued(proxy, msg);
+ if (ret < 0) {
+ vfio_user_recycle(proxy, msg);
+ return;
+ }
+
+ proxy->last_nowait = msg;
+}
+
+void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize)
{
VFIOUserMsg *msg;
int ret;
@@ -696,6 +721,50 @@ static void vfio_user_send_wait(VFIOUserProxy *proxy,
VFIOUserHdr *hdr,
qemu_mutex_unlock(&proxy->lock);
}
+void vfio_user_wait_reqs(VFIOUserProxy *proxy)
+{
+ VFIOUserMsg *msg;
+
+ /*
+ * Any DMA map/unmap requests sent in the middle
+ * of a memory region transaction were sent nowait.
+ * Wait for them here.
+ */
+ qemu_mutex_lock(&proxy->lock);
+ if (proxy->last_nowait != NULL) {
+ /*
+ * Change type to WAIT to wait for reply
+ */
+ msg = proxy->last_nowait;
+ msg->type = VFIO_MSG_WAIT;
+ proxy->last_nowait = NULL;
+ while (!msg->complete) {
+ if (!qemu_cond_timedwait(&msg->cv, &proxy->lock, wait_time)) {
+ VFIOUserMsgQ *list;
+
+ list = msg->pending ? &proxy->pending : &proxy->outgoing;
+ QTAILQ_REMOVE(list, msg, next);
+ error_printf("vfio_wait_reqs - timed out\n");
+ break;
+ }
+ }
+
+ if (msg->hdr->flags & VFIO_USER_ERROR) {
+ error_printf("vfio_user_wait_reqs - error reply on async ");
+ error_printf("request: command %x error %s\n", msg->hdr->command,
+ strerror(msg->hdr->error_reply));
+ }
+
+ /*
+ * Change type back to NOWAIT to free
+ */
+ msg->type = VFIO_MSG_NOWAIT;
+ vfio_user_recycle(proxy, msg);
+ }
+
+ qemu_mutex_unlock(&proxy->lock);
+}
+
static QLIST_HEAD(, VFIOUserProxy) vfio_user_sockets =
QLIST_HEAD_INITIALIZER(vfio_user_sockets);
@@ -830,8 +899,8 @@ void vfio_user_disconnect(VFIOUserProxy *proxy)
g_free(proxy);
}
-static void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
- uint32_t size, uint32_t flags)
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags)
{
static uint16_t next_id;
diff --git a/hw/vfio/user.h b/hw/vfio/user.h
index 9039e96069..31d2c5abd9 100644
--- a/hw/vfio/user.h
+++ b/hw/vfio/user.h
@@ -75,6 +75,7 @@ typedef struct VFIOUserProxy {
QemuCond close_cv;
AioContext *ctx;
QEMUBH *req_bh;
+ bool async_ops;
/*
* above only changed when BQL is held
@@ -106,4 +107,13 @@ void vfio_user_set_handler(VFIODevice *vbasedev,
bool vfio_user_validate_version(VFIOUserProxy *proxy, Error **errp);
int vfio_user_get_info(VFIOUserProxy *proxy, struct vfio_device_info *info);
+VFIOUserFDs *vfio_user_getfds(int numfds);
+void vfio_user_request_msg(VFIOUserHdr *hdr, uint16_t cmd,
+ uint32_t size, uint32_t flags);
+void vfio_user_wait_reqs(VFIOUserProxy *proxy);
+void vfio_user_send_nowait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize);
+void vfio_user_send_wait(VFIOUserProxy *proxy, VFIOUserHdr *hdr,
+ VFIOUserFDs *fds, int rsize);
+
#endif /* VFIO_USER_H */
--
2.34.1
- [PATCH 20/26] vfio-user: proxy container connect/disconnect, (continued)
- [PATCH 20/26] vfio-user: proxy container connect/disconnect, John Levon, 2025/01/08
- [PATCH 22/26] vfio-user: no-mmap DMA support, John Levon, 2025/01/08
- [PATCH 19/26] vfio-user: forward msix BAR accesses to server, John Levon, 2025/01/08
- [PATCH 06/26] vfio: add region cache, John Levon, 2025/01/08
- [PATCH 01/26] vfio/container: pass MemoryRegion to DMA operations, John Levon, 2025/01/08
- [PATCH 24/26] vfio-user: pci reset, John Levon, 2025/01/08
- [PATCH 03/26] vfio/container: support VFIO_DMA_UNMAP_FLAG_ALL, John Levon, 2025/01/08
- [PATCH 16/26] vfio-user: region read/write, John Levon, 2025/01/08
- [PATCH 18/26] vfio-user: get and set IRQs, John Levon, 2025/01/08
- [PATCH 23/26] vfio-user: dma read/write operations, John Levon, 2025/01/08
- [PATCH 21/26] vfio-user: dma map/unmap operations,
John Levon <=
- [PATCH 25/26] vfio-user: add 'x-msg-timeout' option that specifies msg wait times, John Levon, 2025/01/08
- [PATCH 26/26] vfio-user: add coalesced posted writes, John Levon, 2025/01/08