[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 15/26] vfio-user: get region info
From: |
John Levon |
Subject: |
[PATCH 15/26] vfio-user: get region info |
Date: |
Wed, 8 Jan 2025 11:50:21 +0000 |
From: Jagannathan Raman <jag.raman@oracle.com>
Add per-region FD to support mmap() of remote device regions
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/ap.c | 2 ++
hw/vfio/ccw.c | 2 ++
hw/vfio/container.c | 7 ++++
hw/vfio/helpers.c | 28 +++++++++++++--
hw/vfio/pci.c | 2 ++
hw/vfio/platform.c | 2 ++
hw/vfio/trace-events | 1 +
hw/vfio/user-pci.c | 2 ++
hw/vfio/user-protocol.h | 14 ++++++++
hw/vfio/user.c | 68 +++++++++++++++++++++++++++++++++++
include/hw/vfio/vfio-common.h | 6 +++-
11 files changed, 130 insertions(+), 4 deletions(-)
diff --git a/hw/vfio/ap.c b/hw/vfio/ap.c
index 1adce1ab40..54b1815f1d 100644
--- a/hw/vfio/ap.c
+++ b/hw/vfio/ap.c
@@ -162,6 +162,8 @@ static void vfio_ap_realize(DeviceState *dev, Error **errp)
return;
}
+ vbasedev->use_regfds = false;
+
if (!vfio_attach_device(vbasedev->name, vbasedev,
&address_space_memory, errp)) {
goto error;
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 8c16648819..085a3fc6e6 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -586,6 +586,8 @@ static void vfio_ccw_realize(DeviceState *dev, Error **errp)
goto out_unrealize;
}
+ vbasedev->use_regfds = false;
+
if (!vfio_attach_device(cdev->mdevid, vbasedev,
&address_space_memory, errp)) {
goto out_attach_dev_err;
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 039241c9c5..e017cd4b08 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -892,10 +892,17 @@ void vfio_put_base_device(VFIODevice *vbasedev)
int i;
for (i = 0; i < vbasedev->num_regions; i++) {
+ if (vbasedev->regfds != NULL && vbasedev->regfds[i] != -1) {
+ close(vbasedev->regfds[i]);
+ }
g_free(vbasedev->regions[i]);
}
g_free(vbasedev->regions);
vbasedev->regions = NULL;
+ if (vbasedev->regfds != NULL) {
+ g_free(vbasedev->regfds);
+ vbasedev->regfds = NULL;
+ }
}
if (!vbasedev->group) {
diff --git a/hw/vfio/helpers.c b/hw/vfio/helpers.c
index 529520c1d6..802d6ae101 100644
--- a/hw/vfio/helpers.c
+++ b/hw/vfio/helpers.c
@@ -364,6 +364,12 @@ int vfio_region_setup(Object *obj, VFIODevice *vbasedev,
VFIORegion *region,
region->size = info->size;
region->fd_offset = info->offset;
region->nr = index;
+ if (vbasedev->regfds != NULL) {
+ region->fd = vbasedev->regfds[index];
+ } else {
+ region->fd = vbasedev->fd;
+ }
+
if (region->size) {
region->mem = g_new0(MemoryRegion, 1);
@@ -442,7 +448,7 @@ int vfio_region_mmap(VFIORegion *region)
region->mmaps[i].mmap = mmap(map_align, region->mmaps[i].size, prot,
MAP_SHARED | MAP_FIXED,
- region->vbasedev->fd,
+ region->fd,
region->fd_offset +
region->mmaps[i].offset);
if (region->mmaps[i].mmap == MAP_FAILED) {
@@ -567,12 +573,16 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
struct vfio_region_info **info)
{
size_t argsz = sizeof(struct vfio_region_info);
+ int fd = -1;
int ret;
/* create region cache */
if (vbasedev->regions == NULL) {
vbasedev->regions = g_new0(struct vfio_region_info *,
vbasedev->num_regions);
+ if (vbasedev->use_regfds) {
+ vbasedev->regfds = g_new0(int, vbasedev->num_regions);
+ }
}
/* check cache */
if (vbasedev->regions[index] != NULL) {
@@ -586,22 +596,33 @@ int vfio_get_region_info(VFIODevice *vbasedev, int index,
retry:
(*info)->argsz = argsz;
- ret = vbasedev->io->get_region_info(vbasedev, *info);
+ ret = vbasedev->io->get_region_info(vbasedev, *info, &fd);
if (ret != 0) {
g_free(*info);
*info = NULL;
+ if (vbasedev->regfds != NULL) {
+ vbasedev->regfds[index] = -1;
+ }
+
return -errno;
}
if ((*info)->argsz > argsz) {
argsz = (*info)->argsz;
*info = g_realloc(*info, argsz);
+ if (fd != -1) {
+ close(fd);
+ fd = -1;
+ }
goto retry;
}
/* fill cache */
vbasedev->regions[index] = *info;
+ if (vbasedev->regfds != NULL) {
+ vbasedev->regfds[index] = fd;
+ }
return 0;
}
@@ -765,10 +786,11 @@ static int vfio_io_device_feature(VFIODevice *vbasedev,
}
static int vfio_io_get_region_info(VFIODevice *vbasedev,
- struct vfio_region_info *info)
+ struct vfio_region_info *info, int *fd)
{
int ret;
+ *fd = -1;
ret = ioctl(vbasedev->fd, VFIO_DEVICE_GET_REGION_INFO, info);
return ret < 0 ? -errno : ret;
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 27f82d6517..b57059d676 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -3048,6 +3048,8 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
name = g_strdup(vbasedev->name);
}
+ vbasedev->use_regfds = false;
+
if (!vfio_attach_device(name, vbasedev,
pci_device_iommu_address_space(pdev), errp)) {
goto error;
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 1194e55807..6e19573b3b 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -575,6 +575,8 @@ static void vfio_platform_realize(DeviceState *dev, Error
**errp)
VFIODevice *vbasedev = &vdev->vbasedev;
int i;
+ vbasedev->use_regfds = false;
+
qemu_mutex_init(&vdev->intp_mutex);
trace_vfio_platform_realize(vbasedev->sysfsdev ?
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 662bc4edfd..ee6d7a0d0a 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -188,3 +188,4 @@ vfio_user_recv_request(uint16_t cmd) " command 0x%x"
vfio_user_send_write(uint16_t id, int wrote) " id 0x%x wrote 0x%x"
vfio_user_version(uint16_t major, uint16_t minor, const char *caps) " major %d
minor %d caps: %s"
vfio_user_get_info(uint32_t nregions, uint32_t nirqs) " #regions %d #irqs %d"
+vfio_user_get_region_info(uint32_t index, uint32_t flags, uint64_t size) "
index %d flags 0x%x size 0x%"PRIx64
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index 62259db473..60cd9c941c 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -111,6 +111,8 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error
**errp)
vbasedev->ops = &vfio_user_pci_ops;
vbasedev->type = VFIO_DEVICE_TYPE_PCI;
vbasedev->dev = DEVICE(vdev);
+ vbasedev->io = &vfio_dev_io_sock;
+ vbasedev->use_regfds = true;
/*
* vfio-user devices are effectively mdevs (don't use a host iommu).
diff --git a/hw/vfio/user-protocol.h b/hw/vfio/user-protocol.h
index 5f9ef1768f..6f70a48905 100644
--- a/hw/vfio/user-protocol.h
+++ b/hw/vfio/user-protocol.h
@@ -125,4 +125,18 @@ typedef struct {
uint32_t num_irqs;
} VFIOUserDeviceInfo;
+/*
+ * VFIO_USER_DEVICE_GET_REGION_INFO
+ * imported from struct vfio_region_info
+ */
+typedef struct {
+ VFIOUserHdr hdr;
+ uint32_t argsz;
+ uint32_t flags;
+ uint32_t index;
+ uint32_t cap_offset;
+ uint64_t size;
+ uint64_t offset;
+} VFIOUserRegionInfo;
+
#endif /* VFIO_USER_PROTOCOL_H */
diff --git a/hw/vfio/user.c b/hw/vfio/user.c
index 93c7eea649..44e8da8aa1 100644
--- a/hw/vfio/user.c
+++ b/hw/vfio/user.c
@@ -1106,3 +1106,71 @@ int vfio_user_get_info(VFIOUserProxy *proxy, struct
vfio_device_info *info)
return 0;
}
+
+static int vfio_user_get_region_info(VFIOUserProxy *proxy,
+ struct vfio_region_info *info,
+ VFIOUserFDs *fds)
+{
+ g_autofree VFIOUserRegionInfo *msgp = NULL;
+ uint32_t size;
+
+ /* data returned can be larger than vfio_region_info */
+ if (info->argsz < sizeof(*info)) {
+ error_printf("vfio_user_get_region_info argsz too small\n");
+ return -E2BIG;
+ }
+ if (fds != NULL && fds->send_fds != 0) {
+ error_printf("vfio_user_get_region_info can't send FDs\n");
+ return -EINVAL;
+ }
+
+ size = info->argsz + sizeof(VFIOUserHdr);
+ msgp = g_malloc0(size);
+
+ vfio_user_request_msg(&msgp->hdr, VFIO_USER_DEVICE_GET_REGION_INFO,
+ sizeof(*msgp), 0);
+ msgp->argsz = info->argsz;
+ msgp->index = info->index;
+
+ vfio_user_send_wait(proxy, &msgp->hdr, fds, size);
+ if (msgp->hdr.flags & VFIO_USER_ERROR) {
+ return -msgp->hdr.error_reply;
+ }
+ trace_vfio_user_get_region_info(msgp->index, msgp->flags, msgp->size);
+
+ memcpy(info, &msgp->argsz, info->argsz);
+ return 0;
+}
+
+
+/*
+ * Socket-based io_ops
+ */
+
+static int vfio_user_io_get_region_info(VFIODevice *vbasedev,
+ struct vfio_region_info *info,
+ int *fd)
+{
+ int ret;
+ VFIOUserFDs fds = { 0, 1, fd};
+
+ ret = vfio_user_get_region_info(vbasedev->proxy, info, &fds);
+ if (ret) {
+ return ret;
+ }
+
+ if (info->index > vbasedev->num_regions) {
+ return -EINVAL;
+ }
+ /* cap_offset in valid area */
+ if ((info->flags & VFIO_REGION_INFO_FLAG_CAPS) &&
+ (info->cap_offset < sizeof(*info) || info->cap_offset > info->argsz)) {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+VFIODeviceIO vfio_dev_io_sock = {
+ .get_region_info = vfio_user_io_get_region_info,
+};
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index afc67a3a77..50b136b7dc 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -59,6 +59,7 @@ typedef struct VFIORegion {
uint32_t nr_mmaps;
VFIOMmap *mmaps;
uint8_t nr; /* cache the region number for debug */
+ int fd; /* fd to mmap() region */
} VFIORegion;
typedef struct VFIOMigration {
@@ -146,6 +147,7 @@ typedef struct VFIODevice {
bool ram_block_discard_allowed;
OnOffAuto enable_migration;
bool migration_events;
+ bool use_regfds;
VFIODeviceOps *ops;
VFIODeviceIO *io;
unsigned int num_irqs;
@@ -165,6 +167,7 @@ typedef struct VFIODevice {
QLIST_ENTRY(VFIODevice) hwpt_next;
VFIOUserProxy *proxy;
struct vfio_region_info **regions;
+ int *regfds;
} VFIODevice;
struct VFIODeviceOps {
@@ -209,7 +212,7 @@ struct VFIODeviceOps {
struct VFIODeviceIO {
int (*device_feature)(VFIODevice *vdev, struct vfio_device_feature *);
int (*get_region_info)(VFIODevice *vdev,
- struct vfio_region_info *info);
+ struct vfio_region_info *info, int *fd);
int (*get_irq_info)(VFIODevice *vdev, struct vfio_irq_info *irq);
int (*set_irqs)(VFIODevice *vdev, struct vfio_irq_set *irqs);
int (*region_read)(VFIODevice *vdev, uint8_t nr, off_t off, uint32_t size,
@@ -219,6 +222,7 @@ struct VFIODeviceIO {
};
extern VFIODeviceIO vfio_dev_io_ioctl;
+extern VFIODeviceIO vfio_dev_io_sock;
#endif /* CONFIG_LINUX */
--
2.34.1
- [PATCH 08/26] vfio: add device IO ops vector, (continued)
- [PATCH 08/26] vfio: add device IO ops vector, John Levon, 2025/01/08
- [PATCH 04/26] vfio: add vfio_attach_device_by_iommu_type(), John Levon, 2025/01/08
- [PATCH 05/26] vfio: add vfio_prepare_device(), John Levon, 2025/01/08
- [PATCH 09/26] vfio-user: introduce vfio-user protocol specification, John Levon, 2025/01/08
- [PATCH 11/26] vfio-user: connect vfio proxy to remote server, John Levon, 2025/01/08
- [PATCH 14/26] vfio-user: get device info, John Levon, 2025/01/08
- [PATCH 12/26] vfio-user: define socket receive functions, John Levon, 2025/01/08
- [PATCH 07/26] vfio: add VFIO base abstract class, John Levon, 2025/01/08
- [PATCH 13/26] vfio-user: define socket send functions, John Levon, 2025/01/08
- [PATCH 17/26] vfio-user: pci_user_realize PCI setup, John Levon, 2025/01/08
- [PATCH 15/26] vfio-user: get region info,
John Levon <=
- [PATCH 20/26] vfio-user: proxy container connect/disconnect, John Levon, 2025/01/08
- [PATCH 22/26] vfio-user: no-mmap DMA support, John Levon, 2025/01/08
- [PATCH 19/26] vfio-user: forward msix BAR accesses to server, John Levon, 2025/01/08
- [PATCH 06/26] vfio: add region cache, John Levon, 2025/01/08
- [PATCH 01/26] vfio/container: pass MemoryRegion to DMA operations, John Levon, 2025/01/08
- [PATCH 24/26] vfio-user: pci reset, John Levon, 2025/01/08
- [PATCH 03/26] vfio/container: support VFIO_DMA_UNMAP_FLAG_ALL, John Levon, 2025/01/08
- [PATCH 16/26] vfio-user: region read/write, John Levon, 2025/01/08
- [PATCH 18/26] vfio-user: get and set IRQs, John Levon, 2025/01/08
- [PATCH 23/26] vfio-user: dma read/write operations, John Levon, 2025/01/08