[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 15/23] vfio-user: forward msix BAR accesses to server
From: |
John Johnson |
Subject: |
[PATCH v2 15/23] vfio-user: forward msix BAR accesses to server |
Date: |
Wed, 1 Feb 2023 21:55:51 -0800 |
Server holds device current device pending state
Use irq masking commands in socket case
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
---
hw/vfio/pci.h | 1 +
include/hw/vfio/vfio-common.h | 3 ++
hw/vfio/ccw.c | 1 +
hw/vfio/common.c | 26 ++++++++++++++++++
hw/vfio/pci.c | 23 +++++++++++++++-
hw/vfio/platform.c | 1 +
hw/vfio/user-pci.c | 64 +++++++++++++++++++++++++++++++++++++++++++
7 files changed, 118 insertions(+), 1 deletion(-)
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 4f70664..d3e5d5f 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -113,6 +113,7 @@ typedef struct VFIOMSIXInfo {
uint32_t table_offset;
uint32_t pba_offset;
unsigned long *pending;
+ MemoryRegion *pba_region;
} VFIOMSIXInfo;
/*
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index bbc4b15..2c58d7d 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -143,6 +143,7 @@ typedef struct VFIODevice {
bool ram_block_discard_allowed;
bool enable_migration;
bool use_regfds;
+ bool can_mask_irq;
VFIODeviceOps *ops;
VFIODeviceIO *io;
unsigned int num_irqs;
@@ -239,6 +240,8 @@ void vfio_put_base_device(VFIODevice *vbasedev);
void vfio_disable_irqindex(VFIODevice *vbasedev, int index);
void vfio_unmask_single_irqindex(VFIODevice *vbasedev, int index);
void vfio_mask_single_irqindex(VFIODevice *vbasedev, int index);
+void vfio_unmask_single_irq(VFIODevice *vbasedev, int index, int irq);
+void vfio_mask_single_irq(VFIODevice *vbasedev, int index, int irq);
int vfio_set_irq_signaling(VFIODevice *vbasedev, int index, int subindex,
int action, int fd, Error **errp);
void vfio_region_write(void *opaque, hwaddr addr,
diff --git a/hw/vfio/ccw.c b/hw/vfio/ccw.c
index 00605bd..bf67670 100644
--- a/hw/vfio/ccw.c
+++ b/hw/vfio/ccw.c
@@ -616,6 +616,7 @@ static void vfio_ccw_get_device(VFIOGroup *group,
VFIOCCWDevice *vcdev,
vcdev->vdev.dev = &vcdev->cdev.parent_obj.parent_obj;
vcdev->vdev.io = &vfio_dev_io_ioctl;
vcdev->vdev.use_regfds = false;
+ vcdev->vdev.can_mask_irq = false;
return;
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index de64e53..0c1cb21 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -102,6 +102,32 @@ void vfio_mask_single_irqindex(VFIODevice *vbasedev, int
index)
vbasedev->io->set_irqs(vbasedev, &irq_set);
}
+void vfio_mask_single_irq(VFIODevice *vbasedev, int index, int irq)
+{
+ struct vfio_irq_set irq_set = {
+ .argsz = sizeof(irq_set),
+ .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_MASK,
+ .index = index,
+ .start = irq,
+ .count = 1,
+ };
+
+ vbasedev->io->set_irqs(vbasedev, &irq_set);
+}
+
+void vfio_unmask_single_irq(VFIODevice *vbasedev, int index, int irq)
+{
+ struct vfio_irq_set irq_set = {
+ .argsz = sizeof(irq_set),
+ .flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK,
+ .index = index,
+ .start = irq,
+ .count = 1,
+ };
+
+ vbasedev->io->set_irqs(vbasedev, &irq_set);
+}
+
static inline const char *action_to_str(int action)
{
switch (action) {
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 42e7c82..7b16f8f 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -477,6 +477,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
unsigned int nr,
{
VFIOPCIDevice *vdev = VFIO_PCI_BASE(pdev);
VFIOMSIVector *vector;
+ bool new_vec = false;
int ret;
trace_vfio_msix_vector_do_use(vdev->vbasedev.name, nr);
@@ -490,6 +491,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
unsigned int nr,
error_report("vfio: Error: event_notifier_init failed");
}
vector->use = true;
+ new_vec = true;
msix_vector_use(pdev, nr);
}
@@ -516,6 +518,7 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
unsigned int nr,
kvm_irqchip_commit_route_changes(&vfio_route_change);
vfio_connect_kvm_msi_virq(vector);
}
+ new_vec = true;
}
}
@@ -523,6 +526,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
unsigned int nr,
* We don't want to have the host allocate all possible MSI vectors
* for a device if they're not in use, so we shutdown and incrementally
* increase them as needed.
+ * Otherwise, unmask the vector if the vector is already setup (and we can
+ * do so) or send the fd if not.
*/
if (vdev->nr_vectors < nr + 1) {
vdev->nr_vectors = nr + 1;
@@ -533,6 +538,8 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev,
unsigned int nr,
error_report("vfio: failed to enable vectors, %d", ret);
}
}
+ } else if (vdev->vbasedev.can_mask_irq && !new_vec) {
+ vfio_unmask_single_irq(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr);
} else {
Error *err = NULL;
int32_t fd;
@@ -574,6 +581,12 @@ static void vfio_msix_vector_release(PCIDevice *pdev,
unsigned int nr)
trace_vfio_msix_vector_release(vdev->vbasedev.name, nr);
+ /* just mask vector if peer supports it */
+ if (vdev->vbasedev.can_mask_irq) {
+ vfio_mask_single_irq(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, nr);
+ return;
+ }
+
/*
* There are still old guests that mask and unmask vectors on every
* interrupt. If we're using QEMU bypass with a KVM irqfd, leave all of
@@ -644,7 +657,7 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
if (ret) {
error_report("vfio: failed to enable vectors, %d", ret);
}
- } else {
+ } else if (!vdev->vbasedev.can_mask_irq) {
/*
* Some communication channels between VF & PF or PF & fw rely on the
* physical state of the device and expect that enabling MSI-X from the
@@ -660,6 +673,13 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev)
*/
vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL);
vfio_msix_vector_release(&vdev->pdev, 0);
+ } else {
+ /*
+ * If we can use irq masking, send an invalid fd on vector 0
+ * to enable MSI-X without any vectors enabled.
+ */
+ vfio_set_irq_signaling(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX, 0,
+ VFIO_IRQ_SET_ACTION_TRIGGER, -1, NULL);
}
trace_vfio_msix_enable(vdev->vbasedev.name);
@@ -3040,6 +3060,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
vbasedev->dev = DEVICE(vdev);
vbasedev->io = &vfio_dev_io_ioctl;
vbasedev->use_regfds = false;
+ vbasedev->can_mask_irq = false;
tmp = g_strdup_printf("%s/iommu_group", vbasedev->sysfsdev);
len = readlink(tmp, group_path, sizeof(group_path));
diff --git a/hw/vfio/platform.c b/hw/vfio/platform.c
index 8ddfcca..3387ec4 100644
--- a/hw/vfio/platform.c
+++ b/hw/vfio/platform.c
@@ -623,6 +623,7 @@ static void vfio_platform_realize(DeviceState *dev, Error
**errp)
vbasedev->ops = &vfio_platform_ops;
vbasedev->io = &vfio_dev_io_ioctl;
vbasedev->use_regfds = false;
+ vbasedev->can_mask_irq = false;
qemu_mutex_init(&vdev->intp_mutex);
diff --git a/hw/vfio/user-pci.c b/hw/vfio/user-pci.c
index 55ffe7f..bc1d01a 100644
--- a/hw/vfio/user-pci.c
+++ b/hw/vfio/user-pci.c
@@ -45,6 +45,62 @@ struct VFIOUserPCIDevice {
};
/*
+ * The server maintains the device's pending interrupts,
+ * via its MSIX table and PBA, so we treat these acceses
+ * like PCI config space and forward them.
+ */
+static uint64_t vfio_user_pba_read(void *opaque, hwaddr addr,
+ unsigned size)
+{
+ VFIOPCIDevice *vdev = opaque;
+ VFIORegion *region = &vdev->bars[vdev->msix->pba_bar].region;
+ uint64_t data;
+
+ /* server copy is what matters */
+ data = vfio_region_read(region, addr + vdev->msix->pba_offset, size);
+ return data;
+}
+
+static void vfio_user_pba_write(void *opaque, hwaddr addr,
+ uint64_t data, unsigned size)
+{
+ /* dropped */
+}
+
+static const MemoryRegionOps vfio_user_pba_ops = {
+ .read = vfio_user_pba_read,
+ .write = vfio_user_pba_write,
+ .endianness = DEVICE_LITTLE_ENDIAN,
+};
+
+static void vfio_user_msix_setup(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *vfio_reg, *msix_reg, *pba_reg;
+
+ pba_reg = g_new0(MemoryRegion, 1);
+ vdev->msix->pba_region = pba_reg;
+
+ vfio_reg = vdev->bars[vdev->msix->pba_bar].mr;
+ msix_reg = &vdev->pdev.msix_pba_mmio;
+ memory_region_init_io(pba_reg, OBJECT(vdev), &vfio_user_pba_ops, vdev,
+ "VFIO MSIX PBA", int128_get64(msix_reg->size));
+ memory_region_add_subregion_overlap(vfio_reg, vdev->msix->pba_offset,
+ pba_reg, 1);
+}
+
+static void vfio_user_msix_teardown(VFIOPCIDevice *vdev)
+{
+ MemoryRegion *mr, *sub;
+
+ mr = vdev->bars[vdev->msix->pba_bar].mr;
+ sub = vdev->msix->pba_region;
+ memory_region_del_subregion(mr, sub);
+
+ g_free(vdev->msix->pba_region);
+ vdev->msix->pba_region = NULL;
+}
+
+/*
* Incoming request message callback.
*
* Runs off main loop, so BQL held.
@@ -122,6 +178,7 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error
**errp)
vbasedev->dev = DEVICE(vdev);
vbasedev->io = &vfio_dev_io_sock;
vbasedev->use_regfds = true;
+ vbasedev->can_mask_irq = true;
ret = vfio_user_get_device(vbasedev, errp);
if (ret) {
@@ -159,6 +216,9 @@ static void vfio_user_pci_realize(PCIDevice *pdev, Error
**errp)
if (ret) {
goto out_teardown;
}
+ if (vdev->msix != NULL) {
+ vfio_user_msix_setup(vdev);
+ }
ret = vfio_interrupt_setup(vdev, errp);
if (ret) {
@@ -186,6 +246,10 @@ static void vfio_user_instance_finalize(Object *obj)
g_free(vdev->emulated_config_bits);
g_free(vdev->rom);
+ if (vdev->msix != NULL) {
+ vfio_user_msix_teardown(vdev);
+ }
+
vfio_put_device(vdev);
if (vbasedev->proxy != NULL) {
--
1.9.4
- [PATCH v2 06/23] vfio-user: Define type vfio_user_pci_dev_info, (continued)
- [PATCH v2 06/23] vfio-user: Define type vfio_user_pci_dev_info, John Johnson, 2023/02/02
- [PATCH v2 05/23] vfio-user: add device IO ops vector, John Johnson, 2023/02/02
- [PATCH v2 08/23] vfio-user: define socket receive functions, John Johnson, 2023/02/02
- [PATCH v2 07/23] vfio-user: connect vfio proxy to remote server, John Johnson, 2023/02/02
- [PATCH v2 10/23] vfio-user: get device info, John Johnson, 2023/02/02
- [PATCH v2 11/23] vfio-user: get region info, John Johnson, 2023/02/02
- [PATCH v2 09/23] vfio-user: define socket send functions, John Johnson, 2023/02/02
- [PATCH v2 13/23] vfio-user: pci_user_realize PCI setup, John Johnson, 2023/02/02
- [PATCH v2 16/23] vfio-user: proxy container connect/disconnect, John Johnson, 2023/02/02
- [PATCH v2 15/23] vfio-user: forward msix BAR accesses to server,
John Johnson <=
- [PATCH v2 17/23] vfio-user: dma map/unmap operations, John Johnson, 2023/02/02
- [PATCH v2 21/23] vfio-user: pci reset, John Johnson, 2023/02/02
- [PATCH v2 22/23] vfio-user: add 'x-msg-timeout' option that specifies msg wait times, John Johnson, 2023/02/02
- [PATCH v2 12/23] vfio-user: region read/write, John Johnson, 2023/02/02