qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend


From: Cédric Le Goater
Subject: Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend
Date: Mon, 13 Nov 2023 12:04:52 +0100
User-agent: Mozilla Thunderbird

On 11/10/23 11:18, Duan, Zhenzhong wrote:


-----Original Message-----
From: Cédric Le Goater <clg@redhat.com>
Sent: Friday, November 10, 2023 5:34 PM
Subject: Re: [PATCH v5 03/20] vfio/iommufd: Implement the iommufd backend

On 11/9/23 12:45, Zhenzhong Duan wrote:
From: Yi Liu <yi.l.liu@intel.com>

Add the iommufd backend. The IOMMUFD container class is implemented
based on the new /dev/iommu user API. This backend obviously depends
on CONFIG_IOMMUFD.

So far, the iommufd backend doesn't support dirty page sync yet due
to missing support in the host kernel.

Co-authored-by: Eric Auger <eric.auger@redhat.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
Signed-off-by: Zhenzhong Duan <zhenzhong.duan@intel.com>
---
v5: Switch to IOAS attach/detach and hide hwpt

   include/hw/vfio/vfio-common.h |  11 +
   hw/vfio/common.c              |  20 +-
   hw/vfio/iommufd.c             | 429 ++++++++++++++++++++++++++++++++++
   hw/vfio/meson.build           |   3 +
   hw/vfio/trace-events          |  10 +
   5 files changed, 469 insertions(+), 4 deletions(-)
   create mode 100644 hw/vfio/iommufd.c

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 24ecc0e7ee..3dac5c167e 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -89,6 +89,14 @@ typedef struct VFIOHostDMAWindow {
       QLIST_ENTRY(VFIOHostDMAWindow) hostwin_next;
   } VFIOHostDMAWindow;

+typedef struct IOMMUFDBackend IOMMUFDBackend;
+
+typedef struct VFIOIOMMUFDContainer {
+    VFIOContainerBase bcontainer;
+    IOMMUFDBackend *be;
+    uint32_t ioas_id;
+} VFIOIOMMUFDContainer;
+
   typedef struct VFIODeviceOps VFIODeviceOps;

   typedef struct VFIODevice {
@@ -116,6 +124,8 @@ typedef struct VFIODevice {
       OnOffAuto pre_copy_dirty_page_tracking;
       bool dirty_pages_supported;
       bool dirty_tracking;
+    int devid;
+    IOMMUFDBackend *iommufd;
   } VFIODevice;

   struct VFIODeviceOps {
@@ -201,6 +211,7 @@ typedef QLIST_HEAD(VFIODeviceList, VFIODevice)
VFIODeviceList;
   extern VFIOGroupList vfio_group_list;
   extern VFIODeviceList vfio_device_list;
   extern const VFIOIOMMUOps vfio_legacy_ops;
+extern const VFIOIOMMUOps vfio_iommufd_ops;
   extern const MemoryListener vfio_memory_listener;
   extern int vfio_kvm_device_fd;

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 572ae7c934..3b7e11158f 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -19,6 +19,7 @@
    */

   #include "qemu/osdep.h"
+#include CONFIG_DEVICES /* CONFIG_IOMMUFD */
   #include <sys/ioctl.h>
   #ifdef CONFIG_KVM
   #include <linux/kvm.h>
@@ -1462,10 +1463,13 @@ VFIOAddressSpace
*vfio_get_address_space(AddressSpace *as)

   void vfio_put_address_space(VFIOAddressSpace *space)
   {
-    if (QLIST_EMPTY(&space->containers)) {
-        QLIST_REMOVE(space, list);
-        g_free(space);
+    if (!QLIST_EMPTY(&space->containers)) {
+        return;

I think this change deserves to be in a separate patch, even if simple.
Is there some relation with iommufd ? This is not clear.

OK, will do. It's unrelated to iommufd, just avoid unnecessary check below.


       }
+
+    QLIST_REMOVE(space, list);
+    g_free(space);
+
       if (QLIST_EMPTY(&vfio_address_spaces)) {
           qemu_unregister_reset(vfio_reset_handler, NULL);
       }
@@ -1498,8 +1502,16 @@ retry:
   int vfio_attach_device(char *name, VFIODevice *vbasedev,
                          AddressSpace *as, Error **errp)
   {
-    const VFIOIOMMUOps *ops = &vfio_legacy_ops;
+    const VFIOIOMMUOps *ops;

+#ifdef CONFIG_IOMMUFD
+    if (vbasedev->iommufd) {
+        ops = &vfio_iommufd_ops;
+    } else
+#endif
+    {
+        ops = &vfio_legacy_ops;
+    }

Simply adding :

  +#ifdef CONFIG_IOMMUFD
  +    if (vbasedev->iommufd) {
  +        ops = &vfio_iommufd_ops;
  +    }
  +#endif

would have the same effect with less change.

Indeed, will do.


That said, it would also be nice to find a way to avoid the use of
CONFIG_IOMMUFD in hw/vfio/common.c. May be with a helper returning
'const VFIOIOMMUOps *'. This is minor. Still, I find some redundancy
with vfio_container_init() and I don't a good alternative yet :)

Sure, will do, guess you mean a helper function in hw/vfio/helpers.c with
CONFIG_IOMMUFD check?

Yes. That was the idea. I took a look and the benefits are minimal.
I am not sure it is worth the effort.

I also tried to minize the use of CONFIG_IOMMUFD in our various
VFIO devices with an intermediate QOM interface. See below.

+
+#define TYPE_IOMMUFD_INTERFACE "iommufd-interface"
+#define IOMMUFD_INTERFACE(obj) \
+     INTERFACE_CHECK(IOMMUFDInterface, (obj), TYPE_IOMMUFD_INTERFACE)
+typedef struct IOMMUFDInterfaceClass IOMMUFDInterfaceClass;
+DECLARE_CLASS_CHECKERS(IOMMUFDInterfaceClass, IOMMUFD_INTERFACE,
+                       TYPE_IOMMUFD_INTERFACE)
+
+typedef struct IOMMUFDInterface IOMMUFDInterface;
+
+struct IOMMUFDInterfaceClass {
+    InterfaceClass parent;
+    void (*set_fd)(Object *obj, const char *str, Error **errp);
+};
+
+#ifdef CONFIG_IOMMUFD
+static void iommufd_interface_set_fd(Object *obj, const char *str, Error 
**errp)
+{
+    IOMMUFDInterfaceClass *iik = IOMMUFD_INTERFACE_GET_CLASS(obj);
+
+    iik->set_fd(obj, str, errp);
+}
+#endif
+
+static void iommufd_interface_class_init(ObjectClass *klass, void *data)
+{
+#ifdef CONFIG_IOMMUFD
+    object_class_property_add_str(klass, "fd", NULL, iommufd_interface_set_fd);
+#endif
+}
+
+static void iommufd_interface_add_property(Object *obj, Object **iommufd)
+{
+#ifdef CONFIG_IOMMUFD
+    object_property_add_link(obj, "iommufd", TYPE_IOMMUFD_BACKEND, iommufd,
+                             NULL, OBJ_PROP_LINK_STRONG);
+#endif
+}
+
+static const TypeInfo iommufd_interface_info = {
+    .name = TYPE_IOMMUFD_INTERFACE,
+    .parent = TYPE_INTERFACE,
+    .class_size = sizeof(IOMMUFDInterfaceClass),
+    .class_init = iommufd_interface_class_init,
+};
/*
  * Functions used whatever the injection method
@@ -649,21 +696,18 @@ static Property vfio_platform_dev_proper
 static void vfio_platform_instance_init(Object *obj)
 {
     VFIOPlatformDevice *vdev = VFIO_PLATFORM_DEVICE(obj);
+    Object *iommufd = OBJECT(vdev->vbasedev.iommufd);
vdev->vbasedev.fd = -1;
+    iommufd_interface_add_property(obj, &iommufd);
 }
static void vfio_platform_class_init(ObjectClass *klass, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(klass);
     SysBusDeviceClass *sbc = SYS_BUS_DEVICE_CLASS(klass);
+    IOMMUFDInterfaceClass *iik = IOMMUFD_INTERFACE_CLASS(klass);
dc->realize = vfio_platform_realize;
     device_class_set_props(dc, vfio_platform_dev_properties);
     dc->vmsd = &vfio_platform_vmstate;
     dc->desc = "VFIO-based platform device assignment";
     sbc->connect_irq_notifier = vfio_start_irqfd_injection;
     set_bit(DEVICE_CATEGORY_MISC, dc->categories);
     /* Supported by TYPE_VIRT_MACHINE */
     dc->user_creatable = true;
+    iik->set_fd = vfio_platform_set_fd;
 }
static const TypeInfo vfio_platform_dev_info = {
@@ -703,11 +745,16 @@ static const TypeInfo vfio_platform_dev_
     .instance_init = vfio_platform_instance_init,
     .class_init = vfio_platform_class_init,
     .class_size = sizeof(VFIOPlatformDeviceClass),
+    .interfaces    = (InterfaceInfo[]) {
+        { TYPE_IOMMUFD_INTERFACE },
+        { }
+    }
 };
Not really satisfying compared to the #ifdef. Let's keep them.






       return ops->attach_device(name, vbasedev, as, errp);
   }

diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
new file mode 100644
index 0000000000..ea4e23f4ec
--- /dev/null
+++ b/hw/vfio/iommufd.c
@@ -0,0 +1,429 @@
+/*
+ * iommufd container backend
+ *
+ * Copyright (C) 2023 Intel Corporation.
+ * Copyright Red Hat, Inc. 2023
+ *
+ * Authors: Yi Liu <yi.l.liu@intel.com>
+ *          Eric Auger <eric.auger@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include <sys/ioctl.h>
+#include <linux/vfio.h>
+#include <linux/iommufd.h>
+
+#include "hw/vfio/vfio-common.h"
+#include "qemu/error-report.h"
+#include "trace.h"
+#include "qapi/error.h"
+#include "sysemu/iommufd.h"
+#include "hw/qdev-core.h"
+#include "sysemu/reset.h"
+#include "qemu/cutils.h"
+#include "qemu/chardev_open.h"
+
+static int iommufd_map(VFIOContainerBase *bcontainer, hwaddr iova,
+                       ram_addr_t size, void *vaddr, bool readonly)
+{
+    VFIOIOMMUFDContainer *container =
+        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+
+    return iommufd_backend_map_dma(container->be,
+                                   container->ioas_id,
+                                   iova, size, vaddr, readonly);
+}
+
+static int iommufd_unmap(VFIOContainerBase *bcontainer,
+                         hwaddr iova, ram_addr_t size,
+                         IOMMUTLBEntry *iotlb)
+{
+    VFIOIOMMUFDContainer *container =
+        container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+
+    /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
+    return iommufd_backend_unmap_dma(container->be,
+                                     container->ioas_id, iova, size);
+}
+
+static void iommufd_cdev_kvm_device_add(VFIODevice *vbasedev)
+{
+    Error *err = NULL;
+
+    if (vfio_kvm_device_add_fd(vbasedev->fd, &err)) {
+        error_report_err(err);

We should propagate this error to the callers instead.

This is to follow legacy backend where the error doesn't treated as
a serious issue.


+    }
+}
+
+static void iommufd_cdev_kvm_device_del(VFIODevice *vbasedev)
+{
+    Error *err = NULL;
+
+    if (vfio_kvm_device_del_fd(vbasedev->fd, &err)) {
+        error_report_err(err);

Possibly this one also but It might be more complex. Let's keep it that
way.

+    }
+}
+
+static int iommufd_connect_and_bind(VFIODevice *vbasedev, Error **errp)
+{
+    IOMMUFDBackend *iommufd = vbasedev->iommufd;
+    struct vfio_device_bind_iommufd bind = {
+        .argsz = sizeof(bind),
+        .flags = 0,
+    };
+    int ret;
+
+    ret = iommufd_backend_connect(iommufd, errp);
+    if (ret) {
+        return ret;
+    }
+
+    /*
+     * Add device to kvm-vfio to be prepared for the tracking
+     * in KVM. Especially for some emulated devices, it requires
+     * to have kvm information in the device open.
+     */
+    iommufd_cdev_kvm_device_add(vbasedev);

We shoud return a possible error.

This is to follow legacy backend where this error is reported and ignored.
Do we need a difference for iommufd BE?

I don't know ! You tell me :) It is always better to raise the
exception and let the upper layers decide on the action to take.



+
+    /* Bind device to iommufd */
+    bind.iommufd = iommufd->fd;
+    ret = ioctl(vbasedev->fd, VFIO_DEVICE_BIND_IOMMUFD, &bind);
+    if (ret) {
+        error_setg_errno(errp, errno, "error bind device fd=%d to iommufd=%d",
+                         vbasedev->fd, bind.iommufd);
+        goto err_bind;
+    }
+
+    vbasedev->devid = bind.out_devid;
+    trace_iommufd_connect_and_bind(bind.iommufd, vbasedev->name,
vbasedev->fd,
+                                   vbasedev->devid);
+    return ret;
+err_bind:
+    iommufd_cdev_kvm_device_del(vbasedev);
+    iommufd_backend_disconnect(iommufd);

These two calls look like iommufd_unbind_and_disconnect() below.

Yes, they are same as iommufd doesn't support explicit device unbind.
But it looks strange to call iommufd_unbind_and_disconnect in
iommufd_connect_and_bind.

This is just a little redudant. This is minor.


Thanks,

C.





+    return ret;
+}
+
+static void iommufd_unbind_and_disconnect(VFIODevice *vbasedev)
+{
+    /* Unbind is automatically conducted when device fd is closed */
+    iommufd_cdev_kvm_device_del(vbasedev);
+    iommufd_backend_disconnect(vbasedev->iommufd);
+}
+
+static int iommufd_cdev_getfd(const char *sysfs_path, Error **errp)
+{
+    long int ret = -ENOTTY;
+    char *path, *vfio_dev_path = NULL, *vfio_path = NULL;
+    DIR *dir = NULL;
+    struct dirent *dent;
+    gchar *contents;
+    struct stat st;
+    gsize length;
+    int major, minor;
+    dev_t vfio_devt;
+
+    path = g_strdup_printf("%s/vfio-dev", sysfs_path);
+    if (stat(path, &st) < 0) {
+        error_setg_errno(errp, errno, "no such host device");
+        goto out_free_path;
+    }
+
+    dir = opendir(path);
+    if (!dir) {
+        error_setg_errno(errp, errno, "couldn't open dirrectory %s", path);


directory

Good catch, will fix.

Thanks
Zhenzhong





reply via email to

[Prev in Thread] Current Thread [Next in Thread]