qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] vfio: Support P2P access in confidential VM


From: Wencheng Yang
Subject: [PATCH] vfio: Support P2P access in confidential VM
Date: Thu, 16 Jan 2025 17:53:55 +0800

On confidential VM platform, for example, AMD-SEV, P2P doesn't work.
The underlying reason is that IOMMU driver set encryption bit on
IOMMU page table pte entry, it's reasonalbe if the pte maps iova
to system memory. However, if the pte maps iova to device's
mmio bar space, setting encryption bit on pte would cause IOMMU
translates iova to incorrect bus address, rather than mmio bar
address.

To fix the issue, the key point is to let IOMMU driver know the
target phyical address is system memory or device mmio.

VFIO allocates virtual address and maps it to device mmio bar,
the member @ram_device of MemoryRegion indicates the memory
region is for mmio. The patch passes the info to VFIO DAM API,
IOMMU driver would do the correct thing.

Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
---
 hw/vfio/common.c                      | 67 +++++++++++++++++----------
 hw/vfio/container-base.c              |  4 +-
 hw/vfio/container.c                   |  6 ++-
 hw/vfio/iommufd.c                     |  4 +-
 hw/virtio/vhost-vdpa.c                |  6 +--
 include/exec/memory.h                 |  7 ++-
 include/hw/vfio/vfio-common.h         |  4 ++
 include/hw/vfio/vfio-container-base.h |  4 +-
 linux-headers/linux/vfio.h            |  1 +
 system/memory.c                       | 11 +++--
 10 files changed, 74 insertions(+), 40 deletions(-)

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index f7499a9b74..2660a42f9e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -247,31 +247,42 @@ static bool 
vfio_listener_skipped_section(MemoryRegionSection *section)
 
 /* Called with rcu_read_lock held.  */
 static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-                               ram_addr_t *ram_addr, bool *read_only,
+                               ram_addr_t *ram_addr, uint32_t *flag,
                                Error **errp)
 {
     bool ret, mr_has_discard_manager;
+    uint32_t mr_flag = 0;
 
-    ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
+    ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, &mr_flag,
                                &mr_has_discard_manager, errp);
-    if (ret && mr_has_discard_manager) {
-        /*
-         * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
-         * pages will remain pinned inside vfio until unmapped, resulting in a
-         * higher memory consumption than expected. If memory would get
-         * populated again later, there would be an inconsistency between pages
-         * pinned by vfio and pages seen by QEMU. This is the case until
-         * unmapped from the IOMMU (e.g., during device reset).
-         *
-         * With malicious guests, we really only care about pinning more memory
-         * than expected. RLIMIT_MEMLOCK set for the user/process can never be
-         * exceeded and can be used to mitigate this problem.
-         */
-        warn_report_once("Using vfio with vIOMMUs and coordinated discarding 
of"
-                         " RAM (e.g., virtio-mem) works, however, malicious"
-                         " guests can trigger pinning of more memory than"
-                         " intended via an IOMMU. It's possible to mitigate "
-                         " by setting/adjusting RLIMIT_MEMLOCK.");
+    if (ret) {
+        if (flag) {
+            if (mr_flag & MRF_READONLY)
+                *flag |= VFIO_MRF_READONLY;
+
+            if (mr_flag & MRF_RAMDEV)
+                *flag |= VFIO_MRF_RAMDEV;
+        }
+
+        if (mr_has_discard_manager) {
+            /*
+             * Malicious VMs might trigger discarding of IOMMU-mapped memory. 
The
+             * pages will remain pinned inside vfio until unmapped, resulting 
in a
+             * higher memory consumption than expected. If memory would get
+             * populated again later, there would be an inconsistency between 
pages
+             * pinned by vfio and pages seen by QEMU. This is the case until
+             * unmapped from the IOMMU (e.g., during device reset).
+             *
+             * With malicious guests, we really only care about pinning more 
memory
+             * than expected. RLIMIT_MEMLOCK set for the user/process can 
never be
+             * exceeded and can be used to mitigate this problem.
+             */
+            warn_report_once("Using vfio with vIOMMUs and coordinated 
discarding of"
+                             " RAM (e.g., virtio-mem) works, however, 
malicious"
+                             " guests can trigger pinning of more memory than"
+                             " intended via an IOMMU. It's possible to 
mitigate "
+                             " by setting/adjusting RLIMIT_MEMLOCK.");
+        }
     }
     return ret;
 }
@@ -298,9 +309,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
     rcu_read_lock();
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        bool read_only;
+        uint32_t flag = 0;
 
-        if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, &local_err)) {
+        if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &flag, &local_err)) {
             error_report_err(local_err);
             goto out;
         }
@@ -313,7 +324,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
          */
         ret = vfio_container_dma_map(bcontainer, iova,
                                      iotlb->addr_mask + 1, vaddr,
-                                     read_only);
+                                     flag);
         if (ret) {
             error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx", %p) = %d (%s)",
@@ -363,6 +374,7 @@ static int 
vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
                        int128_get64(section->size);
     hwaddr start, next, iova;
     void *vaddr;
+    uint32_t flag = 0;
     int ret;
 
     /*
@@ -377,8 +389,10 @@ static int 
vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
                section->offset_within_address_space;
         vaddr = memory_region_get_ram_ptr(section->mr) + start;
 
+        flag |= section->readonly? VFIO_MRF_READONLY: 0;
+        flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
         ret = vfio_container_dma_map(bcontainer, iova, next - start,
-                                     vaddr, section->readonly);
+                                     vaddr, flag);
         if (ret) {
             /* Rollback */
             vfio_ram_discard_notify_discard(rdl, section);
@@ -563,6 +577,7 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
     hwaddr iova, end;
     Int128 llend, llsize;
     void *vaddr;
+    uint32_t flag = 0;
     int ret;
     Error *err = NULL;
 
@@ -661,8 +676,10 @@ static void vfio_listener_region_add(MemoryListener 
*listener,
         }
     }
 
+    flag |= section->readonly? VFIO_MRF_READONLY: 0;
+    flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
     ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
-                                 vaddr, section->readonly);
+                                 vaddr, flag);
     if (ret) {
         error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
                    "0x%"HWADDR_PRIx", %p) = %d (%s)",
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 749a3fd29d..7cee2ac562 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -17,12 +17,12 @@
 
 int vfio_container_dma_map(VFIOContainerBase *bcontainer,
                            hwaddr iova, ram_addr_t size,
-                           void *vaddr, bool readonly)
+                           void *vaddr, uint32_t flag)
 {
     VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
 
     g_assert(vioc->dma_map);
-    return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
+    return vioc->dma_map(bcontainer, iova, size, vaddr, flag);
 }
 
 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 4ebb526808..90c32cd16d 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -176,7 +176,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase 
*bcontainer,
 }
 
 static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr 
iova,
-                               ram_addr_t size, void *vaddr, bool readonly)
+                               ram_addr_t size, void *vaddr, uint32_t flag)
 {
     const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
                                                   bcontainer);
@@ -188,9 +188,11 @@ static int vfio_legacy_dma_map(const VFIOContainerBase 
*bcontainer, hwaddr iova,
         .size = size,
     };
 
-    if (!readonly) {
+    if (!(flag & VFIO_MRF_READONLY)) {
         map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
     }
+    if (flag & VFIO_MRF_RAMDEV)
+        map.flags |= VFIO_DMA_MAP_FLAG_MMIO;
 
     /*
      * Try the mapping, if it fails with EBUSY, unmap the region and try
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 3490a8f1eb..c773b45b5d 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -28,14 +28,14 @@
 #include "exec/ram_addr.h"
 
 static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
-                            ram_addr_t size, void *vaddr, bool readonly)
+                            ram_addr_t size, void *vaddr, uint32_t flag)
 {
     const VFIOIOMMUFDContainer *container =
         container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
 
     return iommufd_backend_map_dma(container->be,
                                    container->ioas_id,
-                                   iova, size, vaddr, readonly);
+                                   iova, size, vaddr, flag & 
VFIO_MRF_READONLY);
 }
 
 static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
index 3cdaa12ed5..dea733ef8a 100644
--- a/hw/virtio/vhost-vdpa.c
+++ b/hw/virtio/vhost-vdpa.c
@@ -226,15 +226,15 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, 
IOMMUTLBEntry *iotlb)
     }
 
     if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
-        bool read_only;
+        uint32_t flag;
 
-        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
+        if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &flag, NULL,
                                   &local_err)) {
             error_report_err(local_err);
             return;
         }
         ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
-                                 iotlb->addr_mask + 1, vaddr, read_only);
+                                 iotlb->addr_mask + 1, vaddr, flag & 
MRF_READONLY);
         if (ret) {
             error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
                          "0x%" HWADDR_PRIx ", %p) = %d (%m)",
diff --git a/include/exec/memory.h b/include/exec/memory.h
index 9458e2801d..24405af0be 100644
--- a/include/exec/memory.h
+++ b/include/exec/memory.h
@@ -104,10 +104,15 @@ struct MemoryRegionSection {
     hwaddr offset_within_region;
     hwaddr offset_within_address_space;
     bool readonly;
+    bool ram_device;
     bool nonvolatile;
     bool unmergeable;
 };
 
+/* memory region flag */
+#define MRF_READONLY 0x1
+#define MRF_RAMDEV   0x2
+
 typedef struct IOMMUTLBEntry IOMMUTLBEntry;
 
 /* See address_space_translate: bit 0 is read, bit 1 is write.  */
@@ -742,7 +747,7 @@ void 
ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
  * Return: true on success, else false setting @errp with error.
  */
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-                          ram_addr_t *ram_addr, bool *read_only,
+                          ram_addr_t *ram_addr, uint32_t *flag,
                           bool *mr_has_discard_manager, Error **errp);
 
 typedef struct CoalescedMemoryRange CoalescedMemoryRange;
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 0c60be5b15..48018dc751 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -43,6 +43,10 @@ enum {
     VFIO_DEVICE_TYPE_AP = 3,
 };
 
+/* vfio memory region flag */
+#define VFIO_MRF_READONLY 0x1
+#define VFIO_MRF_RAMDEV   0x2
+
 typedef struct VFIOMmap {
     MemoryRegion mem;
     void *mmap;
diff --git a/include/hw/vfio/vfio-container-base.h 
b/include/hw/vfio/vfio-container-base.h
index 4cff9943ab..bb473e7201 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -73,7 +73,7 @@ typedef struct VFIORamDiscardListener {
 
 int vfio_container_dma_map(VFIOContainerBase *bcontainer,
                            hwaddr iova, ram_addr_t size,
-                           void *vaddr, bool readonly);
+                           void *vaddr, uint32_t flag);
 int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
                              hwaddr iova, ram_addr_t size,
                              IOMMUTLBEntry *iotlb);
@@ -113,7 +113,7 @@ struct VFIOIOMMUClass {
     bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
     int (*dma_map)(const VFIOContainerBase *bcontainer,
                    hwaddr iova, ram_addr_t size,
-                   void *vaddr, bool readonly);
+                   void *vaddr, uint32_t flag);
     int (*dma_unmap)(const VFIOContainerBase *bcontainer,
                      hwaddr iova, ram_addr_t size,
                      IOMMUTLBEntry *iotlb);
diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
index 1b5e254d6a..4a32e70c33 100644
--- a/linux-headers/linux/vfio.h
+++ b/linux-headers/linux/vfio.h
@@ -1560,6 +1560,7 @@ struct vfio_iommu_type1_dma_map {
 #define VFIO_DMA_MAP_FLAG_READ (1 << 0)                /* readable from device 
*/
 #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)       /* writable from device */
 #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
+#define VFIO_DMA_MAP_FLAG_MMIO (1 << 3)
        __u64   vaddr;                          /* Process virtual address */
        __u64   iova;                           /* IO virtual address */
        __u64   size;                           /* Size of mapping (bytes) */
diff --git a/system/memory.c b/system/memory.c
index b17b5538ff..71c54fc045 100644
--- a/system/memory.c
+++ b/system/memory.c
@@ -223,6 +223,7 @@ struct FlatRange {
     uint8_t dirty_log_mask;
     bool romd_mode;
     bool readonly;
+    bool ram_device;
     bool nonvolatile;
     bool unmergeable;
 };
@@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
         .size = fr->addr.size,
         .offset_within_address_space = int128_get64(fr->addr.start),
         .readonly = fr->readonly,
+        .ram_device = fr->ram_device,
         .nonvolatile = fr->nonvolatile,
         .unmergeable = fr->unmergeable,
     };
@@ -252,6 +254,7 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
         && a->offset_in_region == b->offset_in_region
         && a->romd_mode == b->romd_mode
         && a->readonly == b->readonly
+        && a->ram_device == b->ram_device
         && a->nonvolatile == b->nonvolatile
         && a->unmergeable == b->unmergeable;
 }
@@ -657,6 +660,7 @@ static void render_memory_region(FlatView *view,
     fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
     fr.romd_mode = mr->romd_mode;
     fr.readonly = readonly;
+    fr.ram_device = mr->ram_device;
     fr.nonvolatile = nonvolatile;
     fr.unmergeable = unmergeable;
 
@@ -2184,7 +2188,7 @@ void 
ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
 
 /* Called with rcu_read_lock held.  */
 bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
-                          ram_addr_t *ram_addr, bool *read_only,
+                          ram_addr_t *ram_addr, uint32_t *flag,
                           bool *mr_has_discard_manager, Error **errp)
 {
     MemoryRegion *mr;
@@ -2246,8 +2250,9 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void 
**vaddr,
         *ram_addr = memory_region_get_ram_addr(mr) + xlat;
     }
 
-    if (read_only) {
-        *read_only = !writable || mr->readonly;
+    if (flag) {
+        *flag |= (!writable || mr->readonly)? MRF_READONLY: 0;
+        *flag |= mr->ram_device? MRF_RAMDEV: 0;
     }
 
     return true;
-- 
2.43.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]