[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH] vfio: Support P2P access in confidential VM
From: |
Alex Williamson |
Subject: |
Re: [PATCH] vfio: Support P2P access in confidential VM |
Date: |
Thu, 16 Jan 2025 12:47:40 -0500 |
On Thu, 16 Jan 2025 17:53:55 +0800
Wencheng Yang <east.moutain.yang@gmail.com> wrote:
> On confidential VM platform, for example, AMD-SEV, P2P doesn't work.
> The underlying reason is that IOMMU driver set encryption bit on
> IOMMU page table pte entry, it's reasonalbe if the pte maps iova
> to system memory. However, if the pte maps iova to device's
> mmio bar space, setting encryption bit on pte would cause IOMMU
> translates iova to incorrect bus address, rather than mmio bar
> address.
>
> To fix the issue, the key point is to let IOMMU driver know the
> target phyical address is system memory or device mmio.
>
> VFIO allocates virtual address and maps it to device mmio bar,
> the member @ram_device of MemoryRegion indicates the memory
> region is for mmio. The patch passes the info to VFIO DAM API,
> IOMMU driver would do the correct thing.
>
> Signed-off-by: Wencheng Yang <east.moutain.yang@gmail.com>
> ---
> hw/vfio/common.c | 67 +++++++++++++++++----------
> hw/vfio/container-base.c | 4 +-
> hw/vfio/container.c | 6 ++-
> hw/vfio/iommufd.c | 4 +-
> hw/virtio/vhost-vdpa.c | 6 +--
> include/exec/memory.h | 7 ++-
> include/hw/vfio/vfio-common.h | 4 ++
> include/hw/vfio/vfio-container-base.h | 4 +-
> linux-headers/linux/vfio.h | 1 +
> system/memory.c | 11 +++--
> 10 files changed, 74 insertions(+), 40 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index f7499a9b74..2660a42f9e 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -247,31 +247,42 @@ static bool
> vfio_listener_skipped_section(MemoryRegionSection *section)
>
> /* Called with rcu_read_lock held. */
> static bool vfio_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> - ram_addr_t *ram_addr, bool *read_only,
> + ram_addr_t *ram_addr, uint32_t *flag,
> Error **errp)
> {
> bool ret, mr_has_discard_manager;
> + uint32_t mr_flag = 0;
>
> - ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, read_only,
> + ret = memory_get_xlat_addr(iotlb, vaddr, ram_addr, &mr_flag,
> &mr_has_discard_manager, errp);
> - if (ret && mr_has_discard_manager) {
> - /*
> - * Malicious VMs might trigger discarding of IOMMU-mapped memory. The
> - * pages will remain pinned inside vfio until unmapped, resulting in
> a
> - * higher memory consumption than expected. If memory would get
> - * populated again later, there would be an inconsistency between
> pages
> - * pinned by vfio and pages seen by QEMU. This is the case until
> - * unmapped from the IOMMU (e.g., during device reset).
> - *
> - * With malicious guests, we really only care about pinning more
> memory
> - * than expected. RLIMIT_MEMLOCK set for the user/process can never
> be
> - * exceeded and can be used to mitigate this problem.
> - */
> - warn_report_once("Using vfio with vIOMMUs and coordinated discarding
> of"
> - " RAM (e.g., virtio-mem) works, however, malicious"
> - " guests can trigger pinning of more memory than"
> - " intended via an IOMMU. It's possible to mitigate "
> - " by setting/adjusting RLIMIT_MEMLOCK.");
> + if (ret) {
> + if (flag) {
> + if (mr_flag & MRF_READONLY)
> + *flag |= VFIO_MRF_READONLY;
> +
> + if (mr_flag & MRF_RAMDEV)
> + *flag |= VFIO_MRF_RAMDEV;
> + }
> +
> + if (mr_has_discard_manager) {
> + /*
> + * Malicious VMs might trigger discarding of IOMMU-mapped
> memory. The
> + * pages will remain pinned inside vfio until unmapped,
> resulting in a
> + * higher memory consumption than expected. If memory would get
> + * populated again later, there would be an inconsistency
> between pages
> + * pinned by vfio and pages seen by QEMU. This is the case until
> + * unmapped from the IOMMU (e.g., during device reset).
> + *
> + * With malicious guests, we really only care about pinning more
> memory
> + * than expected. RLIMIT_MEMLOCK set for the user/process can
> never be
> + * exceeded and can be used to mitigate this problem.
> + */
> + warn_report_once("Using vfio with vIOMMUs and coordinated
> discarding of"
> + " RAM (e.g., virtio-mem) works, however,
> malicious"
> + " guests can trigger pinning of more memory
> than"
> + " intended via an IOMMU. It's possible to
> mitigate "
> + " by setting/adjusting RLIMIT_MEMLOCK.");
> + }
> }
> return ret;
> }
> @@ -298,9 +309,9 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n,
> IOMMUTLBEntry *iotlb)
> rcu_read_lock();
>
> if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> - bool read_only;
> + uint32_t flag = 0;
>
> - if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &read_only,
> &local_err)) {
> + if (!vfio_get_xlat_addr(iotlb, &vaddr, NULL, &flag, &local_err)) {
> error_report_err(local_err);
> goto out;
> }
> @@ -313,7 +324,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n,
> IOMMUTLBEntry *iotlb)
> */
> ret = vfio_container_dma_map(bcontainer, iova,
> iotlb->addr_mask + 1, vaddr,
> - read_only);
> + flag);
> if (ret) {
> error_report("vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
> "0x%"HWADDR_PRIx", %p) = %d (%s)",
> @@ -363,6 +374,7 @@ static int
> vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
> int128_get64(section->size);
> hwaddr start, next, iova;
> void *vaddr;
> + uint32_t flag = 0;
> int ret;
>
> /*
> @@ -377,8 +389,10 @@ static int
> vfio_ram_discard_notify_populate(RamDiscardListener *rdl,
> section->offset_within_address_space;
> vaddr = memory_region_get_ram_ptr(section->mr) + start;
>
> + flag |= section->readonly? VFIO_MRF_READONLY: 0;
> + flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
> ret = vfio_container_dma_map(bcontainer, iova, next - start,
> - vaddr, section->readonly);
> + vaddr, flag);
> if (ret) {
> /* Rollback */
> vfio_ram_discard_notify_discard(rdl, section);
> @@ -563,6 +577,7 @@ static void vfio_listener_region_add(MemoryListener
> *listener,
> hwaddr iova, end;
> Int128 llend, llsize;
> void *vaddr;
> + uint32_t flag = 0;
> int ret;
> Error *err = NULL;
>
> @@ -661,8 +676,10 @@ static void vfio_listener_region_add(MemoryListener
> *listener,
> }
> }
>
> + flag |= section->readonly? VFIO_MRF_READONLY: 0;
> + flag |= section->ram_device? VFIO_MRF_RAMDEV: 0;
> ret = vfio_container_dma_map(bcontainer, iova, int128_get64(llsize),
> - vaddr, section->readonly);
> + vaddr, flag);
> if (ret) {
> error_setg(&err, "vfio_container_dma_map(%p, 0x%"HWADDR_PRIx", "
> "0x%"HWADDR_PRIx", %p) = %d (%s)",
> diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
> index 749a3fd29d..7cee2ac562 100644
> --- a/hw/vfio/container-base.c
> +++ b/hw/vfio/container-base.c
> @@ -17,12 +17,12 @@
>
> int vfio_container_dma_map(VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - void *vaddr, bool readonly)
> + void *vaddr, uint32_t flag)
> {
> VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
>
> g_assert(vioc->dma_map);
> - return vioc->dma_map(bcontainer, iova, size, vaddr, readonly);
> + return vioc->dma_map(bcontainer, iova, size, vaddr, flag);
> }
>
> int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 4ebb526808..90c32cd16d 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -176,7 +176,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase
> *bcontainer,
> }
>
> static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr
> iova,
> - ram_addr_t size, void *vaddr, bool readonly)
> + ram_addr_t size, void *vaddr, uint32_t flag)
> {
> const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
> bcontainer);
> @@ -188,9 +188,11 @@ static int vfio_legacy_dma_map(const VFIOContainerBase
> *bcontainer, hwaddr iova,
> .size = size,
> };
>
> - if (!readonly) {
> + if (!(flag & VFIO_MRF_READONLY)) {
> map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
> }
> + if (flag & VFIO_MRF_RAMDEV)
> + map.flags |= VFIO_DMA_MAP_FLAG_MMIO;
>
> /*
> * Try the mapping, if it fails with EBUSY, unmap the region and try
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 3490a8f1eb..c773b45b5d 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -28,14 +28,14 @@
> #include "exec/ram_addr.h"
>
> static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> - ram_addr_t size, void *vaddr, bool readonly)
> + ram_addr_t size, void *vaddr, uint32_t flag)
> {
> const VFIOIOMMUFDContainer *container =
> container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>
> return iommufd_backend_map_dma(container->be,
> container->ioas_id,
> - iova, size, vaddr, readonly);
> + iova, size, vaddr, flag &
> VFIO_MRF_READONLY);
> }
>
> static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
> diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
> index 3cdaa12ed5..dea733ef8a 100644
> --- a/hw/virtio/vhost-vdpa.c
> +++ b/hw/virtio/vhost-vdpa.c
> @@ -226,15 +226,15 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier
> *n, IOMMUTLBEntry *iotlb)
> }
>
> if ((iotlb->perm & IOMMU_RW) != IOMMU_NONE) {
> - bool read_only;
> + uint32_t flag;
>
> - if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL,
> + if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &flag, NULL,
> &local_err)) {
> error_report_err(local_err);
> return;
> }
> ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova,
> - iotlb->addr_mask + 1, vaddr, read_only);
> + iotlb->addr_mask + 1, vaddr, flag &
> MRF_READONLY);
> if (ret) {
> error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", "
> "0x%" HWADDR_PRIx ", %p) = %d (%m)",
> diff --git a/include/exec/memory.h b/include/exec/memory.h
> index 9458e2801d..24405af0be 100644
> --- a/include/exec/memory.h
> +++ b/include/exec/memory.h
> @@ -104,10 +104,15 @@ struct MemoryRegionSection {
> hwaddr offset_within_region;
> hwaddr offset_within_address_space;
> bool readonly;
> + bool ram_device;
> bool nonvolatile;
> bool unmergeable;
> };
>
> +/* memory region flag */
> +#define MRF_READONLY 0x1
> +#define MRF_RAMDEV 0x2
> +
> typedef struct IOMMUTLBEntry IOMMUTLBEntry;
>
> /* See address_space_translate: bit 0 is read, bit 1 is write. */
> @@ -742,7 +747,7 @@ void
> ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
> * Return: true on success, else false setting @errp with error.
> */
> bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> - ram_addr_t *ram_addr, bool *read_only,
> + ram_addr_t *ram_addr, uint32_t *flag,
> bool *mr_has_discard_manager, Error **errp);
>
> typedef struct CoalescedMemoryRange CoalescedMemoryRange;
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 0c60be5b15..48018dc751 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -43,6 +43,10 @@ enum {
> VFIO_DEVICE_TYPE_AP = 3,
> };
>
> +/* vfio memory region flag */
> +#define VFIO_MRF_READONLY 0x1
> +#define VFIO_MRF_RAMDEV 0x2
> +
> typedef struct VFIOMmap {
> MemoryRegion mem;
> void *mmap;
> diff --git a/include/hw/vfio/vfio-container-base.h
> b/include/hw/vfio/vfio-container-base.h
> index 4cff9943ab..bb473e7201 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -73,7 +73,7 @@ typedef struct VFIORamDiscardListener {
>
> int vfio_container_dma_map(VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - void *vaddr, bool readonly);
> + void *vaddr, uint32_t flag);
> int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> IOMMUTLBEntry *iotlb);
> @@ -113,7 +113,7 @@ struct VFIOIOMMUClass {
> bool (*setup)(VFIOContainerBase *bcontainer, Error **errp);
> int (*dma_map)(const VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - void *vaddr, bool readonly);
> + void *vaddr, uint32_t flag);
> int (*dma_unmap)(const VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> IOMMUTLBEntry *iotlb);
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 1b5e254d6a..4a32e70c33 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -1560,6 +1560,7 @@ struct vfio_iommu_type1_dma_map {
> #define VFIO_DMA_MAP_FLAG_READ (1 << 0) /* readable from device
> */
> #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1) /* writable from device */
> #define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
> +#define VFIO_DMA_MAP_FLAG_MMIO (1 << 3)
Where's the kernel patch that implements the MMIO map flag. That needs
to come first.
I also don't understand why we're creating multiple read-only and
ramdev flags to distill back into vfio mapping flags. Thanks,
Alex
> __u64 vaddr; /* Process virtual address */
> __u64 iova; /* IO virtual address */
> __u64 size; /* Size of mapping (bytes) */
> diff --git a/system/memory.c b/system/memory.c
> index b17b5538ff..71c54fc045 100644
> --- a/system/memory.c
> +++ b/system/memory.c
> @@ -223,6 +223,7 @@ struct FlatRange {
> uint8_t dirty_log_mask;
> bool romd_mode;
> bool readonly;
> + bool ram_device;
> bool nonvolatile;
> bool unmergeable;
> };
> @@ -240,6 +241,7 @@ section_from_flat_range(FlatRange *fr, FlatView *fv)
> .size = fr->addr.size,
> .offset_within_address_space = int128_get64(fr->addr.start),
> .readonly = fr->readonly,
> + .ram_device = fr->ram_device,
> .nonvolatile = fr->nonvolatile,
> .unmergeable = fr->unmergeable,
> };
> @@ -252,6 +254,7 @@ static bool flatrange_equal(FlatRange *a, FlatRange *b)
> && a->offset_in_region == b->offset_in_region
> && a->romd_mode == b->romd_mode
> && a->readonly == b->readonly
> + && a->ram_device == b->ram_device
> && a->nonvolatile == b->nonvolatile
> && a->unmergeable == b->unmergeable;
> }
> @@ -657,6 +660,7 @@ static void render_memory_region(FlatView *view,
> fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr);
> fr.romd_mode = mr->romd_mode;
> fr.readonly = readonly;
> + fr.ram_device = mr->ram_device;
> fr.nonvolatile = nonvolatile;
> fr.unmergeable = unmergeable;
>
> @@ -2184,7 +2188,7 @@ void
> ram_discard_manager_unregister_listener(RamDiscardManager *rdm,
>
> /* Called with rcu_read_lock held. */
> bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void **vaddr,
> - ram_addr_t *ram_addr, bool *read_only,
> + ram_addr_t *ram_addr, uint32_t *flag,
> bool *mr_has_discard_manager, Error **errp)
> {
> MemoryRegion *mr;
> @@ -2246,8 +2250,9 @@ bool memory_get_xlat_addr(IOMMUTLBEntry *iotlb, void
> **vaddr,
> *ram_addr = memory_region_get_ram_addr(mr) + xlat;
> }
>
> - if (read_only) {
> - *read_only = !writable || mr->readonly;
> + if (flag) {
> + *flag |= (!writable || mr->readonly)? MRF_READONLY: 0;
> + *flag |= mr->ram_device? MRF_RAMDEV: 0;
> }
>
> return true;