[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v3 07/17] vfio/migration: Allow migration without VFIO IOMMU
From: |
Alex Williamson |
Subject: |
Re: [PATCH v3 07/17] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support |
Date: |
Tue, 15 Nov 2022 16:36:37 -0700 |
On Thu, 3 Nov 2022 18:16:10 +0200
Avihai Horon <avihaih@nvidia.com> wrote:
> Currently, if IOMMU of a VFIO container doesn't support dirty page
> tracking, migration is blocked. This is because a DMA-able VFIO device
> can dirty RAM pages without updating QEMU about it, thus breaking the
> migration.
>
> However, this doesn't mean that migration can't be done at all.
> In such case, allow migration and let QEMU VFIO code mark the entire
> bitmap dirty.
>
> This guarantees that all pages that might have gotten dirty are reported
> back, and thus guarantees a valid migration even without VFIO IOMMU
> dirty tracking support.
>
> The motivation for this patch is the future introduction of iommufd [1].
> iommufd will directly implement the /dev/vfio/vfio container IOCTLs by
> mapping them into its internal ops, allowing the usage of these IOCTLs
> over iommufd. However, VFIO IOMMU dirty tracking will not be supported
> by this VFIO compatibility API.
>
> This patch will allow migration by hosts that use the VFIO compatibility
> API and prevent migration regressions caused by the lack of VFIO IOMMU
> dirty tracking support.
>
> [1] https://lore.kernel.org/kvm/0-v2-f9436d0bde78+4bb-iommufd_jgg@nvidia.com/
>
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> ---
> hw/vfio/common.c | 84 +++++++++++++++++++++++++++++++++++++--------
> hw/vfio/migration.c | 3 +-
> 2 files changed, 70 insertions(+), 17 deletions(-)
This duplicates quite a bit of code, I think we can integrate this into
a common flow quite a bit more. See below, only compile tested. Thanks,
Alex
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6b5d8c0bf694..4117b40fd9b0 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -397,17 +397,33 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
IOMMUTLBEntry *iotlb)
{
struct vfio_iommu_type1_dma_unmap *unmap;
- struct vfio_bitmap *bitmap;
+ struct vfio_bitmap *vbitmap;
+ unsigned long *bitmap;
+ uint64_t bitmap_size;
uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
int ret;
- unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*vbitmap));
- unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+ unmap->argsz = sizeof(*unmap);
unmap->iova = iova;
unmap->size = size;
- unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
- bitmap = (struct vfio_bitmap *)&unmap->data;
+
+ bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ bitmap = g_try_malloc0(bitmap_size);
+ if (!bitmap) {
+ ret = -ENOMEM;
+ goto unmap_exit;
+ }
+
+ if (!container->dirty_pages_supported) {
+ bitmap_set(bitmap, 0, pages);
+ goto do_unmap;
+ }
+
+ unmap->argsz += sizeof(*vbitmap);
+ unmap->flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
/*
* cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
@@ -415,33 +431,28 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
* to qemu_real_host_page_size.
*/
- bitmap->pgsize = qemu_real_host_page_size();
- bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
+ vbitmap = (struct vfio_bitmap *)&unmap->data;
+ vbitmap->data = (__u64 *)bitmap;
+ vbitmap->pgsize = qemu_real_host_page_size();
+ vbitmap->size = bitmap_size;
- if (bitmap->size > container->max_dirty_bitmap_size) {
- error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
- (uint64_t)bitmap->size);
+ if (bitmap_size > container->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, bitmap_size);
ret = -E2BIG;
goto unmap_exit;
}
- bitmap->data = g_try_malloc0(bitmap->size);
- if (!bitmap->data) {
- ret = -ENOMEM;
- goto unmap_exit;
- }
-
+do_unmap:
ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
if (!ret) {
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
- iotlb->translated_addr, pages);
+ cpu_physical_memory_set_dirty_lebitmap(bitmap, iotlb->translated_addr,
+ pages);
} else {
error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
}
- g_free(bitmap->data);
unmap_exit:
+ g_free(bitmap);
g_free(unmap);
return ret;
}
@@ -460,8 +471,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
.size = size,
};
- if (iotlb && container->dirty_pages_supported &&
- vfio_devices_all_running_and_saving(container)) {
+ if (iotlb && vfio_devices_all_running_and_saving(container)) {
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
}
@@ -1257,6 +1267,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer
*container, bool start)
.argsz = sizeof(dirty),
};
+ if (!container->dirty_pages_supported) {
+ return;
+ }
+
if (start) {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
} else {
@@ -1287,11 +1301,26 @@ static void
vfio_listener_log_global_stop(MemoryListener *listener)
static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
uint64_t size, ram_addr_t ram_addr)
{
- struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap = NULL;
struct vfio_iommu_type1_dirty_bitmap_get *range;
+ unsigned long *bitmap;
+ uint64_t bitmap_size;
uint64_t pages;
int ret;
+ pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
+ bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ bitmap = g_try_malloc0(bitmap_size);
+ if (!bitmap) {
+ return -ENOMEM;
+ }
+
+ if (!container->dirty_pages_supported) {
+ bitmap_set(bitmap, 0, pages);
+ goto set_dirty;
+ }
+
dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1306,15 +1335,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer
*container, uint64_t iova,
* to qemu_real_host_page_size.
*/
range->bitmap.pgsize = qemu_real_host_page_size();
-
- pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size();
- range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
- range->bitmap.data = g_try_malloc0(range->bitmap.size);
- if (!range->bitmap.data) {
- ret = -ENOMEM;
- goto err_out;
- }
+ range->bitmap.size = bitmap_size;
+ range->bitmap.data = (__u64 *)bitmap;
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
if (ret) {
@@ -1324,13 +1346,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer
*container, uint64_t iova,
goto err_out;
}
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
- ram_addr, pages);
+set_dirty:
+ cpu_physical_memory_set_dirty_lebitmap(bitmap, ram_addr, pages);
- trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
- range->bitmap.size, ram_addr);
+ trace_vfio_get_dirty_bitmap(container->fd, iova, size,
+ bitmap_size, ram_addr);
err_out:
- g_free(range->bitmap.data);
+ g_free(bitmap);
g_free(dbitmap);
return ret;
@@ -1465,8 +1487,7 @@ static void vfio_listener_log_sync(MemoryListener
*listener,
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- if (vfio_listener_skipped_section(section) ||
- !container->dirty_pages_supported) {
+ if (vfio_listener_skipped_section(section)) {
return;
}
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index f5e72c7ac198..99ffb7578290 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -857,11 +857,10 @@ int64_t vfio_mig_bytes_transferred(void)
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
{
- VFIOContainer *container = vbasedev->group->container;
struct vfio_region_info *info = NULL;
int ret = -ENOTSUP;
- if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+ if (!vbasedev->enable_migration) {
goto add_blocker;
}