[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 04/14] vfio/migration: Allow migration without VFIO IOMMU dirt
From: |
Avihai Horon |
Subject: |
[PATCH v4 04/14] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support |
Date: |
Wed, 30 Nov 2022 11:44:04 +0200 |
Currently, if IOMMU of a VFIO container doesn't support dirty page
tracking, migration is blocked. This is because a DMA-able VFIO device
can dirty RAM pages without updating QEMU about it, thus breaking the
migration.
However, this doesn't mean that migration can't be done at all.
In such case, allow migration and let QEMU VFIO code mark the entire
bitmap dirty.
This guarantees that all pages that might have gotten dirty are reported
back, and thus guarantees a valid migration even without VFIO IOMMU
dirty tracking support.
The motivation for this patch is the future introduction of iommufd [1].
iommufd will directly implement the /dev/vfio/vfio container IOCTLs by
mapping them into its internal ops, allowing the usage of these IOCTLs
over iommufd. However, VFIO IOMMU dirty tracking will not be supported
by this VFIO compatibility API.
This patch will allow migration by hosts that use the VFIO compatibility
API and prevent migration regressions caused by the lack of VFIO IOMMU
dirty tracking support.
[1] https://lore.kernel.org/kvm/0-v2-f9436d0bde78+4bb-iommufd_jgg@nvidia.com/
Signed-off-by: Avihai Horon <avihaih@nvidia.com>
---
hw/vfio/common.c | 100 ++++++++++++++++++++++++++------------------
hw/vfio/migration.c | 3 +-
2 files changed, 61 insertions(+), 42 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 130e5d1dc7..67104e2fc2 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -397,51 +397,61 @@ static int vfio_dma_unmap_bitmap(VFIOContainer *container,
IOMMUTLBEntry *iotlb)
{
struct vfio_iommu_type1_dma_unmap *unmap;
- struct vfio_bitmap *bitmap;
+ struct vfio_bitmap *vbitmap;
+ unsigned long *bitmap;
+ uint64_t bitmap_size;
uint64_t pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
int ret;
- unmap = g_malloc0(sizeof(*unmap) + sizeof(*bitmap));
+ unmap = g_malloc0(sizeof(*unmap) + sizeof(*vbitmap));
- unmap->argsz = sizeof(*unmap) + sizeof(*bitmap);
+ unmap->argsz = sizeof(*unmap);
unmap->iova = iova;
unmap->size = size;
- unmap->flags |= VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
- bitmap = (struct vfio_bitmap *)&unmap->data;
+ bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ bitmap = g_try_malloc0(bitmap_size);
+ if (!bitmap) {
+ ret = -ENOMEM;
+ goto unmap_exit;
+ }
+
+ if (!container->dirty_pages_supported) {
+ bitmap_set(bitmap, 0, pages);
+ goto do_unmap;
+ }
+
+ unmap->argsz += sizeof(*vbitmap);
+ unmap->flags = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP;
+
+ vbitmap = (struct vfio_bitmap *)&unmap->data;
+ vbitmap->data = (__u64 *)bitmap;
/*
* cpu_physical_memory_set_dirty_lebitmap() supports pages in bitmap of
* qemu_real_host_page_size to mark those dirty. Hence set bitmap_pgsize
* to qemu_real_host_page_size.
*/
+ vbitmap->pgsize = qemu_real_host_page_size();
+ vbitmap->size = bitmap_size;
- bitmap->pgsize = qemu_real_host_page_size();
- bitmap->size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
-
- if (bitmap->size > container->max_dirty_bitmap_size) {
- error_report("UNMAP: Size of bitmap too big 0x%"PRIx64,
- (uint64_t)bitmap->size);
+ if (bitmap_size > container->max_dirty_bitmap_size) {
+ error_report("UNMAP: Size of bitmap too big 0x%"PRIx64, bitmap_size);
ret = -E2BIG;
goto unmap_exit;
}
- bitmap->data = g_try_malloc0(bitmap->size);
- if (!bitmap->data) {
- ret = -ENOMEM;
- goto unmap_exit;
- }
-
+do_unmap:
ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
if (!ret) {
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)bitmap->data,
- iotlb->translated_addr, pages);
+ cpu_physical_memory_set_dirty_lebitmap(bitmap, iotlb->translated_addr,
+ pages);
} else {
error_report("VFIO_UNMAP_DMA with DIRTY_BITMAP : %m");
}
- g_free(bitmap->data);
unmap_exit:
+ g_free(bitmap);
g_free(unmap);
return ret;
}
@@ -460,8 +470,7 @@ static int vfio_dma_unmap(VFIOContainer *container,
.size = size,
};
- if (iotlb && container->dirty_pages_supported &&
- vfio_devices_all_running_and_saving(container)) {
+ if (iotlb && vfio_devices_all_running_and_saving(container)) {
return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
}
@@ -1201,6 +1210,10 @@ static void vfio_set_dirty_page_tracking(VFIOContainer
*container, bool start)
.argsz = sizeof(dirty),
};
+ if (!container->dirty_pages_supported) {
+ return;
+ }
+
if (start) {
dirty.flags = VFIO_IOMMU_DIRTY_PAGES_FLAG_START;
} else {
@@ -1231,11 +1244,26 @@ static void
vfio_listener_log_global_stop(MemoryListener *listener)
static int vfio_get_dirty_bitmap(VFIOContainer *container, uint64_t iova,
uint64_t size, ram_addr_t ram_addr)
{
- struct vfio_iommu_type1_dirty_bitmap *dbitmap;
+ struct vfio_iommu_type1_dirty_bitmap *dbitmap = NULL;
struct vfio_iommu_type1_dirty_bitmap_get *range;
+ unsigned long *bitmap;
+ uint64_t bitmap_size;
uint64_t pages;
int ret;
+ pages = REAL_HOST_PAGE_ALIGN(size) / qemu_real_host_page_size();
+ bitmap_size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
+ BITS_PER_BYTE;
+ bitmap = g_try_malloc0(bitmap_size);
+ if (!bitmap) {
+ return -ENOMEM;
+ }
+
+ if (!container->dirty_pages_supported) {
+ bitmap_set(bitmap, 0, pages);
+ goto set_dirty;
+ }
+
dbitmap = g_malloc0(sizeof(*dbitmap) + sizeof(*range));
dbitmap->argsz = sizeof(*dbitmap) + sizeof(*range);
@@ -1250,15 +1278,8 @@ static int vfio_get_dirty_bitmap(VFIOContainer
*container, uint64_t iova,
* to qemu_real_host_page_size.
*/
range->bitmap.pgsize = qemu_real_host_page_size();
-
- pages = REAL_HOST_PAGE_ALIGN(range->size) / qemu_real_host_page_size();
- range->bitmap.size = ROUND_UP(pages, sizeof(__u64) * BITS_PER_BYTE) /
- BITS_PER_BYTE;
- range->bitmap.data = g_try_malloc0(range->bitmap.size);
- if (!range->bitmap.data) {
- ret = -ENOMEM;
- goto err_out;
- }
+ range->bitmap.size = bitmap_size;
+ range->bitmap.data = (__u64 *)bitmap;
ret = ioctl(container->fd, VFIO_IOMMU_DIRTY_PAGES, dbitmap);
if (ret) {
@@ -1268,13 +1289,13 @@ static int vfio_get_dirty_bitmap(VFIOContainer
*container, uint64_t iova,
goto err_out;
}
- cpu_physical_memory_set_dirty_lebitmap((unsigned long *)range->bitmap.data,
- ram_addr, pages);
+set_dirty:
+ cpu_physical_memory_set_dirty_lebitmap(bitmap, ram_addr, pages);
- trace_vfio_get_dirty_bitmap(container->fd, range->iova, range->size,
- range->bitmap.size, ram_addr);
+ trace_vfio_get_dirty_bitmap(container->fd, iova, size, bitmap_size,
+ ram_addr);
err_out:
- g_free(range->bitmap.data);
+ g_free(bitmap);
g_free(dbitmap);
return ret;
@@ -1409,8 +1430,7 @@ static void vfio_listener_log_sync(MemoryListener
*listener,
{
VFIOContainer *container = container_of(listener, VFIOContainer, listener);
- if (vfio_listener_skipped_section(section) ||
- !container->dirty_pages_supported) {
+ if (vfio_listener_skipped_section(section)) {
return;
}
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 09fe7c1de2..552c2313b2 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -860,11 +860,10 @@ int64_t vfio_mig_bytes_transferred(void)
int vfio_migration_probe(VFIODevice *vbasedev, Error **errp)
{
- VFIOContainer *container = vbasedev->group->container;
struct vfio_region_info *info = NULL;
int ret = -ENOTSUP;
- if (!vbasedev->enable_migration || !container->dirty_pages_supported) {
+ if (!vbasedev->enable_migration) {
goto add_blocker;
}
--
2.26.3
- [PATCH v4 00/14] vfio/migration: Implement VFIO migration protocol v2, Avihai Horon, 2022/11/30
- [PATCH v4 01/14] migration: No save_live_pending() method uses the QEMUFile parameter, Avihai Horon, 2022/11/30
- [PATCH v4 02/14] migration: Simplify migration_iteration_run(), Avihai Horon, 2022/11/30
- [PATCH v4 03/14] vfio/migration: Fix NULL pointer dereference bug, Avihai Horon, 2022/11/30
- [PATCH v4 04/14] vfio/migration: Allow migration without VFIO IOMMU dirty tracking support,
Avihai Horon <=
- [PATCH v4 05/14] migration/qemu-file: Add qemu_file_get_to_fd(), Avihai Horon, 2022/11/30
- [PATCH v4 06/14] vfio/common: Change vfio_devices_all_running_and_saving() logic to equivalent one, Avihai Horon, 2022/11/30
- [PATCH v4 07/14] vfio/migration: Move migration v1 logic to vfio_migration_init(), Avihai Horon, 2022/11/30
- [PATCH v4 08/14] vfio/migration: Rename functions/structs related to v1 protocol, Avihai Horon, 2022/11/30
- [PATCH v4 09/14] vfio/migration: Implement VFIO migration protocol v2, Avihai Horon, 2022/11/30
- [PATCH v4 10/14] vfio/migration: Remove VFIO migration protocol v1, Avihai Horon, 2022/11/30
- [PATCH v4 11/14] vfio: Alphabetize migration section of VFIO trace-events file, Avihai Horon, 2022/11/30
- [PATCH v4 12/14] docs/devel: Align vfio-migration docs to VFIO migration v2, Avihai Horon, 2022/11/30
- [PATCH v4 13/14] vfio/migration: Use VFIO_DEVICE_FEATURE_MIG_DATA_SIZE ioctl, Avihai Horon, 2022/11/30
- [PATCH v4 14/14] vfio/migration: Optimize vfio_save_pending(), Avihai Horon, 2022/11/30