---
docs/devel/vfio-migration.rst | 10 +++++++++
include/hw/vfio/vfio-common.h | 2 ++
hw/vfio/migration.c | 42 ++++++++++++++++++++++++++++++++++-
3 files changed, 53 insertions(+), 1 deletion(-)
diff --git a/docs/devel/vfio-migration.rst b/docs/devel/vfio-migration.rst
index e896b2a673..e75793b76a 100644
--- a/docs/devel/vfio-migration.rst
+++ b/docs/devel/vfio-migration.rst
@@ -16,6 +16,13 @@ helps to reduce the total downtime of the VM. VFIO devices
opt-in to pre-copy
support by reporting the VFIO_MIGRATION_PRE_COPY flag in the
VFIO_DEVICE_FEATURE_MIGRATION ioctl.
+When pre-copy is supported, it's possible to further reduce downtime by
+enabling "switchover-ack" migration capability.
+VFIO migration uAPI defines "initial bytes" as part of its pre-copy data stream
+and recommends that the initial bytes are sent and loaded in the destination
+before stopping the source VM. Enabling this migration capability will
+guarantee that and thus, can potentially reduce downtime even further.
+
Note that currently VFIO migration is supported only for a single device. This
is due to VFIO migration's lack of P2P support. However, P2P support is
planned
to be added later on.
@@ -45,6 +52,9 @@ VFIO implements the device hooks for the iterative approach
as follows:
* A ``save_live_iterate`` function that reads the VFIO device's data from the
vendor driver during iterative pre-copy phase.
+* A ``switchover_ack_needed`` function that checks if the VFIO device uses
+ "switchover-ack" migration capability when this capability is enabled.
+
* A ``save_state`` function to save the device config space if it is present.
* A ``save_live_complete_precopy`` function that sets the VFIO device in
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 5ce7a01d56..b4a7eb0f9a 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -69,6 +69,8 @@ typedef struct VFIOMigration {
uint64_t precopy_init_size;
uint64_t precopy_dirty_size;
uint64_t mig_flags;
+ bool switchover_ack_needed;
+ bool initial_data_sent;
} VFIOMigration;
typedef struct VFIOAddressSpace {
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 418efed019..09c669c1d8 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -18,6 +18,7 @@
#include "sysemu/runstate.h"
#include "hw/vfio/vfio-common.h"
#include "migration/migration.h"
+#include "migration/savevm.h"
#include "migration/vmstate.h"
#include "migration/qemu-file.h"
#include "migration/register.h"
@@ -45,6 +46,7 @@
#define VFIO_MIG_FLAG_DEV_CONFIG_STATE (0xffffffffef100002ULL)
#define VFIO_MIG_FLAG_DEV_SETUP_STATE (0xffffffffef100003ULL)
#define VFIO_MIG_FLAG_DEV_DATA_STATE (0xffffffffef100004ULL)
+#define VFIO_MIG_FLAG_DEV_INIT_DATA_SENT (0xffffffffef100005ULL)
/*
* This is an arbitrary size based on migration of mlx5 devices, where
typically
@@ -380,6 +382,8 @@ static void vfio_save_cleanup(void *opaque)
g_free(migration->data_buffer);
migration->data_buffer = NULL;
+ migration->switchover_ack_needed = false;
+ migration->initial_data_sent = false;
vfio_migration_cleanup(vbasedev);
trace_vfio_save_cleanup(vbasedev->name);
}
@@ -455,10 +459,17 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
if (data_size < 0) {
return data_size;
}
- qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
vfio_update_estimated_pending_data(migration, data_size);
+ if (migration->switchover_ack_needed && !migration->precopy_init_size &&
+ !migration->initial_data_sent) {
+ qemu_put_be64(f, VFIO_MIG_FLAG_DEV_INIT_DATA_SENT);
+ migration->initial_data_sent = true;
+ } else {
+ qemu_put_be64(f, VFIO_MIG_FLAG_END_OF_STATE);
+ }
+
trace_vfio_save_iterate(vbasedev->name);
/*
@@ -576,6 +587,24 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int
version_id)
}
break;
}
+ case VFIO_MIG_FLAG_DEV_INIT_DATA_SENT:
+ {
+ if (!vbasedev->migration->switchover_ack_needed) {
+ error_report("%s: Received INIT_DATA_SENT but switchover ack "
+ "is not needed",
+ vbasedev->name);
+ return -EINVAL;
+ }
+
+ ret = qemu_loadvm_approve_switchover();
+ if (ret) {
+ error_report(
+ "%s: qemu_loadvm_approve_switchover failed, err=%d (%s)",
+ vbasedev->name, ret, strerror(-ret));
+ }
+
+ return ret;
+ }
default:
error_report("%s: Unknown tag 0x%"PRIx64, vbasedev->name, data);
return -EINVAL;
@@ -590,6 +619,16 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int
version_id)
return ret;
}
+static bool vfio_switchover_ack_needed(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+
+ migration->switchover_ack_needed = vfio_precopy_supported(vbasedev);
+
+ return migration->switchover_ack_needed;
+}
+
static const SaveVMHandlers savevm_vfio_handlers = {
.save_setup = vfio_save_setup,
.save_cleanup = vfio_save_cleanup,
@@ -602,6 +641,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_setup = vfio_load_setup,
.load_cleanup = vfio_load_cleanup,
.load_state = vfio_load_state,
+ .switchover_ack_needed = vfio_switchover_ack_needed,
};
/* ---------------------------------------------------------------------- */