[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 28/33] vfio/migration: Multifd device state transfer support -
From: |
Maciej S. Szmigiero |
Subject: |
[PATCH v4 28/33] vfio/migration: Multifd device state transfer support - load thread |
Date: |
Thu, 30 Jan 2025 11:08:49 +0100 |
From: "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>
Since it's important to finish loading device state transferred via the
main migration channel (via save_live_iterate SaveVMHandler) before
starting loading the data asynchronously transferred via multifd the thread
doing the actual loading of the multifd transferred data is only started
from switchover_start SaveVMHandler.
switchover_start handler is called when MIG_CMD_SWITCHOVER_START
sub-command of QEMU_VM_COMMAND is received via the main migration channel.
This sub-command is only sent after all save_live_iterate data have already
been posted so it is safe to commence loading of the multifd-transferred
device state upon receiving it - loading of save_live_iterate data happens
synchronously in the main migration thread (much like the processing of
MIG_CMD_SWITCHOVER_START) so by the time MIG_CMD_SWITCHOVER_START is
processed all the proceeding data must have already been loaded.
Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
---
hw/vfio/migration.c | 229 +++++++++++++++++++++++++++++++++++++++++++
hw/vfio/trace-events | 5 +
2 files changed, 234 insertions(+)
diff --git a/hw/vfio/migration.c b/hw/vfio/migration.c
index 0c0caec1bd64..ab5b097f59c9 100644
--- a/hw/vfio/migration.c
+++ b/hw/vfio/migration.c
@@ -301,8 +301,16 @@ typedef struct VFIOStateBuffer {
} VFIOStateBuffer;
typedef struct VFIOMultifd {
+ QemuThread load_bufs_thread;
+ bool load_bufs_thread_running;
+ bool load_bufs_thread_want_exit;
+
+ bool load_bufs_iter_done;
+ QemuCond load_bufs_iter_done_cond;
+
VFIOStateBuffers load_bufs;
QemuCond load_bufs_buffer_ready_cond;
+ QemuCond load_bufs_thread_finished_cond;
QemuMutex load_bufs_mutex; /* Lock order: this lock -> BQL */
uint32_t load_buf_idx;
uint32_t load_buf_idx_last;
@@ -449,6 +457,171 @@ static bool vfio_load_state_buffer(void *opaque, char
*data, size_t data_size,
return true;
}
+static VFIOStateBuffer *vfio_load_state_buffer_get(VFIOMultifd *multifd)
+{
+ VFIOStateBuffer *lb;
+ guint bufs_len;
+
+ bufs_len = vfio_state_buffers_size_get(&multifd->load_bufs);
+ if (multifd->load_buf_idx >= bufs_len) {
+ assert(multifd->load_buf_idx == bufs_len);
+ return NULL;
+ }
+
+ lb = vfio_state_buffers_at(&multifd->load_bufs,
+ multifd->load_buf_idx);
+ if (!lb->is_present) {
+ return NULL;
+ }
+
+ return lb;
+}
+
+static int vfio_load_bufs_thread_load_config(VFIODevice *vbasedev)
+{
+ return -EINVAL;
+}
+
+static bool vfio_load_state_buffer_write(VFIODevice *vbasedev,
+ VFIOStateBuffer *lb,
+ Error **errp)
+{
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ g_autofree char *buf = NULL;
+ char *buf_cur;
+ size_t buf_len;
+
+ if (!lb->len) {
+ return true;
+ }
+
+ trace_vfio_load_state_device_buffer_load_start(vbasedev->name,
+ multifd->load_buf_idx);
+
+ /* lb might become re-allocated when we drop the lock */
+ buf = g_steal_pointer(&lb->data);
+ buf_cur = buf;
+ buf_len = lb->len;
+ while (buf_len > 0) {
+ ssize_t wr_ret;
+ int errno_save;
+
+ /*
+ * Loading data to the device takes a while,
+ * drop the lock during this process.
+ */
+ qemu_mutex_unlock(&multifd->load_bufs_mutex);
+ wr_ret = write(migration->data_fd, buf_cur, buf_len);
+ errno_save = errno;
+ qemu_mutex_lock(&multifd->load_bufs_mutex);
+
+ if (wr_ret < 0) {
+ error_setg(errp,
+ "writing state buffer %" PRIu32 " failed: %d",
+ multifd->load_buf_idx, errno_save);
+ return false;
+ }
+
+ assert(wr_ret <= buf_len);
+ buf_len -= wr_ret;
+ buf_cur += wr_ret;
+ }
+
+ trace_vfio_load_state_device_buffer_load_end(vbasedev->name,
+ multifd->load_buf_idx);
+
+ return true;
+}
+
+static bool vfio_load_bufs_thread_want_abort(VFIOMultifd *multifd,
+ bool *should_quit)
+{
+ return multifd->load_bufs_thread_want_exit || qatomic_read(should_quit);
+}
+
+static bool vfio_load_bufs_thread(void *opaque, bool *should_quit, Error
**errp)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+ bool ret = true;
+ int config_ret;
+
+ assert(multifd);
+ QEMU_LOCK_GUARD(&multifd->load_bufs_mutex);
+
+ assert(multifd->load_bufs_thread_running);
+
+ while (!vfio_load_bufs_thread_want_abort(multifd, should_quit)) {
+ VFIOStateBuffer *lb;
+
+ assert(multifd->load_buf_idx <= multifd->load_buf_idx_last);
+
+ lb = vfio_load_state_buffer_get(multifd);
+ if (!lb) {
+ trace_vfio_load_state_device_buffer_starved(vbasedev->name,
+ multifd->load_buf_idx);
+ qemu_cond_wait(&multifd->load_bufs_buffer_ready_cond,
+ &multifd->load_bufs_mutex);
+ continue;
+ }
+
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last) {
+ break;
+ }
+
+ if (multifd->load_buf_idx == 0) {
+ trace_vfio_load_state_device_buffer_start(vbasedev->name);
+ }
+
+ if (!vfio_load_state_buffer_write(vbasedev, lb, errp)) {
+ ret = false;
+ goto ret_signal;
+ }
+
+ assert(multifd->load_buf_queued_pending_buffers > 0);
+ multifd->load_buf_queued_pending_buffers--;
+
+ if (multifd->load_buf_idx == multifd->load_buf_idx_last - 1) {
+ trace_vfio_load_state_device_buffer_end(vbasedev->name);
+ }
+
+ multifd->load_buf_idx++;
+ }
+
+ if (vfio_load_bufs_thread_want_abort(multifd, should_quit)) {
+ error_setg(errp, "operation cancelled");
+ ret = false;
+ goto ret_signal;
+ }
+
+ if (vfio_load_config_after_iter(vbasedev)) {
+ while (!multifd->load_bufs_iter_done) {
+ qemu_cond_wait(&multifd->load_bufs_iter_done_cond,
+ &multifd->load_bufs_mutex);
+
+ if (vfio_load_bufs_thread_want_abort(multifd, should_quit)) {
+ error_setg(errp, "operation cancelled");
+ ret = false;
+ goto ret_signal;
+ }
+ }
+ }
+
+ config_ret = vfio_load_bufs_thread_load_config(vbasedev);
+ if (config_ret) {
+ error_setg(errp, "load config state failed: %d", config_ret);
+ ret = false;
+ }
+
+ret_signal:
+ multifd->load_bufs_thread_running = false;
+ qemu_cond_signal(&multifd->load_bufs_thread_finished_cond);
+
+ return ret;
+}
+
static int vfio_save_device_config_state(QEMUFile *f, void *opaque,
Error **errp)
{
@@ -517,11 +690,40 @@ static VFIOMultifd *vfio_multifd_new(void)
multifd->load_buf_queued_pending_buffers = 0;
qemu_cond_init(&multifd->load_bufs_buffer_ready_cond);
+ multifd->load_bufs_iter_done = false;
+ qemu_cond_init(&multifd->load_bufs_iter_done_cond);
+
+ multifd->load_bufs_thread_running = false;
+ multifd->load_bufs_thread_want_exit = false;
+ qemu_cond_init(&multifd->load_bufs_thread_finished_cond);
+
return multifd;
}
+static void vfio_load_cleanup_load_bufs_thread(VFIOMultifd *multifd)
+{
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ while (multifd->load_bufs_thread_running) {
+ multifd->load_bufs_thread_want_exit = true;
+
+ qemu_cond_signal(&multifd->load_bufs_buffer_ready_cond);
+ qemu_cond_signal(&multifd->load_bufs_iter_done_cond);
+ qemu_cond_wait(&multifd->load_bufs_thread_finished_cond,
+ &multifd->load_bufs_mutex);
+ }
+ }
+ bql_lock();
+}
+
static void vfio_multifd_free(VFIOMultifd *multifd)
{
+ vfio_load_cleanup_load_bufs_thread(multifd);
+
+ qemu_cond_destroy(&multifd->load_bufs_thread_finished_cond);
+ qemu_cond_destroy(&multifd->load_bufs_iter_done_cond);
+ vfio_state_buffers_destroy(&multifd->load_bufs);
qemu_cond_destroy(&multifd->load_bufs_buffer_ready_cond);
qemu_mutex_destroy(&multifd->load_bufs_mutex);
@@ -1042,6 +1244,32 @@ static bool vfio_switchover_ack_needed(void *opaque)
return vfio_precopy_supported(vbasedev);
}
+static int vfio_switchover_start(void *opaque)
+{
+ VFIODevice *vbasedev = opaque;
+ VFIOMigration *migration = vbasedev->migration;
+ VFIOMultifd *multifd = migration->multifd;
+
+ if (!migration->multifd_transfer) {
+ /* Load thread is only used for multifd transfer */
+ return 0;
+ }
+
+ assert(multifd);
+
+ /* The lock order is load_bufs_mutex -> BQL so unlock BQL here first */
+ bql_unlock();
+ WITH_QEMU_LOCK_GUARD(&multifd->load_bufs_mutex) {
+ assert(!multifd->load_bufs_thread_running);
+ multifd->load_bufs_thread_running = true;
+ }
+ bql_lock();
+
+ qemu_loadvm_start_load_thread(vfio_load_bufs_thread, vbasedev);
+
+ return 0;
+}
+
static const SaveVMHandlers savevm_vfio_handlers = {
.save_prepare = vfio_save_prepare,
.save_setup = vfio_save_setup,
@@ -1057,6 +1285,7 @@ static const SaveVMHandlers savevm_vfio_handlers = {
.load_state = vfio_load_state,
.load_state_buffer = vfio_load_state_buffer,
.switchover_ack_needed = vfio_switchover_ack_needed,
+ .switchover_start = vfio_switchover_start,
};
/* ---------------------------------------------------------------------- */
diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
index 042a3dc54a33..418b378ebd29 100644
--- a/hw/vfio/trace-events
+++ b/hw/vfio/trace-events
@@ -154,6 +154,11 @@ vfio_load_device_config_state_end(const char *name) " (%s)"
vfio_load_state(const char *name, uint64_t data) " (%s) data 0x%"PRIx64
vfio_load_state_device_data(const char *name, uint64_t data_size, int ret) "
(%s) size %"PRIu64" ret %d"
vfio_load_state_device_buffer_incoming(const char *name, uint32_t idx) " (%s)
idx %"PRIu32
+vfio_load_state_device_buffer_start(const char *name) " (%s)"
+vfio_load_state_device_buffer_starved(const char *name, uint32_t idx) " (%s)
idx %"PRIu32
+vfio_load_state_device_buffer_load_start(const char *name, uint32_t idx) "
(%s) idx %"PRIu32
+vfio_load_state_device_buffer_load_end(const char *name, uint32_t idx) " (%s)
idx %"PRIu32
+vfio_load_state_device_buffer_end(const char *name) " (%s)"
vfio_migration_realize(const char *name) " (%s)"
vfio_migration_set_device_state(const char *name, const char *state) " (%s)
state %s"
vfio_migration_set_state(const char *name, const char *new_state, const char
*recover_state) " (%s) new state %s, recover state %s"
- [PATCH v4 20/33] vfio/migration: Add x-migration-load-config-after-iter VFIO property, (continued)
- [PATCH v4 20/33] vfio/migration: Add x-migration-load-config-after-iter VFIO property, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 21/33] vfio/migration: Add load_device_config_state_start trace event, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 22/33] vfio/migration: Convert bytes_transferred counter to atomic, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 23/33] vfio/migration: Multifd device state transfer support - basic types, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 24/33] vfio/migration: Multifd device state transfer support - VFIOStateBuffer(s), Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 25/33] vfio/migration: Multifd device state transfer - add support checking function, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 26/33] vfio/migration: Multifd device state transfer support - receive init/cleanup, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 27/33] vfio/migration: Multifd device state transfer support - received buffers queuing, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 28/33] vfio/migration: Multifd device state transfer support - load thread,
Maciej S. Szmigiero <=
- [PATCH v4 29/33] vfio/migration: Multifd device state transfer support - config loading support, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 30/33] migration/qemu-file: Define g_autoptr() cleanup function for QEMUFile, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 31/33] vfio/migration: Multifd device state transfer support - send side, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 32/33] vfio/migration: Add x-migration-multifd-transfer VFIO property, Maciej S. Szmigiero, 2025/01/30
- [PATCH v4 33/33] hw/core/machine: Add compat for x-migration-multifd-transfer VFIO property, Maciej S. Szmigiero, 2025/01/30
- Re: [PATCH v4 00/33] Multifd π device state transfer support with VFIO consumer, Fabiano Rosas, 2025/01/30