Currently, it is possible that a live migration never finishes, when the dirty
page rate is high compared to the scan/transfer rate. The exact values for
MAX_MEMORY_ITERATIONS and MAX_TOTAL_MEMORY_TRANSFER_FACTOR are arguable, but
there should be *some* limit to force the final iteration of a live migration
that does not converge.
---
arch_init.c | 10 +++++++++-
1 files changed, 9 insertions(+), 1 deletions(-)
diff --git a/arch_init.c b/arch_init.c
index 4486925..57fcb1e 100644
--- a/arch_init.c
+++ b/arch_init.c
@@ -89,6 +89,9 @@ const uint32_t arch_type = QEMU_ARCH;
#define RAM_SAVE_FLAG_EOS 0x10
#define RAM_SAVE_FLAG_CONTINUE 0x20
+#define MAX_MEMORY_ITERATIONS 10
+#define MAX_TOTAL_MEMORY_TRANSFER_FACTOR 3
+
static int is_dup_page(uint8_t *page, uint8_t ch)
{
uint32_t val = ch<< 24 | ch<< 16 | ch<< 8 | ch;
@@ -107,6 +110,8 @@ static int is_dup_page(uint8_t *page, uint8_t ch)
static RAMBlock *last_block;
static ram_addr_t last_offset;
+static int numberFullMemoryIterations = 0;
+
static int ram_save_block(QEMUFile *f)
{
RAMBlock *block = last_block;
@@ -158,7 +163,10 @@ static int ram_save_block(QEMUFile *f)
offset = 0;
block = QLIST_NEXT(block, next);
if (!block)
+ {
+ numberFullMemoryIterations++;
block = QLIST_FIRST(&ram_list.blocks);
+ }
}
current_addr = block->offset + offset;
@@ -295,7 +303,7 @@ int ram_save_live(Monitor *mon, QEMUFile *f, int stage,
void *opaque)
expected_time = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth;
- return (stage == 2)&& (expected_time<= migrate_max_downtime());
+ return (stage == 2)&& ((expected_time<= migrate_max_downtime() ||
(numberFullMemoryIterations == MAX_MEMORY_ITERATIONS) || (bytes_transferred>
(MAX_TOTAL_MEMORY_TRANSFER_FACTOR*ram_bytes_total()))));
}
static inline void *host_from_stream_offset(QEMUFile *f,