[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[RFC PATCH] pcie: Defer hot unplug until migration is complete
From: |
Yury Kotov |
Subject: |
[RFC PATCH] pcie: Defer hot unplug until migration is complete |
Date: |
Mon, 13 Jan 2020 17:01:22 +0300 |
Devices hot-plug during migration is not allowed and disabled in
corresponding QMP-commands (device_add, device_del).
But guest still can unplug device by powering it off
(Example: echo 0 > /sys/bus/pci/slots/XXX/power).
Fix it by deferring unplugging until the migration is complete.
Signed-off-by: Yury Kotov <address@hidden>
---
hw/pci-bridge/gen_pcie_root_port.c | 7 ++++
hw/pci-bridge/ioh3420.c | 7 ++++
hw/pci-bridge/xio3130_downstream.c | 7 ++++
hw/pci/pcie.c | 54 +++++++++++++++++++++++-------
hw/pci/pcie_port.c | 47 ++++++++++++++++++++++++++
include/hw/pci/pcie.h | 1 +
include/hw/pci/pcie_port.h | 20 +++++++++++
7 files changed, 130 insertions(+), 13 deletions(-)
diff --git a/hw/pci-bridge/gen_pcie_root_port.c
b/hw/pci-bridge/gen_pcie_root_port.c
index 9eaefebca8..5b3c202341 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
}
}
+static const VMStateDescription vmstate_rp_deffered_unplug =
+ VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
+
static const VMStateDescription vmstate_rp_dev = {
.name = "pcie-root-port",
.priority = MIG_PRI_PCI_BUS,
@@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
GenPCIERootPort,
gen_rp_test_migrate_msix),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_rp_deffered_unplug,
+ NULL
}
};
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index f1e16135a3..2399a9a87f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
msi_uninit(d);
}
+static const VMStateDescription vmstate_ioh3420_deffered_unplug =
+ VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
+
static const VMStateDescription vmstate_ioh3420 = {
.name = "ioh-3240-express-root-port",
.priority = MIG_PRI_PCI_BUS,
@@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_ioh3420_deffered_unplug,
+ NULL
}
};
diff --git a/hw/pci-bridge/xio3130_downstream.c
b/hw/pci-bridge/xio3130_downstream.c
index a9f084b863..a5b4fe08ee 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
DEFINE_PROP_END_OF_LIST()
};
+static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
+ VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
+
static const VMStateDescription vmstate_xio3130_downstream = {
.name = "xio3130-express-downstream-port",
.priority = MIG_PRI_PCI_BUS,
@@ -150,6 +153,10 @@ static const VMStateDescription vmstate_xio3130_downstream
= {
VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
VMSTATE_END_OF_LIST()
+ },
+ .subsections = (const VMStateDescription * []) {
+ &vmstate_xio3130_downstream_deffered_unplug,
+ NULL
}
};
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 08718188bb..29f0e5c05b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -28,6 +28,8 @@
#include "hw/pci/pcie_regs.h"
#include "hw/pci/pcie_port.h"
#include "qemu/range.h"
+#include "sysemu/sysemu.h"
+#include "migration/misc.h"
//#define DEBUG_PCIE
#ifdef DEBUG_PCIE
@@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
/* Downstream ports enforce device number 0. */
+ PCIESlot *slot = PCIE_SLOT(dev);
bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
uint16_t pic;
@@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
+ slot->unplug_is_deferred = false;
}
pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
@@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl,
uint16_t *slt_sta)
*slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
}
+static void pcie_cap_slot_unplug(PCIDevice *dev)
+{
+ uint32_t pos = dev->exp.exp_cap;
+ uint8_t *exp_cap = dev->config + pos;
+ PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+
+ pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+ pcie_unplug_device, NULL);
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+ if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+ pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+ PCI_EXP_LNKSTA_DLLLA);
+ }
+ pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
+ hotplug_event_notify(dev);
+}
+
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
+{
+ PCIESlot *slot = PCIE_SLOT(dev);
+
+ if (migration_is_idle() && slot->unplug_is_deferred) {
+ pcie_cap_slot_unplug(dev);
+ slot->unplug_is_deferred = false;
+ }
+}
+
void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t old_slt_ctl, uint16_t old_slt_sta,
uint32_t addr, uint32_t val, int len)
{
+ PCIESlot *slot = PCIE_SLOT(dev);
uint32_t pos = dev->exp.exp_cap;
uint8_t *exp_cap = dev->config + pos;
uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+ bool may_unplug;
if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
/*
@@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
* this is a work around for guests that overwrite
* control of powered off slots before powering them on.
*/
- if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
- (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
+ may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
+ (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
+ if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
(!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
(old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
- PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
- pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
- pcie_unplug_device, NULL);
-
- pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDS);
- if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
- pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
- PCI_EXP_LNKSTA_DLLLA);
+ slot->unplug_is_deferred = !migration_is_idle();
+ if (!slot->unplug_is_deferred) {
+ pcie_cap_slot_unplug(dev);
}
- pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
- PCI_EXP_SLTSTA_PDC);
+ } else if (!may_unplug) {
+ slot->unplug_is_deferred = false;
}
hotplug_event_notify(dev);
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index c19a9be592..bd5fbf6827 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -23,6 +23,9 @@
#include "hw/qdev-properties.h"
#include "qemu/module.h"
#include "hw/hotplug.h"
+#include "sysemu/runstate.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
void pcie_port_init_reg(PCIDevice *d)
{
@@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
DEFINE_PROP_END_OF_LIST()
};
+bool vmstate_deffered_unplug_needed(void *opaque)
+{
+ PCIESlot *slot = opaque;
+
+ return slot->unplug_is_deferred;
+}
+
+static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
+{
+ PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
+
+ pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_vm_state_change(void *opaque, int running, RunState
state)
+{
+ PCIESlot *slot = opaque;
+
+ pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_init(Object *obj)
+{
+ PCIESlot *slot = PCIE_SLOT(obj);
+
+ slot->unplug_is_deferred = false;
+ slot->migration_notifier = (Notifier) {
+ .notify = pcie_slot_migration_notifier_cb
+ };
+ add_migration_state_change_notifier(&slot->migration_notifier);
+ slot->vmstate_change =
+ qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
+}
+
+static void pcie_slot_finalize(Object *obj)
+{
+ PCIESlot *slot = PCIE_SLOT(obj);
+
+ remove_migration_state_change_notifier(&slot->migration_notifier);
+ qemu_del_vm_change_state_handler(slot->vmstate_change);
+}
+
static void pcie_slot_class_init(ObjectClass *oc, void *data)
{
DeviceClass *dc = DEVICE_CLASS(oc);
@@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
.name = TYPE_PCIE_SLOT,
.parent = TYPE_PCIE_PORT,
.instance_size = sizeof(PCIESlot),
+ .instance_init = pcie_slot_init,
+ .instance_finalize = pcie_slot_finalize,
.abstract = true,
.class_init = pcie_slot_class_init,
.interfaces = (InterfaceInfo[]) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 7064875835..128f26199e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl,
uint16_t *slt_sta);
void pcie_cap_slot_write_config(PCIDevice *dev,
uint16_t old_slt_ctl, uint16_t old_slt_sta,
uint32_t addr, uint32_t val, int len);
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
int pcie_cap_slot_post_load(void *opaque, int version_id);
void pcie_cap_slot_push_attention_button(PCIDevice *dev);
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 7515430087..32e45f0c89 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -23,6 +23,9 @@
#include "hw/pci/pci_bridge.h"
#include "hw/pci/pci_bus.h"
+#include "qemu/notify.h"
+#include "sysemu/sysemu.h"
+#include "migration/vmstate.h"
#define TYPE_PCIE_PORT "pcie-port"
#define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
@@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
struct PCIESlot {
/*< private >*/
PCIEPort parent_obj;
+ bool unplug_is_deferred;
+ Notifier migration_notifier;
+ VMChangeStateEntry *vmstate_change;
+
/*< public >*/
/* pci express switch port with slot */
@@ -58,6 +65,19 @@ struct PCIESlot {
QLIST_ENTRY(PCIESlot) next;
};
+bool vmstate_deffered_unplug_needed(void *opaque);
+
+#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) { \
+ .name = parent_section_name "/deffered-unplug", \
+ .version_id = 1, \
+ .minimum_version_id = 1, \
+ .needed = vmstate_deffered_unplug_needed, \
+ .fields = (VMStateField[]) { \
+ VMSTATE_BOOL(unplug_is_deferred, PCIESlot), \
+ VMSTATE_END_OF_LIST() \
+ } \
+}
+
void pcie_chassis_create(uint8_t chassis_number);
PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
int pcie_chassis_add_slot(struct PCIESlot *slot);
--
2.24.1
- [RFC PATCH] pcie: Defer hot unplug until migration is complete,
Yury Kotov <=