qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[RFC PATCH] pcie: Defer hot unplug until migration is complete


From: Yury Kotov
Subject: [RFC PATCH] pcie: Defer hot unplug until migration is complete
Date: Mon, 13 Jan 2020 17:01:22 +0300

Devices hot-plug during migration is not allowed and disabled in
corresponding QMP-commands (device_add, device_del).
But guest still can unplug device by powering it off
(Example: echo 0 > /sys/bus/pci/slots/XXX/power).

Fix it by deferring unplugging until the migration is complete.

Signed-off-by: Yury Kotov <address@hidden>
---
 hw/pci-bridge/gen_pcie_root_port.c |  7 ++++
 hw/pci-bridge/ioh3420.c            |  7 ++++
 hw/pci-bridge/xio3130_downstream.c |  7 ++++
 hw/pci/pcie.c                      | 54 +++++++++++++++++++++++-------
 hw/pci/pcie_port.c                 | 47 ++++++++++++++++++++++++++
 include/hw/pci/pcie.h              |  1 +
 include/hw/pci/pcie_port.h         | 20 +++++++++++
 7 files changed, 130 insertions(+), 13 deletions(-)

diff --git a/hw/pci-bridge/gen_pcie_root_port.c 
b/hw/pci-bridge/gen_pcie_root_port.c
index 9eaefebca8..5b3c202341 100644
--- a/hw/pci-bridge/gen_pcie_root_port.c
+++ b/hw/pci-bridge/gen_pcie_root_port.c
@@ -100,6 +100,9 @@ static void gen_rp_realize(DeviceState *dev, Error **errp)
     }
 }
 
+static const VMStateDescription vmstate_rp_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("pcie-root-port");
+
 static const VMStateDescription vmstate_rp_dev = {
     .name = "pcie-root-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -114,6 +117,10 @@ static const VMStateDescription vmstate_rp_dev = {
                           GenPCIERootPort,
                           gen_rp_test_migrate_msix),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_rp_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c
index f1e16135a3..2399a9a87f 100644
--- a/hw/pci-bridge/ioh3420.c
+++ b/hw/pci-bridge/ioh3420.c
@@ -82,6 +82,9 @@ static void ioh3420_interrupts_uninit(PCIDevice *d)
     msi_uninit(d);
 }
 
+static const VMStateDescription vmstate_ioh3420_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("ioh-3240-express-root-port");
+
 static const VMStateDescription vmstate_ioh3420 = {
     .name = "ioh-3240-express-root-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -93,6 +96,10 @@ static const VMStateDescription vmstate_ioh3420 = {
         VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
                        PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_ioh3420_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci-bridge/xio3130_downstream.c 
b/hw/pci-bridge/xio3130_downstream.c
index a9f084b863..a5b4fe08ee 100644
--- a/hw/pci-bridge/xio3130_downstream.c
+++ b/hw/pci-bridge/xio3130_downstream.c
@@ -139,6 +139,9 @@ static Property xio3130_downstream_props[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+static const VMStateDescription vmstate_xio3130_downstream_deffered_unplug =
+    VMSTATE_DEFFERED_UNPLUG("xio3130-express-downstream-port");
+
 static const VMStateDescription vmstate_xio3130_downstream = {
     .name = "xio3130-express-downstream-port",
     .priority = MIG_PRI_PCI_BUS,
@@ -150,6 +153,10 @@ static const VMStateDescription vmstate_xio3130_downstream 
= {
         VMSTATE_STRUCT(parent_obj.parent_obj.parent_obj.exp.aer_log,
                        PCIESlot, 0, vmstate_pcie_aer_log, PCIEAERLog),
         VMSTATE_END_OF_LIST()
+    },
+    .subsections = (const VMStateDescription * []) {
+        &vmstate_xio3130_downstream_deffered_unplug,
+        NULL
     }
 };
 
diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c
index 08718188bb..29f0e5c05b 100644
--- a/hw/pci/pcie.c
+++ b/hw/pci/pcie.c
@@ -28,6 +28,8 @@
 #include "hw/pci/pcie_regs.h"
 #include "hw/pci/pcie_port.h"
 #include "qemu/range.h"
+#include "sysemu/sysemu.h"
+#include "migration/misc.h"
 
 //#define DEBUG_PCIE
 #ifdef DEBUG_PCIE
@@ -575,6 +577,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
 
     if (dev->cap_present & QEMU_PCIE_SLTCAP_PCP) {
         /* Downstream ports enforce device number 0. */
+        PCIESlot *slot = PCIE_SLOT(dev);
         bool populated = pci_bridge_get_sec_bus(PCI_BRIDGE(dev))->devices[0];
         uint16_t pic;
 
@@ -588,6 +591,7 @@ void pcie_cap_slot_reset(PCIDevice *dev)
 
         pic = populated ? PCI_EXP_SLTCTL_PIC_ON : PCI_EXP_SLTCTL_PIC_OFF;
         pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTCTL, pic);
+        slot->unplug_is_deferred = false;
     }
 
     pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
@@ -608,13 +612,42 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, 
uint16_t *slt_sta)
     *slt_sta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
 }
 
+static void pcie_cap_slot_unplug(PCIDevice *dev)
+{
+    uint32_t pos = dev->exp.exp_cap;
+    uint8_t *exp_cap = dev->config + pos;
+    PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
+
+    pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
+                        pcie_unplug_device, NULL);
+    pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDS);
+    if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
+        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
+                                     PCI_EXP_LNKSTA_DLLLA);
+    }
+    pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA, PCI_EXP_SLTSTA_PDC);
+    hotplug_event_notify(dev);
+}
+
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev)
+{
+    PCIESlot *slot = PCIE_SLOT(dev);
+
+    if (migration_is_idle() && slot->unplug_is_deferred) {
+        pcie_cap_slot_unplug(dev);
+        slot->unplug_is_deferred = false;
+    }
+}
+
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint16_t old_slt_ctl, uint16_t old_slt_sta,
                                 uint32_t addr, uint32_t val, int len)
 {
+    PCIESlot *slot = PCIE_SLOT(dev);
     uint32_t pos = dev->exp.exp_cap;
     uint8_t *exp_cap = dev->config + pos;
     uint16_t sltsta = pci_get_word(exp_cap + PCI_EXP_SLTSTA);
+    bool may_unplug;
 
     if (ranges_overlap(addr, len, pos + PCI_EXP_SLTSTA, 2)) {
         /*
@@ -660,22 +693,17 @@ void pcie_cap_slot_write_config(PCIDevice *dev,
      * this is a work around for guests that overwrite
      * control of powered off slots before powering them on.
      */
-    if ((sltsta & PCI_EXP_SLTSTA_PDS) && (val & PCI_EXP_SLTCTL_PCC) &&
-        (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF &&
+    may_unplug = (val & PCI_EXP_SLTCTL_PCC) &&
+                 (val & PCI_EXP_SLTCTL_PIC_OFF) == PCI_EXP_SLTCTL_PIC_OFF;
+    if (may_unplug && (sltsta & PCI_EXP_SLTSTA_PDS) &&
         (!(old_slt_ctl & PCI_EXP_SLTCTL_PCC) ||
         (old_slt_ctl & PCI_EXP_SLTCTL_PIC_OFF) != PCI_EXP_SLTCTL_PIC_OFF)) {
-        PCIBus *sec_bus = pci_bridge_get_sec_bus(PCI_BRIDGE(dev));
-        pci_for_each_device(sec_bus, pci_bus_num(sec_bus),
-                            pcie_unplug_device, NULL);
-
-        pci_word_test_and_clear_mask(exp_cap + PCI_EXP_SLTSTA,
-                                     PCI_EXP_SLTSTA_PDS);
-        if (dev->cap_present & QEMU_PCIE_LNKSTA_DLLLA) {
-            pci_word_test_and_clear_mask(exp_cap + PCI_EXP_LNKSTA,
-                                         PCI_EXP_LNKSTA_DLLLA);
+        slot->unplug_is_deferred = !migration_is_idle();
+        if (!slot->unplug_is_deferred) {
+            pcie_cap_slot_unplug(dev);
         }
-        pci_word_test_and_set_mask(exp_cap + PCI_EXP_SLTSTA,
-                                       PCI_EXP_SLTSTA_PDC);
+    } else if (!may_unplug) {
+        slot->unplug_is_deferred = false;
     }
 
     hotplug_event_notify(dev);
diff --git a/hw/pci/pcie_port.c b/hw/pci/pcie_port.c
index c19a9be592..bd5fbf6827 100644
--- a/hw/pci/pcie_port.c
+++ b/hw/pci/pcie_port.c
@@ -23,6 +23,9 @@
 #include "hw/qdev-properties.h"
 #include "qemu/module.h"
 #include "hw/hotplug.h"
+#include "sysemu/runstate.h"
+#include "migration/migration.h"
+#include "migration/misc.h"
 
 void pcie_port_init_reg(PCIDevice *d)
 {
@@ -150,6 +153,48 @@ static Property pcie_slot_props[] = {
     DEFINE_PROP_END_OF_LIST()
 };
 
+bool vmstate_deffered_unplug_needed(void *opaque)
+{
+    PCIESlot *slot = opaque;
+
+    return slot->unplug_is_deferred;
+}
+
+static void pcie_slot_migration_notifier_cb(Notifier *notifier, void *data)
+{
+    PCIESlot *slot = container_of(notifier, PCIESlot, migration_notifier);
+
+    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_vm_state_change(void *opaque, int running, RunState 
state)
+{
+    PCIESlot *slot = opaque;
+
+    pcie_cap_slot_deferred_unplug(PCI_DEVICE(slot));
+}
+
+static void pcie_slot_init(Object *obj)
+{
+    PCIESlot *slot = PCIE_SLOT(obj);
+
+    slot->unplug_is_deferred = false;
+    slot->migration_notifier = (Notifier) {
+        .notify = pcie_slot_migration_notifier_cb
+    };
+    add_migration_state_change_notifier(&slot->migration_notifier);
+    slot->vmstate_change =
+        qemu_add_vm_change_state_handler(pcie_slot_vm_state_change, slot);
+}
+
+static void pcie_slot_finalize(Object *obj)
+{
+    PCIESlot *slot = PCIE_SLOT(obj);
+
+    remove_migration_state_change_notifier(&slot->migration_notifier);
+    qemu_del_vm_change_state_handler(slot->vmstate_change);
+}
+
 static void pcie_slot_class_init(ObjectClass *oc, void *data)
 {
     DeviceClass *dc = DEVICE_CLASS(oc);
@@ -166,6 +211,8 @@ static const TypeInfo pcie_slot_type_info = {
     .name = TYPE_PCIE_SLOT,
     .parent = TYPE_PCIE_PORT,
     .instance_size = sizeof(PCIESlot),
+    .instance_init = pcie_slot_init,
+    .instance_finalize = pcie_slot_finalize,
     .abstract = true,
     .class_init = pcie_slot_class_init,
     .interfaces = (InterfaceInfo[]) {
diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h
index 7064875835..128f26199e 100644
--- a/include/hw/pci/pcie.h
+++ b/include/hw/pci/pcie.h
@@ -110,6 +110,7 @@ void pcie_cap_slot_get(PCIDevice *dev, uint16_t *slt_ctl, 
uint16_t *slt_sta);
 void pcie_cap_slot_write_config(PCIDevice *dev,
                                 uint16_t old_slt_ctl, uint16_t old_slt_sta,
                                 uint32_t addr, uint32_t val, int len);
+void pcie_cap_slot_deferred_unplug(PCIDevice *dev);
 int pcie_cap_slot_post_load(void *opaque, int version_id);
 void pcie_cap_slot_push_attention_button(PCIDevice *dev);
 
diff --git a/include/hw/pci/pcie_port.h b/include/hw/pci/pcie_port.h
index 7515430087..32e45f0c89 100644
--- a/include/hw/pci/pcie_port.h
+++ b/include/hw/pci/pcie_port.h
@@ -23,6 +23,9 @@
 
 #include "hw/pci/pci_bridge.h"
 #include "hw/pci/pci_bus.h"
+#include "qemu/notify.h"
+#include "sysemu/sysemu.h"
+#include "migration/vmstate.h"
 
 #define TYPE_PCIE_PORT "pcie-port"
 #define PCIE_PORT(obj) OBJECT_CHECK(PCIEPort, (obj), TYPE_PCIE_PORT)
@@ -44,6 +47,10 @@ void pcie_port_init_reg(PCIDevice *d);
 struct PCIESlot {
     /*< private >*/
     PCIEPort    parent_obj;
+    bool        unplug_is_deferred;
+    Notifier    migration_notifier;
+    VMChangeStateEntry *vmstate_change;
+
     /*< public >*/
 
     /* pci express switch port with slot */
@@ -58,6 +65,19 @@ struct PCIESlot {
     QLIST_ENTRY(PCIESlot) next;
 };
 
+bool vmstate_deffered_unplug_needed(void *opaque);
+
+#define VMSTATE_DEFFERED_UNPLUG(parent_section_name) {            \
+    .name = parent_section_name "/deffered-unplug",               \
+    .version_id = 1,                                              \
+    .minimum_version_id = 1,                                      \
+    .needed = vmstate_deffered_unplug_needed,                     \
+    .fields = (VMStateField[]) {                                  \
+        VMSTATE_BOOL(unplug_is_deferred, PCIESlot),               \
+        VMSTATE_END_OF_LIST()                                     \
+    }                                                             \
+}
+
 void pcie_chassis_create(uint8_t chassis_number);
 PCIESlot *pcie_chassis_find_slot(uint8_t chassis, uint16_t slot);
 int pcie_chassis_add_slot(struct PCIESlot *slot);
-- 
2.24.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]