On Thu, Feb 11, 2021 at 07:52:46PM -0300, Daniel Henrique Barboza wrote:
Handling errors in memory hotunplug in the pSeries machine is more complex
than any other device type, because there are all the complications that other
devices has, and more.
diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
index ecce8abf14..4bcded4a1a 100644
--- a/hw/ppc/spapr.c
+++ b/hw/ppc/spapr.c
@@ -3575,6 +3575,36 @@ static SpaprDimmState
*spapr_recover_pending_dimm_state(SpaprMachineState *ms,
return spapr_pending_dimm_unplugs_add(ms, avail_lmbs, dimm);
}
+void spapr_clear_pending_dimm_unplug_state(SpaprMachineState *spapr,
+ PCDIMMDevice *dimm)
+{
+ SpaprDimmState *ds = spapr_pending_dimm_unplugs_find(spapr, dimm);
+ SpaprDrc *drc;
+ uint32_t nr_lmbs;
+ uint64_t size, addr_start, addr;
+ int i;
+
+ if (ds) {
+ spapr_pending_dimm_unplugs_remove(spapr, ds);
+ }
Hrm... how would !ds arise? Could this just be an assert?
+
+ size = memory_device_get_region_size(MEMORY_DEVICE(dimm), &error_abort);
+ nr_lmbs = size / SPAPR_MEMORY_BLOCK_SIZE;
+
+ addr_start = object_property_get_uint(OBJECT(dimm), PC_DIMM_ADDR_PROP,
+ &error_abort);
+
+ addr = addr_start;
+ for (i = 0; i < nr_lmbs; i++) {
+ drc = spapr_drc_by_id(TYPE_SPAPR_DRC_LMB,
+ addr / SPAPR_MEMORY_BLOCK_SIZE);
+ g_assert(drc);
+
+ drc->unplug_requested = false;
+ addr += SPAPR_MEMORY_BLOCK_SIZE;
+ }
+}
+
/* Callback to be called during DRC release. */
void spapr_lmb_release(DeviceState *dev)
{
diff --git a/hw/ppc/spapr_drc.c b/hw/ppc/spapr_drc.c
index c143bfb6d3..eae941233a 100644
--- a/hw/ppc/spapr_drc.c
+++ b/hw/ppc/spapr_drc.c
@@ -1230,6 +1230,20 @@ static void rtas_ibm_configure_connector(PowerPCCPU *cpu,
drck = SPAPR_DR_CONNECTOR_GET_CLASS(drc);
+ /*
+ * This indicates that the kernel is reconfiguring a LMB due to
+ * a failed hotunplug. Clear the pending unplug state for the whole
+ * DIMM.
+ */
+ if (spapr_drc_type(drc) == SPAPR_DR_CONNECTOR_TYPE_LMB &&
+ drc->unplug_requested) {
+
+ /* This really shouldn't happen in this point, but ... */
+ g_assert(drc->dev);
I'm a little worried that a buggy or malicious guest could trigger
this assert.
+
+ spapr_clear_pending_dimm_unplug_state(spapr, PC_DIMM(drc->dev));
+ }
+
if (!drc->fdt) {
void *fdt;
int fdt_size;
diff --git a/include/hw/ppc/spapr.h b/include/hw/ppc/spapr.h
index ccbeeca1de..5bcc8f3bb8 100644
--- a/include/hw/ppc/spapr.h
+++ b/include/hw/ppc/spapr.h
@@ -847,6 +847,8 @@ int spapr_hpt_shift_for_ramsize(uint64_t ramsize);
int spapr_reallocate_hpt(SpaprMachineState *spapr, int shift, Error **errp);
void spapr_clear_pending_events(SpaprMachineState *spapr);
void spapr_clear_pending_hotplug_events(SpaprMachineState *spapr);
+void spapr_clear_pending_dimm_unplug_state(SpaprMachineState *spapr,
+ PCDIMMDevice *dimm);
int spapr_max_server_number(SpaprMachineState *spapr);
void spapr_store_hpte(PowerPCCPU *cpu, hwaddr ptex,
uint64_t pte0, uint64_t pte1);