qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 5/5] vfio/igd: handle x-igd-opregion in vfio_probe_igd_con


From: Tomita Moeko
Subject: Re: [PATCH v2 5/5] vfio/igd: handle x-igd-opregion in vfio_probe_igd_config_quirk()
Date: Fri, 31 Jan 2025 02:33:03 +0800
User-agent: Mozilla Thunderbird

On 1/25/25 15:42, Tomita Moeko wrote:
> On 1/25/25 05:13, Alex Williamson wrote:
>> On Sat, 25 Jan 2025 03:12:45 +0800
>> Tomita Moeko <tomitamoeko@gmail.com> wrote:
>>
>>> Both enable opregion option (x-igd-opregion) and legacy mode require
>>> setting up OpRegion copy for IGD devices. Move x-igd-opregion handler
>>> in vfio_realize() to vfio_probe_igd_config_quirk() to elimate duplicate
>>> code. Finally we moved all the IGD-related code into igd.c.
>>>
>>> Signed-off-by: Tomita Moeko <tomitamoeko@gmail.com>
>>> ---
>>>  hw/vfio/igd.c        | 143 +++++++++++++++++++++++++++++++++----------
>>>  hw/vfio/pci-quirks.c |  50 ---------------
>>>  hw/vfio/pci.c        |  25 --------
>>>  hw/vfio/pci.h        |   4 --
>>>  4 files changed, 110 insertions(+), 112 deletions(-)
>>>
>>> diff --git a/hw/vfio/igd.c b/hw/vfio/igd.c
>>> index 6e06dd774a..015beacf5f 100644
>>> --- a/hw/vfio/igd.c
>>> +++ b/hw/vfio/igd.c
>>> @@ -106,6 +106,7 @@ static int igd_gen(VFIOPCIDevice *vdev)
>>>      return -1;
>>>  }
>>>  
>>> +#define IGD_ASLS 0xfc /* ASL Storage Register */
>>>  #define IGD_GMCH 0x50 /* Graphics Control Register */
>>>  #define IGD_BDSM 0x5c /* Base Data of Stolen Memory */
>>>  #define IGD_BDSM_GEN11 0xc0 /* Base Data of Stolen Memory of gen 11 and 
>>> later */
>>> @@ -138,6 +139,55 @@ static uint64_t igd_stolen_memory_size(int gen, 
>>> uint32_t gmch)
>>>      return 0;
>>>  }
>>>  
>>> +/*
>>> + * The OpRegion includes the Video BIOS Table, which seems important for
>>> + * telling the driver what sort of outputs it has.  Without this, the 
>>> device
>>> + * may work in the guest, but we may not get output.  This also requires 
>>> BIOS
>>> + * support to reserve and populate a section of guest memory sufficient for
>>> + * the table and to write the base address of that memory to the ASLS 
>>> register
>>> + * of the IGD device.
>>> + */
>>> +static bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>>> +                                       struct vfio_region_info *info,
>>> +                                       Error **errp)
>>> +{
>>> +    int ret;
>>> +
>>> +    vdev->igd_opregion = g_malloc0(info->size);
>>> +    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
>>> +                info->size, info->offset);
>>> +    if (ret != info->size) {
>>> +        error_setg(errp, "failed to read IGD OpRegion");
>>> +        g_free(vdev->igd_opregion);
>>> +        vdev->igd_opregion = NULL;
>>> +        return false;
>>> +    }
>>> +
>>> +    /*
>>> +     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is 
>>> to
>>> +     * allocate 32bit reserved memory for, copy these contents into, and 
>>> write
>>> +     * the reserved memory base address to the device ASLS register at 
>>> 0xFC.
>>> +     * Alignment of this reserved region seems flexible, but using a 4k 
>>> page
>>> +     * alignment seems to work well.  This interface assumes a single IGD
>>> +     * device, which may be at VM address 00:02.0 in legacy mode or another
>>> +     * address in UPT mode.
>>> +     *
>>> +     * NB, there may be future use cases discovered where the VM should 
>>> have
>>> +     * direct interaction with the host OpRegion, in which case the write 
>>> to
>>> +     * the ASLS register would trigger MemoryRegion setup to enable that.
>>> +     */
>>> +    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
>>> +                    vdev->igd_opregion, info->size);
>>> +
>>> +    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
>>> +
>>> +    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
>>> +    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
>>> +    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
>>> +
>>> +    return true;
>>> +}
>>> +
>>>  /*
>>>   * The rather short list of registers that we copy from the host devices.
>>>   * The LPC/ISA bridge values are definitely needed to support the vBIOS, 
>>> the
>>> @@ -339,29 +389,83 @@ void vfio_probe_igd_bar0_quirk(VFIOPCIDevice *vdev, 
>>> int nr)
>>>      QLIST_INSERT_HEAD(&vdev->bars[nr].quirks, bdsm_quirk, next);
>>>  }
>>>  
>>> +static bool vfio_igd_try_enable_opregion(VFIOPCIDevice *vdev, Error **errp)
>>> +{
>>> +    g_autofree struct vfio_region_info *opregion = NULL;
>>> +    int ret;
>>> +
>>> +    /*
>>> +     * Hotplugging is not supprted for both opregion access and legacy 
>>> mode.
>>> +     * For legacy mode, we also need to mark the ROM failed.
>>> +     */
>>
>> The explanation was a little better in the removed comment.
>>
>>> +    if (vdev->pdev.qdev.hotplugged) {
>>> +        vdev->rom_read_failed = true;
>>> +        error_setg(errp,
>>> +                   "IGD OpRegion is not supported on hotplugged device");
>>
>> As was the error log.
>>
>>> +        return false;
>>> +    }
>>> +
>>> +    ret = vfio_get_dev_region_info(&vdev->vbasedev,
>>> +                    VFIO_REGION_TYPE_PCI_VENDOR_TYPE | PCI_VENDOR_ID_INTEL,
>>> +                    VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
>>> +    if (ret) {
>>> +        error_setg_errno(errp, -ret,
>>> +                         "device does not supports IGD OpRegion feature");
>>> +        return false;
>>> +    }
>>> +
>>> +    if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
>>> +        return false;
>>> +    }
>>> +
>>> +    return true;
>>> +}
>>> +
>>>  bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
>>> -                                 Error **errp G_GNUC_UNUSED)
>>> +                                 Error **errp)
>>>  {
>>>      g_autofree struct vfio_region_info *rom = NULL;
>>> -    g_autofree struct vfio_region_info *opregion = NULL;
>>>      g_autofree struct vfio_region_info *host = NULL;
>>>      g_autofree struct vfio_region_info *lpc = NULL;
>>> +    PCIBus *bus;
>>>      PCIDevice *lpc_bridge;
>>>      int ret, gen;
>>> +    bool legacy_mode, enable_opregion;
>>>      uint64_t gms_size;
>>>      uint64_t *bdsm_size;
>>>      uint32_t gmch;
>>>      Error *err = NULL;
>>>  
>>> +    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
>>> +        !vfio_is_vga(vdev)) {
>>> +        return true;
>>> +    }
>>> +
>>>      /*
>>>       * This must be an Intel VGA device at address 00:02.0 for us to even
>>>       * consider enabling legacy mode.  The vBIOS has dependencies on the
>>>       * PCI bus address.
>>>       */
>>> -    if (!vfio_pci_is(vdev, PCI_VENDOR_ID_INTEL, PCI_ANY_ID) ||
>>> -        !vfio_is_vga(vdev) ||
>>> -        &vdev->pdev != pci_find_device(pci_device_root_bus(&vdev->pdev),
>>> -                                       0, PCI_DEVFN(0x2, 0))) {
>>> +    bus = pci_device_root_bus(&vdev->pdev);
>>> +    legacy_mode = (&vdev->pdev == pci_find_device(bus, 0, PCI_DEVFN(0x2, 
>>> 0)));
>>> +    enable_opregion = (vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION);
>>> +
>>> +    if (!enable_opregion && !legacy_mode) {
>>> +        return true;
>>> +    }
>>> +
>>> +    if (!vfio_igd_try_enable_opregion(vdev, &err)) {
>>> +        if (enable_opregion) {
>>> +            error_propagate(errp, err);
>>> +            return false;
>>> +        } else if (legacy_mode) {
>>> +            error_append_hint(&err, "IGD legacy mode disabled\n");
>>> +            error_report_err(err);
>>> +            return true;
>>> +        }
>>> +    }
>>> +
>>> +    if (!legacy_mode) {
>>>          return true;
>>>      }
>>>  
>>> @@ -404,30 +508,10 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
>>>          return true;
>>>      }
>>>  
>>> -    /*
>>> -     * Ignore the hotplug corner case, mark the ROM failed, we can't
>>> -     * create the devices we need for legacy mode in the hotplug scenario.
>>> -     */
>>> -    if (vdev->pdev.qdev.hotplugged) {
>>> -        error_report("IGD device %s hotplugged, ROM disabled, "
>>> -                     "legacy mode disabled", vdev->vbasedev.name);
>>> -        vdev->rom_read_failed = true;
>>> -        return true;
>>> -    }
>>> -
>>>      /*
>>>       * Check whether we have all the vfio device specific regions to
>>>       * support legacy mode (added in Linux v4.6).  If not, bail.
>>>       */
>>> And we're disassociating opregion setup from this useful comment.
>>
>> What are we actually accomplishing here?  What specific code
>> duplication is eliminated?
> 
> This patch is designed for moving the opregion quirk in vfio_realize()
> to igd.c, for better isolation of the igd-specific code. Legacy mode
> also need to initialize opregion as x-igd-opregion=on option. These
> code are almost the same, except legacy mode continues on error, while
> x-igd-opregion fails.
> 
> I am going to clearify that in the commit message of v3.
> 
>> Why is it important to move all this code to igd.c?

x-igd-opreqion quirk is currently located in pci-quirks.c, which is not
controlled by CONFIG_VFIO_IGD, moving it to igd.c prevents building
that unnecessary code in certain binaries, for example, non x86 builds. 

>> It's really difficult to untangle this refactor, I think it could be
>> done more iteratively, if it's really even beneficial.  Thanks,
>>
>> Alex
> 
> Agreed. Actually I'd like to totally remove the "legacy mode" and "UPT
> mode" concept in future, my proposal is:
> * Emulate and initialize ASLS and BDSM register unconditionally. These
>   registers holds HPA, keeping the old value to guest is not a good
>   idea
> * Make the host bridge and LPC bridge ID quirks optional like OpRegion.
>   Recent Linux kernel and Windows driver seems not relying on it. This
>   enables IGD passthrough on Q35 machines, but probably without UEFI
>   GOP or VBIOS support, as it is observed they require specific LPC
>   bridge DID to work.
> * Remove the requirement of IGD device class being VGA controller, this
>   was previous discussed in my kernel change [1]
> * Update the document
> 
> It would time consuming to implement all them, coding is not difficult,
> but I have to verify my change on diffrent platforms. And they are out
> of this patchset's scope I think. I personally perfers doing it in a
> future patchset.
> 
> [1] https://lore.kernel.org/all/20250123163416.7653-1-tomitamoeko@gmail.com/
> 
> Thanks,
> Moeko

Please let me know if you have any thoughts or suggestions, in case
you missed the previous mail.

Best Regards,
Moeko

>>> -    ret = vfio_get_dev_region_info(&vdev->vbasedev,
>>> -                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | 
>>> PCI_VENDOR_ID_INTEL,
>>> -                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
>>> -    if (ret) {
>>> -        error_report("IGD device %s does not support OpRegion access,"
>>> -                     "legacy mode disabled", vdev->vbasedev.name);
>>> -        return true;
>>> -    }
>>> -
>>>      ret = vfio_get_dev_region_info(&vdev->vbasedev,
>>>                          VFIO_REGION_TYPE_PCI_VENDOR_TYPE | 
>>> PCI_VENDOR_ID_INTEL,
>>>                          VFIO_REGION_SUBTYPE_INTEL_IGD_HOST_CFG, &host);
>>> @@ -476,13 +560,6 @@ bool vfio_probe_igd_config_quirk(VFIOPCIDevice *vdev,
>>>          return true;
>>>      }
>>>  
>>> -    /* Setup OpRegion access */
>>> -    if (!vfio_pci_igd_opregion_init(vdev, opregion, &err)) {
>>> -        error_append_hint(&err, "IGD legacy mode disabled\n");
>>> -        error_reportf_err(err, VFIO_MSG_PREFIX, vdev->vbasedev.name);
>>> -        return true;
>>> -    }
>>> -
>>>      /*
>>>       * Allow user to override dsm size using x-igd-gms option, in 
>>> multiples of
>>>       * 32MiB. This option should only be used when the desired size cannot 
>>> be
>>> diff --git a/hw/vfio/pci-quirks.c b/hw/vfio/pci-quirks.c
>>> index b8379cb512..f2b37910f0 100644
>>> --- a/hw/vfio/pci-quirks.c
>>> +++ b/hw/vfio/pci-quirks.c
>>> @@ -1114,56 +1114,6 @@ static void 
>>> vfio_probe_rtl8168_bar2_quirk(VFIOPCIDevice *vdev, int nr)
>>>      trace_vfio_quirk_rtl8168_probe(vdev->vbasedev.name);
>>>  }
>>>  
>>> -#define IGD_ASLS 0xfc /* ASL Storage Register */
>>> -
>>> -/*
>>> - * The OpRegion includes the Video BIOS Table, which seems important for
>>> - * telling the driver what sort of outputs it has.  Without this, the 
>>> device
>>> - * may work in the guest, but we may not get output.  This also requires 
>>> BIOS
>>> - * support to reserve and populate a section of guest memory sufficient for
>>> - * the table and to write the base address of that memory to the ASLS 
>>> register
>>> - * of the IGD device.
>>> - */
>>> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>>> -                                struct vfio_region_info *info, Error 
>>> **errp)
>>> -{
>>> -    int ret;
>>> -
>>> -    vdev->igd_opregion = g_malloc0(info->size);
>>> -    ret = pread(vdev->vbasedev.fd, vdev->igd_opregion,
>>> -                info->size, info->offset);
>>> -    if (ret != info->size) {
>>> -        error_setg(errp, "failed to read IGD OpRegion");
>>> -        g_free(vdev->igd_opregion);
>>> -        vdev->igd_opregion = NULL;
>>> -        return false;
>>> -    }
>>> -
>>> -    /*
>>> -     * Provide fw_cfg with a copy of the OpRegion which the VM firmware is 
>>> to
>>> -     * allocate 32bit reserved memory for, copy these contents into, and 
>>> write
>>> -     * the reserved memory base address to the device ASLS register at 
>>> 0xFC.
>>> -     * Alignment of this reserved region seems flexible, but using a 4k 
>>> page
>>> -     * alignment seems to work well.  This interface assumes a single IGD
>>> -     * device, which may be at VM address 00:02.0 in legacy mode or another
>>> -     * address in UPT mode.
>>> -     *
>>> -     * NB, there may be future use cases discovered where the VM should 
>>> have
>>> -     * direct interaction with the host OpRegion, in which case the write 
>>> to
>>> -     * the ASLS register would trigger MemoryRegion setup to enable that.
>>> -     */
>>> -    fw_cfg_add_file(fw_cfg_find(), "etc/igd-opregion",
>>> -                    vdev->igd_opregion, info->size);
>>> -
>>> -    trace_vfio_pci_igd_opregion_enabled(vdev->vbasedev.name);
>>> -
>>> -    pci_set_long(vdev->pdev.config + IGD_ASLS, 0);
>>> -    pci_set_long(vdev->pdev.wmask + IGD_ASLS, ~0);
>>> -    pci_set_long(vdev->emulated_config_bits + IGD_ASLS, ~0);
>>> -
>>> -    return true;
>>> -}
>>> -
>>>  /*
>>>   * Common quirk probe entry points.
>>>   */
>>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>>> index e624ae56c4..1b90c78c5a 100644
>>> --- a/hw/vfio/pci.c
>>> +++ b/hw/vfio/pci.c
>>> @@ -3136,31 +3136,6 @@ static void vfio_realize(PCIDevice *pdev, Error 
>>> **errp)
>>>          vfio_bar_quirk_setup(vdev, i);
>>>      }
>>>  
>>> -    if (!vdev->igd_opregion &&
>>> -        vdev->features & VFIO_FEATURE_ENABLE_IGD_OPREGION) {
>>> -        g_autofree struct vfio_region_info *opregion = NULL;
>>> -
>>> -        if (vdev->pdev.qdev.hotplugged) {
>>> -            error_setg(errp,
>>> -                       "cannot support IGD OpRegion feature on hotplugged "
>>> -                       "device");
>>> -            goto out_unset_idev;
>>> -        }
>>> -
>>> -        ret = vfio_get_dev_region_info(vbasedev,
>>> -                        VFIO_REGION_TYPE_PCI_VENDOR_TYPE | 
>>> PCI_VENDOR_ID_INTEL,
>>> -                        VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION, &opregion);
>>> -        if (ret) {
>>> -            error_setg_errno(errp, -ret,
>>> -                             "does not support requested IGD OpRegion 
>>> feature");
>>> -            goto out_unset_idev;
>>> -        }
>>> -
>>> -        if (!vfio_pci_igd_opregion_init(vdev, opregion, errp)) {
>>> -            goto out_unset_idev;
>>> -        }
>>> -    }
>>> -
>>>      /* QEMU emulates all of MSI & MSIX */
>>>      if (pdev->cap_present & QEMU_PCI_CAP_MSIX) {
>>>          memset(vdev->emulated_config_bits + pdev->msix_cap, 0xff,
>>> diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
>>> index 5c64de0270..b9e920a6b4 100644
>>> --- a/hw/vfio/pci.h
>>> +++ b/hw/vfio/pci.h
>>> @@ -229,10 +229,6 @@ int vfio_pci_get_pci_hot_reset_info(VFIOPCIDevice 
>>> *vdev,
>>>  
>>>  bool vfio_populate_vga(VFIOPCIDevice *vdev, Error **errp);
>>>  
>>> -bool vfio_pci_igd_opregion_init(VFIOPCIDevice *vdev,
>>> -                                struct vfio_region_info *info,
>>> -                                Error **errp);
>>> -
>>>  void vfio_display_reset(VFIOPCIDevice *vdev);
>>>  bool vfio_display_probe(VFIOPCIDevice *vdev, Error **errp);
>>>  void vfio_display_finalize(VFIOPCIDevice *vdev);
>>
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]