qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] hw/i386/pc: Fix level interrupt sharing for Xen event channe


From: Michael S. Tsirkin
Subject: Re: [PATCH] hw/i386/pc: Fix level interrupt sharing for Xen event channel GSI
Date: Tue, 7 Jan 2025 11:07:36 -0500

On Thu, Dec 19, 2024 at 05:24:11PM +0100, David Woodhouse wrote:
> From: David Woodhouse <dwmw@amazon.co.uk>
> 
> The system GSIs are not designed for sharing. One device might assert a
> shared interrupt with qemu_set_irq() and another might deassert it, and
> the level from the first device is lost.
> 
> This could be solved by using a multiplexer which functions as an OR
> gate, much like the PCI code already implements for pci_set_irq() for
> muxing the INTx lines.
> 
> Alternatively, it could be solved by having a 'resample' callback which
> is invoked when the interrupt is acked at the interrupt controller, and
> causes the devices to re-trigger the interrupt if it should still be
> pending. This is the model that VFIO in Linux uses, with a 'resampler'
> eventfd that actually unmasks the interrupt on the hardware device and
> thus triggers a new interrupt from it if needed. QEMU currently doesn't
> use that VFIO interface correctly, and just bashes on the resampler for
> every MMIO access to the device "just in case".
> 
> This does neither of those. The Xen event channel GSI support *already*
> has hooks into the PC gsi_handler() code, for routing GSIs to PIRQs. So
> we can implement the logical OR of the external input (from PCI INTx,
> serial etc.) with the Xen event channel GSI by allowing that existing
> hook to modify the 'level' being asserted.
> 
> Closes: https://gitlab.com/qemu-project/qemu/-/issues/2731
> Reported-by: Thomas Huth <thuth@redhat.com>
> Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>

Xen things so feel free to merge.

Acked-by: Michael S. Tsirkin <mst@redhat.com>

> ---
>  hw/i386/kvm/xen_evtchn.c | 48 +++++++++++++++++++++++++++++++---------
>  hw/i386/kvm/xen_evtchn.h |  2 +-
>  hw/i386/x86-common.c     | 32 ++++++++++++++++++---------
>  3 files changed, 60 insertions(+), 22 deletions(-)
> 
> diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
> index 07bd0c9ab8..62b906e4ef 100644
> --- a/hw/i386/kvm/xen_evtchn.c
> +++ b/hw/i386/kvm/xen_evtchn.c
> @@ -140,6 +140,8 @@ struct XenEvtchnState {
>  
>      uint64_t callback_param;
>      bool evtchn_in_kernel;
> +    bool setting_callback_gsi;
> +    int extern_gsi_level;
>      uint32_t callback_gsi;
>  
>      QEMUBH *gsi_bh;
> @@ -431,7 +433,16 @@ void xen_evtchn_set_callback_level(int level)
>      }
>  
>      if (s->callback_gsi && s->callback_gsi < s->nr_callback_gsis) {
> -        qemu_set_irq(s->callback_gsis[s->callback_gsi], level);
> +        /*
> +         * Ugly, but since we hold the BQL we can set this flag so that
> +         * xen_evtchn_set_gsi() can tell the difference between this code
> +         * setting the GSI, and an external device (PCI INTx) doing so.
> +         */
> +        s->setting_callback_gsi = true;
> +        /* Do not deassert the line if an external device is asserting it. */
> +        qemu_set_irq(s->callback_gsis[s->callback_gsi],
> +                     level || s->extern_gsi_level);
> +        s->setting_callback_gsi = false;
>          if (level) {
>              /* Ensure the vCPU polls for deassertion */
>              kvm_xen_set_callback_asserted();
> @@ -1596,7 +1607,7 @@ static int allocate_pirq(XenEvtchnState *s, int type, 
> int gsi)
>      return pirq;
>  }
>  
> -bool xen_evtchn_set_gsi(int gsi, int level)
> +bool xen_evtchn_set_gsi(int gsi, int *level)
>  {
>      XenEvtchnState *s = xen_evtchn_singleton;
>      int pirq;
> @@ -1608,16 +1619,33 @@ bool xen_evtchn_set_gsi(int gsi, int level)
>      }
>  
>      /*
> -     * Check that that it *isn't* the event channel GSI, and thus
> -     * that we are not recursing and it's safe to take s->port_lock.
> -     *
> -     * Locking aside, it's perfectly sane to bail out early for that
> -     * special case, as it would make no sense for the event channel
> -     * GSI to be routed back to event channels, when the delivery
> -     * method is to raise the GSI... that recursion wouldn't *just*
> -     * be a locking issue.
> +     * For the callback_gsi we need to implement a logical OR of the event
> +     * channel GSI and the external input (e.g. from PCI INTx), because
> +     * QEMU itself doesn't support shared level interrupts via demux or
> +     * resamplers.
>       */
>      if (gsi && gsi == s->callback_gsi) {
> +        /* Remember the external state of the GSI pin (e.g. from PCI INTx) */
> +        if (!s->setting_callback_gsi) {
> +            s->extern_gsi_level = *level;
> +
> +            /*
> +             * Don't allow the external device to deassert the line if the
> +             * eveht channel GSI should still be asserted.
> +             */
> +            if (!s->extern_gsi_level) {
> +                struct vcpu_info *vi = kvm_xen_get_vcpu_info_hva(0);
> +                if (vi && vi->evtchn_upcall_pending) {
> +                    *level = 1;
> +                }
> +            }
> +        }
> +
> +        /*
> +         * The event channel GSI cannot be routed to PIRQ, as that would make
> +         * no sense. It could also deadlock on s->port_lock, if we proceed.
> +         * So bail out now.
> +         */
>          return false;
>      }
>  
> diff --git a/hw/i386/kvm/xen_evtchn.h b/hw/i386/kvm/xen_evtchn.h
> index b740acfc0d..0521ebc092 100644
> --- a/hw/i386/kvm/xen_evtchn.h
> +++ b/hw/i386/kvm/xen_evtchn.h
> @@ -23,7 +23,7 @@ void xen_evtchn_set_callback_level(int level);
>  
>  int xen_evtchn_set_port(uint16_t port);
>  
> -bool xen_evtchn_set_gsi(int gsi, int level);
> +bool xen_evtchn_set_gsi(int gsi, int *level);
>  void xen_evtchn_snoop_msi(PCIDevice *dev, bool is_msix, unsigned int vector,
>                            uint64_t addr, uint32_t data, bool is_masked);
>  void xen_evtchn_remove_pci_device(PCIDevice *dev);
> diff --git a/hw/i386/x86-common.c b/hw/i386/x86-common.c
> index 3f78182692..e061580f67 100644
> --- a/hw/i386/x86-common.c
> +++ b/hw/i386/x86-common.c
> @@ -450,8 +450,27 @@ static long get_file_size(FILE *f)
>  void gsi_handler(void *opaque, int n, int level)
>  {
>      GSIState *s = opaque;
> +    bool bypass_ioapic = false;
>  
>      trace_x86_gsi_interrupt(n, level);
> +
> +#ifdef CONFIG_XEN_EMU
> +        /*
> +         * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
> +         * routing actually works properly under Xen). And then to
> +         * *either* the PIRQ handling or the I/OAPIC depending on
> +         * whether the former wants it.
> +         *
> +         * Additionally, this hook allows the Xen event channel GSI to
> +         * work around QEMU's lack of support for shared level interrupts,
> +         * by keeping track of the externally driven state of the pin and
> +         * implementing a logical OR with the state of the evtchn GSI.
> +         */
> +    if (xen_mode == XEN_EMULATE) {
> +        bypass_ioapic = xen_evtchn_set_gsi(n, &level);
> +    }
> +#endif
> +
>      switch (n) {
>      case 0 ... ISA_NUM_IRQS - 1:
>          if (s->i8259_irq[n]) {
> @@ -460,18 +479,9 @@ void gsi_handler(void *opaque, int n, int level)
>          }
>          /* fall through */
>      case ISA_NUM_IRQS ... IOAPIC_NUM_PINS - 1:
> -#ifdef CONFIG_XEN_EMU
> -        /*
> -         * Xen delivers the GSI to the Legacy PIC (not that Legacy PIC
> -         * routing actually works properly under Xen). And then to
> -         * *either* the PIRQ handling or the I/OAPIC depending on
> -         * whether the former wants it.
> -         */
> -        if (xen_mode == XEN_EMULATE && xen_evtchn_set_gsi(n, level)) {
> -            break;
> +        if (!bypass_ioapic) {
> +            qemu_set_irq(s->ioapic_irq[n], level);
>          }
> -#endif
> -        qemu_set_irq(s->ioapic_irq[n], level);
>          break;
>      case IO_APIC_SECONDARY_IRQBASE
>          ... IO_APIC_SECONDARY_IRQBASE + IOAPIC_NUM_PINS - 1:
> -- 
> 2.47.0
> 
> 





reply via email to

[Prev in Thread] Current Thread [Next in Thread]