grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 3/3] pxenet: process transmit interrupts when out of resource


From: Andrei Borzenkov
Subject: Re: [PATCH 3/3] pxenet: process transmit interrupts when out of resources
Date: Sat, 12 Mar 2016 09:20:49 +0300
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.6.0

11.03.2016 19:28, Josef Bacik пишет:
> We were hitting a problem in production where our bios based machines would
> sometimes timeout while pulling down either the kernel/initrd.  It turned out
> that sometimes we'd run out of transmit buffers and would then error out while
> sending packets.  This is because we don't actually have an interrupt handler 
> in
> PXE, we just poll the card when we want to receive.  This works most of the
> time, but sometimes we end up with too many transmit interrupts pending and 
> then
> we can't add new packets to the transmit buffers.
> 
> So rework the whole ISR logic to be able to be called from both transmit and
> receive.  If we get OUT_OF_RESOURCES while trying to transmit then we can go
> through and process the interrupts and hope that leaves us with space to retry
> the transmit.  Unfortunately this puts us in a place where we can trip over a
> RECEIVE interrupt, so we have to process that interrupt and leave a netbuff
> behind for the next call into recv.
> 
> With this patch we are now able to properly provision boxes suffering from 
> this
> problem.  Thanks,
> 
> Signed-off-by: Josef Bacik <address@hidden>
> ---
>  grub-core/net/drivers/i386/pc/pxe.c | 155 
> +++++++++++++++++++++++++-----------
>  1 file changed, 108 insertions(+), 47 deletions(-)
> 
> diff --git a/grub-core/net/drivers/i386/pc/pxe.c 
> b/grub-core/net/drivers/i386/pc/pxe.c
> index 3f4152d..57445b7 100644
> --- a/grub-core/net/drivers/i386/pc/pxe.c
> +++ b/grub-core/net/drivers/i386/pc/pxe.c
> @@ -166,16 +166,11 @@ grub_pxe_scan (void)
>    return bangpxe;
>  }
>  
> -static struct grub_net_buff *
> -grub_pxe_recv (struct grub_net_card *dev __attribute__ ((unused)))
> -{
> -  struct grub_pxe_undi_isr *isr;
> -  static int in_progress = 0;
> -  grub_uint8_t *ptr, *end;
> -  struct grub_net_buff *buf;
> -
> -  isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> +static int in_progress = 0;
>  

You need to reset it in ->open or ->close.

> +static void
> +grub_pxe_process_isr (struct grub_pxe_undi_isr *isr)
> +{
>    if (!in_progress)
>      {
>        grub_memset (isr, 0, sizeof (*isr));
> @@ -186,13 +181,14 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__ 
> ((unused)))
>        breaks on intel cards.
>         */
>        if (isr->status)
> -     {
> -       in_progress = 0;
> -       return NULL;
> +        {
> +       grub_dprintf("net", "problem pulling isr %d\n", (int)isr->status);

"pxe" would be better for dedicated debugging, "net" is too generic. We
can also set debug="pxe net" if needed. Also PXE spec lists status
values as hex, would be better to print it this way too.

> +       return;
>       }
>        grub_memset (isr, 0, sizeof (*isr));
>        isr->func_flag = GRUB_PXE_ISR_IN_PROCESS;
>        grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> +      in_progress = 1;
>      }
>    else
>      {
> @@ -200,18 +196,13 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__ 
> ((unused)))
>        isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
>        grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
>      }
> +}
>  
> -  while (isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
> -    {
> -      if (isr->status || isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> -     {
> -       in_progress = 0;
> -       return NULL;
> -     }
> -      grub_memset (isr, 0, sizeof (*isr));
> -      isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> -      grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> -    }
> +static struct grub_net_buff *
> +grub_pxe_make_net_buff (struct grub_pxe_undi_isr *isr)
> +{
> +  struct grub_net_buff *buf;
> +  grub_uint8_t *ptr, *end;
>  
>    buf = grub_netbuff_alloc (isr->frame_len + 2);
>    if (!buf)
> @@ -230,48 +221,118 @@ grub_pxe_recv (struct grub_net_card *dev __attribute__ 
> ((unused)))
>    ptr += isr->buffer_len;
>    while (ptr < end)
>      {
> -      grub_memset (isr, 0, sizeof (*isr));
> -      isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> -      grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> +      grub_pxe_process_isr (isr);
>        if (isr->status || isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
>       {
> -       in_progress = 1;
> +       grub_dprintf("net", "half processed packet\n");
>         grub_netbuff_free (buf);
>         return NULL;
>       }
> -
>        grub_memcpy (ptr, LINEAR (isr->buffer), isr->buffer_len);
>        ptr += isr->buffer_len;
>      }
> -  in_progress = 1;
> -
>    return buf;
>  }
>  
> +static struct grub_net_buff *
> +grub_pxe_recv (struct grub_net_card *dev)
> +{
> +  struct grub_pxe_undi_isr *isr;
> +
> +  if (dev->data)
> +    {
> +      struct grub_net_buff *buf = dev->data;
> +      grub_dprintf("net", "Pulling existing receive packet\n");
> +      dev->data = NULL;
> +      return buf;
> +    }
> +
> +  isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> +  grub_pxe_process_isr (isr);
> +  while (isr->func_flag != GRUB_PXE_ISR_OUT_RECEIVE)
> +    {
> +      if (isr->status || isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> +     {
> +       if (isr->status)
> +         grub_dprintf("net", "error pulling next %d\n", (int)isr->status);
> +       in_progress = 0;
> +       return 0;
> +     }
> +      grub_memset (isr, 0, sizeof (*isr));
> +      isr->func_flag = GRUB_PXE_ISR_IN_GET_NEXT;
> +      grub_pxe_call (GRUB_PXENV_UNDI_ISR, isr, pxe_rm_entry);
> +    }
> +  return grub_pxe_make_net_buff (isr);
> +}
> +
>  static grub_err_t 
> -grub_pxe_send (struct grub_net_card *dev __attribute__ ((unused)),
> -            struct grub_net_buff *pack)
> +grub_pxe_send (struct grub_net_card *dev, struct grub_net_buff *pack)
>  {
>    struct grub_pxe_undi_transmit *trans;
>    struct grub_pxe_undi_tbd *tbd;
>    char *buf;
>  
> -  trans = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> -  grub_memset (trans, 0, sizeof (*trans));
> -  tbd = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 128);
> -  grub_memset (tbd, 0, sizeof (*tbd));
> -  buf = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 256);
> -  grub_memcpy (buf, pack->data, pack->tail - pack->data);
> -
> -  trans->tbd = SEGOFS ((grub_addr_t) tbd);
> -  trans->protocol = 0;
> -  tbd->len = pack->tail - pack->data;
> -  tbd->buf = SEGOFS ((grub_addr_t) buf);
> -
> -  grub_pxe_call (GRUB_PXENV_UNDI_TRANSMIT, trans, pxe_rm_entry);
> -  if (trans->status)
> -    return grub_error (GRUB_ERR_IO, N_("couldn't send network packet"));
> +  while (1)
> +    {
> +      int made_progress = 0;
> +
> +      trans = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> +      grub_memset (trans, 0, sizeof (*trans));
> +      tbd = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 128);
> +      grub_memset (tbd, 0, sizeof (*tbd));
> +      buf = (void *) (GRUB_MEMORY_MACHINE_SCRATCH_ADDR + 256);
> +      grub_memcpy (buf, pack->data, pack->tail - pack->data);
> +
> +      trans->tbd = SEGOFS ((grub_addr_t) tbd);
> +      trans->protocol = 0;
> +      tbd->len = pack->tail - pack->data;
> +      tbd->buf = SEGOFS ((grub_addr_t) buf);
> +
> +      grub_pxe_call (GRUB_PXENV_UNDI_TRANSMIT, trans, pxe_rm_entry);
> +      if (!trans->status)
> +     break;

This looks like the only terminating condition. What if hardware is
stuck and cannot make progress anymore and continues to return error here?

> +      if (trans->status == GRUB_PXENV_STATUS_OUT_OF_RESOURCES)
> +     {
> +       struct grub_pxe_undi_isr *isr;
> +
> +       grub_dprintf("net", "Out of transmit buffers, processing "
> +                    "interrupts\n");
> +       /* Process any outstanding transmit interrupts. */
> +       isr = (void *) GRUB_MEMORY_MACHINE_SCRATCH_ADDR;
> +       do
> +         {
> +           grub_pxe_process_isr (isr);
> +           if (isr->status)
> +             {
> +               grub_dprintf("net", "process interrupts errored %d,"
> +                            "made_progress %d\n", (int)isr->status, 
> made_progress);
> +               if (made_progress)
> +                 break;
> +               else
> +                 goto out_err;
> +             }
> +           if (isr->func_flag == GRUB_PXE_ISR_OUT_DONE)
> +             in_progress = 0;
> +           made_progress = 1;
> +         } while (isr->func_flag == GRUB_PXE_ISR_OUT_TRANSMIT);
> +
> +       /* If we had a receive interrupt in the queue we need to copy this
> +          buffer out otherwise we'll lose it. */
> +       if (isr->func_flag == GRUB_PXE_ISR_OUT_RECEIVE)

It probably should check isr->status, we come here also if we got an
error previously. Such packets are ignored everywhere else.

> +         {
> +           if (dev->data)
> +             grub_dprintf("net", "dropping packet, already have a "
> +                          "pending packet.\n");
> +           else
> +             dev->data = grub_pxe_make_net_buff (isr);
> +         }
> +     }
> +      else
> +        goto out_err;
> +    }
>    return 0;
> +out_err:
> +  return grub_error (GRUB_ERR_IO, N_("couldn't send network packet"));
>  }
>  
>  static void
> 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]