qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH V3 12/22] vfio-pci: cpr part 1


From: Steven Sistare
Subject: Re: [PATCH V3 12/22] vfio-pci: cpr part 1
Date: Mon, 24 May 2021 14:29:28 -0400
User-agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Thunderbird/78.10.2

On 5/21/2021 6:24 PM, Alex Williamson wrote:> On Fri,  7 May 2021 05:25:10 -0700
> Steve Sistare <steven.sistare@oracle.com> wrote:
> 
>> Enable vfio-pci devices to be saved and restored across an exec restart
>> of qemu.
>>
>> At vfio creation time, save the value of vfio container, group, and device
>> descriptors in the environment.
>>
>> In cprsave, suspend the use of virtual addresses in DMA mappings with
>> VFIO_DMA_UNMAP_FLAG_VADDR, because guest ram will be remapped at a
>> different VA after exec.  DMA to already-mapped pages continues.  Save
>> the msi message area as part of vfio-pci vmstate, save the interrupt and
>> notifier eventfd's in the environment, and clear the close-on-exec flag
>> for the vfio descriptors.  The flag is not cleared earlier because the
>> descriptors should not persist across miscellaneous fork and exec calls
>> that may be performed during normal operation.
>>
>> On qemu restart, vfio_realize() finds the descriptor env vars, uses
>> the descriptors, and notes that the device is being reused.  Device and
>> iommu state is already configured, so operations in vfio_realize that
>> would modify the configuration are skipped for a reused device, including
>> vfio ioctl's and writes to PCI configuration space.  The result is that
>> vfio_realize constructs qemu data structures that reflect the current
>> state of the device.  However, the reconstruction is not complete until
>> cprload is called. cprload loads the msi data and finds eventfds in the
>> environment.  It rebuilds vector data structures and attaches the
>> interrupts to the new KVM instance.  cprload then walks the flattened
>> ranges of the vfio_address_spaces and calls VFIO_DMA_MAP_FLAG_VADDR to
>> inform the kernel of the new VA's.  Lastly, it starts the VM and suppresses
>> vfio device reset.
>>
>> This functionality is delivered by 2 patches for clarity.  Part 2 adds
>> eventfd and vector support.
>>
>> Signed-off-by: Steve Sistare <steven.sistare@oracle.com>
>> ---
>>  hw/pci/msi.c                  |   4 ++
>>  hw/pci/pci.c                  |   4 ++
>>  hw/vfio/common.c              |  59 ++++++++++++++++++-
>>  hw/vfio/cpr.c                 | 131 
>> ++++++++++++++++++++++++++++++++++++++++++
>>  hw/vfio/meson.build           |   1 +
>>  hw/vfio/pci.c                 |  65 +++++++++++++++++++--
>>  hw/vfio/trace-events          |   1 +
>>  include/hw/pci/pci.h          |   1 +
>>  include/hw/vfio/vfio-common.h |   5 ++
>>  linux-headers/linux/vfio.h    |  27 +++++++++
>>  migration/cpr.c               |   7 +++
>>  11 files changed, 298 insertions(+), 7 deletions(-)
>>  create mode 100644 hw/vfio/cpr.c
>>
>> diff --git a/hw/pci/msi.c b/hw/pci/msi.c
>> index 47d2b0f..39de6a7 100644
>> --- a/hw/pci/msi.c
>> +++ b/hw/pci/msi.c
>> @@ -225,6 +225,10 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
>>      dev->msi_cap = config_offset;
>>      dev->cap_present |= QEMU_PCI_CAP_MSI;
>>  
>> +    if (dev->reused) {
>> +        return 0;
>> +    }
>> +
>>      pci_set_word(dev->config + msi_flags_off(dev), flags);
>>      pci_set_word(dev->wmask + msi_flags_off(dev),
>>                   PCI_MSI_FLAGS_QSIZE | PCI_MSI_FLAGS_ENABLE);
>> diff --git a/hw/pci/pci.c b/hw/pci/pci.c
>> index e08d981..27019ca 100644
>> --- a/hw/pci/pci.c
>> +++ b/hw/pci/pci.c
>> @@ -308,6 +308,10 @@ static void pci_do_device_reset(PCIDevice *dev)
>>  {
>>      int r;
>>  
>> +    if (dev->reused) {
>> +        return;
>> +    }
>> +
>>      pci_device_deassert_intx(dev);
>>      assert(dev->irq_state == 0);
>>  
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 9220e64..00d07b2 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -31,6 +31,7 @@
>>  #include "exec/memory.h"
>>  #include "exec/ram_addr.h"
>>  #include "hw/hw.h"
>> +#include "qemu/env.h"
>>  #include "qemu/error-report.h"
>>  #include "qemu/main-loop.h"
>>  #include "qemu/range.h"
>> @@ -440,6 +441,10 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>          return vfio_dma_unmap_bitmap(container, iova, size, iotlb);
>>      }
>>  
>> +    if (container->reused) {
>> +        return 0;
>> +    }
>> +
>>      while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
>>          /*
>>           * The type1 backend has an off-by-one bug in the kernel 
>> (71a7d3d78e3c
>> @@ -463,6 +468,11 @@ static int vfio_dma_unmap(VFIOContainer *container,
>>          return -errno;
>>      }
>>  
>> +    if (unmap.size != size) {
>> +        warn_report("VFIO_UNMAP_DMA(0x%lx, 0x%lx) only unmaps 0x%llx",
>> +                     iova, size, unmap.size);
>> +    }
>> +
>>      return 0;
>>  }
>>  
>> @@ -477,6 +487,10 @@ static int vfio_dma_map(VFIOContainer *container, 
>> hwaddr iova,
>>          .size = size,
>>      };
>>  
>> +    if (container->reused) {
>> +        return 0;
>> +    }
>> +
>>      if (!readonly) {
>>          map.flags |= VFIO_DMA_MAP_FLAG_WRITE;
>>      }
>> @@ -1603,6 +1617,10 @@ static int vfio_init_container(VFIOContainer 
>> *container, int group_fd,
>>      if (iommu_type < 0) {
>>          return iommu_type;
>>      }
>> +    if (container->reused) {
>> +        container->iommu_type = iommu_type;
>> +        return 0;
>> +    }
>>  
>>      ret = ioctl(group_fd, VFIO_GROUP_SET_CONTAINER, &container->fd);
>>      if (ret) {
>> @@ -1703,6 +1721,8 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as,
>>  {
>>      VFIOContainer *container;
>>      int ret, fd;
>> +    bool reused;
>> +    char name[40];
>>      VFIOAddressSpace *space;
>>  
>>      space = vfio_get_address_space(as);
>> @@ -1739,16 +1759,29 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as,
>>          return ret;
>>      }
>>  
>> +    snprintf(name, sizeof(name), "vfio_container_%d", group->groupid);
> 
> For more clarity, maybe "vfio_container_for_group_%d"?

OK, that is clearer.

>> +    fd = getenv_fd(name);
>> +    reused = (fd >= 0);
>> +
>>      QLIST_FOREACH(container, &space->containers, next) {
>> +        if (fd >= 0 && container->fd == fd) {
> 
> Test @reused rather than @fd?  > I'm not sure the first half of this test
> is even needed though, <0 should never match container->fd, right?

OK, I will drop the first test.

>> +            group->container = container;
>> +            QLIST_INSERT_HEAD(&container->group_list, group, 
>> container_next);
>> +            return 0;
>> +        }
> 
> This looks unnecessarily sensitive to the order of containers in the
> list, if the fd doesn't match above we try to set a new container below?
> It seems like you only want to create a new container object if none of
> the existing ones match.
> 
> There's also a lot of duplication that seems like it could be combined
> 
> if (container->fd == fd || (!reused && !ioctl(...)) {

OK, I will rewrite to avoid depending on creation order, and de-dup:

    QLIST_FOREACH(container, &space->containers, next) {
        if (container->fd == fd ||
            !ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
            break;
        }
    }

    if (container) {
        group->container = container;
        QLIST_INSERT_HEAD(&container->group_list, group, container_next);
        if (!reused) {
            vfio_kvm_device_add_group(group);
            setenv_fd(name, container->fd);
        }
        return 0;
    }


>>          if (!ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &container->fd)) {
>>              group->container = container;
>>              QLIST_INSERT_HEAD(&container->group_list, group, 
>> container_next);
>>              vfio_kvm_device_add_group(group);
> 
> Why is this kvm device setup missing in the reuse case?

vfio_kvm_device_add_group only calls ioctls, and they were already called when 
the device 
was initially created.

> if (!reused) {
>> +            setenv_fd(name, container->fd);
> }
> 
>>              return 0;
>>          }
>>      }
>>  
>> -    fd = qemu_open_old("/dev/vfio/vfio", O_RDWR);
>> +    if (fd < 0) {
> 
> if (!reused)?

OK.

>> +        fd = qemu_open_old("/dev/vfio/vfio", O_RDWR);
>> +    }
>> +
>>      if (fd < 0) {
>>          error_setg_errno(errp, errno, "failed to open /dev/vfio/vfio");
>>          ret = -errno;
>> @@ -1766,6 +1799,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as,
>>      container = g_malloc0(sizeof(*container));
>>      container->space = space;
>>      container->fd = fd;
>> +    container->reused = reused;
>>      container->error = NULL;
>>      container->dirty_pages_supported = false;
>>      QLIST_INIT(&container->giommu_list);
>> @@ -1893,6 +1927,7 @@ static int vfio_connect_container(VFIOGroup *group, 
>> AddressSpace *as,
>>      }
>>  
>>      container->initialized = true;
>> +    setenv_fd(name, fd);
> 
> Maybe we don't need the test around the previous setenv_fd if we can
> overwrite existing env values, which would seem to be the case for a
> restart here.

Yes, setenv_fd overwrites, so I omitted the test for clarity, at a cost of a 
few cycles.

>>      return 0;
>>  listener_release_exit:
>> @@ -1920,6 +1955,7 @@ static void vfio_disconnect_container(VFIOGroup *group)
>>  
>>      QLIST_REMOVE(group, container_next);
>>      group->container = NULL;
>> +    unsetenv_fdv("vfio_container_%d", group->groupid);
>>  
>>      /*
>>       * Explicitly release the listener first before unset container,
>> @@ -1978,7 +2014,12 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace 
>> *as, Error **errp)
>>      group = g_malloc0(sizeof(*group));
>>  
>>      snprintf(path, sizeof(path), "/dev/vfio/%d", groupid);
>> -    group->fd = qemu_open_old(path, O_RDWR);
>> +
>> +    group->fd = getenv_fd(path);
>> +    if (group->fd < 0) {
>> +        group->fd = qemu_open_old(path, O_RDWR);
>> +    }
>> +
>>      if (group->fd < 0) {
>>          error_setg_errno(errp, errno, "failed to open %s", path);
>>          goto free_group_exit;
>> @@ -2012,6 +2053,8 @@ VFIOGroup *vfio_get_group(int groupid, AddressSpace 
>> *as, Error **errp)
>>  
>>      QLIST_INSERT_HEAD(&vfio_group_list, group, next);
>>  
>> +    setenv_fd(path, group->fd);
>> +
>>      return group;
>>  
>>  close_fd_exit:
>> @@ -2036,6 +2079,7 @@ void vfio_put_group(VFIOGroup *group)
>>      vfio_disconnect_container(group);
>>      QLIST_REMOVE(group, next);
>>      trace_vfio_put_group(group->fd);
>> +    unsetenv_fdv("/dev/vfio/%d", group->groupid);
>>      close(group->fd);
>>      g_free(group);
>>  
>> @@ -2049,8 +2093,14 @@ int vfio_get_device(VFIOGroup *group, const char 
>> *name,
>>  {
>>      struct vfio_device_info dev_info = { .argsz = sizeof(dev_info) };
>>      int ret, fd;
>> +    bool reused;
>> +
>> +    fd = getenv_fd(name);
>> +    reused = (fd >= 0);
>> +    if (fd < 0) {
> 
> if (!reused) ?

OK.

>> +        fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
>> +    }
>>  
>> -    fd = ioctl(group->fd, VFIO_GROUP_GET_DEVICE_FD, name);
>>      if (fd < 0) {
>>          error_setg_errno(errp, errno, "error getting device from group %d",
>>                           group->groupid);
>> @@ -2095,6 +2145,8 @@ int vfio_get_device(VFIOGroup *group, const char *name,
>>      vbasedev->num_irqs = dev_info.num_irqs;
>>      vbasedev->num_regions = dev_info.num_regions;
>>      vbasedev->flags = dev_info.flags;
>> +    vbasedev->reused = reused;
>> +    setenv_fd(name, fd);
>>  
>>      trace_vfio_get_device(name, dev_info.flags, dev_info.num_regions,
>>                            dev_info.num_irqs);
>> @@ -2111,6 +2163,7 @@ void vfio_put_base_device(VFIODevice *vbasedev)
>>      QLIST_REMOVE(vbasedev, next);
>>      vbasedev->group = NULL;
>>      trace_vfio_put_base_device(vbasedev->fd);
>> +    unsetenv_fd(vbasedev->name);
>>      close(vbasedev->fd);
>>  }
>>  
>> diff --git a/hw/vfio/cpr.c b/hw/vfio/cpr.c
>> new file mode 100644
>> index 0000000..c5ad9f2
>> --- /dev/null
>> +++ b/hw/vfio/cpr.c
>> @@ -0,0 +1,131 @@
>> +/*
>> + * Copyright (c) 2021 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2.
>> + * See the COPYING file in the top-level directory.
>> + */
>> +
>> +#include "qemu/osdep.h"
>> +#include <sys/ioctl.h>
>> +#include <linux/vfio.h>
>> +#include "hw/vfio/vfio-common.h"
>> +#include "sysemu/kvm.h"
>> +#include "qapi/error.h"
>> +#include "trace.h"
>> +
>> +static int
>> +vfio_dma_unmap_vaddr_all(VFIOContainer *container, Error **errp)
>> +{
>> +    struct vfio_iommu_type1_dma_unmap unmap = {
>> +        .argsz = sizeof(unmap),
>> +        .flags = VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL,
>> +        .iova = 0,
>> +        .size = 0,
>> +    };
>> +    if (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
>> +        error_setg_errno(errp, errno, "vfio_dma_unmap_vaddr_all");
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int vfio_dma_map_vaddr(VFIOContainer *container, hwaddr iova,
>> +                              ram_addr_t size, void *vaddr,
>> +                              Error **errp)
>> +{
>> +    struct vfio_iommu_type1_dma_map map = {
>> +        .argsz = sizeof(map),
>> +        .flags = VFIO_DMA_MAP_FLAG_VADDR,
>> +        .vaddr = (__u64)(uintptr_t)vaddr,
>> +        .iova = iova,
>> +        .size = size,
>> +    };
>> +    if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map)) {
>> +        error_setg_errno(errp, errno,
>> +                         "vfio_dma_map_vaddr(iova %lu, size %ld, va %p)",
>> +                         iova, size, vaddr);
>> +        return -errno;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int
>> +vfio_region_remap(MemoryRegionSection *section, void *handle, Error **errp)
>> +{
>> +    MemoryRegion *mr = section->mr;
>> +    VFIOContainer *container = handle;
>> +    const char *name = memory_region_name(mr);
>> +    ram_addr_t size = int128_get64(section->size);
>> +    hwaddr offset, iova, roundup;
>> +    void *vaddr;
>> +
>> +    if (vfio_listener_skipped_section(section) || 
>> memory_region_is_iommu(mr)) {
>> +        return 0;
>> +    }
>> +
>> +    offset = section->offset_within_address_space;
>> +    iova = TARGET_PAGE_ALIGN(offset);
>> +    roundup = iova - offset;
>> +    size = (size - roundup) & TARGET_PAGE_MASK;
>> +    vaddr = memory_region_get_ram_ptr(mr) +
>> +            section->offset_within_region + roundup;
>> +
>> +    trace_vfio_region_remap(name, container->fd, iova, iova + size - 1, 
>> vaddr);
>> +    return vfio_dma_map_vaddr(container, iova, size, vaddr, errp);
>> +}
>> +
>> +bool vfio_cpr_capable(VFIOContainer *container, Error **errp)
>> +{
>> +    if (!ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UPDATE_VADDR) ||
>> +        !ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL)) {
>> +        error_setg(errp, "VFIO container does not support VFIO_UPDATE_VADDR 
>> "
>> +                         "or VFIO_UNMAP_ALL");
>> +        return false;
>> +    } else {
>> +        return true;
>> +    }
>> +}
>> +
>> +int vfio_cprsave(Error **errp)
>> +{
>> +    VFIOAddressSpace *space;
>> +    VFIOContainer *container;
>> +
>> +    QLIST_FOREACH(space, &vfio_address_spaces, list) {
>> +        QLIST_FOREACH(container, &space->containers, next) {
>> +            if (!vfio_cpr_capable(container, errp)) {
>> +                return 1;
>> +            }
>> +            if (vfio_dma_unmap_vaddr_all(container, errp)) {
>> +                return 1;
>> +            }
>> +        }
>> +    }
> 
> Seems like you'd want to test that all containers are capable before
> unmapping any vaddrs.  I also hope we'll find an unwind somewhere that
> remaps vaddrs should any fail.

That is verified earlier if one runs qemu with the -only-cpr-capable option:

  vfio_get_iommu_type()
      for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
          if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
              if (only_cpr_capable && !vfio_cpr_capable(container, errp)) {
                  error_prepend(errp, "only-cpr-capable is specified: ");
                  return -EINVAL;

But I will check here as well in case only-cpr-capable was not specified.

I will add code to unwind and remap.

>> +    return 0;
>> +}
>> +
>> +int vfio_cprload(Error **errp)
>> +{
>> +    VFIOAddressSpace *space;
>> +    VFIOContainer *container;
>> +    VFIOGroup *group;
>> +    VFIODevice *vbasedev;
>> +
>> +    QLIST_FOREACH(space, &vfio_address_spaces, list) {
>> +        QLIST_FOREACH(container, &space->containers, next) {
>> +            if (!vfio_cpr_capable(container, errp)) {
>> +                return 1;
>> +            }
>> +            container->reused = false;
>> +            if (as_flat_walk(space->as, vfio_region_remap, container, 
>> errp)) {
>> +                return 1;
>> +            }
>> +        }
>> +    }
> 
> What state are we in if any of these fail?

The guest has not resumed, but we cannot recover.  Since we verified 
vfio_cpr_capable in 
cprsave, vfio_cprload should never fail, sans bugs.

>> +    QLIST_FOREACH(group, &vfio_group_list, next) {
>> +        QLIST_FOREACH(vbasedev, &group->device_list, next) {
>> +            vbasedev->reused = false;
>> +        }
>> +    }
>> +    return 0;
>> +}
>> diff --git a/hw/vfio/meson.build b/hw/vfio/meson.build
>> index da9af29..e247b2b 100644
>> --- a/hw/vfio/meson.build
>> +++ b/hw/vfio/meson.build
>> @@ -5,6 +5,7 @@ vfio_ss.add(files(
>>    'migration.c',
>>  ))
>>  vfio_ss.add(when: 'CONFIG_VFIO_PCI', if_true: files(
>> +  'cpr.c',
>>    'display.c',
>>    'pci-quirks.c',
>>    'pci.c',
>> diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
>> index 7a4fb6c..f7ac9f03 100644
>> --- a/hw/vfio/pci.c
>> +++ b/hw/vfio/pci.c
>> @@ -29,6 +29,8 @@
>>  #include "hw/qdev-properties.h"
>>  #include "hw/qdev-properties-system.h"
>>  #include "migration/vmstate.h"
>> +#include "migration/cpr.h"
>> +#include "qemu/env.h"
>>  #include "qemu/error-report.h"
>>  #include "qemu/main-loop.h"
>>  #include "qemu/module.h"
>> @@ -1612,6 +1614,14 @@ static void vfio_mmap_set_enabled(VFIOPCIDevice 
>> *vdev, bool enabled)
>>      }
>>  }
>>  
>> +static void vfio_config_sync(VFIOPCIDevice *vdev, uint32_t offset, size_t 
>> len)
>> +{
>> +    if (pread(vdev->vbasedev.fd, vdev->pdev.config + offset, len,
>> +          vdev->config_offset + offset) != len) {
>> +        error_report("vfio_config_sync pread failed");
>> +    }
>> +}
>> +
>>  static void vfio_bar_prepare(VFIOPCIDevice *vdev, int nr)
>>  {
>>      VFIOBAR *bar = &vdev->bars[nr];
>> @@ -1652,6 +1662,7 @@ static void vfio_bars_prepare(VFIOPCIDevice *vdev)
>>  static void vfio_bar_register(VFIOPCIDevice *vdev, int nr)
>>  {
>>      VFIOBAR *bar = &vdev->bars[nr];
>> +    PCIDevice *pdev = &vdev->pdev;
>>      char *name;
>>  
>>      if (!bar->size) {
>> @@ -1672,7 +1683,10 @@ static void vfio_bar_register(VFIOPCIDevice *vdev, 
>> int nr)
>>          }
>>      }
>>  
>> -    pci_register_bar(&vdev->pdev, nr, bar->type, bar->mr);
>> +    pci_register_bar(pdev, nr, bar->type, bar->mr);
>> +    if (pdev->reused) {
>> +        vfio_config_sync(vdev, pci_bar(pdev, nr), 8);
> 
> Assuming 64-bit BARs?  This might be the first case where we actually
> rely on the kernel BAR values, IIRC we usually use QEMU's emulation.

No asssumptions.  vfio_config_sync() preads a piece of config space using a 
single 
system call, copying directly to the qemu buffer, not looking at words or 
calling any
action functions.

>> +    }
>>  }
>>  
>>  static void vfio_bars_register(VFIOPCIDevice *vdev)
>> @@ -2884,6 +2898,7 @@ static void vfio_realize(PCIDevice *pdev, Error **errp)
>>          vfio_put_group(group);
>>          goto error;
>>      }
>> +    pdev->reused = vdev->vbasedev.reused;
>>  
>>      vfio_populate_device(vdev, &err);
>>      if (err) {
>> @@ -3046,9 +3061,11 @@ static void vfio_realize(PCIDevice *pdev, Error 
>> **errp)
>>                                               vfio_intx_routing_notifier);
>>          vdev->irqchip_change_notifier.notify = vfio_irqchip_change;
>>          kvm_irqchip_add_change_notifier(&vdev->irqchip_change_notifier);
>> -        ret = vfio_intx_enable(vdev, errp);
>> -        if (ret) {
>> -            goto out_deregister;
>> +        if (!pdev->reused) {
>> +            ret = vfio_intx_enable(vdev, errp);
>> +            if (ret) {
>> +                goto out_deregister;
>> +            }
>>          }
>>      }
>>  
>> @@ -3098,6 +3115,11 @@ static void vfio_realize(PCIDevice *pdev, Error 
>> **errp)
>>      vfio_register_req_notifier(vdev);
>>      vfio_setup_resetfn_quirk(vdev);
>>  
>> +    vfio_config_sync(vdev, pdev->msix_cap + PCI_MSIX_FLAGS, 2);
>> +    if (pdev->reused) {
>> +        pci_update_mappings(pdev);
>> +    }
>> +
> 
> Are the msix flag sync and mapping update related?  They seem
> independent to me.  A blank line and comment would be helpful.

OK.

> I expect we'd need to call msix_enabled() somewhere for the msix flag
> sync to be effective.

Yes, vfio_pci_post_load in cpr part 2 calls msix_enabled.

> Is there an assumption here of msi-x only support or is it not needed
> for msi or intx?

The code supports msi-x and msi.  However, I should only be sync'ing 
PCI_MSIX_FLAGS
if pdev->cap_present & QEMU_PCI_CAP_MSIX.  And, I am missing a sync for 
PCI_MSI_FLAGS.
I'll fix that.

> [...]
> 
> I didn't find that unwind I was hoping for or anywhere that the msix
> flags come into play.  Thanks,

Let me know f I have not adequately answered those questions.

- Steve



reply via email to

[Prev in Thread] Current Thread [Next in Thread]