qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH 2/3] spapr vfio: added support


From: Alex Williamson
Subject: Re: [Qemu-ppc] [PATCH 2/3] spapr vfio: added support
Date: Tue, 19 Feb 2013 14:49:17 -0700

On Tue, 2013-02-19 at 18:43 +1100, Alexey Kardashevskiy wrote:
> The patch adds the following functionality:
> 
> 1. Implements VFIO-IOMMU host kernel driver support;
> 
> 2. Implements interface between SPAPR TCE and VFIO via
> sPAPRVFIOData's map/unmap hooks;
> 
> 3. Implements PHB scan for devices within the same IOMMU group.
> 
> To use VFIO on spapr platform, the "spapr-pci-host-bridge" device needs
> to be created with mandatory "index" and "iommu" properties such as:
> 
> -device spapr-pci-host-bridge,busname=USB,iommu=4,index=5
> 
> where:
> "index" - PHB number which is used to build all other PHB properties
> such as MMIO window, BUID, etc;
> "iommu" - IOMMU ID which represents a Partitionable Endpoint.
> 
> Optional parameters are:
> "forceaddr" - forces QEMU to assign device:function from the host address;
> "multifunction" - enables multifunction what might make sense if the user
> wants to use the configuration from the host in the guest such as
> NEC USB PCI adapter which is visible as a single device with 3 PCI
> functions, without this switch QEMU will create 3 device with 1 function
> on each;

This is a confusing naming conflict with the generic PCI
multifunction=on option.

> "scan" - disables scan and lets the user put to QEMU only some devices
> from PE;

The value passed to scan seems to be more than true/false.  Does it also
imply a depth?

> "busname" - name of the bus, it used to connect vfio-pci devices with
> a PHB when scan is disabled.

Doesn't PCI just use "id" for this?  I'm not sure we need another way to
name a bus.

> If scan is disabled, no PCI device is automatically added and the user
> has to add them manuall as in the example below which adds PHB and
> 3 PCI devices::
> 
>  -device spapr-pci-host-bridge,busname=USB,iommu=4,scan=0,index=5 \
>  -device vfio-pci,host=4:0:1.0,addr=1.0,bus=USB,multifunction=true \
>  -device vfio-pci,host=4:0:1.1,addr=1.1 \
>  -device vfio-pci,host=4:0:1.2,addr=1.2

Functions 1 & 2 require bus= as well, right?  Otherwise they'd end up on
bus 0?

I'd be a bit concerned about the namespace and overlaps of the
parameters you're adding to spapr-pci-host-bridge.  For instance, scan
invokes vfio, but you don't really know that from the option.  forceaddr
seems to imply multifunction, but either only means anything with scan !
= 0.

> Cc: David Gibson <address@hidden>
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
>  hw/spapr.h                 |    4 ++
>  hw/spapr_iommu.c           |  111 ++++++++++++++++++++++++++++++++++++++
>  hw/spapr_iommu_vfio.h      |   34 ++++++++++++
>  hw/spapr_pci.c             |  129 
> +++++++++++++++++++++++++++++++++++++++++---
>  hw/spapr_pci.h             |    6 +++
>  hw/vfio_pci.c              |   62 +++++++++++++++++++++
>  linux-headers/linux/vfio.h |   27 ++++++++++
>  trace-events               |    6 ++-
>  8 files changed, 370 insertions(+), 9 deletions(-)
>  create mode 100644 hw/spapr_iommu_vfio.h

This should be at least 3 patches.  One that updates linux-headers via
scripts/update-linux-headers.sh (all of it, not piecemeal updates), one
that adds spapr backing, and one that enables vfio support.

> 
> diff --git a/hw/spapr.h b/hw/spapr.h
> index bc0cd27..0ecfae2 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -3,6 +3,7 @@
>  
>  #include "dma.h"
>  #include "hw/xics.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  struct VIOsPAPRBus;
>  struct sPAPRPHBState;
> @@ -406,4 +407,7 @@ int spapr_dma_dt(void *fdt, int node_off, const char 
> *propname,
>  int spapr_tcet_dma_dt(void *fdt, int node_off, const char *propname,
>                        DMAContext *dma);
>  
> +DMAContext *spapr_vfio_init_dma(uint32_t liobn, int iommu_id,
> +                                sPAPRVFIOData *data);
> +
>  #endif /* !defined (__HW_SPAPR_H__) */
> diff --git a/hw/spapr_iommu.c b/hw/spapr_iommu.c
> index 94630c1..462f593 100644
> --- a/hw/spapr_iommu.c
> +++ b/hw/spapr_iommu.c
> @@ -22,8 +22,10 @@
>  #include "kvm_ppc.h"
>  #include "dma.h"
>  #include "exec-memory.h"
> +#include "trace.h"
>  
>  #include "hw/spapr.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  #include <libfdt.h>
>  
> @@ -234,6 +236,101 @@ static target_ulong put_tce_emu(sPAPRTCETable *tcet, 
> target_ulong ioba,
>      return H_SUCCESS;
>  }
>  
> +typedef struct sPAPRVFIOTable {
> +    DMAContext dma;
> +    sPAPRVFIOData *data;
> +    uint32_t liobn;
> +    QLIST_ENTRY(sPAPRVFIOTable) list;
> +} sPAPRVFIOTable;
> +
> +QLIST_HEAD(vfio_tce_tables, sPAPRVFIOTable) vfio_tce_tables;
> +
> +DMAContext *spapr_vfio_init_dma(uint32_t liobn, int iommu_id,
> +                                sPAPRVFIOData *data)
> +{
> +    sPAPRVFIOTable *t;
> +
> +    if (kvmppc_create_spapr_tce_iommu(liobn, iommu_id))
> +        return NULL;
> +
> +    t = g_malloc0(sizeof(*t));
> +    t->data = data;
> +    t->liobn = liobn;
> +
> +    QLIST_INSERT_HEAD(&vfio_tce_tables, t, list);
> +
> +    return &t->dma;
> +}
> +
> +static int put_tce_vfio(uint32_t liobn, target_ulong ioba, target_ulong 
> *tces,
> +                        target_ulong tce_value, target_ulong npages)
> +{
> +    int i, ret;
> +    bool found = false;
> +    __u64 size = SPAPR_TCE_PAGE_SIZE;
> +    sPAPRVFIOTable *t;
> +
> +    QLIST_FOREACH(t, &vfio_tce_tables, list) {
> +        if (t->liobn == liobn) {
> +            found = true;
> +            break;
> +        }
> +    }
> +    if (!found) {
> +        return H_CONTINUE; /* positive non-zero value */
> +    }
> +
> +    for (i = 0; i < npages; ++i, ioba += SPAPR_TCE_PAGE_SIZE) {
> +        target_ulong tce = tces ? tces[i] : tce_value;
> +
> +        if (tce & SPAPR_TCE_PAGE_MASK) {
> +            struct vfio_iommu_type1_dma_map param = {
> +                .argsz = sizeof(param),
> +                .iova = ioba,
> +                .vaddr = (__u64)(uintptr_t)
> +                        qemu_get_ram_ptr(tce & ~SPAPR_TCE_PAGE_MASK),
> +                .flags = 0,
> +                .size = size
> +            };
> +
> +            switch (tce & SPAPR_TCE_PAGE_MASK) {
> +            case SPAPR_TCE_RO:
> +                param.flags = VFIO_DMA_MAP_FLAG_READ;
> +                break;
> +            case SPAPR_TCE_WO:
> +                param.flags = VFIO_DMA_MAP_FLAG_WRITE;
> +                break;
> +            case SPAPR_TCE_RW:
> +                param.flags = VFIO_DMA_MAP_FLAG_READ | 
> VFIO_DMA_MAP_FLAG_WRITE;
> +                break;
> +            }
> +
> +            ret = t->data->map(t->data, &param);
> +            trace_spapr_iommu("vfio map", liobn, ioba, tce, ret);
> +            if (ret < 0) {
> +                perror("spapr_tce map");
> +                return H_PARAMETER;
> +            }
> +        } else {
> +            struct vfio_iommu_type1_dma_unmap param = {
> +                .argsz = sizeof(param),
> +                .iova = ioba,
> +                .flags = 0,
> +                .size = size
> +            };
> +
> +            ret = t->data->unmap(t->data, &param);
> +            trace_spapr_iommu("vfio unmap", liobn, ioba, 0, ret);
> +            if (ret < 0) {
> +                perror("spapr_tce unmap");
> +                return H_PARAMETER;
> +            }
> +        }
> +    }
> +
> +    return H_SUCCESS;
> +}
> +
>  static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
>                                         sPAPREnvironment *spapr,
>                                         target_ulong opcode, target_ulong 
> *args)
> @@ -260,6 +357,11 @@ static target_ulong h_put_tce_indirect(PowerPCCPU *cpu,
>          }
>          return ret;
>      }
> +    ret = put_tce_vfio(liobn, ioba, tces, -1, npages);
> +    if (ret != H_CONTINUE) {
> +        return ret;
> +    }
> +
>  #ifdef DEBUG_TCE
>      fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
>              "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
> @@ -294,6 +396,10 @@ static target_ulong h_stuff_tce(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>          }
>          return ret;
>      }
> +    ret = put_tce_vfio(liobn, ioba, NULL, tce_value, npages);
> +    if (ret != H_CONTINUE) {
> +        return ret;
> +    }
>  #ifdef DEBUG_TCE
>      fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
>              "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
> @@ -310,6 +416,7 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>      target_ulong ioba = args[1];
>      target_ulong tce = args[2];
>      sPAPRTCETable *tcet = spapr_tce_find_by_liobn(liobn);
> +    int ret;
>  
>      if (liobn & 0xFFFFFFFF00000000ULL) {
>          hcall_dprintf("spapr_vio_put_tce on out-of-boundsw LIOBN "
> @@ -322,6 +429,10 @@ static target_ulong h_put_tce(PowerPCCPU *cpu, 
> sPAPREnvironment *spapr,
>      if (tcet) {
>          return put_tce_emu(tcet, ioba, tce);
>      }
> +    ret = put_tce_vfio(liobn, ioba, &tce, -1, 1);
> +    if (ret != H_CONTINUE) {
> +        return ret;
> +    }
>  #ifdef DEBUG_TCE
>      fprintf(stderr, "%s on liobn=" TARGET_FMT_lx /*%s*/
>              "  ioba 0x" TARGET_FMT_lx "  TCE 0x" TARGET_FMT_lx "\n",
> diff --git a/hw/spapr_iommu_vfio.h b/hw/spapr_iommu_vfio.h
> new file mode 100644
> index 0000000..9c2fff3
> --- /dev/null
> +++ b/hw/spapr_iommu_vfio.h
> @@ -0,0 +1,34 @@
> +/*
> + * Definitions for VFIO IOMMU implementation for SPAPR TCE.
> + *
> + * Copyright (c) 2012 Alexey Kardashevskiy <address@hidden>
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library; if not, see 
> <http://www.gnu.org/licenses/>.
> + */
> +
> +#if !defined(__HW_SPAPR_IOMMU_VFIO_H__)
> +#define __HW_SPAPR_IOMMU_VFIO_H__
> +
> +#include <linux/vfio.h>
> +
> +typedef struct sPAPRVFIOData sPAPRVFIOData;
> +typedef struct sPAPRVFIOData {
> +    struct vfio_iommu_spapr_tce_info info;
> +    int (*map)(sPAPRVFIOData *data, struct vfio_iommu_type1_dma_map *par);
> +    int (*unmap)(sPAPRVFIOData *data, struct vfio_iommu_type1_dma_unmap 
> *par);
> +} sPAPRVFIOData;
> +
> +void spapr_register_vfio_container(int groupid, sPAPRVFIOData *data);
> +
> +#endif
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> index a6885c4..2631332 100644
> --- a/hw/spapr_pci.c
> +++ b/hw/spapr_pci.c
> @@ -22,6 +22,9 @@
>   * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>   * THE SOFTWARE.
>   */
> +#include <sys/types.h>
> +#include <dirent.h>
> +
>  #include "hw.h"
>  #include "pci.h"
>  #include "msi.h"
> @@ -514,6 +517,94 @@ static DMAContext *spapr_pci_dma_context_fn(PCIBus *bus, 
> void *opaque,
>      return phb->dma;
>  }
>  
> +void spapr_register_vfio_container(int groupid, sPAPRVFIOData *data)
> +{
> +    sPAPRPHBState *phb;
> +
> +    QLIST_FOREACH(phb, &spapr->phbs, list) {
> +        if (phb->iommugroupid == groupid) {
> +            phb->vfio_data = data;
> +            phb->dma_window_start = phb->vfio_data->info.dma32_window_start;
> +            phb->dma_window_size = phb->vfio_data->info.dma32_window_size;
> +            phb->dma = spapr_vfio_init_dma(phb->dma_liobn, groupid,
> +                                           phb->vfio_data);
> +            return;
> +        }
> +    }
> +}
> +
> +static int spapr_pci_scan_vfio(sPAPRPHBState *sphb)
> +{
> +    PCIHostState *phb = PCI_HOST_BRIDGE(sphb);
> +    char iommupath[256];
> +    DIR *dirp;
> +    struct dirent *entry;
> +
> +    if (!sphb->scan) {
> +        trace_spapr_pci("autoscan disabled for ", sphb->dtbusname);
> +        return 0;
> +    }
> +
> +    snprintf(iommupath, sizeof(iommupath),
> +             "/sys/kernel/iommu_groups/%d/devices/", sphb->iommugroupid);
> +    dirp = opendir(iommupath);
> +    if (!dirp) {
> +        fprintf(stderr, "failed to scan group=%d\n", sphb->iommugroupid);
> +        return -1;
> +    }
> +
> +    while ((entry = readdir(dirp)) != NULL) {
> +        char *tmp;
> +        FILE *deviceclassfile;
> +        unsigned deviceclass = 0, domainid, busid, devid, fnid;
> +        char addr[32];
> +        DeviceState *dev;
> +
> +        if (sscanf(entry->d_name, "%X:%X:%X.%x",
> +                   &domainid, &busid, &devid, &fnid) != 4) {
> +            continue;
> +        }
> +
> +        tmp = g_strdup_printf("%s%s/class", iommupath, entry->d_name);
> +        trace_spapr_pci("Reading device class from ", tmp);
> +
> +        deviceclassfile = fopen(tmp, "r");
> +        if (deviceclassfile) {
> +            fscanf(deviceclassfile, "%x", &deviceclass);
> +            fclose(deviceclassfile);
> +        }
> +        g_free(tmp);
> +
> +        if (!deviceclass) {
> +            continue;
> +        }
> +        if ((sphb->scan < 2) &&
> +            ((deviceclass >> 16) == (PCI_CLASS_BRIDGE_OTHER >> 8))) {
> +            /* Skip _any_ bridge */
> +            continue;
> +        }
> +        trace_spapr_pci("Creating device from ", entry->d_name);
> +
> +        dev = qdev_create(&phb->bus->qbus, "vfio-pci");
> +        if (!dev) {
> +            fprintf(stderr, "failed to create vfio-pci\n");
> +            continue;
> +        }
> +        qdev_prop_parse(dev, "host", entry->d_name);
> +        if (sphb->force_addr) {
> +            snprintf(addr, sizeof(addr), "%x.%x", devid, fnid);
> +            qdev_prop_parse(dev, "addr", addr);
> +        }
> +        if (sphb->enable_multifunction) {
> +            qdev_prop_set_bit(dev, "multifunction", 1);
> +        }
> +        qdev_init_nofail(dev);

I'm a bit concerned what happens if scan >= 2 and you do add a bridge.
Does that work?

> +    }
> +    closedir(dirp);
> +
> +    return 0;
> +}
> +
>  static int spapr_phb_init(SysBusDevice *s)
>  {
>      sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
> @@ -627,13 +718,6 @@ static int spapr_phb_init(SysBusDevice *s)
>                             PCI_DEVFN(0, 0), PCI_NUM_PINS);
>      phb->bus = bus;
>  
> -    sphb->dma_window_start = 0;
> -    sphb->dma_window_size = 0x40000000;
> -    sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn, 
> sphb->dma_window_size);
> -    if (!sphb->dma) {
> -        fprintf(stderr, "Unable to create TCE table for %s\n", 
> sphb->dtbusname);
> -        return -1;
> -    }
>      pci_setup_iommu(bus, spapr_pci_dma_context_fn, sphb);
>  
>      QLIST_INSERT_HEAD(&spapr->phbs, sphb, list);
> @@ -650,6 +734,25 @@ static int spapr_phb_init(SysBusDevice *s)
>          sphb->lsi_table[i].irq = irq;
>      }
>  
> +    if (sphb->iommugroupid >= 0) {
> +        if (spapr_pci_scan_vfio(sphb) < 0) {
> +            return -1;
> +        }
> +        /* dma_window_xxxx will be initialized from
> +           spapr_register_vfio_container() when VFIO will create the very 
> first
> +           device in the group */
> +        return 0;
> +    }
> +
> +    sphb->dma_window_start = 0;
> +    sphb->dma_window_size = 0x40000000;
> +    sphb->dma = spapr_tce_new_dma_context(sphb->dma_liobn,
> +                                          sphb->dma_window_size);
> +    if (!sphb->dma) {
> +        fprintf(stderr, "Unable to create TCE table for %s\n", 
> sphb->dtbusname);
> +        return -1;
> +    }
> +
>      return 0;
>  }
>  
> @@ -659,7 +762,9 @@ static void spapr_phb_reset(DeviceState *qdev)
>      sPAPRPHBState *sphb = SPAPR_PCI_HOST_BRIDGE(s);
>  
>      /* Reset the IOMMU state */
> -    spapr_tce_reset(sphb->dma);
> +    if (sphb->iommugroupid == -1) {
> +        spapr_tce_reset(sphb->dma);
> +    }
>  }
>  
>  static Property spapr_phb_properties[] = {
> @@ -674,6 +779,10 @@ static Property spapr_phb_properties[] = {
>      DEFINE_PROP_HEX64("io_win_size", sPAPRPHBState, io_win_size,
>                        SPAPR_PCI_IO_WIN_SIZE),
>      DEFINE_PROP_HEX64("msi_win_addr", sPAPRPHBState, msi_win_addr, -1),
> +    DEFINE_PROP_INT32("iommu", sPAPRPHBState, iommugroupid, -1),
> +    DEFINE_PROP_UINT8("scan", sPAPRPHBState, scan, 1),
> +    DEFINE_PROP_UINT8("mf", sPAPRPHBState, enable_multifunction, 0),

Oops, you said this was "multifunction" in the commit log.

> +    DEFINE_PROP_UINT8("forceaddr", sPAPRPHBState, force_addr, 0),
>      DEFINE_PROP_END_OF_LIST(),
>  };
>  
> @@ -846,6 +955,10 @@ int spapr_populate_pci_dt(sPAPRPHBState *phb,
>      _FDT(fdt_setprop(fdt, bus_off, "interrupt-map", &interrupt_map,
>                       sizeof(interrupt_map)));
>  
> +    if (!phb->dma_window_size) {
> +        fprintf(stderr, "Unexpected error: DMA window is zero, exiting\n");
> +        exit(1);
> +    }
>      spapr_dma_dt(fdt, bus_off, "ibm,dma-window",
>                   phb->dma_liobn, phb->dma_window_start,
>                   phb->dma_window_size);
> diff --git a/hw/spapr_pci.h b/hw/spapr_pci.h
> index b05241d..41a9cb1 100644
> --- a/hw/spapr_pci.h
> +++ b/hw/spapr_pci.h
> @@ -26,6 +26,7 @@
>  #include "hw/pci.h"
>  #include "hw/pci_host.h"
>  #include "hw/xics.h"
> +#include "hw/spapr_iommu_vfio.h"
>  
>  #define SPAPR_MSIX_MAX_DEVS 32
>  
> @@ -62,6 +63,11 @@ typedef struct sPAPRPHBState {
>          uint32_t nvec;
>      } msi_table[SPAPR_MSIX_MAX_DEVS];
>  
> +    struct sPAPRVFIOData *vfio_data;
> +    int32_t iommugroupid;
> +    uint8_t scan; /* 0 don't scan 1 scan only devices 2 scan everything */

Aha, here's the full scan possibilities.  I have doubts that 2 works;
should it be available?

> +    uint8_t enable_multifunction, force_addr;

bool?

> +
>      QLIST_ENTRY(sPAPRPHBState) list;
>  } sPAPRPHBState;
>  
> diff --git a/hw/vfio_pci.c b/hw/vfio_pci.c
> index 7c27834..7862193 100644
> --- a/hw/vfio_pci.c
> +++ b/hw/vfio_pci.c
> @@ -39,6 +39,8 @@
>  #include "qemu-queue.h"
>  #include "range.h"
>  
> +#include "spapr_iommu_vfio.h"
> +
>  /* #define DEBUG_VFIO */
>  #ifdef DEBUG_VFIO
>  #define DPRINTF(fmt, ...) \
> @@ -94,6 +96,7 @@ typedef struct VFIOContainer {
>          /* enable abstraction to support various iommu backends */
>          union {
>              MemoryListener listener; /* Used by type1 iommu */
> +            sPAPRVFIOData spapr; /* Used by SPAPR TCE (POWERPC) iommu */
>          };
>          void (*release)(struct VFIOContainer *);
>      } iommu_data;
> @@ -1193,6 +1196,25 @@ static void vfio_listener_release(VFIOContainer 
> *container)
>  }
>  
>  /*
> + * sPAPR TCE DMA interface
> + */
> +static int spapr_tce_map(sPAPRVFIOData *data,
> +                         struct vfio_iommu_type1_dma_map *param)
> +{
> +    VFIOContainer *container = container_of(data, VFIOContainer,
> +                                            iommu_data.spapr);
> +    return ioctl(container->fd, VFIO_IOMMU_MAP_DMA, param);
> +}
> +
> +static int spapr_tce_unmap(sPAPRVFIOData *data,
> +                           struct vfio_iommu_type1_dma_unmap *param)
> +{
> +    VFIOContainer *container = container_of(data, VFIOContainer,
> +                                            iommu_data.spapr);
> +    return ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, param);
> +}
> +
> +/*
>   * Interrupt setup
>   */
>  static void vfio_disable_interrupts(VFIODevice *vdev)
> @@ -1670,6 +1692,46 @@ static int vfio_connect_container(VFIOGroup *group)
>          container->iommu_data.release = vfio_listener_release;
>  
>          memory_listener_register(&container->iommu_data.listener, 
> &address_space_memory);
> +
> +    } else if (ioctl(fd, VFIO_CHECK_EXTENSION, VFIO_SPAPR_TCE_IOMMU)) {
> +        ret = ioctl(group->fd, VFIO_GROUP_SET_CONTAINER, &fd);
> +        if (ret) {
> +            error_report("vfio: failed to set group container: %s\n",
> +                         strerror(errno));


No \n at the end of error_* strings or Markus will scold me ;)

> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        ret = ioctl(fd, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU);
> +        if (ret) {
> +            error_report("vfio: failed to set iommu for container: %s\n",
> +                         strerror(errno));
> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        container->iommu_data.spapr.info.argsz =
> +                sizeof(container->iommu_data.spapr.info);
> +        ret = ioctl(fd, VFIO_IOMMU_SPAPR_TCE_GET_INFO,
> +                    &container->iommu_data.spapr.info);
> +        if (ret) {
> +            error_report("vfio: failed to get iommu info for container: 
> %s\n",
> +                         strerror(errno));
> +            g_free(container);
> +            close(fd);
> +            return -1;
> +        }
> +
> +        /*
> +         * At the moment of adding VFIO for SPAPR (server POWERPC), only one
> +         * group per container is supported. This may change later.
> +         */
> +        container->iommu_data.spapr.map = spapr_tce_map;
> +        container->iommu_data.spapr.unmap = spapr_tce_unmap;
> +        spapr_register_vfio_container(group->groupid,
> +                                      &container->iommu_data.spapr);
>      } else {
>          error_report("vfio: No available IOMMU models\n");
>          g_free(container);
> diff --git a/linux-headers/linux/vfio.h b/linux-headers/linux/vfio.h
> index 4758d1b..92dc88b 100644
> --- a/linux-headers/linux/vfio.h
> +++ b/linux-headers/linux/vfio.h
> @@ -22,6 +22,7 @@
>  /* Extensions */
>  
>  #define VFIO_TYPE1_IOMMU             1
> +#define VFIO_SPAPR_TCE_IOMMU         2
>  
>  /*
>   * The IOCTL interface is designed for extensibility by embedding the
> @@ -365,4 +366,30 @@ struct vfio_iommu_type1_dma_unmap {
>  
>  #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14)
>  
> +/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
> +
> +/*
> + * The SPAPR TCE info struct provides the information about the PCI bus
> + * address ranges available for DMA, these values are programmed into
> + * the hardware so the guest has to know that information.
> + *
> + * The IOMMU page size is always 4K.
> + */
> +
> +struct vfio_iommu_spapr_tce_info {
> +    __u32 argsz;
> +    __u32 flags;                /* reserved for future use */
> +    __u32 dma32_window_start;   /* 32 bit window start (bytes) */
> +    __u32 dma32_window_size;    /* 32 bit window size (bytes) */
> +};
> +
> +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO        _IO(VFIO_TYPE, VFIO_BASE + 12)
> +
> +/* Reuse type1 map/unmap structs as they are the same at the moment */
> +typedef struct vfio_iommu_type1_dma_map vfio_iommu_spapr_tce_dma_map;
> +typedef struct vfio_iommu_type1_dma_unmap vfio_iommu_spapr_tce_dma_unmap;
> +
> +/* ***************************************************************** */
> +
> +
>  #endif /* _UAPIVFIO_H */
> diff --git a/trace-events b/trace-events
> index e280fba..388a107 100644
> --- a/trace-events
> +++ b/trace-events
> @@ -1016,6 +1016,7 @@ qxl_render_guest_primary_resized(int32_t width, int32_t 
> height, int32_t stride,
>  qxl_render_update_area_done(void *cookie) "%p"
>  
>  # hw/spapr_pci.c
> +spapr_pci(const char *msg1, const char *msg2) "%s%s"
>  spapr_pci_msi(const char *msg, uint32_t n, uint32_t ca) "%s (device#%d, 
> cfg=%x)"
>  spapr_pci_msi_setup(const char *name, unsigned vector, uint64_t addr) 
> "dev\"%s\" vector %u, addr=%"PRIx64
>  spapr_pci_rtas_ibm_change_msi(unsigned func, unsigned req) "func %u, 
> requested %u"
> @@ -1034,4 +1035,7 @@ xics_masked_pending(void) "set_irq_msi: masked pending"
>  xics_set_irq_lsi(int srcno, int nr) "set_irq_lsi: srcno %d [irq %#x]"
>  xics_ics_write_xive(int nr, int srcno, int server, uint8_t priority) 
> "ics_write_xive: irq %#x [src %d] server %#x prio %#x"
>  xics_ics_reject(int nr, int srcno) "reject irq %#x [src %d]"
> -xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> \ No newline at end of file
> +xics_ics_eoi(int nr) "ics_eoi: irq %#x"
> +
> +# hw/spapr_iommu.c
> +spapr_iommu(const char *op, uint32_t liobn, uint64_t ioba, uint64_t tce, int 
> ret) "%s %x ioba=%"PRIx64" tce=%"PRIx64" ret=%d"






reply via email to

[Prev in Thread] Current Thread [Next in Thread]