qemu-ppc
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-ppc] [PATCH 3/3] pseries: Add partial support for PCI


From: Alexander Graf
Subject: Re: [Qemu-ppc] [PATCH 3/3] pseries: Add partial support for PCI
Date: Sun, 30 Oct 2011 18:06:51 +0100

On 28.10.2011, at 03:56, David Gibson wrote:

> From: Alexey Kardashevskiy <address@hidden>
> 
> This patch adds a PCI bus to the pseries machine.  This instantiates
> the qemu generic PCI bus code, advertises a PCI host bridge in the
> guest's device tree and implements the RTAS methods specified by PAPR
> to access PCI config space.  It also sets up the memory regions we
> need to provide windows into the PCI memory and IO space, and
> advertises those to the guest.
> 
> However, because qemu can't yet emulate an IOMMU, which is mandatory on
> pseries, PCI devices which use DMA (i.e. most of them) will not work with
> this code alone.  Still, this is enough to support the virtio_pci device
> (which probably _should_ use emulated PCI DMA, but is specced to use
> direct hypervisor access to guest physical memory instead).
> 
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> Signed-off-by: David Gibson <address@hidden>
> ---
> Makefile.target |    3 +
> hw/spapr.c      |   36 ++++-
> hw/spapr.h      |    2 +
> hw/spapr_pci.c  |  515 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
> hw/spapr_pci.h  |   59 +++++++
> 5 files changed, 611 insertions(+), 4 deletions(-)
> create mode 100644 hw/spapr_pci.c
> create mode 100644 hw/spapr_pci.h
> 
> diff --git a/Makefile.target b/Makefile.target
> index fe5f6f7..f3eb842 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -248,6 +248,9 @@ obj-ppc-y += ppc_newworld.o
> # IBM pSeries (sPAPR)
> obj-ppc-$(CONFIG_PSERIES) += spapr.o spapr_hcall.o spapr_rtas.o spapr_vio.o
> obj-ppc-$(CONFIG_PSERIES) += xics.o spapr_vty.o spapr_llan.o spapr_vscsi.o
> +ifeq ($(CONFIG_PCI),y)
> +obj-ppc-$(CONFIG_PSERIES) += spapr_pci.o device-hotplug.o pci-hotplug.o
> +endif

You make it conditional here ...

> # PowerPC 4xx boards
> obj-ppc-y += ppc4xx_devs.o ppc4xx_pci.o ppc405_uc.o ppc405_boards.o
> obj-ppc-y += ppc440.o ppc440_bamboo.o
> diff --git a/hw/spapr.c b/hw/spapr.c
> index 933af32..bdaa938 100644
> --- a/hw/spapr.c
> +++ b/hw/spapr.c
> @@ -39,10 +39,12 @@
> 
> #include "hw/spapr.h"
> #include "hw/spapr_vio.h"
> +#include "hw/spapr_pci.h"
> #include "hw/xics.h"
> 
> #include "kvm.h"
> #include "kvm_ppc.h"
> +#include "pci.h"

... but not here. Just throw away the condition above. We don't need to support 
-M pseries without PCI.

> 
> #include "exec-memory.h"
> 
> @@ -62,6 +64,11 @@
> #define MAX_CPUS                256
> #define XICS_IRQS             1024
> 
> +#define SPAPR_PCI_BUID          0x800000020000001ULL
> +#define SPAPR_PCI_MEM_WIN_ADDR  (0x10000000000ULL + 0xA0000000)
> +#define SPAPR_PCI_MEM_WIN_SIZE  0x20000000
> +#define SPAPR_PCI_IO_WIN_ADDR   (0x10000000000ULL + 0x80000000)
> +
> #define PHANDLE_XICP            0x00001111
> 
> sPAPREnvironment *spapr;
> @@ -146,6 +153,14 @@ static void *spapr_create_fdt_skel(const char *cpu_model,
>                        &end_prop, sizeof(end_prop))));
>     _FDT((fdt_property_string(fdt, "qemu,boot-device", boot_device)));
> 
> +    /*
> +     * Because we don't always invoke any firmware, we can't rely on
> +     * that to do BAR allocation.  Long term, we should probably do
> +     * that ourselves, but for now, this setting (plus advertising the
> +     * current BARs as 0) causes sufficiently recent kernels to to the
> +     * BAR assignment themselves */
> +    _FDT((fdt_property_cell(fdt, "linux,pci-probe-only", 0)));
> +
>     _FDT((fdt_end_node(fdt)));
> 
>     /* memory node(s) */
> @@ -308,6 +323,7 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
> {
>     int ret;
>     void *fdt;
> +    sPAPRPHBState *phb;
> 
>     fdt = g_malloc(FDT_MAX_SIZE);
> 
> @@ -320,6 +336,15 @@ static void spapr_finalize_fdt(sPAPREnvironment *spapr,
>         exit(1);
>     }
> 
> +    QLIST_FOREACH(phb, &spapr->phbs, list) {
> +        ret = spapr_populate_pci_devices(phb, PHANDLE_XICP, fdt);
> +    }
> +
> +    if (ret < 0) {
> +        fprintf(stderr, "couldn't setup PCI devices in fdt\n");
> +        exit(1);
> +    }
> +
>     /* RTAS */
>     ret = spapr_rtas_device_tree_setup(fdt, rtas_addr, rtas_size);
>     if (ret < 0) {
> @@ -478,6 +503,12 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>         }
>     }
> 
> +    /* Set up PCI */
> +    spapr_create_phb(spapr, "pci", SPAPR_PCI_BUID,
> +                     SPAPR_PCI_MEM_WIN_ADDR,
> +                     SPAPR_PCI_MEM_WIN_SIZE,
> +                     SPAPR_PCI_IO_WIN_ADDR);
> +
>     for (i = 0; i < nb_nics; i++) {
>         NICInfo *nd = &nd_table[i];
> 
> @@ -488,10 +519,7 @@ static void ppc_spapr_init(ram_addr_t ram_size,
>         if (strcmp(nd->model, "ibmveth") == 0) {
>             spapr_vlan_create(spapr->vio_bus, 0x1000 + i, nd);
>         } else {
> -            fprintf(stderr, "pSeries (sPAPR) platform does not support "
> -                    "NIC model '%s' (only ibmveth is supported)\n",
> -                    nd->model);
> -            exit(1);
> +            pci_nic_init_nofail(&nd_table[i], nd->model, NULL);
>         }
>     }
> 
> diff --git a/hw/spapr.h b/hw/spapr.h
> index 6657c33..5689797 100644
> --- a/hw/spapr.h
> +++ b/hw/spapr.h
> @@ -2,12 +2,14 @@
> #define __HW_SPAPR_H__
> 
> #include "hw/xics.h"
> +#include "spapr_pci.h"
> 
> struct VIOsPAPRBus;
> struct icp_state;
> 
> typedef struct sPAPREnvironment {
>     struct VIOsPAPRBus *vio_bus;
> +    QLIST_HEAD(, sPAPRPHBState) phbs;
>     struct icp_state *icp;
> 
>     target_phys_addr_t ram_limit;
> diff --git a/hw/spapr_pci.c b/hw/spapr_pci.c
> new file mode 100644
> index 0000000..a907747
> --- /dev/null
> +++ b/hw/spapr_pci.c
> @@ -0,0 +1,515 @@
> +/*
> + * QEMU sPAPR PCI host originated from Uninorth PCI host
> + *
> + * Copyright (c) 2011 Alexey Kardashevskiy, IBM Corporation.
> + * Copyright (C) 2011 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a 
> copy
> + * of this software and associated documentation files (the "Software"), to 
> deal
> + * in the Software without restriction, including without limitation the 
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + */
> +#include "hw.h"
> +#include "pci.h"
> +#include "pci_host.h"
> +#include "hw/spapr.h"
> +#include "hw/spapr_pci.h"
> +#include "exec-memory.h"
> +#include <libfdt.h>
> +
> +#include "hw/pci_internals.h"
> +
> +static const uint32_t bars[] = {
> +    PCI_BASE_ADDRESS_0, PCI_BASE_ADDRESS_1,
> +    PCI_BASE_ADDRESS_2, PCI_BASE_ADDRESS_3,
> +    PCI_BASE_ADDRESS_4, PCI_BASE_ADDRESS_5
> +    /*, PCI_ROM_ADDRESS*/
> +};
> +
> +static PCIDevice *find_dev(sPAPREnvironment *spapr,
> +                           uint64_t buid, uint32_t config_addr)
> +{
> +    DeviceState *qdev;
> +    int devfn = (config_addr >> 8) & 0xFF;
> +    sPAPRPHBState *phb;
> +
> +    QLIST_FOREACH(phb, &spapr->phbs, list) {
> +        if (phb->buid != buid) {
> +            continue;
> +        }
> +
> +        QLIST_FOREACH(qdev, &phb->host_state.bus->qbus.children, sibling) {
> +            PCIDevice *dev = (PCIDevice *)qdev;
> +            if (dev->devfn == devfn) {
> +                return dev;
> +            }
> +        }
> +    }
> +
> +    return NULL;
> +}
> +
> +static void rtas_ibm_read_pci_config(sPAPREnvironment *spapr,
> +                                     uint32_t token, uint32_t nargs,
> +                                     target_ulong args,
> +                                     uint32_t nret, target_ulong rets)
> +{
> +    uint32_t val, size, addr;
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> +    PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
> +
> +    if (!dev) {
> +        rtas_st(rets, 0, -1);
> +        return;
> +    }
> +    size = rtas_ld(args, 3);
> +    addr = rtas_ld(args, 0) & 0xFF;
> +    val = pci_default_read_config(dev, addr, size);
> +    rtas_st(rets, 0, 0);
> +    rtas_st(rets, 1, val);
> +}
> +
> +static void rtas_read_pci_config(sPAPREnvironment *spapr,
> +                                 uint32_t token, uint32_t nargs,
> +                                 target_ulong args,
> +                                 uint32_t nret, target_ulong rets)
> +{
> +    uint32_t val, size, addr;
> +    PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
> +
> +    if (!dev) {
> +        rtas_st(rets, 0, -1);
> +        return;
> +    }
> +    size = rtas_ld(args, 1);
> +    addr = rtas_ld(args, 0) & 0xFF;
> +    val = pci_default_read_config(dev, addr, size);
> +    rtas_st(rets, 0, 0);
> +    rtas_st(rets, 1, val);
> +}
> +
> +static void rtas_ibm_write_pci_config(sPAPREnvironment *spapr,
> +                                      uint32_t token, uint32_t nargs,
> +                                      target_ulong args,
> +                                      uint32_t nret, target_ulong rets)
> +{
> +    uint32_t val, size, addr;
> +    uint64_t buid = ((uint64_t)rtas_ld(args, 1) << 32) | rtas_ld(args, 2);
> +    PCIDevice *dev = find_dev(spapr, buid, rtas_ld(args, 0));
> +
> +    if (!dev) {
> +        rtas_st(rets, 0, -1);
> +        return;
> +    }
> +    val = rtas_ld(args, 4);
> +    size = rtas_ld(args, 3);
> +    addr = rtas_ld(args, 0) & 0xFF;
> +    pci_default_write_config(dev, addr, val, size);
> +    rtas_st(rets, 0, 0);
> +}
> +
> +static void rtas_write_pci_config(sPAPREnvironment *spapr,
> +                                  uint32_t token, uint32_t nargs,
> +                                  target_ulong args,
> +                                  uint32_t nret, target_ulong rets)
> +{
> +    uint32_t val, size, addr;
> +    PCIDevice *dev = find_dev(spapr, 0, rtas_ld(args, 0));
> +
> +    if (!dev) {
> +        rtas_st(rets, 0, -1);
> +        return;
> +    }
> +    val = rtas_ld(args, 2);
> +    size = rtas_ld(args, 1);
> +    addr = rtas_ld(args, 0) & 0xFF;
> +    pci_default_write_config(dev, addr, val, size);
> +    rtas_st(rets, 0, 0);
> +}
> +
> +static int pci_spapr_map_irq(PCIDevice *pci_dev, int irq_num)
> +{
> +    /*
> +     * Here we need to convert pci_dev + irq_num to some unique value
> +     * which is less than number of IRQs on the specific bus (now it
> +     * is 16).  At the moment irq_num == device_id (number of the
> +     * slot?)
> +     * FIXME: we should swizzle in fn and irq_num
> +     */
> +    return (pci_dev->devfn >> 3) % SPAPR_PCI_NUM_LSI;
> +}
> +
> +static void pci_spapr_set_irq(void *opaque, int irq_num, int level)
> +{
> +    /*
> +     * Here we use the number returned by pci_spapr_map_irq to find a
> +     * corresponding qemu_irq.
> +     */
> +    sPAPRPHBState *phb = opaque;
> +
> +    qemu_set_irq(phb->lsi_table[irq_num].qirq, level);
> +}
> +
> +static int spapr_phb_init(SysBusDevice *s)
> +{
> +    sPAPRPHBState *phb = FROM_SYSBUS(sPAPRPHBState, s);
> +    int i;
> +
> +    /* Initialize the LSI table */
> +    for (i = 0; i < SPAPR_PCI_NUM_LSI; i++) {
> +        qemu_irq qirq;
> +        uint32_t num;
> +
> +        qirq = spapr_allocate_irq(0, &num);
> +        if (!qirq) {
> +            return -1;
> +        }
> +
> +        phb->lsi_table[i].dt_irq = num;
> +        phb->lsi_table[i].qirq = qirq;
> +    }
> +
> +    return 0;
> +}
> +
> +static int spapr_main_pci_host_init(PCIDevice *d)
> +{
> +    return 0;
> +}
> +
> +static PCIDeviceInfo spapr_main_pci_host_info = {
> +    .qdev.name = "spapr-pci-host-bridge",
> +    .qdev.size = sizeof(PCIDevice),
> +    .init      = spapr_main_pci_host_init,
> +};
> +
> +static void spapr_register_devices(void)
> +{
> +    sysbus_register_dev("spapr-pci-host-bridge", sizeof(sPAPRPHBState),
> +                        spapr_phb_init);
> +    pci_qdev_register(&spapr_main_pci_host_info);
> +}
> +
> +device_init(spapr_register_devices)
> +
> +static uint64_t spapr_io_read(void *opaque, target_phys_addr_t addr,
> +                              unsigned size)
> +{
> +    switch (size) {
> +    case 1:
> +        return cpu_inb(addr);
> +    case 2:
> +        return cpu_inw(addr);
> +    case 4:
> +        return cpu_inl(addr);
> +    }
> +    assert(0);
> +}
> +
> +static void spapr_io_write(void *opaque, target_phys_addr_t addr,
> +                           uint64_t data, unsigned size)
> +{
> +    switch (size) {
> +    case 1:
> +        cpu_outb(addr, data);
> +        return;
> +    case 2:
> +        cpu_outw(addr, data);
> +        return;
> +    case 4:
> +        cpu_outl(addr, data);
> +        return;
> +    }
> +    assert(0);
> +}
> +
> +static MemoryRegionOps spapr_io_ops = {
> +    .endianness = DEVICE_LITTLE_ENDIAN,
> +    .read = spapr_io_read,
> +    .write = spapr_io_write
> +};
> +
> +void spapr_create_phb(sPAPREnvironment *spapr,
> +                      const char *busname, uint64_t buid,
> +                      uint64_t mem_win_addr, uint64_t mem_win_size,
> +                      uint64_t io_win_addr)
> +{
> +    DeviceState *dev;
> +    SysBusDevice *s;
> +    sPAPRPHBState *phb;
> +    PCIBus *bus;
> +    char namebuf[strlen(busname)+11];
> +
> +    dev = qdev_create(NULL, "spapr-pci-host-bridge");
> +    qdev_init_nofail(dev);
> +    s = sysbus_from_qdev(dev);
> +    phb = FROM_SYSBUS(sPAPRPHBState, s);
> +
> +    phb->mem_win_addr = mem_win_addr;
> +
> +    sprintf(namebuf, "%s-mem", busname);
> +    memory_region_init(&phb->memspace, namebuf, INT64_MAX);
> +
> +    sprintf(namebuf, "%s-memwindow", busname);
> +    memory_region_init_alias(&phb->memwindow, namebuf, &phb->memspace,
> +                             SPAPR_PCI_MEM_WIN_BUS_OFFSET, mem_win_size);
> +    memory_region_add_subregion(get_system_memory(), mem_win_addr,
> +                                &phb->memwindow);
> +
> +    phb->io_win_addr = io_win_addr;
> +
> +    /* On ppc, we only have MMIO no specific IO space from the CPU
> +     * perspective.  In theory we ought to be able to embed the PCI IO
> +     * memory region direction in the system memory space.  However,
> +     * if any of the IO BAR subregions use the old_portio mechanism,
> +     * that won't be processed properly unless accessed from the
> +     * system io address space.  This hack to bounce things via
> +     * system_io works around the problem until all the users of
> +     * old_portion are updated */
> +    sprintf(namebuf, "%s-io", busname);
> +    memory_region_init(&phb->iospace, namebuf, SPAPR_PCI_IO_WIN_SIZE);
> +    /* FIXME: fix to support multiple PHBs */
> +    memory_region_add_subregion(get_system_io(), 0, &phb->iospace);
> +
> +    sprintf(namebuf, "%s-iowindow", busname);
> +    memory_region_init_io(&phb->iowindow, &spapr_io_ops, phb,
> +                          namebuf, SPAPR_PCI_IO_WIN_SIZE);
> +    memory_region_add_subregion(get_system_memory(), io_win_addr,
> +                                &phb->iowindow);
> +
> +    phb->host_state.bus = bus = pci_register_bus(&phb->busdev.qdev, busname,
> +                                                 pci_spapr_set_irq,
> +                                                 pci_spapr_map_irq,
> +                                                 phb,
> +                                                 &phb->memspace, 
> &phb->iospace,
> +                                                 PCI_DEVFN(0, 0),
> +                                                 SPAPR_PCI_NUM_LSI);
> +
> +    spapr_rtas_register("read-pci-config", rtas_read_pci_config);
> +    spapr_rtas_register("write-pci-config", rtas_write_pci_config);
> +    spapr_rtas_register("ibm,read-pci-config", rtas_ibm_read_pci_config);
> +    spapr_rtas_register("ibm,write-pci-config", rtas_ibm_write_pci_config);
> +
> +    /*
> +     * This is a workaround to disable PCI devices resetting as we do
> +     * BAR allocation on the QEMU side and reset destroys this
> +     * configuration.
> +     */
> +    bus->qbus.info->reset = NULL;

Eh. What? So you're breaking reset for all PCI devices? How do you reboot?


Alex




reply via email to

[Prev in Thread] Current Thread [Next in Thread]