[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-ppc] [Qemu-devel] [PATCH 14/17] pseries: Support for in-kernel
From: |
Anthony Liguori |
Subject: |
Re: [Qemu-ppc] [Qemu-devel] [PATCH 14/17] pseries: Support for in-kernel XICS interrupt controller |
Date: |
Mon, 08 Jul 2013 13:50:10 -0500 |
User-agent: |
Notmuch/0.15.2+202~g0c4b8aa (http://notmuchmail.org) Emacs/23.3.1 (x86_64-pc-linux-gnu) |
Alexey Kardashevskiy <address@hidden> writes:
> From: David Gibson <address@hidden>
>
> Recent (host) kernels support emulating the PAPR defined "XICS" interrupt
> controller system within KVM. This patch allows qemu to initialize and
> configure the in-kernel XICS, and keep its state in sync with qemu's XICS
> state as necessary.
>
> This should give considerable performance improvements. e.g. on a simple
> IPI ping-pong test between hardware threads, using qemu XICS gives us
> around 5,000 irqs/second, whereas the in-kernel XICS gives us around
> 70,000 irqs/s on the same hardware configuration.
>
> [Mike Qiu <address@hidden>: fixed mistype which caused ics_set_kvm_state() to
> fail]
> Signed-off-by: David Gibson <address@hidden>
> [aik: moved to a separate device]
> Signed-off-by: Alexey Kardashevskiy <address@hidden>
> ---
> default-configs/ppc64-softmmu.mak | 1 +
> hw/intc/Makefile.objs | 1 +
> hw/intc/xics_kvm.c | 445
> +++++++++++++++++++++++++++++++++++++
> hw/ppc/spapr.c | 32 ++-
> include/hw/ppc/xics.h | 13 ++
> 5 files changed, 489 insertions(+), 3 deletions(-)
> create mode 100644 hw/intc/xics_kvm.c
>
> diff --git a/default-configs/ppc64-softmmu.mak
> b/default-configs/ppc64-softmmu.mak
> index 69a9f8d..5b995f9 100644
> --- a/default-configs/ppc64-softmmu.mak
> +++ b/default-configs/ppc64-softmmu.mak
> @@ -48,5 +48,6 @@ CONFIG_OPENPIC_KVM=$(and $(CONFIG_E500),$(CONFIG_KVM))
> # For pSeries
> CONFIG_PCI_HOTPLUG=y
> CONFIG_XICS=$(CONFIG_PSERIES)
> +CONFIG_XICS_KVM=$(and $(CONFIG_PSERIES),$(CONFIG_KVM))
> # For PReP
> CONFIG_MC146818RTC=y
> diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
> index abe8f80..9e77afe 100644
> --- a/hw/intc/Makefile.objs
> +++ b/hw/intc/Makefile.objs
> @@ -23,3 +23,4 @@ obj-$(CONFIG_OPENPIC) += openpic.o
> obj-$(CONFIG_OPENPIC_KVM) += openpic_kvm.o
> obj-$(CONFIG_SH4) += sh_intc.o
> obj-$(CONFIG_XICS) += xics.o
> +obj-$(CONFIG_XICS_KVM) += xics_kvm.o
> diff --git a/hw/intc/xics_kvm.c b/hw/intc/xics_kvm.c
> new file mode 100644
> index 0000000..d5604a7
> --- /dev/null
> +++ b/hw/intc/xics_kvm.c
> @@ -0,0 +1,445 @@
> +/*
> + * QEMU PowerPC pSeries Logical Partition (aka sPAPR) hardware System
> Emulator
> + *
> + * PAPR Virtualized Interrupt System, aka ICS/ICP aka xics, in-kernel
> emulation
> + *
> + * Copyright (c) 2013 David Gibson, IBM Corporation.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this software and associated documentation files (the "Software"), to
> deal
> + * in the Software without restriction, including without limitation the
> rights
> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> + * copies of the Software, and to permit persons to whom the Software is
> + * furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> FROM,
> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> + * THE SOFTWARE.
> + *
> + */
> +
> +#include "hw/hw.h"
> +#include "trace.h"
> +#include "hw/ppc/spapr.h"
> +#include "hw/ppc/xics.h"
> +#include "kvm_ppc.h"
> +#include "qemu/config-file.h"
> +
> +#include <sys/ioctl.h>
> +
> +struct icp_state_kvm {
CodingStyle
Regards,
Anthony Liguori
> + struct icp_state parent;
> +
> + uint32_t set_xive_token;
> + uint32_t get_xive_token;
> + uint32_t int_off_token;
> + uint32_t int_on_token;
> + int kernel_xics_fd;
> +};
> +
> +static void icp_get_kvm_state(struct icp_server_state *ss)
> +{
> + uint64_t state;
> + struct kvm_one_reg reg = {
> + .id = KVM_REG_PPC_ICP_STATE,
> + .addr = (uintptr_t)&state,
> + };
> + int ret;
> +
> + if (!ss->cs) {
> + return; /* kernel irqchip not in use */
> + }
> +
> + ret = kvm_vcpu_ioctl(ss->cs, KVM_GET_ONE_REG, ®);
> + if (ret != 0) {
> + fprintf(stderr, "Unable to retrieve KVM interrupt controller state"
> + " for CPU %d: %s\n", ss->cs->cpu_index, strerror(errno));
> + exit(1);
> + }
> +
> + ss->xirr = state >> KVM_REG_PPC_ICP_XISR_SHIFT;
> + ss->mfrr = (state >> KVM_REG_PPC_ICP_MFRR_SHIFT)
> + & KVM_REG_PPC_ICP_MFRR_MASK;
> + ss->pending_priority = (state >> KVM_REG_PPC_ICP_PPRI_SHIFT)
> + & KVM_REG_PPC_ICP_PPRI_MASK;
> +}
> +
> +static int icp_set_kvm_state(struct icp_server_state *ss)
> +{
> + uint64_t state;
> + struct kvm_one_reg reg = {
> + .id = KVM_REG_PPC_ICP_STATE,
> + .addr = (uintptr_t)&state,
> + };
> + int ret;
> +
> + if (!ss->cs) {
> + return 0; /* kernel irqchip not in use */
> + }
> +
> + state = ((uint64_t)ss->xirr << KVM_REG_PPC_ICP_XISR_SHIFT)
> + | ((uint64_t)ss->mfrr << KVM_REG_PPC_ICP_MFRR_SHIFT)
> + | ((uint64_t)ss->pending_priority << KVM_REG_PPC_ICP_PPRI_SHIFT);
> +
> + ret = kvm_vcpu_ioctl(ss->cs, KVM_SET_ONE_REG, ®);
> + if (ret != 0) {
> + fprintf(stderr, "Unable to restore KVM interrupt controller state
> (0x%"
> + PRIx64 ") for CPU %d: %s\n", state, ss->cs->cpu_index,
> + strerror(errno));
> + exit(1);
> + return ret;
> + }
> +
> + return 0;
> +}
> +
> +static void ics_get_kvm_state(struct ics_state *ics)
> +{
> + struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp);
> + uint64_t state;
> + struct kvm_device_attr attr = {
> + .flags = 0,
> + .group = KVM_DEV_XICS_GRP_SOURCES,
> + .addr = (uint64_t)(uintptr_t)&state,
> + };
> + int i;
> +
> + for (i = 0; i < ics->nr_irqs; i++) {
> + struct ics_irq_state *irq = &ics->irqs[i];
> + int ret;
> +
> + attr.attr = i + ics->offset;
> +
> + ret = ioctl(icpkvm->kernel_xics_fd, KVM_GET_DEVICE_ATTR, &attr);
> + if (ret != 0) {
> + fprintf(stderr, "Unable to retrieve KVM interrupt controller
> state"
> + " for IRQ %d: %s\n", i + ics->offset, strerror(errno));
> + exit(1);
> + }
> +
> + irq->server = state & KVM_XICS_DESTINATION_MASK;
> + irq->saved_priority = (state >> KVM_XICS_PRIORITY_SHIFT)
> + & KVM_XICS_PRIORITY_MASK;
> + /*
> + * To be consistent with the software emulation in xics.c, we
> + * split out the masked state + priority that we get from the
> + * kernel into 'current priority' (0xff if masked) and
> + * 'saved priority' (if masked, this is the priority the
> + * interrupt had before it was masked). Masking and unmasking
> + * are done with the ibm,int-off and ibm,int-on RTAS calls.
> + */
> + if (state & KVM_XICS_MASKED) {
> + irq->priority = 0xff;
> + } else {
> + irq->priority = irq->saved_priority;
> + }
> +
> + if (state & KVM_XICS_PENDING) {
> + if (state & KVM_XICS_LEVEL_SENSITIVE) {
> + irq->status |= XICS_STATUS_ASSERTED;
> + } else {
> + /*
> + * A pending edge-triggered interrupt (or MSI)
> + * must have been rejected previously when we
> + * first detected it and tried to deliver it,
> + * so mark it as pending and previously rejected
> + * for consistency with how xics.c works.
> + */
> + irq->status |= XICS_STATUS_MASKED_PENDING
> + | XICS_STATUS_REJECTED;
> + }
> + }
> + }
> +}
> +
> +static int ics_set_kvm_state(struct ics_state *ics)
> +{
> + struct icp_state_kvm *icpkvm = XICS_KVM(ics->icp);
> + uint64_t state;
> + struct kvm_device_attr attr = {
> + .flags = 0,
> + .group = KVM_DEV_XICS_GRP_SOURCES,
> + .addr = (uint64_t)(uintptr_t)&state,
> + };
> + int i;
> +
> + for (i = 0; i < ics->nr_irqs; i++) {
> + struct ics_irq_state *irq = &ics->irqs[i];
> + int ret;
> +
> + attr.attr = i + ics->offset;
> +
> + state = irq->server;
> + state |= (uint64_t)(irq->saved_priority & KVM_XICS_PRIORITY_MASK)
> + << KVM_XICS_PRIORITY_SHIFT;
> + if (irq->priority != irq->saved_priority) {
> + assert(irq->priority == 0xff);
> + state |= KVM_XICS_MASKED;
> + }
> +
> + if (ics->islsi[i]) {
> + state |= KVM_XICS_LEVEL_SENSITIVE;
> + if (irq->status & XICS_STATUS_ASSERTED) {
> + state |= KVM_XICS_PENDING;
> + }
> + } else {
> + if (irq->status & XICS_STATUS_MASKED_PENDING) {
> + state |= KVM_XICS_PENDING;
> + }
> + }
> +
> + ret = ioctl(icpkvm->kernel_xics_fd, KVM_SET_DEVICE_ATTR, &attr);
> + if (ret != 0) {
> + fprintf(stderr, "Unable to restore KVM interrupt controller
> state"
> + " for IRQs %d: %s\n", i + ics->offset, strerror(errno));
> + return ret;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static void icp_pre_save(void *opaque)
> +{
> + struct icp_server_state *ss = opaque;
> +
> + icp_get_kvm_state(ss);
> +}
> +
> +static int icp_post_load(void *opaque, int version_id)
> +{
> + struct icp_server_state *ss = opaque;
> +
> + return icp_set_kvm_state(ss);
> +}
> +
> +static void ics_pre_save(void *opaque)
> +{
> + struct ics_state *ics = opaque;
> +
> + ics_get_kvm_state(ics);
> +}
> +
> +static int ics_post_load(void *opaque, int version_id)
> +{
> + struct ics_state *ics = opaque;
> +
> + return ics_set_kvm_state(ics);
> +}
> +
> +static VMStateDescription vmstate_icpkvm_server = {
> + .name = "icpkvm/server",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .minimum_version_id_old = 1,
> + .pre_save = icp_pre_save,
> + .post_load = icp_post_load,
> +};
> +
> +static VMStateDescription vmstate_icskvm = {
> + .name = "icskvm",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .minimum_version_id_old = 1,
> + .pre_save = ics_pre_save,
> + .post_load = ics_post_load,
> +};
> +
> +static void ics_set_irq_kvm(void *opaque, int srcno, int val)
> +{
> + struct ics_state *ics = opaque;
> + struct kvm_irq_level args;
> + int rc;
> +
> + args.irq = srcno + ics->offset;
> + if (!ics->islsi[srcno]) {
> + if (!val) {
> + return;
> + }
> + args.level = KVM_INTERRUPT_SET;
> + } else {
> + args.level = val ? KVM_INTERRUPT_SET_LEVEL : KVM_INTERRUPT_UNSET;
> + }
> + rc = kvm_vm_ioctl(kvm_state, KVM_IRQ_LINE, &args);
> + if (rc < 0) {
> + perror("kvm_irq_line");
> + }
> +}
> +
> +int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +{
> + CPUState *cs;
> + struct icp_server_state *ss;
> + struct icp_state_kvm *icpkvm = (struct icp_state_kvm *)
> object_dynamic_cast(
> + OBJECT(icp), TYPE_XICS_KVM);
> +
> + if (!icpkvm) {
> + return -1;
> + }
> +
> + cs = CPU(cpu);
> + ss = &icp->ss[cs->cpu_index];
> +
> + assert(cs->cpu_index < icp->nr_servers);
> + if (icpkvm->kernel_xics_fd == -1) {
> + abort();
> + }
> +
> + if (icpkvm->kernel_xics_fd != -1) {
> + int ret;
> + struct kvm_enable_cap xics_enable_cap = {
> + .cap = KVM_CAP_IRQ_XICS,
> + .flags = 0,
> + .args = {icpkvm->kernel_xics_fd, cs->cpu_index, 0, 0},
> + };
> +
> + ss->cs = cs;
> +
> + ret = kvm_vcpu_ioctl(ss->cs, KVM_ENABLE_CAP, &xics_enable_cap);
> + if (ret < 0) {
> + fprintf(stderr, "Unable to connect CPU%d to kernel XICS: %s\n",
> + cs->cpu_index, strerror(errno));
> + exit(1);
> + }
> + }
> + xics_common_cpu_setup(icp, cpu);
> +
> + vmstate_icpkvm_server.fields = vmstate_icp_server.fields;
> + vmstate_register(NULL, cs->cpu_index, &vmstate_icpkvm_server, ss);
> +
> + return 0;
> +}
> +
> +static void rtas_dummy(PowerPCCPU *cpu, sPAPREnvironment *spapr,
> + uint32_t token,
> + uint32_t nargs, target_ulong args,
> + uint32_t nret, target_ulong rets)
> +{
> + fprintf(stderr, "pseries: %s() should never be called for in-kernel
> XICS\n", __func__);
> +}
> +
> +static void xics_kvm_realize(DeviceState *dev, Error **errp)
> +{
> + struct icp_state_kvm *icpkvm = XICS_KVM(dev);
> + QemuOptsList *list = qemu_find_opts("machine");
> + int rc;
> + struct kvm_create_device xics_create_device = {
> + .type = KVM_DEV_TYPE_XICS,
> + .flags = 0,
> + };
> +
> + if (!kvm_enabled()) {
> + error_setg(errp, "KVM must be enabled for in-kernel XICS");
> + goto fail;
> + }
> +
> + if (QTAILQ_EMPTY(&list->head) ||
> + !qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
> + "kernel_irqchip", true) ||
> + !kvm_check_extension(kvm_state, KVM_CAP_IRQ_XICS)) {
> + error_setg(errp, "KVM must be enabled for in-kernel XICS");
> + return;
> + }
> +
> + icpkvm->set_xive_token = spapr_rtas_register("ibm,set-xive", rtas_dummy);
> + icpkvm->get_xive_token = spapr_rtas_register("ibm,get-xive", rtas_dummy);
> + icpkvm->int_off_token = spapr_rtas_register("ibm,int-off", rtas_dummy);
> + icpkvm->int_on_token = spapr_rtas_register("ibm,int-on", rtas_dummy);
> +
> + rc = kvmppc_define_rtas_token(icpkvm->set_xive_token, "ibm,set-xive");
> + if (rc < 0) {
> + error_setg(errp, "kvmppc_define_rtas_token: ibm,set-xive");
> + goto fail;
> + }
> +
> + rc = kvmppc_define_rtas_token(icpkvm->get_xive_token, "ibm,get-xive");
> + if (rc < 0) {
> + error_setg(errp, "kvmppc_define_rtas_token: ibm,get-xive");
> + goto fail;
> + }
> +
> + rc = kvmppc_define_rtas_token(icpkvm->int_on_token, "ibm,int-on");
> + if (rc < 0) {
> + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-on");
> + goto fail;
> + }
> +
> + rc = kvmppc_define_rtas_token(icpkvm->int_off_token, "ibm,int-off");
> + if (rc < 0) {
> + error_setg(errp, "kvmppc_define_rtas_token: ibm,int-off");
> + goto fail;
> + }
> +
> + /* Create the kernel ICP */
> + rc = kvm_vm_ioctl(kvm_state, KVM_CREATE_DEVICE, &xics_create_device);
> + if (rc < 0) {
> + error_setg_errno(errp, -rc, "Error on KVM_CREATE_DEVICE for XICS");
> + goto fail;
> + }
> +
> + icpkvm->kernel_xics_fd = xics_create_device.fd;
> +
> + xics_common_init(&icpkvm->parent, ics_set_irq_kvm);
> +
> + /* We use each the ICS's offset into the global irq number space
> + * as an instance id. This means we can extend to multiple ICS
> + * instances without needing to change the savevm format */
> + vmstate_icskvm.fields = vmstate_ics.fields;
> + vmstate_register(NULL, icpkvm->parent.ics->offset, &vmstate_icskvm,
> + icpkvm->parent.ics);
> +
> + return;
> +
> +fail:
> + kvmppc_define_rtas_token(0, "ibm,set-xive");
> + kvmppc_define_rtas_token(0, "ibm,get-xive");
> + kvmppc_define_rtas_token(0, "ibm,int-on");
> + kvmppc_define_rtas_token(0, "ibm,int-off");
> + return;
> +}
> +
> +static void xics_kvm_reset(DeviceState *d)
> +{
> + struct icp_state_kvm *icpkvm = XICS_KVM(d);
> + struct icp_state *icp = &icpkvm->parent;
> + int i;
> +
> + xics_common_reset(icp);
> +
> + for (i = 0; i < icp->nr_servers; i++) {
> + if (icp->ss[i].cs) {
> + icp_set_kvm_state(&icp->ss[i]);
> + }
> + }
> +
> + ics_set_kvm_state(icp->ics);
> +}
> +
> +static void xics_kvm_class_init(ObjectClass *oc, void *data)
> +{
> + DeviceClass *dc = DEVICE_CLASS(oc);
> +
> + dc->realize = xics_kvm_realize;
> + dc->reset = xics_kvm_reset;
> +}
> +
> +static const TypeInfo xics_kvm_info = {
> + .name = TYPE_XICS_KVM,
> + .parent = TYPE_XICS,
> + .instance_size = sizeof(struct icp_state_kvm),
> + .class_init = xics_kvm_class_init,
> +};
> +
> +static void xics_kvm_register_types(void)
> +{
> + type_register_static(&xics_kvm_info);
> +}
> +
> +type_init(xics_kvm_register_types)
> diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c
> index f989a22..211f434 100644
> --- a/hw/ppc/spapr.c
> +++ b/hw/ppc/spapr.c
> @@ -1001,7 +1001,31 @@ static struct icp_state *xics_system_init(int
> nr_servers, int nr_irqs)
> {
> struct icp_state *icp = NULL;
>
> - icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
> + if (kvm_enabled()) {
> + bool irqchip_allowed = true, irqchip_required = false;
> + QemuOptsList *list = qemu_find_opts("machine");
> +
> + if (!QTAILQ_EMPTY(&list->head)) {
> + irqchip_allowed = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
> + "kernel_irqchip", true);
> + irqchip_required = qemu_opt_get_bool(QTAILQ_FIRST(&list->head),
> + "kernel_irqchip", false);
> + }
> +
> + if (irqchip_allowed) {
> + icp = try_create_xics(TYPE_XICS_KVM, nr_servers, nr_irqs);
> + }
> +
> + if (irqchip_required && !icp) {
> + perror("iFailed to create in-kernel XICS\n");
> + abort();
> + }
> + }
> +
> + if (!icp) {
> + icp = try_create_xics(TYPE_XICS, nr_servers, nr_irqs);
> + }
> +
> if (!icp) {
> perror("Failed to create XICS\n");
> abort();
> @@ -1102,8 +1126,6 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> }
> env = &cpu->env;
>
> - xics_cpu_setup(spapr->icp, cpu);
> -
> /* Set time-base frequency to 512 MHz */
> cpu_ppc_tb_init(env, TIMEBASE_FREQ);
>
> @@ -1117,6 +1139,10 @@ static void ppc_spapr_init(QEMUMachineInitArgs *args)
> kvmppc_set_papr(cpu);
> }
>
> + if (xics_kvm_cpu_setup(spapr->icp, cpu)) {
> + xics_cpu_setup(spapr->icp, cpu);
> + }
> +
> qemu_register_reset(spapr_cpu_reset, cpu);
> }
>
> diff --git a/include/hw/ppc/xics.h b/include/hw/ppc/xics.h
> index 3f72806..e474c01 100644
> --- a/include/hw/ppc/xics.h
> +++ b/include/hw/ppc/xics.h
> @@ -32,6 +32,9 @@
> #define TYPE_XICS "xics"
> #define XICS(obj) OBJECT_CHECK(struct icp_state, (obj), TYPE_XICS)
>
> +#define TYPE_XICS_KVM "xics-kvm"
> +#define XICS_KVM(obj) OBJECT_CHECK(struct icp_state_kvm, (obj),
> TYPE_XICS_KVM)
> +
> #define XICS_IPI 0x2
> #define XICS_BUID 0x1
> #define XICS_IRQ_BASE (XICS_BUID << 12)
> @@ -53,6 +56,7 @@ struct icp_state {
> };
>
> struct icp_server_state {
> + CPUState *cs;
> uint32_t xirr;
> uint8_t pending_priority;
> uint8_t mfrr;
> @@ -88,6 +92,15 @@ void xics_common_reset(struct icp_state *icp);
>
> void xics_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
>
> +#ifdef CONFIG_KVM
> +int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu);
> +#else
> +static inline int xics_kvm_cpu_setup(struct icp_state *icp, PowerPCCPU *cpu)
> +{
> + return -1;
> +}
> +#endif
> +
> extern const VMStateDescription vmstate_icp_server;
> extern const VMStateDescription vmstate_ics;
>
> --
> 1.7.10.4
- Re: [Qemu-ppc] [Qemu-devel] [PATCH 14/17] pseries: Support for in-kernel XICS interrupt controller,
Anthony Liguori <=