[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached
From: |
Eric Auger |
Subject: |
Re: [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached iotlb |
Date: |
Wed, 4 Aug 2021 18:26:15 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.10.1 |
Hi Chenxiang,
On 8/4/21 10:49 AM, chenxiang wrote:
> From: Xiang Chen <chenxiang66@hisilicon.com>
>
> It splits invalidations into ^2 range invalidations in the patch
> 6d9cd115b(" hw/arm/smmuv3: Enforce invalidation on a power of two range").
> So for some scenarios such as the size of invalidation is not ^2 range
> invalidation, it costs more time to invalidate.
this ^² split is not only necessary for internal TLB management but also
for IOMMU MR notifier calls (which use a mask), ie. IOTLB unmap
notifications used for both vhost and vfio integrations.
So you can disable the internal IOTLB but we can't simply remove the pow
of 2 split. See below.
internal TLB could be disabled through a property but I would rather set
it as an "x-" experimental property for debug purpose. Until recently
this was indeed helpful to debug bugs related to internal IOTLB
management (RIL support) ;-) I hope this period is over though ;-)
> Currently smmuv3_translate is rarely used (i only see it is used when
> binding msi), so i think maybe we can disable cached iotlb to promote
> efficiency of invalidation. So add device property disable_cached_iotlb
> to disable cached iotlb, and then we can send non-^2 range invalidation
> directly.
> Use tool dma_map_benchmark to have a test on the latency of unmap,
> and we can see it promotes much on unmap when the size of invalidation
> is not ^2 range invalidation (such as g = 7/15/31/511):
>
> t = 1(thread = 1)
> before opt(us) after opt(us)
> g=1(4K size) 0.2/7.6 0.2/7.5
> g=4(8K size) 0.4/7.9 0.4/7.9
> g=7(28K size) 0.6/10.2 0.6/8.2
> g=8(32K size) 0.6/8.3 0.6/8.3
> g=15(60K size) 1.1/12.1 1.1/9.1
> g=16(64K size) 1.1/9.2 1.1/9.1
> g=31(124K size) 2.0/14.8 2.0/10.7
> g=32(128K size) 2.1/14.8 2.1/10.7
> g=511(2044K size) 30.9/65.1 31.1/55.9
> g=512(2048K size) 0.3/32.1 0.3/32.1
> t = 10(thread = 10)
> before opt(us) after opt(us)
> g=1(4K size) 0.2/39.9 0.2/39.1
> g=4(8K size) 0.5/42.6 0.5/42.4
> g=7(28K size) 0.6/66.4 0.6/45.3
> g=8(32K size) 0.7/45.8 0.7/46.1
> g=15(60K size) 1.1/80.5 1.1/49.6
> g=16(64K size) 1.1/49.8 1.1/50.2
> g=31(124K size) 2.0/98.3 2.1/58.0
> g=32(128K size) 2.1/57.7 2.1/58.2
> g=511(2044K size) 35.2/322.2 35.3/236.7
> g=512(2048K size) 0.8/238.2 0.9/240.3
>
> Note: i test it based on VSMMU enabled with the patchset
> ("vSMMUv3/pSMMUv3 2 stage VFIO integration").
>
> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> ---
> hw/arm/smmuv3.c | 77
> ++++++++++++++++++++++++++++++++-----------------
> include/hw/arm/smmuv3.h | 1 +
> 2 files changed, 52 insertions(+), 26 deletions(-)
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 01b60be..7ae668f 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -19,6 +19,7 @@
> #include "qemu/osdep.h"
> #include "qemu/bitops.h"
> #include "hw/irq.h"
> +#include "hw/qdev-properties.h"
> #include "hw/sysbus.h"
> #include "migration/vmstate.h"
> #include "hw/qdev-core.h"
> @@ -682,19 +683,21 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion
> *mr, hwaddr addr,
> page_mask = (1ULL << (tt->granule_sz)) - 1;
> aligned_addr = addr & ~page_mask;
>
> - cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> - if (cached_entry) {
> - if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> - status = SMMU_TRANS_ERROR;
> - if (event.record_trans_faults) {
> - event.type = SMMU_EVT_F_PERMISSION;
> - event.u.f_permission.addr = addr;
> - event.u.f_permission.rnw = flag & 0x1;
> + if (s->disable_cached_iotlb) {
> + cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> + if (cached_entry) {
> + if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO))
> {
> + status = SMMU_TRANS_ERROR;
> + if (event.record_trans_faults) {
> + event.type = SMMU_EVT_F_PERMISSION;
> + event.u.f_permission.addr = addr;
> + event.u.f_permission.rnw = flag & 0x1;
> + }
> + } else {
> + status = SMMU_TRANS_SUCCESS;
> }
> - } else {
> - status = SMMU_TRANS_SUCCESS;
> + goto epilogue;
> }
> - goto epilogue;
> }
>
> cached_entry = g_new0(SMMUTLBEntry, 1);
> @@ -742,7 +745,9 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion
> *mr, hwaddr addr,
> }
> status = SMMU_TRANS_ERROR;
> } else {
> - smmu_iotlb_insert(bs, cfg, cached_entry);
> + if (s->disable_cached_iotlb) {
> + smmu_iotlb_insert(bs, cfg, cached_entry);
> + }
> status = SMMU_TRANS_SUCCESS;
> }
>
> @@ -855,8 +860,9 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int
> asid, dma_addr_t iova,
> }
> }
>
> -static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
> +static void smmuv3_s1_range_inval(SMMUv3State *s, Cmd *cmd)
> {
> + SMMUState *bs = ARM_SMMU(s);
> dma_addr_t end, addr = CMD_ADDR(cmd);
> uint8_t type = CMD_TYPE(cmd);
> uint16_t vmid = CMD_VMID(cmd);
> @@ -876,7 +882,9 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
> if (!tg) {
> trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
> smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
> - smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> + if (s->disable_cached_iotlb) {
> + smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> + }
> return;
> }
>
> @@ -885,17 +893,23 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd
> *cmd)
> num_pages = (num + 1) * BIT_ULL(scale);
> granule = tg * 2 + 10;
>
> - /* Split invalidations into ^2 range invalidations */
> - end = addr + (num_pages << granule) - 1;
> -
> - while (addr != end + 1) {
> - uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> -
> - num_pages = (mask + 1) >> granule;
> + if (s->disable_cached_iotlb) {
> trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl,
> leaf);
> smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> - smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
smmuv3_inv_notifiers_iova()
also needs to be called with power of 2 ranges
as it eventually calls memory_region_notify_iommu_one() which sets
event.entry.addr_mask = num_pages * (1 << granule) - 1;
> - addr += mask + 1;
> + } else {
> + /* Split invalidations into ^2 range invalidations */
> + end = addr + (num_pages << granule) - 1;
> +
> + while (addr != end + 1) {
> + uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> +
> + num_pages = (mask + 1) >> granule;
> + trace_smmuv3_s1_range_inval(vmid, asid, addr,
> + tg, num_pages, ttl, leaf);
> + smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> + smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
> + addr += mask + 1;
> + }
> }
> }
>
> @@ -1028,18 +1042,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>
> trace_smmuv3_cmdq_tlbi_nh_asid(asid);
> smmu_inv_notifiers_all(&s->smmu_state);
> - smmu_iotlb_inv_asid(bs, asid);
> + if (s->disable_cached_iotlb) {
> + smmu_iotlb_inv_asid(bs, asid);
> + }
> break;
> }
> case SMMU_CMD_TLBI_NH_ALL:
> case SMMU_CMD_TLBI_NSNH_ALL:
> trace_smmuv3_cmdq_tlbi_nh();
> smmu_inv_notifiers_all(&s->smmu_state);
> - smmu_iotlb_inv_all(bs);
> + if (s->disable_cached_iotlb) {
> + smmu_iotlb_inv_all(bs);
> + }
> break;
> case SMMU_CMD_TLBI_NH_VAA:
> case SMMU_CMD_TLBI_NH_VA:
> - smmuv3_s1_range_inval(bs, &cmd);
> + smmuv3_s1_range_inval(s, &cmd);
> break;
> case SMMU_CMD_TLBI_EL3_ALL:
> case SMMU_CMD_TLBI_EL3_VA:
> @@ -1506,6 +1524,12 @@ static void smmuv3_instance_init(Object *obj)
> /* Nothing much to do here as of now */
> }
>
> +static Property smmuv3_properties[] = {
> + DEFINE_PROP_BOOL("disable_cached_iotlb", SMMUv3State,
> + disable_cached_iotlb, true),
> + DEFINE_PROP_END_OF_LIST(),
> +};
> +
> static void smmuv3_class_init(ObjectClass *klass, void *data)
> {
> DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -1515,6 +1539,7 @@ static void smmuv3_class_init(ObjectClass *klass, void
> *data)
> device_class_set_parent_reset(dc, smmu_reset, &c->parent_reset);
> c->parent_realize = dc->realize;
> dc->realize = smmu_realize;
> + device_class_set_props(dc, smmuv3_properties);
> }
>
> static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index c641e60..c94ab7e 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -62,6 +62,7 @@ struct SMMUv3State {
>
> qemu_irq irq[4];
> QemuMutex mutex;
> + bool disable_cached_iotlb; /* Whether disable/enable cached iotlb */
> };
>
> typedef enum {
Thanks
Eric