qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached


From: Eric Auger
Subject: Re: [RESEND RFC] hw/arm/smmuv3: add device properties to disable cached iotlb
Date: Wed, 4 Aug 2021 18:26:15 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.10.1

Hi Chenxiang,

On 8/4/21 10:49 AM, chenxiang wrote:
> From: Xiang Chen <chenxiang66@hisilicon.com>
>
> It splits invalidations into ^2 range invalidations in the patch
> 6d9cd115b(" hw/arm/smmuv3: Enforce invalidation on a power of two range").
> So for some scenarios such as the size of invalidation is not ^2 range
> invalidation, it costs more time to invalidate.
this ^² split is not only necessary for internal TLB management but also
for IOMMU MR notifier calls (which use a mask), ie. IOTLB unmap
notifications used for both vhost and vfio integrations.
So you can disable the internal IOTLB but we can't simply remove the pow
of 2 split. See below.

internal TLB could be disabled through a property but I would rather set
it as an "x-" experimental property for debug purpose. Until recently
this was indeed helpful to debug bugs related to internal IOTLB
management (RIL support) ;-) I hope this period is over though ;-)
> Currently smmuv3_translate is rarely used (i only see it is used when
> binding msi), so i think maybe we can disable cached iotlb to promote
> efficiency of invalidation. So add device property disable_cached_iotlb
> to disable cached iotlb, and then we can send non-^2 range invalidation
> directly.
> Use tool dma_map_benchmark to have a test on the latency of unmap,
> and we can see it promotes much on unmap when the size of invalidation
> is not ^2 range invalidation (such as g = 7/15/31/511):
>
> t = 1(thread = 1)
>                       before opt(us)   after opt(us)
> g=1(4K size)  0.2/7.6         0.2/7.5
> g=4(8K size)  0.4/7.9         0.4/7.9
> g=7(28K size) 0.6/10.2                0.6/8.2
> g=8(32K size) 0.6/8.3         0.6/8.3
> g=15(60K size)        1.1/12.1                1.1/9.1
> g=16(64K size)        1.1/9.2         1.1/9.1
> g=31(124K size)       2.0/14.8                2.0/10.7
> g=32(128K size)       2.1/14.8                2.1/10.7
> g=511(2044K size)     30.9/65.1               31.1/55.9
> g=512(2048K size) 0.3/32.1            0.3/32.1
> t = 10(thread = 10)
>                       before opt(us)   after opt(us)
> g=1(4K size)  0.2/39.9                0.2/39.1
> g=4(8K size)  0.5/42.6                0.5/42.4
> g=7(28K size) 0.6/66.4                0.6/45.3
> g=8(32K size) 0.7/45.8                0.7/46.1
> g=15(60K size)        1.1/80.5                1.1/49.6
> g=16(64K size)        1.1/49.8                1.1/50.2
> g=31(124K size)       2.0/98.3                2.1/58.0
> g=32(128K size)       2.1/57.7                2.1/58.2
> g=511(2044K size)     35.2/322.2              35.3/236.7
> g=512(2048K size) 0.8/238.2           0.9/240.3
>
> Note: i test it based on VSMMU enabled with the patchset
> ("vSMMUv3/pSMMUv3 2 stage VFIO integration").
>
> Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
> ---
>  hw/arm/smmuv3.c         | 77 
> ++++++++++++++++++++++++++++++++-----------------
>  include/hw/arm/smmuv3.h |  1 +
>  2 files changed, 52 insertions(+), 26 deletions(-)
>
> diff --git a/hw/arm/smmuv3.c b/hw/arm/smmuv3.c
> index 01b60be..7ae668f 100644
> --- a/hw/arm/smmuv3.c
> +++ b/hw/arm/smmuv3.c
> @@ -19,6 +19,7 @@
>  #include "qemu/osdep.h"
>  #include "qemu/bitops.h"
>  #include "hw/irq.h"
> +#include "hw/qdev-properties.h"
>  #include "hw/sysbus.h"
>  #include "migration/vmstate.h"
>  #include "hw/qdev-core.h"
> @@ -682,19 +683,21 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> *mr, hwaddr addr,
>      page_mask = (1ULL << (tt->granule_sz)) - 1;
>      aligned_addr = addr & ~page_mask;
>  
> -    cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> -    if (cached_entry) {
> -        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> -            status = SMMU_TRANS_ERROR;
> -            if (event.record_trans_faults) {
> -                event.type = SMMU_EVT_F_PERMISSION;
> -                event.u.f_permission.addr = addr;
> -                event.u.f_permission.rnw = flag & 0x1;
> +    if (s->disable_cached_iotlb) {
> +        cached_entry = smmu_iotlb_lookup(bs, cfg, tt, aligned_addr);
> +        if (cached_entry) {
> +            if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) 
> {
> +                status = SMMU_TRANS_ERROR;
> +                if (event.record_trans_faults) {
> +                    event.type = SMMU_EVT_F_PERMISSION;
> +                    event.u.f_permission.addr = addr;
> +                    event.u.f_permission.rnw = flag & 0x1;
> +                }
> +            } else {
> +                status = SMMU_TRANS_SUCCESS;
>              }
> -        } else {
> -            status = SMMU_TRANS_SUCCESS;
> +            goto epilogue;
>          }
> -        goto epilogue;
>      }
>  
>      cached_entry = g_new0(SMMUTLBEntry, 1);
> @@ -742,7 +745,9 @@ static IOMMUTLBEntry smmuv3_translate(IOMMUMemoryRegion 
> *mr, hwaddr addr,
>          }
>          status = SMMU_TRANS_ERROR;
>      } else {
> -        smmu_iotlb_insert(bs, cfg, cached_entry);
> +        if (s->disable_cached_iotlb) {
> +            smmu_iotlb_insert(bs, cfg, cached_entry);
> +        }
>          status = SMMU_TRANS_SUCCESS;
>      }
>  
> @@ -855,8 +860,9 @@ static void smmuv3_inv_notifiers_iova(SMMUState *s, int 
> asid, dma_addr_t iova,
>      }
>  }
>  
> -static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
> +static void smmuv3_s1_range_inval(SMMUv3State *s, Cmd *cmd)
>  {
> +    SMMUState *bs = ARM_SMMU(s);
>      dma_addr_t end, addr = CMD_ADDR(cmd);
>      uint8_t type = CMD_TYPE(cmd);
>      uint16_t vmid = CMD_VMID(cmd);
> @@ -876,7 +882,9 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd *cmd)
>      if (!tg) {
>          trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, 1, ttl, leaf);
>          smmuv3_inv_notifiers_iova(s, asid, addr, tg, 1);
> -        smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> +        if (s->disable_cached_iotlb) {
> +            smmu_iotlb_inv_iova(s, asid, addr, tg, 1, ttl);
> +        }
>          return;
>      }
>  
> @@ -885,17 +893,23 @@ static void smmuv3_s1_range_inval(SMMUState *s, Cmd 
> *cmd)
>      num_pages = (num + 1) * BIT_ULL(scale);
>      granule = tg * 2 + 10;
>  
> -    /* Split invalidations into ^2 range invalidations */
> -    end = addr + (num_pages << granule) - 1;
> -
> -    while (addr != end + 1) {
> -        uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> -
> -        num_pages = (mask + 1) >> granule;
> +    if (s->disable_cached_iotlb) {
>          trace_smmuv3_s1_range_inval(vmid, asid, addr, tg, num_pages, ttl, 
> leaf);
>          smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> -        smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);

smmuv3_inv_notifiers_iova() 
also needs to be called with power of 2 ranges
as it eventually calls memory_region_notify_iommu_one() which sets 
event.entry.addr_mask = num_pages * (1 << granule) - 1;

> -        addr += mask + 1;
> +    } else {
> +        /* Split invalidations into ^2 range invalidations */
> +        end = addr + (num_pages << granule) - 1;
> +
> +        while (addr != end + 1) {
> +            uint64_t mask = dma_aligned_pow2_mask(addr, end, 64);
> +
> +            num_pages = (mask + 1) >> granule;
> +            trace_smmuv3_s1_range_inval(vmid, asid, addr,
> +                                        tg, num_pages, ttl, leaf);
> +            smmuv3_inv_notifiers_iova(s, asid, addr, tg, num_pages);
> +            smmu_iotlb_inv_iova(s, asid, addr, tg, num_pages, ttl);
> +            addr += mask + 1;
> +        }
>      }
>  }
>  
> @@ -1028,18 +1042,22 @@ static int smmuv3_cmdq_consume(SMMUv3State *s)
>  
>              trace_smmuv3_cmdq_tlbi_nh_asid(asid);
>              smmu_inv_notifiers_all(&s->smmu_state);
> -            smmu_iotlb_inv_asid(bs, asid);
> +            if (s->disable_cached_iotlb) {
> +                smmu_iotlb_inv_asid(bs, asid);
> +            }
>              break;
>          }
>          case SMMU_CMD_TLBI_NH_ALL:
>          case SMMU_CMD_TLBI_NSNH_ALL:
>              trace_smmuv3_cmdq_tlbi_nh();
>              smmu_inv_notifiers_all(&s->smmu_state);
> -            smmu_iotlb_inv_all(bs);
> +            if (s->disable_cached_iotlb) {
> +                smmu_iotlb_inv_all(bs);
> +            }
>              break;
>          case SMMU_CMD_TLBI_NH_VAA:
>          case SMMU_CMD_TLBI_NH_VA:
> -            smmuv3_s1_range_inval(bs, &cmd);
> +            smmuv3_s1_range_inval(s, &cmd);
>              break;
>          case SMMU_CMD_TLBI_EL3_ALL:
>          case SMMU_CMD_TLBI_EL3_VA:
> @@ -1506,6 +1524,12 @@ static void smmuv3_instance_init(Object *obj)
>      /* Nothing much to do here as of now */
>  }
>  
> +static Property smmuv3_properties[] = {
> +    DEFINE_PROP_BOOL("disable_cached_iotlb", SMMUv3State,
> +                     disable_cached_iotlb, true),
> +    DEFINE_PROP_END_OF_LIST(),
> +};
> +
>  static void smmuv3_class_init(ObjectClass *klass, void *data)
>  {
>      DeviceClass *dc = DEVICE_CLASS(klass);
> @@ -1515,6 +1539,7 @@ static void smmuv3_class_init(ObjectClass *klass, void 
> *data)
>      device_class_set_parent_reset(dc, smmu_reset, &c->parent_reset);
>      c->parent_realize = dc->realize;
>      dc->realize = smmu_realize;
> +    device_class_set_props(dc, smmuv3_properties);
>  }
>  
>  static int smmuv3_notify_flag_changed(IOMMUMemoryRegion *iommu,
> diff --git a/include/hw/arm/smmuv3.h b/include/hw/arm/smmuv3.h
> index c641e60..c94ab7e 100644
> --- a/include/hw/arm/smmuv3.h
> +++ b/include/hw/arm/smmuv3.h
> @@ -62,6 +62,7 @@ struct SMMUv3State {
>  
>      qemu_irq     irq[4];
>      QemuMutex mutex;
> +    bool disable_cached_iotlb; /* Whether disable/enable cached iotlb */
>  };
>  
>  typedef enum {
Thanks

Eric




reply via email to

[Prev in Thread] Current Thread [Next in Thread]