qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH v2 05/13] hw/arm/smmu-common: Support nested translation


From: Eric Auger
Subject: Re: [RFC PATCH v2 05/13] hw/arm/smmu-common: Support nested translation
Date: Thu, 18 Apr 2024 15:54:01 +0200
User-agent: Mozilla Thunderbird

Hi Mostafa,

On 4/8/24 16:08, Mostafa Saleh wrote:
> When nested translation is requested, do the following:
>
> - Translate stage-1 IPA using stage-2 to a physical address.
> - Translate stage-1 PTW walks using stage-2.
> - Combine both to create a single TLB entry, for that we choose
>   the smallest entry to cache, which means that if the smallest
>   entry comes from stage-2, and stage-2 use different granule,
>   TLB lookup for stage-1 (in nested config) will always miss.
>   Lookup logic is modified for nesting to lookup using stage-2
>   granule if stage-1 granule missed and they are different.
>
> Also, add more visibility in trace points, to make it easier to debug.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
>  hw/arm/smmu-common.c         | 153 ++++++++++++++++++++++++++++-------
>  hw/arm/trace-events          |   6 +-
>  include/hw/arm/smmu-common.h |   3 +-
>  3 files changed, 131 insertions(+), 31 deletions(-)
>
> diff --git a/hw/arm/smmu-common.c b/hw/arm/smmu-common.c
> index 771b9c79a3..2cf27b490b 100644
> --- a/hw/arm/smmu-common.c
> +++ b/hw/arm/smmu-common.c
> @@ -66,8 +66,10 @@ SMMUIOTLBKey smmu_get_iotlb_key(int asid, int vmid, 
> uint64_t iova,
>      return key;
>  }
>  
> -SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> -                                SMMUTransTableInfo *tt, hwaddr iova)
> +static SMMUTLBEntry *smmu_iotlb_lookup_all_levels(SMMUState *bs,
> +                                                  SMMUTransCfg *cfg,
> +                                                  SMMUTransTableInfo *tt,
> +                                                  hwaddr iova)
this helper can be introduced in a separate patch to ease the code review
>  {
>      uint8_t tg = (tt->granule_sz - 10) / 2;
>      uint8_t inputsize = 64 - tt->tsz;
> @@ -88,10 +90,29 @@ SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, 
> SMMUTransCfg *cfg,
>          }
>          level++;
>      }
> +    return entry;
> +}
> +
> +SMMUTLBEntry *smmu_iotlb_lookup(SMMUState *bs, SMMUTransCfg *cfg,
> +                                SMMUTransTableInfo *tt, hwaddr iova)
> +{
> +    SMMUTLBEntry *entry = NULL;
> +
> +    entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
> +    /*
> +     * For nested translation also use the s2 granule, as the TLB will insert
> +     * the smallest of both, so the entry can be cached with the s2 granule.
> +     */
> +    if (!entry && (cfg->stage == SMMU_NESTED) &&
> +        (cfg->s2cfg.granule_sz != tt->granule_sz)) {
> +        tt->granule_sz = cfg->s2cfg.granule_sz;
> +        entry = smmu_iotlb_lookup_all_levels(bs, cfg, tt, iova);
this is also the kind of stuff that can be introduced and reviewed
separately without tkaing any risk until NESTED is not support. In this
new patch you could also document the TLB strategy.
> +    }
>  
>      if (entry) {
>          cfg->iotlb_hits++;
>          trace_smmu_iotlb_lookup_hit(cfg->asid, cfg->s2cfg.vmid, iova,
> +                                    entry->entry.addr_mask,
can be moved to a separate fix. same for the trace point changes
>                                      cfg->iotlb_hits, cfg->iotlb_misses,
>                                      100 * cfg->iotlb_hits /
>                                      (cfg->iotlb_hits + cfg->iotlb_misses));
> @@ -117,7 +138,7 @@ void smmu_iotlb_insert(SMMUState *bs, SMMUTransCfg *cfg, 
> SMMUTLBEntry *new)
>      *key = smmu_get_iotlb_key(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
>                                tg, new->level);
>      trace_smmu_iotlb_insert(cfg->asid, cfg->s2cfg.vmid, new->entry.iova,
> -                            tg, new->level);
> +                            tg, new->level, new->entry.translated_addr);
>      g_hash_table_insert(bs->iotlb, key, new);
>  }
>  
> @@ -286,6 +307,27 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> dma_addr_t iova)
>      return NULL;
>  }
>  
> +/* Return the correct table address based on configuration. */
does the S2 translation for a PTE table?
> +static inline int translate_table_s1(dma_addr_t *table_addr, SMMUTransCfg 
> *cfg,
> +                                     SMMUPTWEventInfo *info, SMMUState *bs)
> +{
> +    dma_addr_t addr = *table_addr;
> +    SMMUTLBEntry *cached_entry;
> +
> +    if (cfg->stage != SMMU_NESTED) {
> +        return 0;
> +    }
> +
> +    CALL_FUNC_CFG_S2(cfg, cached_entry, smmu_translate,
> +                     bs, cfg, addr, IOMMU_RO, info);
> +
> +    if (cached_entry) {
> +        *table_addr = CACHED_ENTRY_TO_ADDR(cached_entry, addr);
> +        return 0;
> +    }
> +    return -EINVAL;
> +}
> +
>  /**
>   * smmu_ptw_64_s1 - VMSAv8-64 Walk of the page tables for a given IOVA
>   * @cfg: translation config
> @@ -301,7 +343,8 @@ SMMUTransTableInfo *select_tt(SMMUTransCfg *cfg, 
> dma_addr_t iova)
>   */
>  static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>                            dma_addr_t iova, IOMMUAccessFlags perm,
> -                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> +                          SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info,
> +                          SMMUState *bs)
>  {
>      dma_addr_t baseaddr, indexmask;
>      SMMUStage stage = cfg->stage;
> @@ -349,6 +392,10 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>                  goto error;
>              }
>              baseaddr = get_table_pte_address(pte, granule_sz);
> +            /* In case of failure, retain stage-2 fault. */
link to the doc?
> +            if (translate_table_s1(&baseaddr, cfg, info, bs)) {
let's avoid that call if S1 only

What about the error handling. I am not sure we are covering all the
cases listed in 7.3.12 F_WALK_EABT for instance?
> +                goto error_no_stage;
> +            }
>              level++;
>              continue;
>          } else if (is_page_pte(pte, level)) {
> @@ -384,7 +431,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>          tlbe->entry.translated_addr = gpa;
>          tlbe->entry.iova = iova & ~mask;
>          tlbe->entry.addr_mask = mask;
> -        tlbe->entry.perm = PTE_AP_TO_PERM(ap);
> +        tlbe->parent_perm = tlbe->entry.perm = PTE_AP_TO_PERM(ap);
>          tlbe->level = level;
>          tlbe->granule = granule_sz;
>          return 0;
> @@ -393,6 +440,7 @@ static int smmu_ptw_64_s1(SMMUTransCfg *cfg,
>  
>  error:
>      info->stage = SMMU_STAGE_1;
> +error_no_stage:
>      tlbe->entry.perm = IOMMU_NONE;
>      return -EINVAL;
>  }
> @@ -505,7 +553,7 @@ static int smmu_ptw_64_s2(SMMUTransCfg *cfg,
>          tlbe->entry.translated_addr = gpa;
>          tlbe->entry.iova = ipa & ~mask;
>          tlbe->entry.addr_mask = mask;
> -        tlbe->entry.perm = s2ap;
> +        tlbe->parent_perm = tlbe->entry.perm = s2ap;
>          tlbe->level = level;
>          tlbe->granule = granule_sz;
>          return 0;
> @@ -518,6 +566,28 @@ error:
>      return -EINVAL;
>  }
>  
> +/* Combine 2 TLB enteries and return in tlbe. */
entries.
Would suggest combine
> +static void combine_tlb(SMMUTLBEntry *tlbe, SMMUTLBEntry *tlbe_s2,
> +                        dma_addr_t iova, SMMUTransCfg *cfg)
> +{
> +        if (cfg->stage == SMMU_NESTED) {
> +            tlbe->entry.addr_mask = MIN(tlbe->entry.addr_mask,
> +                                        tlbe_s2->entry.addr_mask);
> +            tlbe->entry.translated_addr = CACHED_ENTRY_TO_ADDR(tlbe_s2,
> +                                          tlbe->entry.translated_addr);
> +
> +            tlbe->granule = MIN(tlbe->granule, tlbe_s2->granule);
could you add a link to the spec so we can clearly check what this code
is written against?
> +            tlbe->level = MAX(tlbe->level, tlbe_s2->level);
> +            tlbe->entry.iova = iova & ~tlbe->entry.addr_mask;
> +            /* parent_perm has s2 perm while perm has s1 perm. */
> +            tlbe->parent_perm = tlbe_s2->entry.perm;
> +            return;
> +        }
> +
> +        /* That was not nested, use the s2. */
should be removed and tested ;-)
> +        memcpy(tlbe, tlbe_s2, sizeof(*tlbe));
You shall avoid doing that memcpy if not necessary. I would advocate for
separate paths for S2 and nested.
> +}
> +
>  /**
>   * smmu_ptw - Walk the page tables for an IOVA, according to @cfg
need to update the above args desc
>   *
> @@ -530,28 +600,59 @@ error:
>   * return 0 on success
>   */
>  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info)
> +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs)
>  {
> -    if (cfg->stage == SMMU_STAGE_1) {
> -        return smmu_ptw_64_s1(cfg, iova, perm, tlbe, info);
> -    } else if (cfg->stage == SMMU_STAGE_2) {
> -        /*
> -         * If bypassing stage 1(or unimplemented), the input address is 
> passed
> -         * directly to stage 2 as IPA. If the input address of a transaction
> -         * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> -         * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address 
> sizes"
> -         */
> -        if (iova >= (1ULL << cfg->oas)) {
> -            info->type = SMMU_PTW_ERR_ADDR_SIZE;
> -            info->stage = SMMU_STAGE_1;
> -            tlbe->entry.perm = IOMMU_NONE;
> -            return -EINVAL;
> +    int ret = 0;
> +    SMMUTLBEntry tlbe_s2;
> +    dma_addr_t ipa = iova;
> +
> +    if (cfg->stage & SMMU_STAGE_1) {
> +        ret = smmu_ptw_64_s1(cfg, iova, perm, tlbe, info, bs);
> +        if (ret) {
> +            return ret;
>          }
> +        /* This is the IPA for next stage.*/
but you don't necessarily have the S2 enabled so this is not necessarily
an IPA
> +        ipa = CACHED_ENTRY_TO_ADDR(tlbe, iova);
> +    }
>  
> -        return smmu_ptw_64_s2(cfg, iova, perm, tlbe, info);
> +    /*
> +     * The address output from the translation causes a stage 1 Address Size
> +     * fault if it exceeds the range of the effective IPA size for the given 
> CD.
> +     * If bypassing stage 1(or unimplemented), the input address is passed
> +     * directly to stage 2 as IPA. If the input address of a transaction
> +     * exceeds the size of the IAS, a stage 1 Address Size fault occurs.
> +     * For AA64, IAS = OAS according to (IHI 0070.E.a) "3.4 Address sizes"
> +     */
> +    if (ipa >= (1ULL << cfg->oas)) {
in case we have S2 only, would be clearer to use ias instead (despite
above comment say they are the same)
> +        info->type = SMMU_PTW_ERR_ADDR_SIZE;
> +        info->stage = SMMU_STAGE_1;
What does the stage really means. That should be documented in the
struct I think
> +        tlbe->entry.perm = IOMMU_NONE;
> +        return -EINVAL;
>      }
this check also is introduced for S1 only. If this is a fix this should
be brought separately.
Also the above comment refers to IPA. Does it also hold for S1 only. Is
the check identical in that case?
>  
> -    g_assert_not_reached();
> +    if (cfg->stage & SMMU_STAGE_2) {
> +        ret = smmu_ptw_64_s2(cfg, ipa, perm, &tlbe_s2, info);
> +        if (ret) {
> +            return ret;
> +        }
> +        combine_tlb(tlbe, &tlbe_s2, iova, cfg);
> +    }
I think I would prefer the S1, S2, nested cases cleary separated at the
price of some code duplication. I am afraid serializing stuff make the
code less maintainable.
Also it is important the S1 case is not altered in terms of perf.
> +
> +    return ret;
> +}
> +
> +static int validate_tlb_entry(SMMUTLBEntry *cached_entry, IOMMUAccessFlags 
> flag,
> +                              SMMUPTWEventInfo *info)
> +{
> +        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm &
> +            cached_entry->parent_perm & IOMMU_WO)) {
> +            info->type = SMMU_PTW_ERR_PERMISSION;
> +            info->stage = !(cached_entry->entry.perm & IOMMU_WO) ?
> +                          SMMU_STAGE_1 :
> +                          SMMU_STAGE_2;
> +            return -EINVAL;
> +        }
> +        return 0;
>  }
>  
>  SMMUTLBEntry *smmu_translate(SMMUState *bs, SMMUTransCfg *cfg, dma_addr_t 
> addr,
> @@ -595,16 +696,14 @@ SMMUTLBEntry *smmu_translate(SMMUState *bs, 
> SMMUTransCfg *cfg, dma_addr_t addr,
>  
>      cached_entry = smmu_iotlb_lookup(bs, cfg, &tt_combined, aligned_addr);
>      if (cached_entry) {
> -        if ((flag & IOMMU_WO) && !(cached_entry->entry.perm & IOMMU_WO)) {
> -            info->type = SMMU_PTW_ERR_PERMISSION;
> -            info->stage = cfg->stage;
> +        if (validate_tlb_entry(cached_entry, flag, info)) {
>              return NULL;
>          }
>          return cached_entry;
>      }
>  
>      cached_entry = g_new0(SMMUTLBEntry, 1);
> -    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info);
> +    status = smmu_ptw(cfg, aligned_addr, flag, cached_entry, info, bs);
>      if (status) {
>              g_free(cached_entry);
>              return NULL;
> diff --git a/hw/arm/trace-events b/hw/arm/trace-events
> index cc12924a84..5f23f0b963 100644
> --- a/hw/arm/trace-events
> +++ b/hw/arm/trace-events
> @@ -15,9 +15,9 @@ smmu_iotlb_inv_asid(uint16_t asid) "IOTLB invalidate 
> asid=%d"
>  smmu_iotlb_inv_vmid(uint16_t vmid) "IOTLB invalidate vmid=%d"
>  smmu_iotlb_inv_iova(uint16_t asid, uint64_t addr) "IOTLB invalidate asid=%d 
> addr=0x%"PRIx64
>  smmu_inv_notifiers_mr(const char *name) "iommu mr=%s"
> -smmu_iotlb_lookup_hit(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t 
> hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d 
> addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> -smmu_iotlb_lookup_miss(uint16_t asid, uint16_t vmid, uint64_t addr, uint32_t 
> hit, uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d 
> addr=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> -smmu_iotlb_insert(uint16_t asid, uint16_t vmid, uint64_t addr, uint8_t tg, 
> uint8_t level) "IOTLB ++ asid=%d vmid=%d addr=0x%"PRIx64" tg=%d level=%d"
> +smmu_iotlb_lookup_hit(int asid, uint16_t vmid, uint64_t addr, uint64_t mask, 
> uint32_t hit, uint32_t miss, uint32_t p) "IOTLB cache HIT asid=%d vmid=%d 
> addr=0x%"PRIx64" mask=0x%"PRIx64" hit=%d miss=%d hit rate=%d"
> +smmu_iotlb_lookup_miss(int asid, uint16_t vmid, uint64_t addr, uint32_t hit, 
> uint32_t miss, uint32_t p) "IOTLB cache MISS asid=%d vmid=%d addr=0x%"PRIx64" 
> hit=%d miss=%d hit rate=%d"
> +smmu_iotlb_insert(int asid, uint16_t vmid, uint64_t addr, uint8_t tg, 
> uint8_t level, uint64_t translate_addr) "IOTLB ++ asid=%d vmid=%d 
> addr=0x%"PRIx64" tg=%d level=%d translate_addr=0x%"PRIx64
>  
>  # smmuv3.c
>  smmuv3_read_mmio(uint64_t addr, uint64_t val, unsigned size, uint32_t r) 
> "addr: 0x%"PRIx64" val:0x%"PRIx64" size: 0x%x(%d)"
> diff --git a/include/hw/arm/smmu-common.h b/include/hw/arm/smmu-common.h
> index 2772175115..03ff0f02ba 100644
> --- a/include/hw/arm/smmu-common.h
> +++ b/include/hw/arm/smmu-common.h
> @@ -91,6 +91,7 @@ typedef struct SMMUTLBEntry {
>      IOMMUTLBEntry entry;
>      uint8_t level;
>      uint8_t granule;
> +    IOMMUAccessFlags parent_perm;
>  } SMMUTLBEntry;
>  
>  /* Stage-2 configuration. */
> @@ -198,7 +199,7 @@ static inline uint16_t smmu_get_sid(SMMUDevice *sdev)
>   * pair, according to @cfg translation config
>   */
>  int smmu_ptw(SMMUTransCfg *cfg, dma_addr_t iova, IOMMUAccessFlags perm,
> -             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info);
> +             SMMUTLBEntry *tlbe, SMMUPTWEventInfo *info, SMMUState *bs);
>  
>  
>  /*
To be honest this patch is quite complex to review. I would suggest you
try to split it.

Thanks

Eric




reply via email to

[Prev in Thread] Current Thread [Next in Thread]