qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 31/54] accel/tcg: Always use IntervalTree for code lookups


From: Richard Henderson
Subject: [PATCH v2 31/54] accel/tcg: Always use IntervalTree for code lookups
Date: Thu, 14 Nov 2024 08:01:07 -0800

Because translation is special, we don't need the speed
of the direct-mapped softmmu tlb.  We cache a lookups in
DisasContextBase within the translator loop anyway.

Drop the addr_code comparator from CPUTLBEntry.
Go directly to the IntervalTree for MMU_INST_FETCH.
Derive exec flags from read flags.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
 include/exec/cpu-all.h    |  3 ++
 include/exec/tlb-common.h |  5 ++-
 accel/tcg/cputlb.c        | 76 ++++++++++++++++++++++++---------------
 3 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 45e6676938..ad160c328a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -339,6 +339,9 @@ static inline int cpu_mmu_index(CPUState *cs, bool ifetch)
     (TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
     | TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
 
+/* Filter read flags to exec flags. */
+#define TLB_EXEC_FLAGS_MASK  (TLB_MMIO)
+
 /*
  * Flags stored in CPUTLBEntryFull.slow_flags[x].
  * TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x].
diff --git a/include/exec/tlb-common.h b/include/exec/tlb-common.h
index 300f9fae67..feaa471299 100644
--- a/include/exec/tlb-common.h
+++ b/include/exec/tlb-common.h
@@ -26,7 +26,6 @@ typedef union CPUTLBEntry {
     struct {
         uint64_t addr_read;
         uint64_t addr_write;
-        uint64_t addr_code;
         /*
          * Addend to virtual address to get host address.  IO accesses
          * use the corresponding iotlb value.
@@ -35,7 +34,7 @@ typedef union CPUTLBEntry {
     };
     /*
      * Padding to get a power of two size, as well as index
-     * access to addr_{read,write,code}.
+     * access to addr_{read,write}.
      */
     uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
 } CPUTLBEntry;
@@ -92,7 +91,7 @@ struct CPUTLBEntryFull {
      * Additional tlb flags for use by the slow path. If non-zero,
      * the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW.
      */
-    uint8_t slow_flags[MMU_ACCESS_COUNT];
+    uint8_t slow_flags[2];
 
     /*
      * Allow target-specific additions to this structure.
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 981098a6f2..be2ea1bc70 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -114,8 +114,9 @@ static inline uint64_t tlb_read_idx(const CPUTLBEntry 
*entry,
                       MMU_DATA_LOAD * sizeof(uint64_t));
     QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
                       MMU_DATA_STORE * sizeof(uint64_t));
-    QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
-                      MMU_INST_FETCH * sizeof(uint64_t));
+
+    tcg_debug_assert(access_type == MMU_DATA_LOAD ||
+                     access_type == MMU_DATA_STORE);
 
 #if TARGET_LONG_BITS == 32
     /* Use qatomic_read, in case of addr_write; only care about low bits. */
@@ -480,8 +481,7 @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry 
*tlb_entry,
     mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
 
     return (page == (tlb_entry->addr_read & mask) ||
-            page == (tlb_addr_write(tlb_entry) & mask) ||
-            page == (tlb_entry->addr_code & mask));
+            page == (tlb_addr_write(tlb_entry) & mask));
 }
 
 /* Called with tlb_c.lock held */
@@ -1184,9 +1184,6 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
     /* Now calculate the new entry */
     node->copy.addend = addend - addr_page;
 
-    tlb_set_compare(full, &node->copy, addr_page, read_flags,
-                    MMU_INST_FETCH, prot & PAGE_EXEC);
-
     if (wp_flags & BP_MEM_READ) {
         read_flags |= TLB_WATCHPOINT;
     }
@@ -1308,22 +1305,30 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput 
*o,
     /* Primary lookup in the fast tlb. */
     entry = tlbfast_entry(fast, addr);
     full = &desc->fulltlb[tlbfast_index(fast, addr)];
-    cmp = tlb_read_idx(entry, access_type);
-    if (tlb_hit(cmp, addr)) {
-        goto found;
+    if (access_type != MMU_INST_FETCH) {
+        cmp = tlb_read_idx(entry, access_type);
+        if (tlb_hit(cmp, addr)) {
+            goto found_data;
+        }
     }
 
     /* Secondary lookup in the IntervalTree. */
     node = tlbtree_lookup_addr(desc, addr);
     if (node) {
-        cmp = tlb_read_idx(&node->copy, access_type);
-        if (tlb_hit(cmp, addr)) {
-            /* Install the cached entry. */
-            qemu_spin_lock(&cpu->neg.tlb.c.lock);
-            copy_tlb_helper_locked(entry, &node->copy);
-            qemu_spin_unlock(&cpu->neg.tlb.c.lock);
-            *full = node->full;
-            goto found;
+        if (access_type == MMU_INST_FETCH) {
+            if (node->full.prot & PAGE_EXEC) {
+                goto found_code;
+            }
+        } else {
+            cmp = tlb_read_idx(&node->copy, access_type);
+            if (tlb_hit(cmp, addr)) {
+                /* Install the cached entry. */
+                qemu_spin_lock(&cpu->neg.tlb.c.lock);
+                copy_tlb_helper_locked(entry, &node->copy);
+                qemu_spin_unlock(&cpu->neg.tlb.c.lock);
+                *full = node->full;
+                goto found_data;
+            }
         }
     }
 
@@ -1333,9 +1338,14 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput *o,
         tcg_debug_assert(probe);
         return false;
     }
-
     o->did_tlb_fill = true;
 
+    if (access_type == MMU_INST_FETCH) {
+        node = tlbtree_lookup_addr(desc, addr);
+        tcg_debug_assert(node);
+        goto found_code;
+    }
+
     entry = tlbfast_entry(fast, addr);
     full = &desc->fulltlb[tlbfast_index(fast, addr)];
     cmp = tlb_read_idx(entry, access_type);
@@ -1345,14 +1355,29 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput 
*o,
      * called tlb_fill_align, so we know that this entry *is* valid.
      */
     flags &= ~TLB_INVALID_MASK;
+    goto found_data;
+
+ found_data:
+    flags &= cmp;
+    flags |= full->slow_flags[access_type];
+    o->flags = flags;
+    o->full = *full;
+    o->haddr = (void *)((uintptr_t)addr + entry->addend);
     goto done;
 
- found:
-    /* Alignment has not been checked by tlb_fill_align. */
-    {
+ found_code:
+    o->flags = node->copy.addr_read & TLB_EXEC_FLAGS_MASK;
+    o->full = node->full;
+    o->haddr = (void *)((uintptr_t)addr + node->copy.addend);
+    goto done;
+
+ done:
+    if (!o->did_tlb_fill) {
         int a_bits = memop_alignment_bits(memop);
 
         /*
+         * Alignment has not been checked by tlb_fill_align.
+         *
          * The TLB_CHECK_ALIGNED check differs from the normal alignment
          * check, in that this is based on the atomicity of the operation.
          * The intended use case is the ARM memory type field of each PTE,
@@ -1366,13 +1391,6 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput *o,
             cpu_unaligned_access(cpu, addr, access_type, i->mmu_idx, i->ra);
         }
     }
-
- done:
-    flags &= cmp;
-    flags |= full->slow_flags[access_type];
-    o->flags = flags;
-    o->full = *full;
-    o->haddr = (void *)((uintptr_t)addr + entry->addend);
     return true;
 }
 
-- 
2.43.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]