[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 31/54] accel/tcg: Always use IntervalTree for code lookups
From: |
Richard Henderson |
Subject: |
[PATCH v2 31/54] accel/tcg: Always use IntervalTree for code lookups |
Date: |
Thu, 14 Nov 2024 08:01:07 -0800 |
Because translation is special, we don't need the speed
of the direct-mapped softmmu tlb. We cache a lookups in
DisasContextBase within the translator loop anyway.
Drop the addr_code comparator from CPUTLBEntry.
Go directly to the IntervalTree for MMU_INST_FETCH.
Derive exec flags from read flags.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-all.h | 3 ++
include/exec/tlb-common.h | 5 ++-
accel/tcg/cputlb.c | 76 ++++++++++++++++++++++++---------------
3 files changed, 52 insertions(+), 32 deletions(-)
diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h
index 45e6676938..ad160c328a 100644
--- a/include/exec/cpu-all.h
+++ b/include/exec/cpu-all.h
@@ -339,6 +339,9 @@ static inline int cpu_mmu_index(CPUState *cs, bool ifetch)
(TLB_INVALID_MASK | TLB_NOTDIRTY | TLB_MMIO \
| TLB_FORCE_SLOW | TLB_DISCARD_WRITE)
+/* Filter read flags to exec flags. */
+#define TLB_EXEC_FLAGS_MASK (TLB_MMIO)
+
/*
* Flags stored in CPUTLBEntryFull.slow_flags[x].
* TLB_FORCE_SLOW must be set in CPUTLBEntry.addr_idx[x].
diff --git a/include/exec/tlb-common.h b/include/exec/tlb-common.h
index 300f9fae67..feaa471299 100644
--- a/include/exec/tlb-common.h
+++ b/include/exec/tlb-common.h
@@ -26,7 +26,6 @@ typedef union CPUTLBEntry {
struct {
uint64_t addr_read;
uint64_t addr_write;
- uint64_t addr_code;
/*
* Addend to virtual address to get host address. IO accesses
* use the corresponding iotlb value.
@@ -35,7 +34,7 @@ typedef union CPUTLBEntry {
};
/*
* Padding to get a power of two size, as well as index
- * access to addr_{read,write,code}.
+ * access to addr_{read,write}.
*/
uint64_t addr_idx[(1 << CPU_TLB_ENTRY_BITS) / sizeof(uint64_t)];
} CPUTLBEntry;
@@ -92,7 +91,7 @@ struct CPUTLBEntryFull {
* Additional tlb flags for use by the slow path. If non-zero,
* the corresponding CPUTLBEntry comparator must have TLB_FORCE_SLOW.
*/
- uint8_t slow_flags[MMU_ACCESS_COUNT];
+ uint8_t slow_flags[2];
/*
* Allow target-specific additions to this structure.
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 981098a6f2..be2ea1bc70 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -114,8 +114,9 @@ static inline uint64_t tlb_read_idx(const CPUTLBEntry
*entry,
MMU_DATA_LOAD * sizeof(uint64_t));
QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_write) !=
MMU_DATA_STORE * sizeof(uint64_t));
- QEMU_BUILD_BUG_ON(offsetof(CPUTLBEntry, addr_code) !=
- MMU_INST_FETCH * sizeof(uint64_t));
+
+ tcg_debug_assert(access_type == MMU_DATA_LOAD ||
+ access_type == MMU_DATA_STORE);
#if TARGET_LONG_BITS == 32
/* Use qatomic_read, in case of addr_write; only care about low bits. */
@@ -480,8 +481,7 @@ static bool tlb_hit_page_mask_anyprot(CPUTLBEntry
*tlb_entry,
mask &= TARGET_PAGE_MASK | TLB_INVALID_MASK;
return (page == (tlb_entry->addr_read & mask) ||
- page == (tlb_addr_write(tlb_entry) & mask) ||
- page == (tlb_entry->addr_code & mask));
+ page == (tlb_addr_write(tlb_entry) & mask));
}
/* Called with tlb_c.lock held */
@@ -1184,9 +1184,6 @@ void tlb_set_page_full(CPUState *cpu, int mmu_idx,
/* Now calculate the new entry */
node->copy.addend = addend - addr_page;
- tlb_set_compare(full, &node->copy, addr_page, read_flags,
- MMU_INST_FETCH, prot & PAGE_EXEC);
-
if (wp_flags & BP_MEM_READ) {
read_flags |= TLB_WATCHPOINT;
}
@@ -1308,22 +1305,30 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput
*o,
/* Primary lookup in the fast tlb. */
entry = tlbfast_entry(fast, addr);
full = &desc->fulltlb[tlbfast_index(fast, addr)];
- cmp = tlb_read_idx(entry, access_type);
- if (tlb_hit(cmp, addr)) {
- goto found;
+ if (access_type != MMU_INST_FETCH) {
+ cmp = tlb_read_idx(entry, access_type);
+ if (tlb_hit(cmp, addr)) {
+ goto found_data;
+ }
}
/* Secondary lookup in the IntervalTree. */
node = tlbtree_lookup_addr(desc, addr);
if (node) {
- cmp = tlb_read_idx(&node->copy, access_type);
- if (tlb_hit(cmp, addr)) {
- /* Install the cached entry. */
- qemu_spin_lock(&cpu->neg.tlb.c.lock);
- copy_tlb_helper_locked(entry, &node->copy);
- qemu_spin_unlock(&cpu->neg.tlb.c.lock);
- *full = node->full;
- goto found;
+ if (access_type == MMU_INST_FETCH) {
+ if (node->full.prot & PAGE_EXEC) {
+ goto found_code;
+ }
+ } else {
+ cmp = tlb_read_idx(&node->copy, access_type);
+ if (tlb_hit(cmp, addr)) {
+ /* Install the cached entry. */
+ qemu_spin_lock(&cpu->neg.tlb.c.lock);
+ copy_tlb_helper_locked(entry, &node->copy);
+ qemu_spin_unlock(&cpu->neg.tlb.c.lock);
+ *full = node->full;
+ goto found_data;
+ }
}
}
@@ -1333,9 +1338,14 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput *o,
tcg_debug_assert(probe);
return false;
}
-
o->did_tlb_fill = true;
+ if (access_type == MMU_INST_FETCH) {
+ node = tlbtree_lookup_addr(desc, addr);
+ tcg_debug_assert(node);
+ goto found_code;
+ }
+
entry = tlbfast_entry(fast, addr);
full = &desc->fulltlb[tlbfast_index(fast, addr)];
cmp = tlb_read_idx(entry, access_type);
@@ -1345,14 +1355,29 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput
*o,
* called tlb_fill_align, so we know that this entry *is* valid.
*/
flags &= ~TLB_INVALID_MASK;
+ goto found_data;
+
+ found_data:
+ flags &= cmp;
+ flags |= full->slow_flags[access_type];
+ o->flags = flags;
+ o->full = *full;
+ o->haddr = (void *)((uintptr_t)addr + entry->addend);
goto done;
- found:
- /* Alignment has not been checked by tlb_fill_align. */
- {
+ found_code:
+ o->flags = node->copy.addr_read & TLB_EXEC_FLAGS_MASK;
+ o->full = node->full;
+ o->haddr = (void *)((uintptr_t)addr + node->copy.addend);
+ goto done;
+
+ done:
+ if (!o->did_tlb_fill) {
int a_bits = memop_alignment_bits(memop);
/*
+ * Alignment has not been checked by tlb_fill_align.
+ *
* The TLB_CHECK_ALIGNED check differs from the normal alignment
* check, in that this is based on the atomicity of the operation.
* The intended use case is the ARM memory type field of each PTE,
@@ -1366,13 +1391,6 @@ static bool tlb_lookup(CPUState *cpu, TLBLookupOutput *o,
cpu_unaligned_access(cpu, addr, access_type, i->mmu_idx, i->ra);
}
}
-
- done:
- flags &= cmp;
- flags |= full->slow_flags[access_type];
- o->flags = flags;
- o->full = *full;
- o->haddr = (void *)((uintptr_t)addr + entry->addend);
return true;
}
--
2.43.0
- [PATCH v2 18/54] accel/tcg: Remove the victim tlb, (continued)
- [PATCH v2 18/54] accel/tcg: Remove the victim tlb, Richard Henderson, 2024/11/14
- [PATCH v2 27/54] accel/tcg: Return CPUTLBEntryFull not pointer in probe_access_internal, Richard Henderson, 2024/11/14
- [PATCH v2 34/54] target/alpha: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14
- [PATCH v2 17/54] accel/tcg: Replace victim_tlb_hit with tlbtree_hit, Richard Henderson, 2024/11/14
- [PATCH v2 30/54] accel/tcg: Merge mmu_lookup1 into mmu_lookup, Richard Henderson, 2024/11/14
- [PATCH v2 31/54] accel/tcg: Always use IntervalTree for code lookups,
Richard Henderson <=
- [PATCH v2 46/54] target/s390x: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14
- [PATCH v2 44/54] target/riscv: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14
- [PATCH v2 40/54] target/microblaze: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14
- [PATCH v2 43/54] target/ppc: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14
- [PATCH v2 41/54] target/mips: Convert to TCGCPUOps.tlb_fill_align, Richard Henderson, 2024/11/14