[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v3 07/17] accel/tcg: Introduce TARGET_TB_PCREL
From: |
Richard Henderson |
Subject: |
[PATCH v3 07/17] accel/tcg: Introduce TARGET_TB_PCREL |
Date: |
Mon, 22 Aug 2022 16:23:28 -0700 |
Prepare for targets to be able to produce TBs that can
run in more than one virtual context.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu-defs.h | 3 +++
include/exec/exec-all.h | 41 ++++++++++++++++++++++++++---
include/hw/core/cpu.h | 1 +
accel/tcg/cpu-exec.c | 55 ++++++++++++++++++++++++++++++---------
accel/tcg/translate-all.c | 48 ++++++++++++++++++++++------------
5 files changed, 115 insertions(+), 33 deletions(-)
diff --git a/include/exec/cpu-defs.h b/include/exec/cpu-defs.h
index ba3cd32a1e..87e2bc4e59 100644
--- a/include/exec/cpu-defs.h
+++ b/include/exec/cpu-defs.h
@@ -54,6 +54,9 @@
# error TARGET_PAGE_BITS must be defined in cpu-param.h
# endif
#endif
+#ifndef TARGET_TB_PCREL
+# define TARGET_TB_PCREL 0
+#endif
#define TARGET_LONG_SIZE (TARGET_LONG_BITS / 8)
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index cec3ef1666..b41835bb55 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -459,8 +459,32 @@ struct tb_tc {
};
struct TranslationBlock {
- target_ulong pc; /* simulated PC corresponding to this block (EIP + CS
base) */
- target_ulong cs_base; /* CS base for this block */
+#if !TARGET_TB_PCREL
+ /*
+ * Guest PC corresponding to this block. This must be the true
+ * virtual address. Therefore e.g. x86 stores EIP + CS_BASE, and
+ * targets like Arm, MIPS, HP-PA, which reuse low bits for ISA or
+ * privilege, must store those bits elsewhere.
+ *
+ * If TARGET_TB_PCREL, the opcodes for the TranslationBlock are
+ * written such that the TB is associated only with the physical
+ * page and may be run in any virtual address context. In this case,
+ * PC must always be taken from ENV in a target-specific manner.
+ * Unwind information is taken as byte offsets from the "current"
+ * value of the PC, as tracked by the translator.
+ */
+ target_ulong pc;
+#endif
+
+ /*
+ * Target-specific data associated with the TranslationBlock, e.g.:
+ * x86: the original user, the Code Segment virtual base,
+ * arm: an extension of tb->flags,
+ * s390x: instruction data for EXECUTE,
+ * sparc: the next pc of the instruction queue (for delay slots).
+ */
+ target_ulong cs_base;
+
uint32_t flags; /* flags defining in which context the code was generated
*/
uint32_t cflags; /* compile flags */
@@ -536,13 +560,24 @@ struct TranslationBlock {
/* Hide the read to avoid ifdefs for TARGET_TB_PCREL. */
static inline target_ulong tb_pc(const TranslationBlock *tb)
{
+#if TARGET_TB_PCREL
+ qemu_build_not_reached();
+#else
return tb->pc;
+#endif
}
-/* Similarly, but for logs. */
+/*
+ * Similarly, but for logs. In this case, when the virtual pc
+ * is not available, use the physical address.
+ */
static inline target_ulong tb_pc_log(const TranslationBlock *tb)
{
+#if TARGET_TB_PCREL
+ return tb->page_addr[0];
+#else
return tb->pc;
+#endif
}
/* Hide the qatomic_read to make code a little easier on the eyes */
diff --git a/include/hw/core/cpu.h b/include/hw/core/cpu.h
index 8edef14199..7dcfccf6e2 100644
--- a/include/hw/core/cpu.h
+++ b/include/hw/core/cpu.h
@@ -235,6 +235,7 @@ struct hvf_vcpu_state;
typedef struct {
TranslationBlock *tb;
+ vaddr pc;
} CPUJumpCache;
/* work queue */
diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c
index f146960b7b..3fb7ce05f8 100644
--- a/accel/tcg/cpu-exec.c
+++ b/accel/tcg/cpu-exec.c
@@ -185,7 +185,7 @@ static bool tb_lookup_cmp(const void *p, const void *d)
const TranslationBlock *tb = p;
const struct tb_desc *desc = d;
- if (tb_pc(tb) == desc->pc &&
+ if ((TARGET_TB_PCREL || tb_pc(tb) == desc->pc) &&
tb->page_addr[0] == desc->page_addr0 &&
tb->cs_base == desc->cs_base &&
tb->flags == desc->flags &&
@@ -227,7 +227,8 @@ static TranslationBlock *tb_htable_lookup(CPUState *cpu,
target_ulong pc,
return NULL;
}
desc.page_addr0 = phys_pc;
- h = tb_hash_func(phys_pc, pc, flags, cflags, *cpu->trace_dstate);
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : pc),
+ flags, cflags, *cpu->trace_dstate);
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
}
@@ -243,21 +244,42 @@ static inline TranslationBlock *tb_lookup(CPUState *cpu,
target_ulong pc,
tcg_debug_assert(!(cflags & CF_INVALID));
hash = tb_jmp_cache_hash_func(pc);
- tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb);
-
- if (likely(tb &&
- tb->pc == pc &&
- tb->cs_base == cs_base &&
- tb->flags == flags &&
- tb->trace_vcpu_dstate == *cpu->trace_dstate &&
- tb_cflags(tb) == cflags)) {
- return tb;
+ if (TARGET_TB_PCREL) {
+ /* Use acquire to ensure current load of pc from tb_jmp_cache[]. */
+ tb = qatomic_load_acquire(&cpu->tb_jmp_cache[hash].tb);
+ } else {
+ /* Use rcu_read to ensure current load of pc from *tb. */
+ tb = qatomic_rcu_read(&cpu->tb_jmp_cache[hash].tb);
}
+ if (likely(tb)) {
+ target_ulong jmp_pc;
+
+ if (TARGET_TB_PCREL) {
+ jmp_pc = cpu->tb_jmp_cache[hash].pc;
+ } else {
+ jmp_pc = tb_pc(tb);
+ }
+ if (jmp_pc == pc &&
+ tb->cs_base == cs_base &&
+ tb->flags == flags &&
+ tb->trace_vcpu_dstate == *cpu->trace_dstate &&
+ tb_cflags(tb) == cflags) {
+ return tb;
+ }
+ }
+
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cflags);
if (tb == NULL) {
return NULL;
}
- qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb);
+
+ if (TARGET_TB_PCREL) {
+ cpu->tb_jmp_cache[hash].pc = pc;
+ /* Use store_release on tb to ensure pc is current. */
+ qatomic_store_release(&cpu->tb_jmp_cache[hash].tb, tb);
+ } else {
+ qatomic_set(&cpu->tb_jmp_cache[hash].tb, tb);
+ }
return tb;
}
@@ -445,6 +467,7 @@ cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int
*tb_exit)
if (cc->tcg_ops->synchronize_from_tb) {
cc->tcg_ops->synchronize_from_tb(cpu, last_tb);
} else {
+ assert(!TARGET_TB_PCREL);
assert(cc->set_pc);
cc->set_pc(cpu, tb_pc(last_tb));
}
@@ -988,7 +1011,13 @@ int cpu_exec(CPUState *cpu)
* for the fast lookup
*/
h = tb_jmp_cache_hash_func(pc);
- qatomic_set(&cpu->tb_jmp_cache[h].tb, tb);
+ if (TARGET_TB_PCREL) {
+ cpu->tb_jmp_cache[h].pc = pc;
+ /* Use store_release on tb to ensure pc is current. */
+ qatomic_store_release(&cpu->tb_jmp_cache[h].tb, tb);
+ } else {
+ qatomic_set(&cpu->tb_jmp_cache[h].tb, tb);
+ }
}
#ifndef CONFIG_USER_ONLY
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index 1248ee3433..27435b97db 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -298,7 +298,7 @@ static int encode_search(TranslationBlock *tb, uint8_t
*block)
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
if (i == 0) {
- prev = (j == 0 ? tb_pc(tb) : 0);
+ prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
} else {
prev = tcg_ctx->gen_insn_data[i - 1][j];
}
@@ -326,7 +326,7 @@ static int encode_search(TranslationBlock *tb, uint8_t
*block)
static int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
uintptr_t searched_pc, bool reset_icount)
{
- target_ulong data[TARGET_INSN_START_WORDS] = { tb_pc(tb) };
+ target_ulong data[TARGET_INSN_START_WORDS];
uintptr_t host_pc = (uintptr_t)tb->tc.ptr;
CPUArchState *env = cpu->env_ptr;
const uint8_t *p = tb->tc.ptr + tb->tc.size;
@@ -342,6 +342,11 @@ static int cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
return -1;
}
+ memset(data, 0, sizeof(data));
+ if (!TARGET_TB_PCREL) {
+ data[0] = tb_pc(tb);
+ }
+
/* Reconstruct the stored insn data while looking for the point at
which the end of the insn exceeds the searched_pc. */
for (i = 0; i < num_insns; ++i) {
@@ -884,13 +889,13 @@ static bool tb_cmp(const void *ap, const void *bp)
const TranslationBlock *a = ap;
const TranslationBlock *b = bp;
- return tb_pc(a) == tb_pc(b) &&
- a->cs_base == b->cs_base &&
- a->flags == b->flags &&
- (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
- a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
- a->page_addr[0] == b->page_addr[0] &&
- a->page_addr[1] == b->page_addr[1];
+ return ((TARGET_TB_PCREL || tb_pc(a) == tb_pc(b)) &&
+ a->cs_base == b->cs_base &&
+ a->flags == b->flags &&
+ (tb_cflags(a) & ~CF_INVALID) == (tb_cflags(b) & ~CF_INVALID) &&
+ a->trace_vcpu_dstate == b->trace_vcpu_dstate &&
+ a->page_addr[0] == b->page_addr[0] &&
+ a->page_addr[1] == b->page_addr[1]);
}
void tb_htable_init(void)
@@ -1169,8 +1174,8 @@ static void do_tb_phys_invalidate(TranslationBlock *tb,
bool rm_from_page_list)
/* remove the TB from the hash list */
phys_pc = tb->page_addr[0];
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, orig_cflags,
- tb->trace_vcpu_dstate);
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+ tb->flags, orig_cflags, tb->trace_vcpu_dstate);
if (!qht_remove(&tb_ctx.htable, tb, h)) {
return;
}
@@ -1186,10 +1191,17 @@ static void do_tb_phys_invalidate(TranslationBlock *tb,
bool rm_from_page_list)
}
/* remove the TB from the hash list */
- h = tb_jmp_cache_hash_func(tb->pc);
- CPU_FOREACH(cpu) {
- if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) {
- qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL);
+ if (TARGET_TB_PCREL) {
+ /* Any TB may be at any virtual address */
+ CPU_FOREACH(cpu) {
+ cpu_tb_jmp_cache_clear(cpu);
+ }
+ } else {
+ h = tb_jmp_cache_hash_func(tb_pc(tb));
+ CPU_FOREACH(cpu) {
+ if (qatomic_read(&cpu->tb_jmp_cache[h].tb) == tb) {
+ qatomic_set(&cpu->tb_jmp_cache[h].tb, NULL);
+ }
}
}
@@ -1300,8 +1312,8 @@ tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc,
}
/* add in the hash table */
- h = tb_hash_func(phys_pc, tb_pc(tb), tb->flags, tb->cflags,
- tb->trace_vcpu_dstate);
+ h = tb_hash_func(phys_pc, (TARGET_TB_PCREL ? 0 : tb_pc(tb)),
+ tb->flags, tb->cflags, tb->trace_vcpu_dstate);
qht_insert(&tb_ctx.htable, tb, h, &existing_tb);
/* remove TB from the page(s) if we couldn't insert it */
@@ -1371,7 +1383,9 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
gen_code_buf = tcg_ctx->code_gen_ptr;
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
+#if !TARGET_TB_PCREL
tb->pc = pc;
+#endif
tb->cs_base = cs_base;
tb->flags = flags;
tb->cflags = cflags;
--
2.34.1
- [PATCH v3 00/17] accel/tcg + target/arm: pc-relative translation, Richard Henderson, 2022/08/22
- [PATCH v3 01/17] accel/tcg: Remove PageDesc code_bitmap, Richard Henderson, 2022/08/22
- [PATCH v3 03/17] accel/tcg: Use DisasContextBase in plugin_gen_tb_start, Richard Henderson, 2022/08/22
- [PATCH v3 04/17] accel/tcg: Do not align tb->page_addr[0], Richard Henderson, 2022/08/22
- [PATCH v3 02/17] accel/tcg: Use bool for page_find_alloc, Richard Henderson, 2022/08/22
- [PATCH v3 05/17] include/hw/core: Create struct CPUJumpCache, Richard Henderson, 2022/08/22
- [PATCH v3 13/17] target/arm: Change gen_exception_internal to work on displacements, Richard Henderson, 2022/08/22
- [PATCH v3 14/17] target/arm: Change gen_jmp* to work on displacements, Richard Henderson, 2022/08/22
- [PATCH v3 11/17] target/arm: Change gen_*set_pc_im to gen_*update_pc, Richard Henderson, 2022/08/22
- [PATCH v3 15/17] target/arm: Introduce gen_pc_plus_diff for aarch64, Richard Henderson, 2022/08/22
- [PATCH v3 07/17] accel/tcg: Introduce TARGET_TB_PCREL,
Richard Henderson <=
- [PATCH v3 08/17] accel/tcg: Split log_cpu_exec into inline and slow path, Richard Henderson, 2022/08/22
- [PATCH v3 12/17] target/arm: Change gen_exception_insn* to work on displacements, Richard Henderson, 2022/08/22
- [PATCH v3 09/17] target/arm: Introduce curr_insn_len, Richard Henderson, 2022/08/22
- [PATCH v3 06/17] accel/tcg: Introduce tb_pc and tb_pc_log, Richard Henderson, 2022/08/22
- [PATCH v3 10/17] target/arm: Change gen_goto_tb to work on displacements, Richard Henderson, 2022/08/22
- [PATCH v3 16/17] target/arm: Introduce gen_pc_plus_diff for aarch32, Richard Henderson, 2022/08/22
- [PATCH v3 17/17] target/arm: Enable TARGET_TB_PCREL, Richard Henderson, 2022/08/22