[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 07/10] target/arm: Implement v8.1M low-overhead-loop instructi
From: |
Peter Maydell |
Subject: |
[PATCH v2 07/10] target/arm: Implement v8.1M low-overhead-loop instructions |
Date: |
Mon, 19 Oct 2020 16:12:58 +0100 |
v8.1M's "low-overhead-loop" extension has three instructions
for looping:
* DLS (start of a do-loop)
* WLS (start of a while-loop)
* LE (end of a loop)
The loop-start instructions are both simple operations to start a
loop whose iteration count (if any) is in LR. The loop-end
instruction handles "decrement iteration count and jump back to loop
start"; it also caches the information about the branch back to the
start of the loop to improve performance of the branch on subsequent
iterations.
As with the branch-future instructions, the architecture permits an
implementation to discard the LO_BRANCH_INFO cache at any time, and
QEMU takes the IMPDEF option to never set it in the first place
(equivalent to discarding it immediately), because for us a "real"
implementation would be unnecessary complexity.
(This implementation only provides the simple looping constructs; the
vector extension MVE (Helium) adds some extra variants to handle
looping across vectors. We'll add those later when we implement
MVE.)
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/t32.decode | 8 ++++
target/arm/translate.c | 93 +++++++++++++++++++++++++++++++++++++++++-
2 files changed, 99 insertions(+), 2 deletions(-)
diff --git a/target/arm/t32.decode b/target/arm/t32.decode
index 3015731a8d0..8152739b52b 100644
--- a/target/arm/t32.decode
+++ b/target/arm/t32.decode
@@ -659,4 +659,12 @@ BL 1111 0. .......... 11.1 ............
@branch24
BF 1111 0 boff:4 10 ----- 1110 - ---------- 1 # BF
BF 1111 0 boff:4 11 ----- 1110 0 0000000000 1 # BFX, BFLX
]
+ [
+ # LE and WLS immediate
+ %lob_imm 1:10 11:1 !function=times_2
+
+ DLS 1111 0 0000 100 rn:4 1110 0000 0000 0001
+ WLS 1111 0 0000 100 rn:4 1100 . .......... 1 imm=%lob_imm
+ LE 1111 0 0000 0 f:1 0 1111 1100 . .......... 1 imm=%lob_imm
+ ]
}
diff --git a/target/arm/translate.c b/target/arm/translate.c
index 01b697083a0..5083f828780 100644
--- a/target/arm/translate.c
+++ b/target/arm/translate.c
@@ -2490,17 +2490,23 @@ static void gen_goto_tb(DisasContext *s, int n,
target_ulong dest)
s->base.is_jmp = DISAS_NORETURN;
}
-static inline void gen_jmp (DisasContext *s, uint32_t dest)
+/* Jump, specifying which TB number to use if we gen_goto_tb() */
+static inline void gen_jmp_tb(DisasContext *s, uint32_t dest, int tbno)
{
if (unlikely(is_singlestepping(s))) {
/* An indirect jump so that we still trigger the debug exception. */
gen_set_pc_im(s, dest);
s->base.is_jmp = DISAS_JUMP;
} else {
- gen_goto_tb(s, 0, dest);
+ gen_goto_tb(s, tbno, dest);
}
}
+static inline void gen_jmp(DisasContext *s, uint32_t dest)
+{
+ gen_jmp_tb(s, dest, 0);
+}
+
static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
{
if (x)
@@ -7953,6 +7959,89 @@ static bool trans_BF(DisasContext *s, arg_BF *a)
return true;
}
+static bool trans_DLS(DisasContext *s, arg_DLS *a)
+{
+ /* M-profile low-overhead loop start */
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->rn == 13 || a->rn == 15) {
+ /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+ return false;
+ }
+
+ /* Not a while loop, no tail predication: just set LR to the count */
+ tmp = load_reg(s, a->rn);
+ store_reg(s, 14, tmp);
+ return true;
+}
+
+static bool trans_WLS(DisasContext *s, arg_WLS *a)
+{
+ /* M-profile low-overhead while-loop start */
+ TCGv_i32 tmp;
+ TCGLabel *nextlabel;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+ if (a->rn == 13 || a->rn == 15) {
+ /* CONSTRAINED UNPREDICTABLE: we choose to UNDEF */
+ return false;
+ }
+ if (s->condexec_mask) {
+ /*
+ * WLS in an IT block is CONSTRAINED UNPREDICTABLE;
+ * we choose to UNDEF, because otherwise our use of
+ * gen_goto_tb(1) would clash with the use of TB exit 1
+ * in the dc->condjmp condition-failed codepath in
+ * arm_tr_tb_stop() and we'd get an assertion.
+ */
+ return false;
+ }
+ nextlabel = gen_new_label();
+ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_R[a->rn], 0, nextlabel);
+ tmp = load_reg(s, a->rn);
+ store_reg(s, 14, tmp);
+ gen_jmp_tb(s, s->base.pc_next, 1);
+
+ gen_set_label(nextlabel);
+ gen_jmp(s, read_pc(s) + a->imm);
+ return true;
+}
+
+static bool trans_LE(DisasContext *s, arg_LE *a)
+{
+ /*
+ * M-profile low-overhead loop end. The architecture permits an
+ * implementation to discard the LO_BRANCH_INFO cache at any time,
+ * and we take the IMPDEF option to never set it in the first place
+ * (equivalent to always discarding it immediately), because for QEMU
+ * a "real" implementation would be complicated and wouldn't execute
+ * any faster.
+ */
+ TCGv_i32 tmp;
+
+ if (!dc_isar_feature(aa32_lob, s)) {
+ return false;
+ }
+
+ if (!a->f) {
+ /* Not loop-forever. If LR <= 1 this is the last loop: do nothing. */
+ arm_gen_condlabel(s);
+ tcg_gen_brcondi_i32(TCG_COND_LEU, cpu_R[14], 1, s->condlabel);
+ /* Decrement LR */
+ tmp = load_reg(s, 14);
+ tcg_gen_addi_i32(tmp, tmp, -1);
+ store_reg(s, 14, tmp);
+ }
+ /* Jump back to the loop start */
+ gen_jmp(s, read_pc(s) - a->imm);
+ return true;
+}
+
static bool op_tbranch(DisasContext *s, arg_tbranch *a, bool half)
{
TCGv_i32 addr, tmp;
--
2.20.1
- [PATCH v2 00/10] target/arm: Various v8.1M minor features, Peter Maydell, 2020/10/19
- [PATCH v2 01/10] decodetree: Fix codegen for non-overlapping group inside overlapping group, Peter Maydell, 2020/10/19
- [PATCH v2 02/10] target/arm: Implement v8.1M NOCP handling, Peter Maydell, 2020/10/19
- [PATCH v2 07/10] target/arm: Implement v8.1M low-overhead-loop instructions,
Peter Maydell <=
- [PATCH v2 08/10] target/arm: Fix has_vfp/has_neon ID reg squashing for M-profile, Peter Maydell, 2020/10/19
- [PATCH v2 03/10] target/arm: Implement v8.1M conditional-select insns, Peter Maydell, 2020/10/19
- [PATCH v2 05/10] target/arm: Don't allow BLX imm for M-profile, Peter Maydell, 2020/10/19
- [PATCH v2 04/10] target/arm: Make the t32 insn[25:23]=111 group non-overlapping, Peter Maydell, 2020/10/19
- [PATCH v2 09/10] target/arm: Allow M-profile CPUs with FP16 to set FPSCR.FP16, Peter Maydell, 2020/10/19
- [PATCH v2 06/10] target/arm: Implement v8.1M branch-future insns (as NOPs), Peter Maydell, 2020/10/19
- [PATCH v2 10/10] target/arm: Implement FPSCR.LTPSIZE for M-profile LOB extension, Peter Maydell, 2020/10/19