[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 51/68] tcg/arm: Add full [US]XT[BH] into {s}extract
From: |
Richard Henderson |
Subject: |
[PULL 51/68] tcg/arm: Add full [US]XT[BH] into {s}extract |
Date: |
Fri, 17 Jan 2025 10:24:39 -0800 |
The armv6 uxt and sxt opcodes have a 2-bit rotate field
which supports extractions from ofs = {0,8,16,24}.
Special case ofs = 0, len <= 8 as AND.
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target-has.h | 21 ++++++++++++++--
tcg/arm/tcg-target.c.inc | 54 +++++++++++++++++++++++++++++++++++-----
2 files changed, 67 insertions(+), 8 deletions(-)
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index 316185500d..d9f3311102 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -41,8 +41,8 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
-#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
#define TCG_TARGET_HAS_extract2_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
@@ -82,4 +82,21 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_HAS_tst_vec 1
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+ if (use_armv7_instructions) {
+ return true; /* SBFX or UBFX */
+ }
+ switch (len) {
+ case 8: /* SXTB or UXTB */
+ case 16: /* SXTH or UXTH */
+ return (ofs % 8) == 0;
+ }
+ return false;
+}
+
+#define TCG_TARGET_extract_valid tcg_target_extract_valid
+#define TCG_TARGET_sextract_valid tcg_target_extract_valid
+
#endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 9cfb733a14..12dad7307f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1036,19 +1036,61 @@ static void tcg_out_deposit(TCGContext *s, ARMCond
cond, TCGReg rd,
static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
TCGReg rn, int ofs, int len)
{
- /* ubfx */
- tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
- | (ofs << 7) | ((len - 1) << 16));
+ /* According to gcc, AND can be faster. */
+ if (ofs == 0 && len <= 8) {
+ tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
+ encode_imm_nofail((1 << len) - 1));
+ return;
+ }
+
+ if (use_armv7_instructions) {
+ /* ubfx */
+ tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+ return;
+ }
+
+ assert(ofs % 8 == 0);
+ switch (len) {
+ case 8:
+ /* uxtb */
+ tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ case 16:
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
TCGReg rn, int ofs, int len)
{
- /* sbfx */
- tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
- | (ofs << 7) | ((len - 1) << 16));
+ if (use_armv7_instructions) {
+ /* sbfx */
+ tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+ return;
+ }
+
+ assert(ofs % 8 == 0);
+ switch (len) {
+ case 8:
+ /* sxtb */
+ tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ case 16:
+ /* sxth */
+ tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
+
static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
TCGReg rd, TCGReg rn, int32_t offset)
{
--
2.43.0
- [PULL 50/68] tcg/aarch64: Expand extract with offset 0 with andi, (continued)
- [PULL 50/68] tcg/aarch64: Expand extract with offset 0 with andi, Richard Henderson, 2025/01/17
- [PULL 52/68] tcg/loongarch64: Fold the ext{8, 16, 32}[us] cases into {s}extract, Richard Henderson, 2025/01/17
- [PULL 53/68] tcg/mips: Fold the ext{8, 16, 32}[us] cases into {s}extract, Richard Henderson, 2025/01/17
- [PULL 57/68] tcg/s390x: Fold the ext{8, 16, 32}[us] cases into {s}extract, Richard Henderson, 2025/01/17
- [PULL 58/68] tcg/sparc64: Use SRA, SRL for {s}extract_i64, Richard Henderson, 2025/01/17
- [PULL 49/68] tcg/aarch64: Provide TCG_TARGET_{s}extract_valid, Richard Henderson, 2025/01/17
- [PULL 47/68] tcg/i386: Handle all 8-bit extensions for i686, Richard Henderson, 2025/01/17
- [PULL 64/68] tcg/riscv: Use BEXTI for single-bit extractions, Richard Henderson, 2025/01/17
- [PULL 63/68] util/cpuinfo-riscv: Detect Zbs, Richard Henderson, 2025/01/17
- [PULL 62/68] tcg: Remove TCG_TARGET_HAS_deposit_{i32,i64}, Richard Henderson, 2025/01/17
- [PULL 51/68] tcg/arm: Add full [US]XT[BH] into {s}extract,
Richard Henderson <=
- [PULL 54/68] tcg/ppc: Fold the ext{8, 16, 32}[us] cases into {s}extract, Richard Henderson, 2025/01/17
- [PULL 56/68] tcg/riscv: Use SRAIW, SRLIW for {s}extract_i64, Richard Henderson, 2025/01/17
- [PULL 60/68] tcg/tci: Remove assertions for deposit and extract, Richard Henderson, 2025/01/17
- [PULL 55/68] tcg/riscv64: Fold the ext{8, 16, 32}[us] cases into {s}extract, Richard Henderson, 2025/01/17
- [PULL 66/68] tcg: Document tb_lookup() and tcg_tb_lookup(), Richard Henderson, 2025/01/17
- [PULL 61/68] tcg: Remove TCG_TARGET_HAS_{s}extract_{i32,i64}, Richard Henderson, 2025/01/17
- [PULL 67/68] accel/tcg: Call tcg_tb_insert() for one-insn TBs, Richard Henderson, 2025/01/17
- [PULL 59/68] tcg/tci: Provide TCG_TARGET_{s}extract_valid, Richard Henderson, 2025/01/17
- [PULL 68/68] softfloat: Constify helpers returning float_status field, Richard Henderson, 2025/01/17
- [PULL 65/68] linux-user: Add missing /proc/cpuinfo fields for sparc, Richard Henderson, 2025/01/17