[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 11/14] tcg-aarch64: Improve tcg_out_movi
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [RFC 11/14] tcg-aarch64: Improve tcg_out_movi |
Date: |
Mon, 12 Aug 2013 11:44:52 -0700 |
Handle small positive and negative numbers early. Check for logical
immediates. Check if using MOVN for the first set helps.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/aarch64/tcg-target.c | 85 +++++++++++++++++++++++++++++++++++++-----------
1 file changed, 66 insertions(+), 19 deletions(-)
diff --git a/tcg/aarch64/tcg-target.c b/tcg/aarch64/tcg-target.c
index 920c63c..02ab278 100644
--- a/tcg/aarch64/tcg-target.c
+++ b/tcg/aarch64/tcg-target.c
@@ -511,32 +511,79 @@ static inline void tcg_out_movr(TCGContext *s, AArch64Ext
ext,
tcg_out_aimm(s, INSN_ADDI, ext, dest, src, 0);
}
+static inline void tcg_out_movwi(TCGContext *s, AArch64Insn insn,
+ AArch64Ext ext, TCGReg rd,
+ uint16_t value, int shift)
+{
+ tcg_out32(s, insn | ext | shift << 17 | value << 5 | rd);
+}
+
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg rd,
tcg_target_long value)
{
- AArch64Insn insn = INSN_MOVZ;
+ tcg_target_long valid = (type == TCG_TYPE_I32 ? 0xffffffffull : -1ull);
+ AArch64Insn insn;
+ AArch64Ext ext;
+ int i, wantinv, shift;
+
+ value &= valid;
+
+ /* Check small positive values. */
+ if ((value & ~0xffff) == 0) {
+ tcg_out_movwi(s, INSN_MOVZ, E32, rd, value, 0);
+ return;
+ }
+
+ /* Check small negative values. */
+ if ((~value & valid & ~0xffff) == 0) {
+ tcg_out_movwi(s, INSN_MOVN, EXT(type == TCG_TYPE_I64), rd, ~value, 0);
+ return;
+ }
+
+ /* Check for bitfield immediates. */
+ if ((value & ~0xffffffffull) == 0) {
+ i = find_bitmask32(value);
+ ext = E32;
+ } else {
+ i = find_bitmask64(value);
+ ext = E64;
+ }
+ if (i >= 0) {
+ tcg_out32(s, INSN_ORRI | TCG_REG_XZR << 5 | ext
+ | bitmask_enc[i] << 10 | rd);
+ return;
+ }
- if (type == TCG_TYPE_I32) {
- value = (uint32_t)value;
+ /* Would it take fewer insns to load the inverse? */
+ wantinv = 0;
+ for (i = 0; i < 64; i += 16) {
+ if (((value >> i) & 0xffff) == 0) {
+ wantinv--;
+ }
+ if (((~value >> i) & 0xffff) == 0) {
+ wantinv++;
+ }
}
- /* Construct halfwords of the immediate with MOVZ/MOVK with LSL.
- Count trailing zeros in 16 bit steps, mapping 64 to 0. Emit the
- first MOVZ with the half-word immediate skipping the zeros, with
- a shift (LSL) equal to this number. Then all other insns are MOVKs.
- Zero the processed half-word in the value, continue until empty.
- We build the final result 16bits at a time with up to 4 instructions,
- but do not emit instructions for 16bit zero holes. */
- do {
- unsigned shift = ctz64(value) & (63 & -16);
- unsigned half = (value >> shift) & 0xffff;
- AArch64Ext ext = EXT(shift >= 32);
-
- tcg_out32(s, insn | ext | shift << 17 | half << 5 | rd);
-
- insn = INSN_MOVK;
+ if (wantinv > 0) {
+ value = ~value;
+ insn = INSN_MOVN;
+ valid = -1;
+ } else {
+ insn = INSN_MOVZ;
+ valid = 0;
+ }
+
+ /* Perform the first round specially, to handle the inverse. */
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_movwi(s, insn, ext, rd, value >> shift, shift);
+ value &= ~(0xffffUL << shift);
+
+ while (value) {
+ shift = ctz64(value) & (63 & -16);
+ tcg_out_movwi(s, INSN_MOVK, ext, rd, (value ^ valid) >> shift, shift);
value &= ~(0xffffUL << shift);
- } while (value);
+ }
}
static inline void tcg_out_ldst_r(TCGContext *s,
--
1.8.3.1
- [Qemu-devel] [RFC 00/14] tcg aarch64 improvements, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 01/14] tcg-aarch64: Allow immediate operands to add and sub, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 03/14] tcg-aarch64: Allow immediate operands to compare, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 04/14] tcg-aarch64: Convert from opcode enums to insn enums, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 05/14] tcg-aarch64: Support andc, orc, eqv, not, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 06/14] tcg-aarch64: Handle zero as first argument to sub, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 07/14] tcg-aarch64: Support movcond, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 08/14] tcg-aarch64: Support deposit, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 09/14] tcg-aarch64: Support add2, sub2, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 10/14] tcg-aarch64: Support div, mulu2, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 11/14] tcg-aarch64: Improve tcg_out_movi,
Richard Henderson <=
- [Qemu-devel] [RFC 12/14] tcg-aarch64: Avoid add with zero in tlb load, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 13/14] tcg-aarch64: Use adrp in tcg_out_movi, Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 14/14] tcg-aarch64: Pass return address to load/store helpers directly., Richard Henderson, 2013/08/12
- [Qemu-devel] [RFC 02/14] tcg-aarch64: Allow immediate operands to and, or, xor, Richard Henderson, 2013/08/12