[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-next 12/15] tcg-ppc64: Use TCG_REG_TB in tcg_out
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-next 12/15] tcg-ppc64: Use TCG_REG_TB in tcg_out_movi and tcg_out_mem_long |
Date: |
Mon, 5 Aug 2013 08:28:47 -1000 |
This results in significant code size reductions when manipulating
pointers into TCG's own data structures. E.g.
-OUT: [size=180]
+OUT: [size=132]
...
-xxx: li r2,16383 # goto_tb
-xxx: rldicr r2,r2,32,31
-xxx: oris r2,r2,39128
-xxx: ori r2,r2,376
-xxx: ldx r30,0,r2
+xxx: addis r30,r30,-544
+xxx: ld r30,-8(r30)
...
-xxx: li r3,16383 # exit_tb
-xxx: rldicr r3,r3,32,31
-xxx: oris r3,r3,39128
-xxx: ori r3,r3,288
+xxx: addis r3,r30,-544
+xxx: addi r3,r3,-96
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/ppc64/tcg-target.c | 164 +++++++++++++++++++++++++++++--------------------
1 file changed, 99 insertions(+), 65 deletions(-)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index e01d8bc..d4e1efc 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -548,6 +548,78 @@ static inline void tcg_out_shri64(TCGContext *s, TCGReg
dst, TCGReg src, int c)
tcg_out_rld(s, RLDICL, dst, src, 64 - c, c);
}
+static void tcg_out_mem_long(TCGContext *s, PowerOpcode opi, PowerOpcode opx,
+ TCGReg rt, TCGReg base, tcg_target_long offset)
+{
+ tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
+ TCGReg rs = TCG_REG_R2;
+
+ assert(rt != TCG_REG_R2 && base != TCG_REG_R2);
+
+ switch (opi) {
+ case LD: case LWA:
+ align = 3;
+ /* FALLTHRU */
+ default:
+ if (rt != TCG_REG_R0) {
+ rs = rt;
+ }
+ break;
+ case STD:
+ align = 3;
+ break;
+ case STB: case STH: case STW:
+ break;
+ }
+
+ /* For unaligned, use the indexed form. */
+ if (offset & align) {
+ do_indexed:
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig);
+ tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2));
+ return;
+ }
+
+ if (base == TCG_REG_R0) {
+ /* For absolute addresses, avoid indexed form. First try turning
+ it into an offset from a known base register, then just fold
+ the low 16 bits. */
+ offset -= (tcg_target_long)s->code_buf;
+ if (offset == (int32_t)offset) {
+ orig = offset;
+ base = TCG_REG_TB;
+ } else {
+ offset = (int16_t)orig;
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig - offset);
+ orig = offset;
+ base = TCG_REG_R2;
+ }
+ } else if (offset != (int32_t)offset) {
+ /* For very large offsets off a real base register, use indexed. */
+ goto do_indexed;
+ }
+
+ l0 = (int16_t)offset;
+ offset = (offset - l0) >> 16;
+ l1 = (int16_t)offset;
+
+ if (l1 < 0 && orig >= 0) {
+ extra = 0x4000;
+ l1 = (int16_t)(offset - 0x4000);
+ }
+ if (l1) {
+ tcg_out32(s, ADDIS | TAI(rs, base, l1));
+ base = rs;
+ }
+ if (extra) {
+ tcg_out32(s, ADDIS | TAI(rs, base, extra));
+ base = rs;
+ }
+ if (opi != ADDI || base != rt || l0 != 0) {
+ tcg_out32(s, opi | TAI(rt, base, l0));
+ }
+}
+
static void tcg_out_movi32(TCGContext *s, TCGReg ret, int32_t arg)
{
if (arg == (int16_t) arg) {
@@ -563,23 +635,37 @@ static void tcg_out_movi32(TCGContext *s, TCGReg ret,
int32_t arg)
static void tcg_out_movi(TCGContext *s, TCGType type, TCGReg ret,
tcg_target_long arg)
{
+ tcg_target_long tmp;
+
+ /* Two attempts at 1 or 2 insn sequence for 32-bit constant. */
if (type == TCG_TYPE_I32 || arg == (int32_t)arg) {
tcg_out_movi32(s, ret, arg);
- } else if (arg == (uint32_t)arg && !(arg & 0x8000)) {
+ return;
+ }
+ if (arg == (uint32_t)arg && !(arg & 0x8000)) {
tcg_out32(s, ADDI | TAI(ret, 0, arg));
tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
- } else {
- int32_t high = arg >> 32;
- tcg_out_movi32(s, ret, high);
- if (high) {
- tcg_out_shli64(s, ret, ret, 32);
- }
- if (arg & 0xffff0000) {
- tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
- }
- if (arg & 0xffff) {
- tcg_out32(s, ORI | SAI(ret, ret, arg));
- }
+ return;
+ }
+
+ /* See if we can turn a address constant into a TB offset. */
+ tmp = arg - (uintptr_t)s->code_buf;
+ if (tmp == (int32_t)tmp) {
+ tcg_out_mem_long(s, ADDI, ADD, ret, TCG_REG_TB, tmp);
+ return;
+ }
+
+ /* Full 64-bit constant load. */
+ tmp = arg >> 32;
+ tcg_out_movi32(s, ret, tmp);
+ if (tmp) {
+ tcg_out_shli64(s, ret, ret, 32);
+ }
+ if (arg & 0xffff0000) {
+ tcg_out32(s, ORIS | SAI(ret, ret, arg >> 16));
+ }
+ if (arg & 0xffff) {
+ tcg_out32(s, ORI | SAI(ret, ret, arg));
}
}
@@ -746,58 +832,6 @@ static void tcg_out_call(TCGContext *s, tcg_target_long
arg,
#endif
}
-static void tcg_out_mem_long(TCGContext *s, PowerOpcode opi, PowerOpcode opx,
- TCGReg rt, TCGReg base, tcg_target_long offset)
-{
- tcg_target_long orig = offset, l0, l1, extra = 0, align = 0;
- TCGReg rs = TCG_REG_R2;
-
- assert(rt != TCG_REG_R2 && base != TCG_REG_R2);
-
- switch (opi) {
- case LD: case LWA:
- align = 3;
- /* FALLTHRU */
- default:
- if (rt != TCG_REG_R0) {
- rs = rt;
- }
- break;
- case STD:
- align = 3;
- break;
- case STB: case STH: case STW:
- break;
- }
-
- /* For unaligned, or very large offsets, use the indexed form. */
- if (offset & align || offset != (int32_t)offset) {
- tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_R2, orig);
- tcg_out32(s, opx | TAB(rt, base, TCG_REG_R2));
- return;
- }
-
- l0 = (int16_t)offset;
- offset = (offset - l0) >> 16;
- l1 = (int16_t)offset;
-
- if (l1 < 0 && orig >= 0) {
- extra = 0x4000;
- l1 = (int16_t)(offset - 0x4000);
- }
- if (l1) {
- tcg_out32(s, ADDIS | TAI(rs, base, l1));
- base = rs;
- }
- if (extra) {
- tcg_out32(s, ADDIS | TAI(rs, base, extra));
- base = rs;
- }
- if (opi != ADDI || base != rt || l0 != 0) {
- tcg_out32(s, opi | TAI(rt, base, l0));
- }
-}
-
#if defined (CONFIG_SOFTMMU)
#include "exec/softmmu_defs.h"
--
1.8.3.1
- [Qemu-devel] [PATCH for-next 02/15] tcg-ppc64: Add an LK argument to tcg_out_call, (continued)
- [Qemu-devel] [PATCH for-next 02/15] tcg-ppc64: Add an LK argument to tcg_out_call, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 03/15] tcg-ppc64: Use the branch absolute instruction when possible, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 04/15] tcg-ppc64: Don't load the static chain from TCG, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 05/15] tcg-ppc64: Look through the function descriptor when profitable, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 06/15] tcg-ppc64: Move AREG0 to r31, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 07/15] tcg-ppc64: Tidy register allocation order, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 08/15] tcg-ppc64: Create PowerOpcode, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 09/15] tcg-ppc64: Handle long offsets better, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 10/15] tcg-ppc64: Use indirect jump threading, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 11/15] tcg-ppc64: Setup TCG_REG_TB, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 12/15] tcg-ppc64: Use TCG_REG_TB in tcg_out_movi and tcg_out_mem_long,
Richard Henderson <=
- [Qemu-devel] [PATCH for-next 14/15] tcg-ppc64: Streamline tcg_out_tlb_read, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 13/15] tcg-ppc64: Tidy tcg_target_qemu_prologue, Richard Henderson, 2013/08/05
- Re: [Qemu-devel] [PATCH for-next 00/15] Collection of improvements for tcg/ppc64, Richard Henderson, 2013/08/17