qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types


From: David Hildenbrand
Subject: Re: [PATCH v3 05/16] tcg/s390x: Implement tcg_out_ld/st for vector types
Date: Thu, 6 May 2021 13:55:30 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Thunderbird/78.8.1

On 03.05.21 20:35, Richard Henderson wrote:
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
  tcg/s390x/tcg-target.c.inc | 117 +++++++++++++++++++++++++++++++++----
  1 file changed, 105 insertions(+), 12 deletions(-)

diff --git a/tcg/s390x/tcg-target.c.inc b/tcg/s390x/tcg-target.c.inc
index 52df20a1ed..6ed9a309c1 100644
--- a/tcg/s390x/tcg-target.c.inc
+++ b/tcg/s390x/tcg-target.c.inc
@@ -265,6 +265,12 @@ typedef enum S390Opcode {
      RX_STC      = 0x42,
      RX_STH      = 0x40,
+ VRX_VL = 0xe706,
+    VRX_VLLEZ   = 0xe704,
+    VRX_VST     = 0xe70e,
+    VRX_VSTEF   = 0xe70b,
+    VRX_VSTEG   = 0xe70a,
+
      NOP         = 0x0707,
  } S390Opcode;
@@ -532,6 +538,26 @@ static void tcg_out_insn_RSY(TCGContext *s, S390Opcode op, TCGReg r1,
  #define tcg_out_insn_RX   tcg_out_insn_RS
  #define tcg_out_insn_RXY  tcg_out_insn_RSY
+static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
+{
+    return ((v1 & 16) << (7 - 3))
+         | ((v2 & 16) << (6 - 3))
+         | ((v3 & 16) << (5 - 3))
+         | ((v4 & 16) << (4 - 3));
+}

This a bit confusing (and maybe wrong?).

If we have v1=TCG_REG_V31, corresponding to "32 + 31", we'd get

  (63 & 16) << (7 - 3)
=    16     <<  4
= 256
= 0x100

And with v4=TCG_REG_V31
  (63 & 16) << (4 - 3)
=    16     <<  1
= 32
= 0x20

Which doesn't make any sense to me, because the RXB is bit 36-39 in an 48-bit instruction (here: 0xf00) . But maybe I messed up.


RXB is 4 bit. Can we just make RXB() return these 4 bits and do any shifting in the caller? At least for me that would be easier to grasp.

static int RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
{
    return (v1 - TCG_REG_V0 >= 16) << 3 |
           (v2 - TCG_REG_V0 >= 16) << 2 |
           (v3 - TCG_REG_V0 >= 16) << 1 |
           (v4 - TCG_REG_V0 >= 16);
}

or without comparisons

static uint8_t RXB(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
{
    return ((v1 & 16) >> (4 - 3)) |
           ((v2 & 16) >> (4 - 2)) |
           ((v3 & 16) >> (4 - 1)) |
           ((v4 & 16) >> (4 - 0));
}

And then maybe add a simple

static uint16_t RXB_INSTR(TCGReg v1, TCGReg v2, TCGReg v3, TCGReg v4)
{
        return RXB(v1, v2, v3, v4) << 8;
}

that shifts the RXB into place if necessary.


+
+static void tcg_out_insn_VRX(TCGContext *s, S390Opcode op, TCGReg v1,
+                             TCGReg b2, TCGReg x2, intptr_t d2, int m3)
+{
+    tcg_debug_assert(v1 >= TCG_REG_V0 && v1 <= TCG_REG_V31);
+    tcg_debug_assert(d2 >= 0 && d2 <= 0xfff);
+    tcg_debug_assert(x2 <= TCG_REG_R15);
+    tcg_debug_assert(b2 <= TCG_REG_R15);
+    tcg_out16(s, (op & 0xff00) | ((v1 & 15) << 4) | x2);
+    tcg_out16(s, (b2 << 12) | d2);
+    tcg_out16(s, (op & 0x00ff) | RXB(v1, 0, 0, 0) | (m3 << 12));
+}
+
  /* Emit an opcode with "type-checking" of the format.  */
  #define tcg_out_insn(S, FMT, OP, ...) \
      glue(tcg_out_insn_,FMT)(S, glue(glue(FMT,_),OP), ## __VA_ARGS__)
@@ -708,25 +734,92 @@ static void tcg_out_mem(TCGContext *s, S390Opcode opc_rx, 
S390Opcode opc_rxy,
      }
  }
+static void tcg_out_vrx_mem(TCGContext *s, S390Opcode opc_vrx,
+                            TCGReg data, TCGReg base, TCGReg index,
+                            tcg_target_long ofs, int m3)
+{
+    if (ofs < 0 || ofs >= 0x1000) {
+        if (ofs >= -0x80000 && ofs < 0x80000) {
+            tcg_out_insn(s, RXY, LAY, TCG_TMP0, base, index, ofs);
+            base = TCG_TMP0;
+            index = TCG_REG_NONE;
+            ofs = 0;
+        } else {
+            tcg_out_movi(s, TCG_TYPE_PTR, TCG_TMP0, ofs);
+            if (index != TCG_REG_NONE) {
+                tcg_out_insn(s, RRE, AGR, TCG_TMP0, index);
+            }
+            index = TCG_TMP0;
+            ofs = 0;
+        }
+    }
+    tcg_out_insn_VRX(s, opc_vrx, data, base, index, ofs, m3);
+}
/* load data without address translation or endianness conversion */
-static inline void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
+static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
  {
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, RX_L, RXY_LY, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_32);
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, 0, RXY_LG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VLLEZ, data, base, TCG_REG_NONE, ofs, MO_64);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VL, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
      }
  }
-static inline void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
-                              TCGReg base, intptr_t ofs)
+static void tcg_out_st(TCGContext *s, TCGType type, TCGReg data,
+                       TCGReg base, intptr_t ofs)
  {
-    if (type == TCG_TYPE_I32) {
-        tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
-    } else {
-        tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+    switch (type) {
+    case TCG_TYPE_I32:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, RX_ST, RXY_STY, data, base, TCG_REG_NONE, ofs);
+        } else {
+            tcg_out_vrx_mem(s, VRX_VSTEF, data, base, TCG_REG_NONE, ofs, 1);
+        }
+        break;
+
+    case TCG_TYPE_I64:
+        if (likely(data < 16)) {
+            tcg_out_mem(s, 0, RXY_STG, data, base, TCG_REG_NONE, ofs);
+            break;
+        }
+        /* fallthru */
+
+    case TCG_TYPE_V64:
+        tcg_out_vrx_mem(s, VRX_VSTEG, data, base, TCG_REG_NONE, ofs, 0);
+        break;
+
+    case TCG_TYPE_V128:
+        /* Hint quadword aligned.  */
+        tcg_out_vrx_mem(s, VRX_VST, data, base, TCG_REG_NONE, ofs, 4);
+        break;
+
+    default:
+        g_assert_not_reached();
      }
  }

Remaining stuff LGTM, although I have little experience with that code

--
Thanks,

David / dhildenb




reply via email to

[Prev in Thread] Current Thread [Next in Thread]