qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v2 34/81] tcg/i386: Fold the ext{8,16,32}[us] cases into {s}e


From: Philippe Mathieu-Daudé
Subject: Re: [PATCH v2 34/81] tcg/i386: Fold the ext{8,16,32}[us] cases into {s}extract
Date: Wed, 15 Jan 2025 22:56:59 +0100
User-agent: Mozilla Thunderbird

On 7/1/25 09:00, Richard Henderson wrote:
Accept byte and word extensions with the extract opcodes.
This is preparatory to removing the specialized extracts.

Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
  tcg/i386/tcg-target-has.h | 49 +++++++++++++++++++++++++++----
  tcg/tcg-has.h             | 12 +++++---
  tcg/optimize.c            |  8 +++--
  tcg/tcg-op.c              | 12 +++-----
  tcg/i386/tcg-target.c.inc | 62 +++++++++++++++++++++++++++++----------
  5 files changed, 107 insertions(+), 36 deletions(-)

diff --git a/tcg/i386/tcg-target-has.h b/tcg/i386/tcg-target-has.h
index 3ea2eab807..ad69f957a7 100644
--- a/tcg/i386/tcg-target-has.h
+++ b/tcg/i386/tcg-target-has.h
@@ -80,7 +80,7 @@
  #define TCG_TARGET_HAS_ctpop_i64        have_popcnt
  #define TCG_TARGET_HAS_deposit_i64      1
  #define TCG_TARGET_HAS_extract_i64      1
-#define TCG_TARGET_HAS_sextract_i64     0
+#define TCG_TARGET_HAS_sextract_i64     1
  #define TCG_TARGET_HAS_extract2_i64     1
  #define TCG_TARGET_HAS_negsetcond_i64   1
  #define TCG_TARGET_HAS_add2_i64         1
@@ -130,10 +130,47 @@
       (TCG_TARGET_REG_BITS == 32 && (ofs) == 8 && (len) == 8))
  #define TCG_TARGET_deposit_i64_valid    TCG_TARGET_deposit_i32_valid
-/* Check for the possibility of high-byte extraction and, for 64-bit,
-   zero-extending 32-bit right-shift.  */
-#define TCG_TARGET_extract_i32_valid(ofs, len) ((ofs) == 8 && (len) == 8)
-#define TCG_TARGET_extract_i64_valid(ofs, len) \
-    (((ofs) == 8 && (len) == 8) || ((ofs) + (len)) == 32)
+/*
+ * Check for the possibility of low byte/word extraction, high-byte extraction
+ * and zero-extending 32-bit right-shift.
+ *
+ * We cannot sign-extend from high byte to 64-bits without using the
+ * REX prefix that explicitly excludes access to the high-byte registers.
+ */
+static inline bool
+tcg_target_sextract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    switch (ofs) {
+    case 0:
+        switch (len) {
+        case 8:
+        case 16:
+            return true;
+        case 32:
+            return type == TCG_TYPE_I64;
+        }
+        return false;
+    case 8:
+        return len == 8 && type == TCG_TYPE_I32;
+    }
+    return false;
+}
+#define TCG_TARGET_sextract_valid  tcg_target_sextract_valid
+
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+    if (type == TCG_TYPE_I64 && ofs + len == 32) {
+        return true;
+    }
+    switch (ofs) {
+    case 0:
+        return len == 8 || len == 16;
+    case 8:
+        return len == 8;
+    }
+    return false;
+}
+#define TCG_TARGET_extract_valid  tcg_target_extract_valid
#endif
diff --git a/tcg/tcg-has.h b/tcg/tcg-has.h
index 65b6a0b0cf..8ed35be8c3 100644
--- a/tcg/tcg-has.h
+++ b/tcg/tcg-has.h
@@ -56,11 +56,15 @@
  #ifndef TCG_TARGET_deposit_i64_valid
  #define TCG_TARGET_deposit_i64_valid(ofs, len) 1
  #endif
-#ifndef TCG_TARGET_extract_i32_valid
-#define TCG_TARGET_extract_i32_valid(ofs, len) 1
+#ifndef TCG_TARGET_extract_valid
+#define TCG_TARGET_extract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_extract_i32 \
+     : TCG_TARGET_HAS_extract_i64)
  #endif
-#ifndef TCG_TARGET_extract_i64_valid
-#define TCG_TARGET_extract_i64_valid(ofs, len) 1
+#ifndef TCG_TARGET_sextract_valid
+#define TCG_TARGET_sextract_valid(type, ofs, len) \
+    ((type) == TCG_TYPE_I32 ? TCG_TARGET_HAS_sextract_i32 \
+     : TCG_TARGET_HAS_sextract_i64)
  #endif
/* Only one of DIV or DIV2 should be defined. */
diff --git a/tcg/optimize.c b/tcg/optimize.c
index c363c5c04b..cd8ad712c4 100644
--- a/tcg/optimize.c
+++ b/tcg/optimize.c
@@ -2362,8 +2362,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp 
*op, bool neg)
          xor_opc = INDEX_op_xor_i32;
          shr_opc = INDEX_op_shr_i32;
          neg_opc = INDEX_op_neg_i32;
-        if (TCG_TARGET_extract_i32_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I32, sh, 1)) {
              uext_opc = TCG_TARGET_HAS_extract_i32 ? INDEX_op_extract_i32 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, sh, 1)) {
              sext_opc = TCG_TARGET_HAS_sextract_i32 ? INDEX_op_sextract_i32 : 
0;
          }
          break;
@@ -2373,8 +2375,10 @@ static void fold_setcond_tst_pow2(OptContext *ctx, TCGOp 
*op, bool neg)
          xor_opc = INDEX_op_xor_i64;
          shr_opc = INDEX_op_shr_i64;
          neg_opc = INDEX_op_neg_i64;
-        if (TCG_TARGET_extract_i64_valid(sh, 1)) {
+        if (TCG_TARGET_extract_valid(TCG_TYPE_I64, sh, 1)) {
              uext_opc = TCG_TARGET_HAS_extract_i64 ? INDEX_op_extract_i64 : 0;
+        }
+        if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, sh, 1)) {
              sext_opc = TCG_TARGET_HAS_sextract_i64 ? INDEX_op_sextract_i64 : 
0;
          }
          break;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index ab5ccd8dcb..d813a7f44e 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -1014,8 +1014,7 @@ void tcg_gen_extract_i32(TCGv_i32 ret, TCGv_i32 arg,
          return;
      }
- if (TCG_TARGET_HAS_extract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I32, ofs, len)) {
          tcg_gen_op4ii_i32(INDEX_op_extract_i32, ret, arg, ofs, len);
          return;
      }
@@ -1077,8 +1076,7 @@ void tcg_gen_sextract_i32(TCGv_i32 ret, TCGv_i32 arg,
          }
      }
- if (TCG_TARGET_HAS_sextract_i32
-        && TCG_TARGET_extract_i32_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I32, ofs, len)) {
          tcg_gen_op4ii_i32(INDEX_op_sextract_i32, ret, arg, ofs, len);
          return;
      }
@@ -2811,8 +2809,7 @@ void tcg_gen_extract_i64(TCGv_i64 ret, TCGv_i64 arg,
          goto do_shift_and;
      }
- if (TCG_TARGET_HAS_extract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_extract_valid(TCG_TYPE_I64, ofs, len)) {
          tcg_gen_op4ii_i64(INDEX_op_extract_i64, ret, arg, ofs, len);
          return;
      }
@@ -2917,8 +2914,7 @@ void tcg_gen_sextract_i64(TCGv_i64 ret, TCGv_i64 arg,
          return;
      }
- if (TCG_TARGET_HAS_sextract_i64
-        && TCG_TARGET_extract_i64_valid(ofs, len)) {
+    if (TCG_TARGET_sextract_valid(TCG_TYPE_I64, ofs, len)) {
          tcg_gen_op4ii_i64(INDEX_op_sextract_i64, ret, arg, ofs, len);
          return;
      }
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index 047c5da81c..afff56956f 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -3036,6 +3036,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
TCGType type,
case INDEX_op_extract_i64:
          if (a2 + args[3] == 32) {
+            if (a2 == 0) {
+                tcg_out_ext32u(s, a0, a1);
+                break;
+            }
              /* This is a 32-bit zero-extending right shift.  */
              tcg_out_mov(s, TCG_TYPE_I32, a0, a1);
              tcg_out_shifti(s, SHIFT_SHR, a0, a2);
@@ -3043,28 +3047,53 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, 
TCGType type,
          }
          /* FALLTHRU */
      case INDEX_op_extract_i32:
-        /* On the off-chance that we can use the high-byte registers.
-           Otherwise we emit the same ext16 + shift pattern that we
-           would have gotten from the normal tcg-op.c expansion.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8u(s, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
              tcg_out_ext16u(s, a0, a1);
-            tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            /*
+             * On the off-chance that we can use the high-byte registers.
+             * Otherwise we emit the same ext16 + shift pattern that we
+             * would have gotten from the normal tcg-op.c expansion.
+             */
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVZBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16u(s, a0, a1);
+                tcg_out_shifti(s, SHIFT_SHR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
+        }
+        break;
+
+    case INDEX_op_sextract_i64:
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
+            tcg_out_ext16s(s, TCG_TYPE_I64, a0, a1);
+        } else if (a2 == 0 && args[3] == 32) {
+            tcg_out_ext32s(s, a0, a1);
+        } else {
+            g_assert_not_reached();
          }
          break;
case INDEX_op_sextract_i32:
-        /* We don't implement sextract_i64, as we cannot sign-extend to
-           64-bits without using the REX prefix that explicitly excludes
-           access to the high-byte registers.  */
-        tcg_debug_assert(a2 == 8 && args[3] == 8);
-        if (a1 < 4 && a0 < 8) {
-            tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
-        } else {
+        if (a2 == 0 && args[3] == 8) {
+            tcg_out_ext8s(s, TCG_TYPE_I32, a0, a1);
+        } else if (a2 == 0 && args[3] == 16) {
              tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
-            tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+        } else if (a2 == 8 && args[3] == 8) {
+            if (a1 < 4 && a0 < 8) {
+                tcg_out_modrm(s, OPC_MOVSBL, a0, a1 + 4);
+            } else {
+                tcg_out_ext16s(s, TCG_TYPE_I32, a0, a1);
+                tcg_out_shifti(s, SHIFT_SAR, a0, 8);
+            }
+        } else {
+            g_assert_not_reached();
          }
          break;
@@ -3747,6 +3776,7 @@ tcg_target_op_def(TCGOpcode op, TCGType type, unsigned flags)
      case INDEX_op_extract_i32:
      case INDEX_op_extract_i64:
      case INDEX_op_sextract_i32:
+    case INDEX_op_sextract_i64:
      case INDEX_op_ctpop_i32:
      case INDEX_op_ctpop_i64:
          return C_O1_I1(r, r);

To the best of my knowledge,
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>




reply via email to

[Prev in Thread] Current Thread [Next in Thread]