[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC PATCH v4 69/75] target/i386: convert pmullw/pmulhw/pmu
From: |
Jan Bobek |
Subject: |
[Qemu-devel] [RFC PATCH v4 69/75] target/i386: convert pmullw/pmulhw/pmulhuw helpers to gvec style |
Date: |
Wed, 21 Aug 2019 13:29:45 -0400 |
Make these helpers suitable for use with tcg_gen_gvec_* functions.
Signed-off-by: Jan Bobek <address@hidden>
---
target/i386/ops_sse.h | 42 ++++++++++++++++++++++++++++++------
target/i386/ops_sse_header.h | 6 +++---
target/i386/translate.c | 27 +++++++++++------------
3 files changed, 51 insertions(+), 24 deletions(-)
diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h
index 168e581c0c..6ec116573b 100644
--- a/target/i386/ops_sse.h
+++ b/target/i386/ops_sse.h
@@ -412,20 +412,50 @@ static inline int satsw(int x)
}
}
-#define FMULLW(a, b) ((a) * (b))
#define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16)
-#define FMULHUW(a, b) ((a) * (b) >> 16)
-#define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16)
#define FAVG(a, b) (((a) + (b) + 1) >> 1)
#endif
-SSE_HELPER_W(helper_pmullw, FMULLW)
+void glue(helper_pmullw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+ const intptr_t oprsz = simd_oprsz(desc);
+ const intptr_t maxsz = simd_maxsz(desc);
+
+ for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+ const uint32_t t = (uint32_t)a->W(i) * (uint32_t)b->W(i);
+ d->W(i) = t;
+ }
+ glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
#if SHIFT == 0
SSE_HELPER_W(helper_pmulhrw, FMULHRW)
#endif
-SSE_HELPER_W(helper_pmulhuw, FMULHUW)
-SSE_HELPER_W(helper_pmulhw, FMULHW)
+
+void glue(helper_pmulhuw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+ const intptr_t oprsz = simd_oprsz(desc);
+ const intptr_t maxsz = simd_maxsz(desc);
+
+ for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+ const uint32_t t = (uint32_t)a->W(i) * (uint32_t)b->W(i);
+ d->W(i) = t >> 16;
+ }
+ glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
+
+void glue(helper_pmulhw, SUFFIX)(Reg *d, Reg *a, Reg *b, uint32_t desc)
+{
+ const intptr_t oprsz = simd_oprsz(desc);
+ const intptr_t maxsz = simd_maxsz(desc);
+
+ for (intptr_t i = 0; i * sizeof(uint16_t) < oprsz; ++i) {
+ const int32_t t = (int32_t)a->W(i) * (int32_t)b->W(i);
+ d->W(i) = t >> 16;
+ }
+ glue(clear_high, SUFFIX)(d, oprsz, maxsz);
+}
SSE_HELPER_B(helper_pavgb, FAVG)
SSE_HELPER_W(helper_pavgw, FAVG)
diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h
index 724692a689..7e6411fc82 100644
--- a/target/i386/ops_sse_header.h
+++ b/target/i386/ops_sse_header.h
@@ -58,12 +58,12 @@ DEF_HELPER_3(glue(pslldqi, SUFFIX), void, Reg, Reg, i32)
DEF_HELPER_3(glue(psrldqi, SUFFIX), void, Reg, Reg, i32)
#endif
-DEF_HELPER_3(glue(pmullw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(pmullw, SUFFIX), void, Reg, Reg, Reg, i32)
#if SHIFT == 0
DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg)
#endif
-DEF_HELPER_3(glue(pmulhuw, SUFFIX), void, env, Reg, Reg)
-DEF_HELPER_3(glue(pmulhw, SUFFIX), void, env, Reg, Reg)
+DEF_HELPER_4(glue(pmulhuw, SUFFIX), void, Reg, Reg, Reg, i32)
+DEF_HELPER_4(glue(pmulhw, SUFFIX), void, Reg, Reg, Reg, i32)
DEF_HELPER_3(glue(pavgb, SUFFIX), void, env, Reg, Reg)
DEF_HELPER_3(glue(pavgw, SUFFIX), void, env, Reg, Reg)
diff --git a/target/i386/translate.c b/target/i386/translate.c
index 03f7c6e450..79f8c1ddac 100644
--- a/target/i386/translate.c
+++ b/target/i386/translate.c
@@ -2801,13 +2801,10 @@ static const SSEFunc_0_epp sse_op_table1[256][4] = {
[0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */
[0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */
[0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps },
- [0xd5] = MMX_OP2(pmullw),
[0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL },
[0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */
[0xe0] = MMX_OP2(pavgb),
[0xe3] = MMX_OP2(pavgw),
- [0xe4] = MMX_OP2(pmulhuw),
- [0xe5] = MMX_OP2(pmulhw),
[0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd,
gen_helper_cvtpd2dq },
[0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */
[0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */
@@ -6116,21 +6113,21 @@ DEF_GEN_INSN3_HELPER_EPP(addsubpd, addsubpd, Vdq, Vdq,
Wdq)
DEF_GEN_INSN3_HELPER_EPP(vaddsubpd, addsubpd, Vdq, Hdq, Wdq)
DEF_GEN_INSN3_HELPER_EPP(vaddsubpd, addsubpd, Vqq, Hqq, Wqq)
-DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(pmullw, pmullw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmullw, pmullw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmullw, pmullw_xmm, Vqq, Hqq, Wqq)
+DEF_GEN_INSN3_GVEC(pmullw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmullw_mmx)
+DEF_GEN_INSN3_GVEC(pmullw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmullw_xmm)
+DEF_GEN_INSN3_GVEC(vpmullw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmullw_xmm)
+DEF_GEN_INSN3_GVEC(vpmullw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmullw_xmm)
DEF_GEN_INSN3_HELPER_EPP(pmulld, pmulld_xmm, Vdq, Vdq, Wdq)
DEF_GEN_INSN3_HELPER_EPP(vpmulld, pmulld_xmm, Vdq, Hdq, Wdq)
DEF_GEN_INSN3_HELPER_EPP(vpmulld, pmulld_xmm, Vqq, Hqq, Wqq)
-DEF_GEN_INSN3_HELPER_EPP(pmulhw, pmulhw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(pmulhw, pmulhw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmulhw, pmulhw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmulhw, pmulhw_xmm, Vqq, Hqq, Wqq)
-DEF_GEN_INSN3_HELPER_EPP(pmulhuw, pmulhuw_mmx, Pq, Pq, Qq)
-DEF_GEN_INSN3_HELPER_EPP(pmulhuw, pmulhuw_xmm, Vdq, Vdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmulhuw, pmulhuw_xmm, Vdq, Hdq, Wdq)
-DEF_GEN_INSN3_HELPER_EPP(vpmulhuw, pmulhuw_xmm, Vqq, Hqq, Wqq)
+DEF_GEN_INSN3_GVEC(pmulhw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmulhw_mmx)
+DEF_GEN_INSN3_GVEC(pmulhw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhw_xmm)
+DEF_GEN_INSN3_GVEC(vpmulhw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhw_xmm)
+DEF_GEN_INSN3_GVEC(vpmulhw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhw_xmm)
+DEF_GEN_INSN3_GVEC(pmulhuw, Pq, Pq, Qq, 3_ool, MM_OPRSZ, MM_MAXSZ, pmulhuw_mmx)
+DEF_GEN_INSN3_GVEC(pmulhuw, Vdq, Vdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhuw_xmm)
+DEF_GEN_INSN3_GVEC(vpmulhuw, Vdq, Hdq, Wdq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhuw_xmm)
+DEF_GEN_INSN3_GVEC(vpmulhuw, Vqq, Hqq, Wqq, 3_ool, XMM_OPRSZ, XMM_MAXSZ,
pmulhuw_xmm)
DEF_GEN_INSN3_HELPER_EPP(pmuldq, pmuldq_xmm, Vdq, Vdq, Wdq)
DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vdq, Hdq, Wdq)
DEF_GEN_INSN3_HELPER_EPP(vpmuldq, pmuldq_xmm, Vqq, Hqq, Wqq)
--
2.20.1
- [Qemu-devel] [RFC PATCH v4 52/75] target/i386: introduce SSE4.1 code generators, (continued)
- [Qemu-devel] [RFC PATCH v4 52/75] target/i386: introduce SSE4.1 code generators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 48/75] target/i386: introduce SSSE3 translators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 44/75] target/i386: introduce SSE2 vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 62/75] target/i386: introduce AVX2 translators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 53/75] target/i386: introduce SSE4.1 vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 59/75] target/i386: introduce AVX translators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 47/75] target/i386: introduce SSE3 vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 51/75] target/i386: introduce SSE4.1 translators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 66/75] target/i386: cleanup leftovers in ops_sse_header.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 67/75] target/i386: introduce aliases for helper-based tcg_gen_gvec_* functions, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 69/75] target/i386: convert pmullw/pmulhw/pmulhuw helpers to gvec style,
Jan Bobek <=
- [Qemu-devel] [RFC PATCH v4 43/75] target/i386: introduce SSE2 code generators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 50/75] target/i386: introduce SSSE3 vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 55/75] target/i386: introduce SSE4.2 vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 57/75] target/i386: introduce AES and PCLMULQDQ code generators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 58/75] target/i386: introduce AES and PCLMULQDQ vector instructions to sse-opcode.inc.h, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 65/75] target/i386: remove obsoleted helpers, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 49/75] target/i386: introduce SSSE3 code generators, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 70/75] target/i386: convert pavgb/pavgw helpers to gvec style, Jan Bobek, 2019/08/21
- [Qemu-devel] [RFC PATCH v4 71/75] target/i386: convert pmuludq/pmaddwd helpers to gvec style, Jan Bobek, 2019/08/21