qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [RFC PATCH v4 53/75] target/i386: introduce SSE4.1 vector i


From: Jan Bobek
Subject: [Qemu-devel] [RFC PATCH v4 53/75] target/i386: introduce SSE4.1 vector instructions to sse-opcode.inc.h
Date: Wed, 21 Aug 2019 13:29:29 -0400

Add all the SSE4.1 vector instruction entries to sse-opcode.inc.h.

Signed-off-by: Jan Bobek <address@hidden>
---
 target/i386/sse-opcode.inc.h | 101 +++++++++++++++++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git a/target/i386/sse-opcode.inc.h b/target/i386/sse-opcode.inc.h
index d8ea71aa6c..9682cce7ef 100644
--- a/target/i386/sse-opcode.inc.h
+++ b/target/i386/sse-opcode.inc.h
@@ -389,6 +389,58 @@
  * 66 0F 3A 0F /r ib       PALIGNR xmm1, xmm2/m128, imm8
  * NP 0F 38 00 /r          PSHUFB mm1, mm2/m64
  * 66 0F 38 00 /r          PSHUFB xmm1, xmm2/m128
+ *
+ * SSE4.1 Instructions
+ * --------------------
+ * 66 0F 38 40 /r          PMULLD xmm1, xmm2/m128
+ * 66 0F 38 28 /r          PMULDQ xmm1, xmm2/m128
+ * 66 0F 38 3A /r          PMINUW xmm1, xmm2/m128
+ * 66 0F 38 3B /r          PMINUD xmm1, xmm2/m128
+ * 66 0F 38 38 /r          PMINSB xmm1, xmm2/m128
+ * 66 0F 38 39 /r          PMINSD xmm1, xmm2/m128
+ * 66 0F 38 41 /r          PHMINPOSUW xmm1, xmm2/m128
+ * 66 0F 38 3E /r          PMAXUW xmm1, xmm2/m128
+ * 66 0F 38 3F /r          PMAXUD xmm1, xmm2/m128
+ * 66 0F 38 3C /r          PMAXSB xmm1, xmm2/m128
+ * 66 0F 38 3D /r          PMAXSD xmm1, xmm2/m128
+ * 66 0F 3A 42 /r ib       MPSADBW xmm1, xmm2/m128, imm8
+ * 66 0F 3A 40 /r ib       DPPS xmm1, xmm2/m128, imm8
+ * 66 0F 3A 41 /r ib       DPPD xmm1, xmm2/m128, imm8
+ * 66 0F 3A 08 /r ib       ROUNDPS xmm1, xmm2/m128, imm8
+ * 66 0F 3A 09 /r ib       ROUNDPD xmm1, xmm2/m128, imm8
+ * 66 0F 3A 0A /r ib       ROUNDSS xmm1, xmm2/m32, imm8
+ * 66 0F 3A 0B /r ib       ROUNDSD xmm1, xmm2/m64, imm8
+ * 66 0F 38 29 /r          PCMPEQQ xmm1, xmm2/m128
+ * 66 0F 38 17 /r          PTEST xmm1, xmm2/m128
+ * 66 0F 38 2B /r          PACKUSDW xmm1, xmm2/m128
+ * 66 0F 3A 0C /r ib       BLENDPS xmm1, xmm2/m128, imm8
+ * 66 0F 3A 0D /r ib       BLENDPD xmm1, xmm2/m128, imm8
+ * 66 0F 38 14 /r          BLENDVPS xmm1, xmm2/m128, <XMM0>
+ * 66 0F 38 15 /r          BLENDVPD xmm1, xmm2/m128 , <XMM0>
+ * 66 0F 38 10 /r          PBLENDVB xmm1, xmm2/m128, <XMM0>
+ * 66 0F 3A 0E /r ib       PBLENDW xmm1, xmm2/m128, imm8
+ * 66 0F 3A 21 /r ib       INSERTPS xmm1, xmm2/m32, imm8
+ * 66 0F 3A 20 /r ib       PINSRB xmm1,r32/m8,imm8
+ * 66 0F 3A 22 /r ib       PINSRD xmm1,r/m32,imm8
+ * 66 REX.W 0F 3A 22 /r ib PINSRQ xmm1,r/m64,imm8
+ * 66 0F 3A 17 /r ib       EXTRACTPS reg/m32, xmm1, imm8
+ * 66 0F 3A 14 /r ib       PEXTRB r32/m8,xmm2,imm8
+ * 66 0F 3A 15 /r ib       PEXTRW r32/m16, xmm, imm8
+ * 66 0F 3A 16 /r ib       PEXTRD r/m32,xmm2,imm8
+ * 66 REX.W 0F 3A 16 /r ib PEXTRQ r/m64,xmm2,imm8
+ * 66 0f 38 20 /r          PMOVSXBW xmm1, xmm2/m64
+ * 66 0f 38 21 /r          PMOVSXBD xmm1, xmm2/m32
+ * 66 0f 38 22 /r          PMOVSXBQ xmm1, xmm2/m16
+ * 66 0f 38 23 /r          PMOVSXWD xmm1, xmm2/m64
+ * 66 0f 38 24 /r          PMOVSXWQ xmm1, xmm2/m32
+ * 66 0f 38 25 /r          PMOVSXDQ xmm1, xmm2/m64
+ * 66 0f 38 30 /r          PMOVZXBW xmm1, xmm2/m64
+ * 66 0f 38 31 /r          PMOVZXBD xmm1, xmm2/m32
+ * 66 0f 38 32 /r          PMOVZXBQ xmm1, xmm2/m16
+ * 66 0f 38 33 /r          PMOVZXWD xmm1, xmm2/m64
+ * 66 0f 38 34 /r          PMOVZXWQ xmm1, xmm2/m32
+ * 66 0f 38 35 /r          PMOVZXDQ xmm1, xmm2/m64
+ * 66 0F 38 2A /r          MOVNTDQA xmm1, m128
  */
 
 OPCODE(movd, LEG(NP, 0F, 0, 0x6e), MMX, WR, Pq, Ed)
@@ -501,10 +553,12 @@ OPCODE(addsubps, LEG(F2, 0F, 0, 0xd0), SSE3, WRR, Vdq, 
Vdq, Wdq)
 OPCODE(addsubpd, LEG(66, 0F, 0, 0xd0), SSE3, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmullw, LEG(NP, 0F, 0, 0xd5), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pmullw, LEG(66, 0F, 0, 0xd5), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmulld, LEG(66, 0F38, 0, 0x40), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmulhw, LEG(NP, 0F, 0, 0xe5), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pmulhw, LEG(66, 0F, 0, 0xe5), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmulhuw, LEG(NP, 0F, 0, 0xe4), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pmulhuw, LEG(66, 0F, 0, 0xe4), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmuldq, LEG(66, 0F38, 0, 0x28), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmuludq, LEG(NP, 0F, 0, 0xf4), SSE2, WRR, Pq, Pq, Qq)
 OPCODE(pmuludq, LEG(66, 0F, 0, 0xf4), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmulhrsw, LEG(NP, 0F38, 0, 0x0b), SSSE3, WRR, Pq, Pq, Qq)
@@ -531,16 +585,25 @@ OPCODE(rsqrtps, LEG(NP, 0F, 0, 0x52), SSE, WR, Vdq, Wdq)
 OPCODE(rsqrtss, LEG(F3, 0F, 0, 0x52), SSE, WR, Vd, Wd)
 OPCODE(pminub, LEG(NP, 0F, 0, 0xda), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pminub, LEG(66, 0F, 0, 0xda), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pminuw, LEG(66, 0F38, 0, 0x3a), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pminud, LEG(66, 0F38, 0, 0x3b), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pminsb, LEG(66, 0F38, 0, 0x38), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(pminsw, LEG(NP, 0F, 0, 0xea), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pminsw, LEG(66, 0F, 0, 0xea), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pminsd, LEG(66, 0F38, 0, 0x39), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(minps, LEG(NP, 0F, 0, 0x5d), SSE, WRR, Vdq, Vdq, Wdq)
 OPCODE(minpd, LEG(66, 0F, 0, 0x5d), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(minss, LEG(F3, 0F, 0, 0x5d), SSE, WRR, Vd, Vd, Wd)
 OPCODE(minsd, LEG(F2, 0F, 0, 0x5d), SSE2, WRR, Vq, Vq, Wq)
+OPCODE(phminposuw, LEG(66, 0F38, 0, 0x41), SSE4_1, WR, Vdq, Wdq)
 OPCODE(pmaxub, LEG(NP, 0F, 0, 0xde), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pmaxub, LEG(66, 0F, 0, 0xde), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmaxuw, LEG(66, 0F38, 0, 0x3e), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmaxud, LEG(66, 0F38, 0, 0x3f), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmaxsb, LEG(66, 0F38, 0, 0x3c), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(pmaxsw, LEG(NP, 0F, 0, 0xee), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pmaxsw, LEG(66, 0F, 0, 0xee), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pmaxsd, LEG(66, 0F38, 0, 0x3d), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(maxps, LEG(NP, 0F, 0, 0x5f), SSE, WRR, Vdq, Vdq, Wdq)
 OPCODE(maxpd, LEG(66, 0F, 0, 0x5f), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(maxss, LEG(F3, 0F, 0, 0x5f), SSE, WRR, Vd, Vd, Wd)
@@ -551,6 +614,7 @@ OPCODE(pavgw, LEG(NP, 0F, 0, 0xe3), SSE, WRR, Pq, Pq, Qq)
 OPCODE(pavgw, LEG(66, 0F, 0, 0xe3), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(psadbw, LEG(NP, 0F, 0, 0xf6), SSE, WRR, Pq, Pq, Qq)
 OPCODE(psadbw, LEG(66, 0F, 0, 0xf6), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(mpsadbw, LEG(66, 0F3A, 0, 0x42), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
 OPCODE(pabsb, LEG(NP, 0F38, 0, 0x1c), SSSE3, WR, Pq, Qq)
 OPCODE(pabsb, LEG(66, 0F38, 0, 0x1c), SSSE3, WR, Vdq, Wdq)
 OPCODE(pabsw, LEG(NP, 0F38, 0, 0x1d), SSSE3, WR, Pq, Qq)
@@ -563,18 +627,26 @@ OPCODE(psignw, LEG(NP, 0F38, 0, 0x09), SSSE3, WRR, Pq, 
Pq, Qq)
 OPCODE(psignw, LEG(66, 0F38, 0, 0x09), SSSE3, WRR, Vdq, Vdq, Wdq)
 OPCODE(psignd, LEG(NP, 0F38, 0, 0x0a), SSSE3, WRR, Pq, Pq, Qq)
 OPCODE(psignd, LEG(66, 0F38, 0, 0x0a), SSSE3, WRR, Vdq, Vdq, Wdq)
+OPCODE(dpps, LEG(66, 0F3A, 0, 0x40), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(dppd, LEG(66, 0F3A, 0, 0x41), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(roundps, LEG(66, 0F3A, 0, 0x08), SSE4_1, WRR, Vdq, Wdq, Ib)
+OPCODE(roundpd, LEG(66, 0F3A, 0, 0x09), SSE4_1, WRR, Vdq, Wdq, Ib)
+OPCODE(roundss, LEG(66, 0F3A, 0, 0x0a), SSE4_1, WRR, Vd, Wd, Ib)
+OPCODE(roundsd, LEG(66, 0F3A, 0, 0x0b), SSE4_1, WRR, Vq, Wq, Ib)
 OPCODE(pcmpeqb, LEG(NP, 0F, 0, 0x74), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpeqb, LEG(66, 0F, 0, 0x74), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pcmpeqw, LEG(NP, 0F, 0, 0x75), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpeqw, LEG(66, 0F, 0, 0x75), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pcmpeqd, LEG(NP, 0F, 0, 0x76), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpeqd, LEG(66, 0F, 0, 0x76), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(pcmpeqq, LEG(66, 0F38, 0, 0x29), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(pcmpgtb, LEG(NP, 0F, 0, 0x64), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpgtb, LEG(66, 0F, 0, 0x64), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pcmpgtw, LEG(NP, 0F, 0, 0x65), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpgtw, LEG(66, 0F, 0, 0x65), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(pcmpgtd, LEG(NP, 0F, 0, 0x66), MMX, WRR, Pq, Pq, Qq)
 OPCODE(pcmpgtd, LEG(66, 0F, 0, 0x66), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(ptest, LEG(66, 0F38, 0, 0x17), SSE4_1, RR, Vdq, Wdq)
 OPCODE(cmpps, LEG(NP, 0F, 0, 0xc2), SSE, WRRR, Vdq, Vdq, Wdq, Ib)
 OPCODE(cmppd, LEG(66, 0F, 0, 0xc2), SSE2, WRRR, Vdq, Vdq, Wdq, Ib)
 OPCODE(cmpss, LEG(F3, 0F, 0, 0xc2), SSE, WRRR, Vd, Vd, Wd, Ib)
@@ -623,6 +695,7 @@ OPCODE(packssdw, LEG(NP, 0F, 0, 0x6b), MMX, WRR, Pq, Pq, Qq)
 OPCODE(packssdw, LEG(66, 0F, 0, 0x6b), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(packuswb, LEG(NP, 0F, 0, 0x67), MMX, WRR, Pq, Pq, Qq)
 OPCODE(packuswb, LEG(66, 0F, 0, 0x67), SSE2, WRR, Vdq, Vdq, Wdq)
+OPCODE(packusdw, LEG(66, 0F38, 0, 0x2b), SSE4_1, WRR, Vdq, Vdq, Wdq)
 OPCODE(punpckhbw, LEG(NP, 0F, 0, 0x68), MMX, WRR, Pq, Pq, Qq)
 OPCODE(punpckhbw, LEG(66, 0F, 0, 0x68), SSE2, WRR, Vdq, Vdq, Wdq)
 OPCODE(punpckhwd, LEG(NP, 0F, 0, 0x69), MMX, WRR, Pq, Pq, Qq)
@@ -649,12 +722,39 @@ OPCODE(pshufhw, LEG(F3, 0F, 0, 0x70), SSE2, WRR, Vdq, 
Wdq, Ib)
 OPCODE(pshufd, LEG(66, 0F, 0, 0x70), SSE2, WRR, Vdq, Wdq, Ib)
 OPCODE(shufps, LEG(NP, 0F, 0, 0xc6), SSE, WRRR, Vdq, Vdq, Wdq, Ib)
 OPCODE(shufpd, LEG(66, 0F, 0, 0xc6), SSE2, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(blendps, LEG(66, 0F3A, 0, 0x0c), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(blendpd, LEG(66, 0F3A, 0, 0x0d), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(blendvps, LEG(66, 0F38, 0, 0x14), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(blendvpd, LEG(66, 0F38, 0, 0x15), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pblendvb, LEG(66, 0F38, 0, 0x10), SSE4_1, WRR, Vdq, Vdq, Wdq)
+OPCODE(pblendw, LEG(66, 0F3A, 0, 0x0e), SSE4_1, WRRR, Vdq, Vdq, Wdq, Ib)
+OPCODE(insertps, LEG(66, 0F3A, 0, 0x21), SSE4_1, WRRR, Vdq, Vdq, Wd, Ib)
+OPCODE(pinsrb, LEG(66, 0F3A, 0, 0x20), SSE4_1, WRRR, Vdq, Vdq, RdMb, Ib)
 OPCODE(pinsrw, LEG(NP, 0F, 0, 0xc4), SSE, WRRR, Pq, Pq, RdMw, Ib)
 OPCODE(pinsrw, LEG(66, 0F, 0, 0xc4), SSE2, WRRR, Vdq, Vdq, RdMw, Ib)
+OPCODE(pinsrd, LEG(66, 0F3A, 0, 0x22), SSE4_1, WRRR, Vdq, Vdq, Ed, Ib)
+OPCODE(pinsrq, LEG(66, 0F3A, 1, 0x22), SSE4_1, WRRR, Vdq, Vdq, Eq, Ib)
+OPCODE(extractps, LEG(66, 0F3A, 0, 0x17), SSE4_1, WRR, Ed, Vdq, Ib)
+OPCODE(pextrb, LEG(66, 0F3A, 0, 0x14), SSE4_1, WRR, RdMb, Vdq, Ib)
+OPCODE(pextrw, LEG(66, 0F3A, 0, 0x15), SSE4_1, WRR, RdMw, Vdq, Ib)
+OPCODE(pextrd, LEG(66, 0F3A, 0, 0x16), SSE4_1, WRR, Ed, Vdq, Ib)
+OPCODE(pextrq, LEG(66, 0F3A, 1, 0x16), SSE4_1, WRR, Eq, Vdq, Ib)
 OPCODE(pextrw, LEG(NP, 0F, 0, 0xc5), SSE, WRR, Gd, Nq, Ib)
 OPCODE(pextrw, LEG(NP, 0F, 1, 0xc5), SSE, WRR, Gq, Nq, Ib)
 OPCODE(pextrw, LEG(66, 0F, 0, 0xc5), SSE2, WRR, Gd, Udq, Ib)
 OPCODE(pextrw, LEG(66, 0F, 1, 0xc5), SSE2, WRR, Gq, Udq, Ib)
+OPCODE(pmovsxbw, LEG(66, 0F38, 0, 0x20), SSE4_1, WR, Vdq, Wq)
+OPCODE(pmovsxbd, LEG(66, 0F38, 0, 0x21), SSE4_1, WR, Vdq, Wd)
+OPCODE(pmovsxbq, LEG(66, 0F38, 0, 0x22), SSE4_1, WR, Vdq, Ww)
+OPCODE(pmovsxwd, LEG(66, 0F38, 0, 0x23), SSE4_1, WR, Vdq, Wq)
+OPCODE(pmovsxwq, LEG(66, 0F38, 0, 0x24), SSE4_1, WR, Vdq, Wd)
+OPCODE(pmovsxdq, LEG(66, 0F38, 0, 0x25), SSE4_1, WR, Vdq, Wq)
+OPCODE(pmovzxbw, LEG(66, 0F38, 0, 0x30), SSE4_1, WR, Vdq, Wq)
+OPCODE(pmovzxbd, LEG(66, 0F38, 0, 0x31), SSE4_1, WR, Vdq, Wd)
+OPCODE(pmovzxbq, LEG(66, 0F38, 0, 0x32), SSE4_1, WR, Vdq, Ww)
+OPCODE(pmovzxwd, LEG(66, 0F38, 0, 0x33), SSE4_1, WR, Vdq, Wq)
+OPCODE(pmovzxwq, LEG(66, 0F38, 0, 0x34), SSE4_1, WR, Vdq, Wd)
+OPCODE(pmovzxdq, LEG(66, 0F38, 0, 0x35), SSE4_1, WR, Vdq, Wq)
 OPCODE(cvtpi2ps, LEG(NP, 0F, 0, 0x2a), SSE, WR, Vdq, Qq)
 OPCODE(cvtsi2ss, LEG(F3, 0F, 0, 0x2a), SSE, WR, Vd, Ed)
 OPCODE(cvtsi2ss, LEG(F3, 0F, 1, 0x2a), SSE, WR, Vd, Eq)
@@ -691,6 +791,7 @@ OPCODE(movnti, LEG(NP, 0F, 0, 0xc3), SSE2, WR, Md, Gd)
 OPCODE(movnti, LEG(NP, 0F, 1, 0xc3), SSE2, WR, Mq, Gq)
 OPCODE(movntq, LEG(NP, 0F, 0, 0xe7), SSE, WR, Mq, Pq)
 OPCODE(movntdq, LEG(66, 0F, 0, 0xe7), SSE2, WR, Mdq, Vdq)
+OPCODE(movntdqa, LEG(66, 0F38, 0, 0x2a), SSE4_1, WR, Vdq, Mdq)
 OPCODE(pause, LEG(F3, NA, 0, 0x90), SSE2, )
 OPCODE(emms, LEG(NP, 0F, 0, 0x77), MMX, )
 
-- 
2.20.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]