[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[dotgnu-pnet-commits] libjit ChangeLog jit/jit-gen-x86-64.h jit/jit-r...
From: |
Klaus Treichel |
Subject: |
[dotgnu-pnet-commits] libjit ChangeLog jit/jit-gen-x86-64.h jit/jit-r... |
Date: |
Sun, 13 Apr 2008 17:55:37 +0000 |
CVSROOT: /cvsroot/dotgnu-pnet
Module name: libjit
Changes by: Klaus Treichel <ktreichel> 08/04/13 17:55:37
Modified files:
. : ChangeLog
jit : jit-gen-x86-64.h jit-rules-x86-64.c
jit-rules-x86-64.ins
Log message:
Add support for more opcodes on x86-64.
CVSWeb URLs:
http://cvs.savannah.gnu.org/viewcvs/libjit/ChangeLog?cvsroot=dotgnu-pnet&r1=1.360&r2=1.361
http://cvs.savannah.gnu.org/viewcvs/libjit/jit/jit-gen-x86-64.h?cvsroot=dotgnu-pnet&r1=1.4&r2=1.5
http://cvs.savannah.gnu.org/viewcvs/libjit/jit/jit-rules-x86-64.c?cvsroot=dotgnu-pnet&r1=1.2&r2=1.3
http://cvs.savannah.gnu.org/viewcvs/libjit/jit/jit-rules-x86-64.ins?cvsroot=dotgnu-pnet&r1=1.3&r2=1.4
Patches:
Index: ChangeLog
===================================================================
RCS file: /cvsroot/dotgnu-pnet/libjit/ChangeLog,v
retrieving revision 1.360
retrieving revision 1.361
diff -u -b -r1.360 -r1.361
--- ChangeLog 13 Apr 2008 16:14:15 -0000 1.360
+++ ChangeLog 13 Apr 2008 17:55:36 -0000 1.361
@@ -10,6 +10,20 @@
* include/jit/jit-walk.h: use _JIT_ARCH_GET_RETURN_ADDRESS and
_JIT_ARCH_GET_CURRENT_RETURN if available.
+ * jit/jit-gen-x86-64.h: Add additional macros for saving and
+ restoring the fpu controlword and the mxcsr register. Add
+ additional SSE conversion macros. Add SSE compare macros.
+ Add macros for the SSE bit operations on packed values.
+ Add macros for SSE sqrt and rounding. Add macros for fpu rounding.
+
+ * jit/jit-rules-x86-64.c: Add the dreg register class and functions
+ to handle rounding and SSE bit opcodes on packed values.
+
+ * jit/jit-rules-x86-64.ins: Add INT_TO_NFLOAT, LONG_TO_NFLOAT,
+ FLOAT32_TO_NFLOAT, FLOAT64_TO_NFLOAT.
+ Rewrite NFLOAT_TO_INT and NFLOAT_TO_LONG to use the new functions
+ in jit-rules-x86-64.c. Add handling of ABS, NEG and float compares.
+
2008-03-31 Klaus Treichel <address@hidden>
* jit/jit-rules-x86.ins: Fix the sign opcode for integers and the
Index: jit/jit-gen-x86-64.h
===================================================================
RCS file: /cvsroot/dotgnu-pnet/libjit/jit/jit-gen-x86-64.h,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -b -r1.4 -r1.5
--- jit/jit-gen-x86-64.h 30 Mar 2008 15:05:13 -0000 1.4
+++ jit/jit-gen-x86-64.h 13 Apr 2008 17:55:36 -0000 1.5
@@ -111,6 +111,28 @@
} X86_64_XMM1_OP;
/*
+ * Logical opcodes used with packed single and double precision values.
+ */
+typedef enum
+{
+ XMM_ANDP = 0x54,
+ XMM_ORP = 0x56,
+ XMM_XORP = 0x57
+} X86_64_XMM_PLOP;
+
+/*
+ * Rounding modes for xmm rounding instructions, the mxcsr register and
+ * the fpu control word.
+ */
+typedef enum
+{
+ X86_ROUND_NEAREST = 0x00, /* Round to the nearest integer
*/
+ X86_ROUND_DOWN = 0x01, /* Round towards negative
infinity */
+ X86_ROUND_UP = 0x02, /* Round towards positive
infinity */
+ X86_ROUND_ZERO = 0x03 /* Round towards zero
(truncate) */
+} X86_64_ROUNDMODE;
+
+/*
* Helper union for emmitting 64 bit immediate values.
*/
typedef union
@@ -3560,6 +3582,59 @@
} while(0)
/*
+ * xmm instructions with a prefix and three opcodes
+ */
+#define x86_64_p1_xmm3_reg_reg_size(inst, p1, opc1, opc2, opc3, r, reg, size) \
+ do { \
+ *(inst)++ = (unsigned char)(p1); \
+ x86_64_rex_emit(inst, (size), (r), 0, (reg)); \
+ *(inst)++ = (unsigned char)(opc1); \
+ *(inst)++ = (unsigned char)(opc2); \
+ *(inst)++ = (unsigned char)(opc3); \
+ x86_64_reg_emit(inst, (r), (reg)); \
+ } while(0)
+
+#define x86_64_p1_xmm3_reg_regp_size(inst, p1, opc1, opc2, opc3, r, regp,
size) \
+ do { \
+ *(inst)++ = (unsigned char)(p1); \
+ x86_64_rex_emit(inst, (size), (r), 0, (regp)); \
+ *(inst)++ = (unsigned char)(opc1); \
+ *(inst)++ = (unsigned char)(opc2); \
+ *(inst)++ = (unsigned char)(opc3); \
+ x86_64_regp_emit(inst, (r), (regp)); \
+ } while(0)
+
+#define x86_64_p1_xmm3_reg_mem_size(inst, p1, opc1, opc2, opc3, r, mem, size) \
+ do { \
+ *(inst)++ = (unsigned char)(p1); \
+ x86_64_rex_emit(inst, (size), (r), 0, 0); \
+ *(inst)++ = (unsigned char)(opc1); \
+ *(inst)++ = (unsigned char)(opc2); \
+ *(inst)++ = (unsigned char)(opc3); \
+ x86_64_mem_emit(inst, (r), (mem)); \
+ } while(0)
+
+#define x86_64_p1_xmm3_reg_membase_size(inst, p1, opc1, opc2, opc3, r,
basereg, disp, size) \
+ do { \
+ *(inst)++ = (unsigned char)(p1); \
+ x86_64_rex_emit(inst, (size), (r), 0, (basereg)); \
+ *(inst)++ = (unsigned char)(opc1); \
+ *(inst)++ = (unsigned char)(opc2); \
+ *(inst)++ = (unsigned char)(opc3); \
+ x86_64_membase_emit(inst, (r), (basereg), (disp)); \
+ } while(0)
+
+#define x86_64_p1_xmm3_reg_memindex_size(inst, p1, opc1, opc2, opc3, r,
basereg, disp, indexreg, shift, size) \
+ do { \
+ *(inst)++ = (unsigned char)(p1); \
+ x86_64_rex_emit(inst, (size), (r), (indexreg), (basereg)); \
+ *(inst)++ = (unsigned char)(opc1); \
+ *(inst)++ = (unsigned char)(opc2); \
+ *(inst)++ = (unsigned char)(opc3); \
+ x86_64_memindex_emit((inst), (r), (basereg), (disp),
(indexreg), (shift)); \
+ } while(0)
+
+/*
* xmm1: Macro for use of the X86_64_XMM1 enum
*/
#define x86_64_xmm1_reg_reg(inst, opc, dreg, sreg, is_double) \
@@ -3588,6 +3663,56 @@
} while(0)
/*
+ * Load and store MXCSR register state
+ */
+
+/*
+ * ldmxcsr: Load MXCSR register
+ */
+#define x86_64_ldmxcsr_regp(inst, sregp) \
+ do { \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0xae, 2, (sregp)); \
+ } while(0)
+
+#define x86_64_ldmxcsr_mem(inst, mem) \
+ do { \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0xae, 2, (mem)); \
+ } while(0)
+
+#define x86_64_ldmxcsr_membase(inst, basereg, disp) \
+ do { \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0xae, 2, (basereg),
(disp)); \
+ } while(0)
+
+#define x86_64_ldmxcsr_memindex(inst, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0xae, 2, (basereg),
(disp), (indexreg), (shift)); \
+ } while(0)
+
+/*
+ * stmxcsr: Store MXCSR register
+ */
+#define x86_64_stmxcsr_regp(inst, sregp) \
+ do { \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0xae, 3, (sregp)); \
+ } while(0)
+
+#define x86_64_stmxcsr_mem(inst, mem) \
+ do { \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0xae, 3, (mem)); \
+ } while(0)
+
+#define x86_64_stmxcsr_membase(inst, basereg, disp) \
+ do { \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0xae, 3, (basereg),
(disp)); \
+ } while(0)
+
+#define x86_64_stmxcsr_memindex(inst, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0xae, 3, (basereg),
(disp), (indexreg), (shift)); \
+ } while(0)
+
+/*
* Move instructions
*/
@@ -3952,6 +4077,66 @@
} while(0)
/*
+ * cvtss2si: Convert float32.to a signed integer using the rounding mode
+ * in the mxcsr register
+ * The size is the size of the integer value (4 or 8)
+ */
+#define x86_64_cvtss2si_reg_reg_size(inst, dreg, sxreg, size) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x2d, (dreg),
(sxreg), (size)); \
+ } while(0)
+
+#define x86_64_cvtss2si_reg_regp_size(inst, dreg, sregp, size) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x2d, (dreg),
(sregp), (size)); \
+ } while(0)
+
+#define x86_64_cvtss2si_reg_mem_size(inst, dreg, mem, size) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x2d, (dreg),
(mem), (size)); \
+ } while(0)
+
+#define x86_64_cvtss2si_reg_membase_size(inst, dreg, basereg, disp, size) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x2d,
(dreg), (basereg), (disp), (size)); \
+ } while(0)
+
+#define x86_64_cvtss2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg,
shift, size) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x2d,
(dreg), (basereg), (disp), (indexreg), (shift), (size)); \
+ } while(0)
+
+/*
+ * cvtsd2si: Convert float64 to a signed integer using the rounding mode
+ * in the mxcsr register
+ * The size is the size of the integer value (4 or 8)
+ */
+#define x86_64_cvtsd2si_reg_reg_size(inst, dreg, sxreg, size) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x2d, (dreg),
(sxreg), (size)); \
+ } while(0)
+
+#define x86_64_cvtsd2si_reg_regp_size(inst, dreg, sregp, size) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x2d, (dreg),
(sregp), (size)); \
+ } while(0)
+
+#define x86_64_cvtsd2si_reg_mem_size(inst, dreg, mem, size) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x2d, (dreg),
(mem), (size)); \
+ } while(0)
+
+#define x86_64_cvtsd2si_reg_membase_size(inst, dreg, basereg, disp, size) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x2d,
(dreg), (basereg), (disp), (size)); \
+ } while(0)
+
+#define x86_64_cvtsd2si_reg_memindex_size(inst, dreg, basereg, disp, indexreg,
shift, size) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x2d,
(dreg), (basereg), (disp), (indexreg), (shift), (size)); \
+ } while(0)
+
+/*
* cvtss2sd: Convert float32 to float64
*/
#define x86_64_cvtss2sd_reg_reg(inst, dreg, sreg) \
@@ -4008,6 +4193,122 @@
} while(0)
/*
+ * Compare opcodes
+ */
+
+/*
+ * comiss: Compare ordered scalar single precision values
+ */
+#define x86_64_comiss_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_xmm2_reg_reg((inst), 0x0f, 0x2f, (dreg), (sreg)); \
+ } while(0)
+
+#define x86_64_comiss_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0x2f, (dreg), (sregp)); \
+ } while(0)
+
+#define x86_64_comiss_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0x2f, (dreg), (mem)); \
+ } while(0)
+
+#define x86_64_comiss_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0x2f, (dreg), (basereg),
(disp)); \
+ } while(0)
+
+#define x86_64_comiss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift)
\
+ do { \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0x2f, (dreg), (basereg),
(disp), (indexreg), (shift)); \
+ } while(0)
+
+/*
+ * comisd: Compare ordered scalar double precision values
+ */
+#define x86_64_comisd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x2f, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_comisd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x2f, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_comisd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x2f, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_comisd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x2f,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_comisd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift)
\
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x2f,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * ucomiss: Compare unordered scalar single precision values
+ */
+#define x86_64_ucomiss_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_xmm2_reg_reg((inst), 0x0f, 0x2e, (dreg), (sreg)); \
+ } while(0)
+
+#define x86_64_ucomiss_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0x2e, (dreg), (sregp)); \
+ } while(0)
+
+#define x86_64_ucomiss_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0x2e, (dreg), (mem)); \
+ } while(0)
+
+#define x86_64_ucomiss_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0x2e, (dreg), (basereg),
(disp)); \
+ } while(0)
+
+#define x86_64_ucomiss_reg_memindex(inst, dreg, basereg, disp, indexreg,
shift) \
+ do { \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0x2e, (dreg), (basereg),
(disp), (indexreg), (shift)); \
+ } while(0)
+
+/*
+ * ucomisd: Compare unordered scalar double precision values
+ */
+#define x86_64_ucomisd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x2e, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_ucomisd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x2e, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_ucomisd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x2e, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_ucomisd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x2e,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_ucomisd_reg_memindex(inst, dreg, basereg, disp, indexreg,
shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x2e,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
* Arithmetic opcodes
*/
@@ -4124,132 +4425,603 @@
} while(0)
/*
- * addsd: Add scalar double precision float values
+ * Macros for the logical operations with packed single precision values.
*/
-#define x86_64_addsd_reg_reg(inst, dreg, sreg) \
+#define x86_64_plops_reg_reg(inst, op, dreg, sreg) \
do { \
- x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(sreg), 0); \
+ x86_64_xmm2_reg_reg((inst), 0x0f, (op), (dreg), (sreg)); \
} while(0)
-#define x86_64_addsd_reg_regp(inst, dreg, sregp) \
+#define x86_64_plops_reg_regp(inst, op, dreg, sregp) \
do { \
- x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(sregp), 0); \
+ x86_64_xmm2_reg_regp((inst), 0x0f, (op), (dreg), (sregp)); \
} while(0)
-#define x86_64_addsd_reg_mem(inst, dreg, mem) \
+#define x86_64_plops_reg_mem(inst, op, dreg, mem) \
do { \
- x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(mem), 0); \
+ x86_64_xmm2_reg_mem((inst), 0x0f, (op), (dreg), (mem)); \
} while(0)
-#define x86_64_addsd_reg_membase(inst, dreg, basereg, disp) \
+#define x86_64_plops_reg_membase(inst, op, dreg, basereg, disp) \
do { \
- x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x58,
(dreg), (basereg), (disp), 0); \
+ x86_64_xmm2_reg_membase((inst), 0x0f, (op), (dreg), (basereg),
(disp)); \
} while(0)
-#define x86_64_addsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+#define x86_64_plops_reg_memindex(inst, op, dreg, basereg, disp, indexreg,
shift) \
do { \
- x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x58,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, (op), (dreg), (basereg),
(disp), (indexreg), (shift)); \
} while(0)
/*
- * subsd: Substract scalar double precision float values
+ * andps: And
*/
-#define x86_64_subsd_reg_reg(inst, dreg, sreg) \
+#define x86_64_andps_reg_reg(inst, dreg, sreg) \
do { \
- x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(sreg), 0); \
+ x86_64_xmm2_reg_reg((inst), 0x0f, 0x54, (dreg), (sreg)); \
} while(0)
-#define x86_64_subsd_reg_regp(inst, dreg, sregp) \
+#define x86_64_andps_reg_regp(inst, dreg, sregp) \
do { \
- x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(sregp), 0); \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0x54, (dreg), (sregp)); \
} while(0)
-#define x86_64_subsd_reg_mem(inst, dreg, mem) \
+#define x86_64_andps_reg_mem(inst, dreg, mem) \
do { \
- x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(mem), 0); \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0x54, (dreg), (mem)); \
} while(0)
-#define x86_64_subsd_reg_membase(inst, dreg, basereg, disp) \
+#define x86_64_andps_reg_membase(inst, dreg, basereg, disp) \
do { \
- x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5c,
(dreg), (basereg), (disp), 0); \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0x54, (dreg), (basereg),
(disp)); \
} while(0)
-#define x86_64_subsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+#define x86_64_andps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
do { \
- x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5c,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0x54, (dreg), (basereg),
(disp), (indexreg), (shift)); \
} while(0)
/*
- * mulsd: Multiply scalar double precision float values
+ * orps: Or
*/
-#define x86_64_mulsd_reg_reg(inst, dreg, sreg) \
+#define x86_64_orps_reg_reg(inst, dreg, sreg) \
do { \
- x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(sreg), 0); \
+ x86_64_xmm2_reg_reg((inst), 0x0f, 0x56, (dreg), (sreg)); \
} while(0)
-#define x86_64_mulsd_reg_regp(inst, dreg, sregp) \
+#define x86_64_orps_reg_regp(inst, dreg, sregp) \
do { \
- x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(sregp), 0); \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0x56, (dreg), (sregp)); \
} while(0)
-#define x86_64_mulsd_reg_mem(inst, dreg, mem) \
+#define x86_64_orps_reg_mem(inst, dreg, mem) \
do { \
- x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(mem), 0); \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0x56, (dreg), (mem)); \
} while(0)
-#define x86_64_mulsd_reg_membase(inst, dreg, basereg, disp) \
+#define x86_64_orps_reg_membase(inst, dreg, basereg, disp) \
do { \
- x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x59,
(dreg), (basereg), (disp), 0); \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0x56, (dreg), (basereg),
(disp)); \
} while(0)
-#define x86_64_mulsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+#define x86_64_orps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
do { \
- x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x59,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0x56, (dreg), (basereg),
(disp), (indexreg), (shift)); \
} while(0)
/*
- * divsd: Divide scalar double precision float values
+ * xorps: Xor
*/
-#define x86_64_divsd_reg_reg(inst, dreg, sreg) \
+#define x86_64_xorps_reg_reg(inst, dreg, sreg) \
do { \
- x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(sreg), 0); \
+ x86_64_xmm2_reg_reg((inst), 0x0f, 0x57, (dreg), (sreg)); \
} while(0)
-#define x86_64_divsd_reg_regp(inst, dreg, sregp) \
+#define x86_64_xorps_reg_regp(inst, dreg, sregp) \
do { \
- x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(sregp), 0); \
+ x86_64_xmm2_reg_regp((inst), 0x0f, 0x57, (dreg), (sregp)); \
} while(0)
-#define x86_64_divsd_reg_mem(inst, dreg, mem) \
+#define x86_64_xorps_reg_mem(inst, dreg, mem) \
do { \
- x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(mem), 0); \
+ x86_64_xmm2_reg_mem((inst), 0x0f, 0x57, (dreg), (mem)); \
} while(0)
-#define x86_64_divsd_reg_membase(inst, dreg, basereg, disp) \
+#define x86_64_xorps_reg_membase(inst, dreg, basereg, disp) \
do { \
- x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5e,
(dreg), (basereg), (disp), 0); \
+ x86_64_xmm2_reg_membase((inst), 0x0f, 0x57, (dreg), (basereg),
(disp)); \
} while(0)
-#define x86_64_divsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+#define x86_64_xorps_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
do { \
- x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5e,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_64_xmm2_reg_memindex((inst), 0x0f, 0x57, (dreg), (basereg),
(disp), (indexreg), (shift)); \
} while(0)
/*
- * fpu instructions
+ * maxss: Maximum value
*/
+#define x86_64_maxss_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5f, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_maxss_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5f, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_maxss_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5f, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_maxss_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5f,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_maxss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5f,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
/*
- * fld
+ * minss: Minimum value
*/
+#define x86_64_minss_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x5d, (dreg),
(sreg), 0); \
+ } while(0)
-#define x86_64_fld_regp_size(inst, sregp, size) \
+#define x86_64_minss_reg_regp(inst, dreg, sregp) \
do { \
- x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
- switch(size) \
- { \
- case 4: \
- { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x5d, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_minss_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x5d, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_minss_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x5d,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_minss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x5d,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * sqrtss: Square root
+ */
+#define x86_64_sqrtss_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf3, 0x0f, 0x51, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_sqrtss_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf3, 0x0f, 0x51, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_sqrtss_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf3, 0x0f, 0x51, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_sqrtss_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf3, 0x0f, 0x51,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_sqrtss_reg_memindex(inst, dreg, basereg, disp, indexreg, shift)
\
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf3, 0x0f, 0x51,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+
+/*
+ * Macros for the logical operations with packed double precision values.
+ */
+#define x86_64_plopd_reg_reg(inst, op, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, (op), (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_plopd_reg_regp(inst, op, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, (op), (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_plopd_reg_mem(inst, op, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, (op), (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_plopd_reg_membase(inst, op, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, (op),
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_plopd_reg_memindex(inst, op, dreg, basereg, disp, indexreg,
shift) \
+ do { \
+ x86_64_xmm2_reg_memindex_size((inst), 0x66, 0x0f, (op), (dreg),
(basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * addsd: Add scalar double precision float values
+ */
+#define x86_64_addsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_addsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_addsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x58, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_addsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x58,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_addsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x58,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * subsd: Substract scalar double precision float values
+ */
+#define x86_64_subsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_subsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_subsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5c, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_subsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5c,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_subsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5c,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * mulsd: Multiply scalar double precision float values
+ */
+#define x86_64_mulsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_mulsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_mulsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x59, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_mulsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x59,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_mulsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x59,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * divsd: Divide scalar double precision float values
+ */
+#define x86_64_divsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_divsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_divsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5e, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_divsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5e,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_divsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5e,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * andpd: And
+ */
+#define x86_64_andpd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x54, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_andpd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x54, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_andpd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x54, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_andpd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x54,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_andpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x54,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * orpd: Or
+ */
+#define x86_64_orpd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x56, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_orpd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x56, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_orpd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x56, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_orpd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x56,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_orpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x56,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * xorpd: Xor
+ */
+#define x86_64_xorpd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0x66, 0x0f, 0x57, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_xorpd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0x66, 0x0f, 0x57, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_xorpd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0x66, 0x0f, 0x57, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_xorpd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0x66, 0x0f, 0x57,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_xorpd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0x66, 0x0f, 0x57,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * maxsd: Maximum value
+ */
+#define x86_64_maxsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5f, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_maxsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5f, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_maxsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5f, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_maxsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5f,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_maxsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x5f,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * minsd: Minimum value
+ */
+#define x86_64_minsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x5d, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_minsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x5d, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_minsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x5d, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_minsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x5d,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_minsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift) \
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2 0x0f, 0x5d,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * sqrtsd: Square root
+ */
+#define x86_64_sqrtsd_reg_reg(inst, dreg, sreg) \
+ do { \
+ x86_64_p1_xmm2_reg_reg_size((inst), 0xf2, 0x0f, 0x51, (dreg),
(sreg), 0); \
+ } while(0)
+
+#define x86_64_sqrtsd_reg_regp(inst, dreg, sregp) \
+ do { \
+ x86_64_p1_xmm2_reg_regp_size((inst), 0xf2, 0x0f, 0x51, (dreg),
(sregp), 0); \
+ } while(0)
+
+#define x86_64_sqrtsd_reg_mem(inst, dreg, mem) \
+ do { \
+ x86_64_p1_xmm2_reg_mem_size((inst), 0xf2, 0x0f, 0x51, (dreg),
(mem), 0); \
+ } while(0)
+
+#define x86_64_sqrtsd_reg_membase(inst, dreg, basereg, disp) \
+ do { \
+ x86_64_p1_xmm2_reg_membase_size((inst), 0xf2, 0x0f, 0x51,
(dreg), (basereg), (disp), 0); \
+ } while(0)
+
+#define x86_64_sqrtsd_reg_memindex(inst, dreg, basereg, disp, indexreg, shift)
\
+ do { \
+ x86_64_p1_xmm2_reg_memindex_size((inst), 0xf2, 0x0f, 0x51,
(dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ } while(0)
+
+/*
+ * Rounding: Available in SSE 4.1 only
+ */
+
+/*
+ * roundss: Round scalar single precision value
+ */
+#define x86_64_roundss_reg_reg(inst, dreg, sreg, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_reg_size((inst), 0x66, 0x0f, 0x3a, 0x0a,
(dreg), (sreg), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundss_reg_regp(inst, dreg, sregp, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_regp_size((inst), 0x66, 0x0f, 0x3a, 0x0a,
(dreg), (sregp), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundss_reg_mem(inst, dreg, mem, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_mem_size((inst), 0x66, 0x0f, 0x3a, 0x0a,
(dreg), (mem), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundss_reg_membase(inst, dreg, basereg, disp, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_membase_size((inst), 0x66, 0x0f, 0x3a, 0x0a,
(dreg), (basereg), (disp), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundss_reg_memindex(inst, dreg, basereg, disp, indexreg,
shift, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_memindex_size((inst), 0x66, 0x0f, 0x3a,
0x0a, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+/*
+ * roundsd: Round scalar double precision value
+ */
+#define x86_64_roundsd_reg_reg(inst, dreg, sreg, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_reg_size((inst), 0x66, 0x0f, 0x3a, 0x0b,
(dreg), (sreg), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundsd_reg_regp(inst, dreg, sregp, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_regp_size((inst), 0x66, 0x0f, 0x3a, 0x0b,
(dreg), (sregp), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundsd_reg_mem(inst, dreg, mem, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_mem_size((inst), 0x66, 0x0f, 0x3a, 0x0b,
(dreg), (mem), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundsd_reg_membase(inst, dreg, basereg, disp, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_membase_size((inst), 0x66, 0x0f, 0x3a, 0x0b,
(dreg), (basereg), (disp), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+#define x86_64_roundsd_reg_memindex(inst, dreg, basereg, disp, indexreg,
shift, mode) \
+ do { \
+ x86_64_p1_xmm3_reg_memindex_size((inst), 0x66, 0x0f, 0x3a,
0x0b, (dreg), (basereg), (disp), (indexreg), (shift), 0); \
+ x86_imm_emit8((inst), (mode)); \
+ } while(0)
+
+/*
+ * Clear xmm register
+ */
+#define x86_64_clear_xreg(inst, reg) \
+ do { \
+ x86_64_xorps_reg_reg((inst), (reg), (reg)); \
+ } while(0)
+
+/*
+ * fpu instructions
+ */
+
+/*
+ * fld
+ */
+
+#define x86_64_fld_regp_size(inst, sregp, size) \
+ do { \
+ x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
+ switch(size) \
+ { \
+ case 4: \
+ { \
*(inst)++ = (unsigned char)0xd9; \
x86_64_regp_emit((inst), 0, (sregp)); \
} \
@@ -4349,7 +5121,7 @@
/*
* fild: Load an integer and convert it to long double
*/
-#define x86_fild_mem_size(inst, mem, size) \
+#define x86_64_fild_mem_size(inst, mem, size) \
do { \
switch(size) \
{ \
@@ -4374,7 +5146,7 @@
} \
} while (0)
-#define x86_fild_membase_size(inst, mem, size) \
+#define x86_64_fild_membase_size(inst, basereg, disp, size) \
do { \
x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
switch(size) \
@@ -4486,7 +5258,6 @@
/*
* fstp: store top fpu register to memory and pop it from the fpu stack
*/
-
#define x86_64_fstp_regp_size(inst, sregp, size) \
do { \
x86_64_rex_emit((inst), 0, 0, 0, (sregp)); \
@@ -4591,7 +5362,7 @@
} while(0)
/*
- * Convert long double to integer
+ * fistp: Convert long double to integer
*/
#define x86_64_fistp_mem_size(inst, mem, size) \
do { \
@@ -4618,8 +5389,35 @@
} \
} while(0)
+#define x86_64_fistp_regp_size(inst, dregp, size) \
+ do { \
+ x86_64_rex_emit((inst), 0, 0, 0, (dregp)); \
+ switch((size)) \
+ { \
+ case 2: \
+ { \
+ *(inst)++ = (unsigned char)0xdf; \
+ x86_64_regp_emit((inst), 3, (dregp)); \
+ } \
+ break; \
+ case 4: \
+ { \
+ *(inst)++ = (unsigned char)0xdb; \
+ x86_64_regp_emit((inst), 3, (dregp)); \
+ } \
+ break; \
+ case 8: \
+ { \
+ *(inst)++ = (unsigned char)0xdf; \
+ x86_64_regp_emit((inst), 7, (dregp)); \
+ } \
+ break; \
+ } \
+ } while(0)
+
#define x86_64_fistp_membase_size(inst, basereg, disp, size) \
do { \
+ x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
switch((size)) \
{ \
case 2: \
@@ -4644,13 +5442,114 @@
} while(0)
/*
+ * frndint: Round st(0) to integer according to the rounding mode set in the
fpu control word.
+ */
+#define x86_64_frndint(inst) \
+ do { \
+ *(inst)++ = (unsigned char)0xd9; \
+ *(inst)++ = (unsigned char)0xfc; \
+ } while(0)
+
+/*
+ * fisttp: Convert long double to integer using truncation as rounding mode
Available in SSE 3 only
+ */
+#define x86_64_fisttp_regp_size(inst, dregp, size) \
+ do { \
+ x86_64_rex_emit((inst), 0, 0, 0, (dregp)); \
+ switch((size)) \
+ { \
+ case 2: \
+ { \
+ *(inst)++ = (unsigned char)0xdf; \
+ x86_64_regp_emit((inst), 1, (dregp)); \
+ } \
+ break; \
+ case 4: \
+ { \
+ *(inst)++ = (unsigned char)0xdb; \
+ x86_64_regp_emit((inst), 1, (dregp)); \
+ } \
+ break; \
+ case 8: \
+ { \
+ *(inst)++ = (unsigned char)0xdd; \
+ x86_64_regp_emit((inst), 1, (dregp)); \
+ } \
+ break; \
+ } \
+ } while(0)
+
+#define x86_64_fisttp_mem_size(inst, mem, size) \
+ do { \
+ switch((size)) \
+ { \
+ case 2: \
+ { \
+ *(inst)++ = (unsigned char)0xdf; \
+ x86_64_mem_emit((inst), 1, (mem)); \
+ } \
+ break; \
+ case 4: \
+ { \
+ *(inst)++ = (unsigned char)0xdb; \
+ x86_64_mem_emit((inst), 1, (mem)); \
+ } \
+ break; \
+ case 8: \
+ { \
+ *(inst)++ = (unsigned char)0xdd; \
+ x86_64_mem_emit((inst), 1, (mem)); \
+ } \
+ break; \
+ } \
+ } while(0)
+
+#define x86_64_fisttp_membase_size(inst, basereg, disp, size) \
+ do { \
+ x86_64_rex_emit((inst), 0, 0, 0, (basereg)); \
+ switch((size)) \
+ { \
+ case 2: \
+ { \
+ *(inst)++ = (unsigned char)0xdf; \
+ x86_64_membase_emit((inst), 1, (basereg),
(disp)); \
+ } \
+ break; \
+ case 4: \
+ { \
+ *(inst)++ = (unsigned char)0xdb; \
+ x86_64_membase_emit((inst), 1, (basereg),
(disp)); \
+ } \
+ break; \
+ case 8: \
+ { \
+ *(inst)++ = (unsigned char)0xdd; \
+ x86_64_membase_emit((inst), 1, (basereg),
(disp)); \
+ } \
+ break; \
+ } \
+ } while(0)
+
+#define x86_64_fabs(inst) \
+ do { \
+ *(inst)++ = (unsigned char)0xd9; \
+ *(inst)++ = (unsigned char)0xe1; \
+ } while(0)
+
+#define x86_64_fchs(inst) \
+ do { \
+ *(inst)++ = (unsigned char)0xd9; \
+ *(inst)++ = (unsigned char)0xe0; \
+ } while(0)
+
+/*
* Store fpu control word after checking for pending unmasked fpu exceptions
*/
#define x86_64_fnstcw(inst, mem) \
do { \
*(inst)++ = (unsigned char)0xd9; \
x86_64_mem_emit((inst), 7, (mem)); \
- } while (0)
+ } while(0)
#define x86_64_fnstcw_membase(inst, basereg, disp) \
do { \
Index: jit/jit-rules-x86-64.c
===================================================================
RCS file: /cvsroot/dotgnu-pnet/libjit/jit/jit-rules-x86-64.c,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -b -r1.2 -r1.3
--- jit/jit-rules-x86-64.c 24 Mar 2008 12:42:51 -0000 1.2
+++ jit/jit-rules-x86-64.c 13 Apr 2008 17:55:36 -0000 1.3
@@ -104,6 +104,22 @@
#define HAVE_RED_ZONE 1
/*
+ * Some declarations that should be replaced by querying the cpuinfo
+ * if generating code for the current cpu.
+ */
+/*
+#define HAVE_X86_SSE_4_1 0
+#define HAVE_X86_SSE_4 0
+#define HAVE_X86_SSE_3 0
+#define HAVE_X86_FISTTP 0
+*/
+
+#define TODO() \
+do { \
+ fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
+} while(0)
+
+/*
* Setup or teardown the x86 code output process.
*/
#define jit_cache_setup_output(needed) \
@@ -165,6 +181,9 @@
static _jit_regclass_t *x86_64_reg; /* X86_64 general purpose
registers */
static _jit_regclass_t *x86_64_creg; /* X86_64 call clobbered general */
/* purpose registers */
+static _jit_regclass_t *x86_64_dreg; /* general purpose registers that */
+
/* can be used as divisor */
+
/* (all but %rax and %rdx) */
static _jit_regclass_t *x86_64_rreg; /* general purpose registers not used*/
/* for returning values */
static _jit_regclass_t *x86_64_sreg; /* general purpose registers that can*/
@@ -196,6 +215,16 @@
X86_64_REG_R9, X86_64_REG_R10,
X86_64_REG_R11);
+ /* r egister class for divisors */
+ x86_64_dreg = _jit_regclass_create(
+ "dreg", JIT_REG_WORD | JIT_REG_LONG, 12,
+ X86_64_REG_RCX, X86_64_REG_RBX,
+ X86_64_REG_RSI, X86_64_REG_RDI,
+ X86_64_REG_R8, X86_64_REG_R9,
+ X86_64_REG_R10, X86_64_REG_R11,
+ X86_64_REG_R12, X86_64_REG_R13,
+ X86_64_REG_R14, X86_64_REG_R15);
+
/* register class with all registers not used for returning values */
x86_64_rreg = _jit_regclass_create(
"rreg", JIT_REG_WORD | JIT_REG_LONG, 12,
@@ -341,6 +370,452 @@
}
/*
+ * Do a logical xmm operation with packed float32 values
+ */
+static int
+_jit_plops_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
+ X86_64_XMM_PLOP opc, int reg, void
*packed_value)
+{
+ void *ptr;
+ jit_nint offset;
+ unsigned char *inst;
+
+ inst = *inst_ptr;
+ ptr = _jit_cache_alloc(&(gen->posn), 16);
+ if(!ptr)
+ {
+ return 0;
+ }
+ jit_memcpy(ptr, packed_value, 16);
+
+ /* calculate the offset for membase addressing */
+ offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 8 : 7));
+ if((offset >= jit_min_int) && (offset <= jit_max_int))
+ {
+ /* We can use RIP relative addressing here */
+ x86_64_plops_reg_membase(inst, opc, reg, X86_64_RIP, offset);
+ *inst_ptr = inst;
+ return 1;
+ }
+ /* Check if mem addressing can be used */
+ if(((jit_nint)ptr >= jit_min_int) &&
+ ((jit_nint)ptr <= jit_max_int))
+ {
+ /* We can use absolute addressing */
+ x86_64_plops_reg_mem(inst, opc, reg, (jit_nint)ptr);
+ *inst_ptr = inst;
+ return 1;
+ }
+ /* We have to use an extra general register */
+ TODO();
+ return 0;
+}
+
+/*
+ * Do a logical xmm operation with packed float64 values
+ */
+static int
+_jit_plopd_reg_imm(jit_gencode_t gen, unsigned char **inst_ptr,
+ X86_64_XMM_PLOP opc, int reg, void
*packed_value)
+{
+ void *ptr;
+ jit_nint offset;
+ unsigned char *inst;
+
+ inst = *inst_ptr;
+ ptr = _jit_cache_alloc(&(gen->posn), 16);
+ if(!ptr)
+ {
+ return 0;
+ }
+ jit_memcpy(ptr, packed_value, 16);
+
+ /* calculate the offset for membase addressing */
+ offset = (jit_nint)ptr - ((jit_nint)inst + (reg > 7 ? 9 : 8));
+ if((offset >= jit_min_int) && (offset <= jit_max_int))
+ {
+ /* We can use RIP relative addressing here */
+ x86_64_plopd_reg_membase(inst, opc, reg, X86_64_RIP, offset);
+ *inst_ptr = inst;
+ return 1;
+ }
+ /* Check if mem addressing can be used */
+ if(((jit_nint)ptr >= jit_min_int) &&
+ ((jit_nint)ptr <= jit_max_int))
+ {
+ /* We can use absolute addressing */
+ x86_64_plopd_reg_mem(inst, opc, reg, (jit_nint)ptr);
+ *inst_ptr = inst;
+ return 1;
+ }
+ /* We have to use an extra general register */
+ TODO();
+ return 0;
+}
+
+/*
+ * Helpers for saving and setting roundmode in the fpu control word
+ * and restoring it afterwards.
+ * The rounding mode bits are bit 10 and 11 in the fpu control word.
+ * sp_offset is the start offset of a temporary eight byte block.
+ */
+static unsigned char *
+_x86_64_set_fpu_roundmode(unsigned char *inst, int scratch_reg,
+ int sp_offset,
X86_64_ROUNDMODE mode)
+{
+ int fpcw_save_offset = sp_offset + 4;
+ int fpcw_new_offset = sp_offset;
+ int round_mode = ((int)mode) << 10;
+ int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 10);
+
+ /* store FPU control word */
+ x86_64_fnstcw_membase(inst, X86_64_RSP, fpcw_save_offset);
+ /* load the value into the scratch register */
+ x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP,
fpcw_save_offset, 2);
+ /* Set the rounding mode */
+ if(mode != X86_ROUND_ZERO)
+ {
+ /* Not all bits are set in the mask so we have to clear it
first */
+ x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 2);
+ }
+ x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 2);
+ /* Store the new round mode */
+ x86_64_mov_membase_reg_size(inst, X86_64_RSP, fpcw_new_offset,
scratch_reg, 2);
+ /* Now load the new control word */
+ x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_new_offset);
+
+ return inst;
+}
+
+static unsigned char *
+_x86_64_restore_fpcw(unsigned char *inst, int sp_offset)
+{
+ int fpcw_save_offset = sp_offset + 4;
+
+ /* Now load the saved control word */
+ x86_64_fldcw_membase(inst, X86_64_RSP, fpcw_save_offset);
+
+ return inst;
+}
+
+/*
+ * Helpers for saving and setting roundmode in the mxcsr register and
+ * restoring it afterwards.
+ * The rounding mode bits are bit 13 and 14 in the mxcsr register.
+ * sp_offset is the start offset of a temporary eight byte block.
+ */
+static unsigned char *
+_x86_64_set_xmm_roundmode(unsigned char *inst, int scratch_reg,
+ int sp_offset,
X86_64_ROUNDMODE mode)
+{
+ int mxcsr_save_offset = sp_offset + 4;
+ int mxcsr_new_offset = sp_offset;
+ int round_mode = ((int)mode) << 13;
+ int round_mode_mask = ~(((int)X86_ROUND_ZERO) << 13);
+
+ /* save the mxcsr register */
+ x86_64_stmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
+ /* Load the contents of the mxcsr register into the scratch register */
+ x86_64_mov_reg_membase_size(inst, scratch_reg, X86_64_RSP,
mxcsr_save_offset, 4);
+ /* Set the rounding mode */
+ if(mode != X86_ROUND_ZERO)
+ {
+ /* Not all bits are set in the mask so we have to clear it
first */
+ x86_64_and_reg_imm_size(inst, scratch_reg, round_mode_mask, 4);
+ }
+ x86_64_or_reg_imm_size(inst, scratch_reg, round_mode, 4);
+ /* Store the new round mode */
+ x86_64_mov_membase_reg_size(inst, X86_64_RSP, mxcsr_new_offset,
scratch_reg, 4);
+ /* and load it to the mxcsr register */
+ x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_new_offset);
+
+ return inst;
+}
+
+static unsigned char *
+_x86_64_restore_mxcsr(unsigned char *inst, int sp_offset)
+{
+ int mxcsr_save_offset = sp_offset + 4;
+
+ /* restore the mxcsr register */
+ x86_64_ldmxcsr_membase(inst, X86_64_RSP, mxcsr_save_offset);
+
+ return inst;
+}
+
+/*
+ * perform rounding of scalar single precision values.
+ * We have to use the fpu where see4.1 is not supported.
+ */
+static unsigned char *
+x86_64_rounds_reg_reg(unsigned char *inst, int dreg, int sreg,
+ int scratch_reg, X86_64_ROUNDMODE
mode)
+{
+#ifdef HAVE_RED_ZONE
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
+#else
+ /* Copy the xmm register to the stack */
+ x86_64_movss_membase_reg(inst, X86_64_RSP, -16, sreg);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RSP, -16, 4);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
+ x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
+#endif
+#else
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundss_reg_reg(inst, dreg, sreg, mode);
+#else
+ /* allocate space on the stack for two ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
+ /* Copy the xmm register to the stack */
+ x86_64_movss_regp_reg(inst, X86_64_RSP, sreg);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
+ /* Load the value to the fpu */
+ x86_64_fld_regp_size(inst, X86_64_RSP, 4);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
+ x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
+#endif
+#endif
+ return inst;
+}
+
+static unsigned char *
+x86_64_rounds_reg_membase(unsigned char *inst, int dreg, int offset,
+ int scratch_reg,
X86_64_ROUNDMODE mode)
+{
+#ifdef HAVE_RED_ZONE
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
+#else
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 4);
+ x86_64_movss_reg_membase(inst, dreg, X86_64_RSP, -16);
+#endif
+#else
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundss_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
+#else
+ /* allocate space on the stack for two ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RBP, offset, 4);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
+ x86_64_movss_reg_regp(inst, dreg, X86_64_RSP);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
+#endif
+#endif
+ return inst;
+}
+
+/*
+ * perform rounding of scalar double precision values.
+ * We have to use the fpu where see4.1 is not supported.
+ */
+static unsigned char *
+x86_64_roundd_reg_reg(unsigned char *inst, int dreg, int sreg,
+ int scratch_reg, X86_64_ROUNDMODE
mode)
+{
+#ifdef HAVE_RED_ZONE
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
+#else
+ /* Copy the xmm register to the stack */
+ x86_64_movsd_membase_reg(inst, X86_64_RSP, -16, sreg);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RSP, -16, 8);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
+ x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
+#endif
+#else
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundsd_reg_reg(inst, dreg, sreg, mode);
+#else
+ /* allocate space on the stack for two ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
+ /* Copy the xmm register to the stack */
+ x86_64_movsd_regp_reg(inst, X86_64_RSP, sreg);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
+ /* Load the value to the fpu */
+ x86_64_fld_regp_size(inst, X86_64_RSP, 8);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
+ x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
+#endif
+#endif
+ return inst;
+}
+
+static unsigned char *
+x86_64_roundd_reg_membase(unsigned char *inst, int dreg, int offset,
+ int scratch_reg,
X86_64_ROUNDMODE mode)
+{
+#ifdef HAVE_RED_ZONE
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
+#else
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_membase_size(inst, X86_64_RSP, -16, 8);
+ x86_64_movsd_reg_membase(inst, dreg, X86_64_RSP, -16);
+#endif
+#else
+#ifdef HAVE_X86_SSE_4_1
+ x86_64_roundsd_reg_membase(inst, dreg, X86_64_RBP, offset, mode);
+#else
+ /* allocate space on the stack for two ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
+ /* Load the value to the fpu */
+ x86_64_fld_membase_size(inst, X86_64_RBP, offset, 8);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 8);
+ /* and move st(0) to the destination register */
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
+ x86_64_movsd_reg_regp(inst, dreg, X86_64_RSP);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
+#endif
+#endif
+ return inst;
+}
+
+/*
+ * Round the value in St(0) to integer according to the rounding
+ * mode specified.
+ */
+static unsigned char *
+x86_64_roundnf(unsigned char *inst, int scratch_reg, X86_64_ROUNDMODE mode)
+{
+#ifdef HAVE_RED_ZONE
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+#else
+ /* allocate space on the stack for two ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 0, mode);
+ /* And round it to integer */
+ x86_64_frndint(inst);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 0);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#endif
+ return inst;
+}
+
+/*
+ * Round the value in the fpu register st(0) to integer and
+ * store the value in dreg. St(0) is popped from the fpu stack.
+ */
+static unsigned char *
+x86_64_nfloat_to_int(unsigned char *inst, int dreg, int scratch_reg, int size)
+{
+#ifdef HAVE_RED_ZONE
+#ifdef HAVE_X86_FISTTP
+ /* convert float to int */
+ x86_64_fisttp_membase_size(inst, X86_64_RSP, -8, 4);
+ /* move result to the destination */
+ x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -8, 4);
+#else
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, -8, X86_ROUND_ZERO);
+ /* And round the value in st(0) to integer and store it on the stack */
+ x86_64_fistp_membase_size(inst, X86_64_RSP, -16, size);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, -8);
+ /* and load the integer to the destination register */
+ x86_64_mov_reg_membase_size(inst, dreg, X86_64_RSP, -16, size);
+#endif
+#else
+#ifdef HAVE_X86_FISTTP
+ /* allocate space on the stack for one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
+ /* convert float to int */
+ x86_64_fisttp_regp_size(inst, X86_64_RSP, 4);
+ /* move result to the destination */
+ x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, 4);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#else
+ /* allocate space on the stack for 2 ints and one long value */
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 16, 8);
+ /* Set the fpu round mode */
+ inst = _x86_64_set_fpu_roundmode(inst, scratch_reg, 8, X86_ROUND_ZERO);
+ /* And round the value in st(0) to integer and store it on the stack */
+ x86_64_fistp_regp_size(inst, X86_64_RSP, size);
+ /* restore the fpu control word */
+ inst = _x86_64_restore_fpcw(inst, 8);
+ /* and load the integer to the destination register */
+ x86_64_mov_reg_regp_size(inst, dreg, X86_64_RSP, size);
+ /* restore the stack pointer */
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 16, 8);
+#endif
+#endif
+ return inst;
+}
+
+/*
* Call a function
*/
static unsigned char *
@@ -1049,9 +1524,16 @@
{
int xmm_reg =
_jit_reg_info[reg].cpu_reg;
+ if(float32_value == (jit_float32) 0.0)
+ {
+ x86_64_clear_xreg(inst,
xmm_reg);
+ }
+ else
+ {
_jit_xmm1_reg_imm_size_float32(gen,
&inst, XMM1_MOV,
xmm_reg, &float32_value);
}
+ }
else
{
if(float32_value == (jit_float32) 0.0)
@@ -1069,7 +1551,7 @@
ptr =
_jit_cache_alloc(&(gen->posn), sizeof(jit_float32));
jit_memcpy(ptr, &float32_value,
sizeof(float32_value));
- offset = (jit_nint)ptr -
((jit_nint)inst + 7);
+ offset = (jit_nint)ptr -
((jit_nint)inst + 6);
if((offset >= jit_min_int) &&
(offset <= jit_max_int))
{
/* We can use RIP
relative addressing here */
@@ -1084,7 +1566,7 @@
else
{
/* We have to use an
extra general register */
- /* TODO */
+ TODO();
}
}
}
@@ -1111,9 +1593,16 @@
{
int xmm_reg =
_jit_reg_info[reg].cpu_reg;
+ if(float64_value == (jit_float64) 0.0)
+ {
+ x86_64_clear_xreg(inst,
xmm_reg);
+ }
+ else
+ {
_jit_xmm1_reg_imm_size_float64(gen,
&inst, XMM1_MOV,
xmm_reg, &float64_value);
}
+ }
else
{
if(float64_value == (jit_float64) 0.0)
@@ -1131,7 +1620,7 @@
ptr =
_jit_cache_alloc(&(gen->posn), sizeof(jit_float64));
jit_memcpy(ptr, &float64_value,
sizeof(float64_value));
- offset = (jit_nint)ptr -
((jit_nint)inst + 7);
+ offset = (jit_nint)ptr -
((jit_nint)inst + 6);
if((offset >= jit_min_int) &&
(offset <= jit_max_int))
{
/* We can use RIP
relative addressing here */
@@ -1146,7 +1635,7 @@
else
{
/* We have to use an
extra general register */
- /* TODO */
+ TODO();
}
}
}
@@ -1192,7 +1681,7 @@
else
{
/* We have to use an extra
general register */
- /* TODO */
+ TODO();
}
}
else
@@ -1212,7 +1701,7 @@
ptr =
_jit_cache_alloc(&(gen->posn), sizeof(jit_nfloat));
jit_memcpy(ptr, &nfloat_value,
sizeof(nfloat_value));
- offset = (jit_nint)ptr -
((jit_nint)inst + 7);
+ offset = (jit_nint)ptr -
((jit_nint)inst + 6);
if((offset >= jit_min_int) &&
(offset <= jit_max_int))
{
/* We can use RIP
relative addressing here */
@@ -1241,7 +1730,7 @@
else
{
/* We have to use an
extra general register */
- /* TODO */
+ TODO();
}
}
}
@@ -2315,11 +2804,6 @@
return inst;
}
-#define TODO() \
- do { \
- fprintf(stderr, "TODO at %s, %d\n", __FILE__, (int)__LINE__); \
- } while (0)
-
void
_jit_gen_insn(jit_gencode_t gen, jit_function_t func,
jit_block_t block, jit_insn_t insn)
Index: jit/jit-rules-x86-64.ins
===================================================================
RCS file: /cvsroot/dotgnu-pnet/libjit/jit/jit-rules-x86-64.ins,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -r1.3 -r1.4
--- jit/jit-rules-x86-64.ins 30 Mar 2008 15:05:14 -0000 1.3
+++ jit/jit-rules-x86-64.ins 13 Apr 2008 17:55:36 -0000 1.4
@@ -22,6 +22,7 @@
%regclass reg x86_64_reg
%regclass creg x86_64_creg
+%regclass dreg x86_64_dreg
%regclass rreg x86_64_rreg
%regclass sreg x86_64_sreg
%regclass freg x86_64_freg
@@ -91,62 +92,106 @@
x86_64_mov_reg_reg_size(inst, $1, $2, 4);
}
+JIT_OP_INT_TO_NFLOAT:
+ [=freg, local] -> {
+ x86_64_fild_membase_size(inst, X86_64_RBP, $2, 4);
+ }
+ [=freg, reg] -> {
+#ifdef HAVE_RED_ZONE
+ x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 4);
+ x86_64_fild_membase_size(inst, X86_64_RSP, -8, 4);
+#else
+ x86_64_push_reg_size(inst, $2, 8);
+ x86_64_fild_membase_size(inst, X86_64_RSP, 0, 4);
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
+#endif
+ }
+
+JIT_OP_LONG_TO_NFLOAT:
+ [=freg, local] -> {
+ x86_64_fild_membase_size(inst, X86_64_RBP, $2, 8);
+ }
+ [=freg, reg] -> {
+#ifdef HAVE_RED_ZONE
+ x86_64_mov_membase_reg_size(inst, X86_64_RSP, -8, $2, 8);
+ x86_64_fild_membase_size(inst, X86_64_RSP, -8, 8);
+#else
+ x86_64_push_reg_size(inst, $2, 8);
+ x86_64_fild_membase_size(inst, X86_64_RSP, 0, 8);
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, sizeof(jit_nint), 8);
+#endif
+ }
+
JIT_OP_NFLOAT_TO_INT: stack
- [=reg, freg] -> {
- /* allocate space on the stack for 2 shorts and 1 int */
+ [=reg, freg, scratch reg] -> {
+ inst = x86_64_nfloat_to_int(inst, $1, $3, 4);
+ }
+
+JIT_OP_NFLOAT_TO_LONG: stack
+ [=reg, freg, scratch reg] -> {
+ inst = x86_64_nfloat_to_int(inst, $1, $3, 8);
+ }
+
+JIT_OP_FLOAT32_TO_NFLOAT:
+ [=freg, local] -> {
+ x86_64_fld_membase_size(inst, X86_64_RBP, $2, 4);
+ }
+ [=freg, xreg] -> {
+#ifdef HAVE_RED_ZONE
+ x86_64_movss_membase_reg(inst, X86_64_RSP, -8, $2);
+ x86_64_fld_membase_size(inst, X86_64_RSP, -8, 4);
+#else
x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
- /* store FPU control word */
- x86_64_fnstcw_membase(inst, X86_64_RSP, 0);
- /* set "round toward zero" mode */
- x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2);
- x86_64_or_reg_imm_size(inst, $1, 0xc00, 2);
- x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2);
- x86_64_fldcw_membase(inst, X86_64_RSP, 2);
- /* convert float to int */
- x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 4);
- /* restore FPU control word */
- x86_64_fldcw_membase(inst, X86_64_RSP, 0);
- /* move result to the destination */
- x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 4);
- /* restore the stack */
+ x86_64_movss_regp_reg(inst, X86_64_RSP, $2);
+ x86_64_fld_regp_size(inst, X86_64_RSP, 4);
x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#endif
}
-JIT_OP_NFLOAT_TO_LONG: stack
- [=reg, freg] -> {
- /* allocate space on the stack for 2 shorts and 1 long */
- x86_64_sub_reg_imm_size(inst, X86_64_RSP, 12, 8);
- /* store FPU control word */
- x86_64_fnstcw_membase(inst, X86_64_RSP, 0);
- /* set "round toward zero" mode */
- x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 0, 2);
- x86_64_or_reg_imm_size(inst, $1, 0xc00, 2);
- x86_64_mov_membase_reg_size(inst, X86_64_RSP, 2, $1, 2);
- x86_64_fldcw_membase(inst, X86_64_RSP, 2);
- /* convert float to long */
- x86_64_fistp_membase_size(inst, X86_64_RSP, 4, 8);
- /* restore FPU control word */
- x86_64_fldcw_membase(inst, X86_64_RSP, 0);
- /* move result to the destination */
- x86_64_mov_reg_membase_size(inst, $1, X86_64_RSP, 4, 8);
- /* restore the stack */
- x86_64_add_reg_imm_size(inst, X86_64_RSP, 12, 8);
+JIT_OP_FLOAT64_TO_NFLOAT:
+ [=freg, local] -> {
+ x86_64_fld_membase_size(inst, X86_64_RBP, $2, 8);
+ }
+ [=freg, xreg] -> {
+#ifdef HAVE_RED_ZONE
+ x86_64_movsd_membase_reg(inst, X86_64_RSP, -8, $2);
+ x86_64_fld_membase_size(inst, X86_64_RSP, -8, 8);
+#else
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
+ x86_64_movsd_regp_reg(inst, X86_64_RSP, $2);
+ x86_64_fld_regp_size(inst, X86_64_RSP, 8);
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#endif
}
JIT_OP_NFLOAT_TO_FLOAT32: stack
[=xreg, freg] -> {
+#ifdef HAVE_RED_ZONE
/* Avoid modifying the stack pointer by simply using negative */
/* offsets here. */
x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 4);
x86_64_movss_reg_membase(inst, $1, X86_64_RSP, -8);
+#else
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 4);
+ x86_64_movss_reg_regp(inst, $1, X86_64_RSP);
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#endif
}
JIT_OP_NFLOAT_TO_FLOAT64: stack
[=xreg, freg] -> {
+#ifdef HAVE_RED_ZONE
/* Avoid modifying the stack pointer by simply using negative */
/* offsets here. */
x86_64_fstp_membase_size(inst, X86_64_RSP, -8, 8);
x86_64_movsd_reg_membase(inst, $1, X86_64_RSP, -8);
+#else
+ x86_64_sub_reg_imm_size(inst, X86_64_RSP, 8, 8);
+ x86_64_fstp_regp_size(inst, X86_64_RSP, 8);
+ x86_64_movsd_reg_regp(inst, $1, X86_64_RSP);
+ x86_64_add_reg_imm_size(inst, X86_64_RSP, 8, 8);
+#endif
}
/*
@@ -894,12 +939,12 @@
x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 4);
x86_64_sar_reg_imm_size(inst, $1, shift, 4);
}
- [reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 4);
x86_64_cdq(inst);
x86_64_idiv_reg_size(inst, $3, 4);
}
- [reg("rax"), reg, scratch reg("rdx")] -> {
+ [reg("rax"), dreg, scratch reg("rdx")] -> {
jit_int min_int = jit_min_int;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@@ -937,12 +982,12 @@
}
x86_64_shr_reg_imm_size(inst, $1, shift, 4);
}
- [reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 4);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $3, 4);
}
- [reg("rax"), reg, scratch reg("rdx")] -> {
+ [reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $2, $2, 4);
@@ -974,12 +1019,12 @@
x86_patch(patch, inst);
x86_64_clear_reg(inst, $1);
}
- [=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 4);
x86_64_cdq(inst);
x86_64_idiv_reg_size(inst, $4, 4);
}
- [=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
jit_int min_int = jit_min_int;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@@ -1009,16 +1054,16 @@
[reg, imm, if("$2 == 1")] -> {
x86_64_clear_reg(inst, $1);
}
- [reg, imm, if("(((jit_nuint)$2) & (((jit_nuint)$2) - 1)) == 0")] -> {
+ [reg, imm, if("($2 & ($2 - 1)) == 0")] -> {
/* x & (x - 1) is equal to zero if x is a power of 2 */
x86_64_and_reg_imm_size(inst, $1, $2 - 1, 4);
}
- [=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 4);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $4, 4);
}
- [=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $3, $3, 4);
@@ -1170,12 +1215,12 @@
x86_64_cmov_reg_reg_size(inst, X86_CC_S, $1, $3, 1, 8);
x86_64_sar_reg_imm_size(inst, $1, shift, 8);
}
- [reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 8);
x86_64_cqo(inst);
x86_64_idiv_reg_size(inst, $3, 8);
}
- [reg("rax"), reg, scratch reg("rdx")] -> {
+ [reg("rax"), dreg, scratch reg("rdx")] -> {
jit_long min_long = jit_min_long;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@@ -1214,12 +1259,12 @@
}
x86_64_shr_reg_imm_size(inst, $1, shift, 8);
}
- [reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $3, $2, 8);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $3, 8);
}
- [reg("rax"), reg, scratch reg("rdx")] -> {
+ [reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $2, $2, 8);
@@ -1251,12 +1296,12 @@
x86_patch(patch, inst);
x86_64_clear_reg(inst, $1);
}
- [=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 8);
x86_64_cqo(inst);
x86_64_idiv_reg_size(inst, $4, 8);
}
- [=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
jit_long min_long = jit_min_long;
unsigned char *patch, *patch2;
#ifndef JIT_USE_SIGNALS
@@ -1301,12 +1346,12 @@
x86_64_and_reg_reg_size(inst, $1, $3, 8);
}
}
- [=reg("rdx"), *reg("rax"), imm, scratch reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), imm, scratch dreg, scratch reg("rdx")] -> {
x86_64_mov_reg_imm_size(inst, $4, $3, 8);
x86_64_clear_reg(inst, X86_64_RDX);
x86_64_div_reg_size(inst, $4, 8);
}
- [=reg("rdx"), *reg("rax"), reg, scratch reg("rdx")] -> {
+ [=reg("rdx"), *reg("rax"), dreg, scratch reg("rdx")] -> {
#ifndef JIT_USE_SIGNALS
unsigned char *patch;
x86_64_test_reg_reg_size(inst, $3, $3, 8);
@@ -1367,6 +1412,22 @@
x86_64_divss_reg_membase(inst, $1, X86_64_RBP, $2);
}
+JIT_OP_FABS:
+ [xreg] -> {
+ /* Simply clear the sign */
+ jit_uint values[4] = {0x7fffffff, 0x7fffffff, 0x7fffffff,
0x7fffffff};
+
+ _jit_plops_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
+ }
+
+JIT_OP_FNEG:
+ [xreg] -> {
+ /* Simply toggle the sign */
+ jit_uint values[4] = {0x80000000, 0x80000000, 0x80000000,
0x80000000};
+
+ _jit_plops_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
+ }
+
/*
* double precision float versions
*/
@@ -1415,6 +1476,35 @@
x86_64_divsd_reg_reg(inst, $1, $2);
}
+JIT_OP_DABS:
+ [xreg] -> {
+ /* Simply clear the sign */
+ jit_ulong values[2] = {0x7fffffffffffffff, 0x7fffffffffffffff};
+
+ _jit_plopd_reg_imm(gen, &inst, XMM_ANDP, $1, &(values[0]));
+ }
+
+JIT_OP_DNEG:
+ [xreg] -> {
+ /* Simply toggle the sign */
+ jit_ulong values[2] = {0x8000000000000000, 0x8000000000000000};
+
+ _jit_plopd_reg_imm(gen, &inst, XMM_XORP, $1, &(values[0]));
+ }
+
+/*
+ * native float versions
+ */
+JIT_OP_NFABS: stack
+ [freg] -> {
+ x86_64_fabs(inst);
+ }
+
+JIT_OP_NFNEG: stack
+ [freg] -> {
+ x86_64_fchs(inst);
+ }
+
/*
* Bitwise opcodes.
*/
@@ -1872,13 +1962,133 @@
inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
}
+JIT_OP_BR_FEQ:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x74 /* eq */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x74 /* eq */, insn);
+ }
+
+JIT_OP_BR_FNE:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x75 /* ne */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x75 /* ne */, insn);
+ }
+
+JIT_OP_BR_FLT:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
+ }
+
+JIT_OP_BR_FLE:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x76 /* le_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x76 /* le_un */, insn);
+ }
+
+JIT_OP_BR_FGT:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
+ }
+
+JIT_OP_BR_FGE:
+ [xreg, local] -> {
+ x86_64_comiss_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
+ }
+
+JIT_OP_BR_DEQ:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x74 /* eq */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x74 /* eq */, insn);
+ }
+
+JIT_OP_BR_DNE:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x75 /* ne */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x75 /* ne */, insn);
+ }
+
+JIT_OP_BR_DLT:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x72 /* lt_un */, insn);
+ }
+
+JIT_OP_BR_DLE:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x76 /* le_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x76 /* le_un */, insn);
+ }
+
+JIT_OP_BR_DGT:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x77 /* gt_un */, insn);
+ }
+
+JIT_OP_BR_DGE:
+ [xreg, local] -> {
+ x86_64_comisd_reg_membase(inst, $1, X86_64_RBP, $2);
+ inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
+ }
+ [xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $1, $2);
+ inst = output_branch(func, inst, 0x73 /* ge_un */, insn);
+ }
+
/*
* Comparison opcodes.
*/
JIT_OP_IEQ:
[=reg, reg, immzero] -> {
- x86_64_or_reg_reg_size(inst, $2, $2, 4);
+ x86_64_test_reg_reg_size(inst, $2, $2, 4);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
[=reg, reg, imm] -> {
@@ -1896,7 +2106,7 @@
JIT_OP_INE:
[=reg, reg, immzero] -> {
- x86_64_or_reg_reg_size(inst, $2, $2, 4);
+ x86_64_test_reg_reg_size(inst, $2, $2, 4);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
[=reg, reg, imm] -> {
@@ -2026,7 +2236,7 @@
JIT_OP_LEQ:
[=reg, reg, immzero] -> {
- x86_64_or_reg_reg_size(inst, $2, $2, 8);
+ x86_64_test_reg_reg_size(inst, $2, $2, 8);
inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
}
[=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <=
(jit_nint)jit_max_int")] -> {
@@ -2044,7 +2254,7 @@
JIT_OP_LNE:
[=reg, reg, immzero] -> {
- x86_64_or_reg_reg_size(inst, $2, $2, 8);
+ x86_64_test_reg_reg_size(inst, $2, $2, 8);
inst = setcc_reg(inst, $1, X86_CC_NE, 0);
}
[=reg, reg, imm, if("$3 >= (jit_nint)jit_min_int && $3 <=
(jit_nint)jit_max_int")] -> {
@@ -2172,6 +2382,232 @@
inst = setcc_reg(inst, $1, X86_CC_GE, 0);
}
+JIT_OP_FEQ:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
+ }
+
+JIT_OP_FNE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_NE, 0);
+ }
+
+JIT_OP_FLT:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_B, 0);
+ }
+
+JIT_OP_FLE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_BE, 0);
+ }
+
+JIT_OP_FGT:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_A, 0);
+ }
+
+JIT_OP_FGE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comiss_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_AE, 0);
+ }
+
+JIT_OP_DEQ:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_EQ, 0);
+ }
+
+JIT_OP_DNE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_NE, 0);
+ }
+
+JIT_OP_DLT:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_B, 0);
+ }
+
+JIT_OP_DLE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_BE, 0);
+ }
+
+JIT_OP_DGT:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_A, 0);
+ }
+
+JIT_OP_DGE:
+ [=reg, xreg, xreg] -> {
+ x86_64_comisd_reg_reg(inst, $2, $3);
+ inst = setcc_reg(inst, $1, X86_CC_AE, 0);
+ }
+
+JIT_OP_FSQRT:
+ [=xreg, local] -> {
+ x86_64_sqrtss_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [=xreg, xreg] -> {
+ x86_64_sqrtss_reg_reg(inst, $1, $2);
+ }
+
+JIT_OP_DSQRT:
+ [=xreg, local] -> {
+ x86_64_sqrtsd_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [=xreg, xreg] -> {
+ x86_64_sqrtsd_reg_reg(inst, $1, $2);
+ }
+
+/*
+ * Absolute, minimum, maximum, and sign.
+ */
+JIT_OP_IMAX:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 4);
+ }
+
+JIT_OP_IMAX_UN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 4);
+ }
+
+JIT_OP_IMIN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 4);
+ }
+
+JIT_OP_IMIN_UN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 4);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 4);
+ }
+
+JIT_OP_LMAX:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 1, 8);
+ }
+
+JIT_OP_LMAX_UN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_LT, $1, $2, 0, 8);
+ }
+
+JIT_OP_LMIN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 1, 8);
+ }
+
+JIT_OP_LMIN_UN:
+ [reg, reg] -> {
+ x86_64_cmp_reg_reg_size(inst, $1, $2, 8);
+ x86_64_cmov_reg_reg_size(inst, X86_CC_GT, $1, $2, 0, 8);
+ }
+
+JIT_OP_FMAX:
+ [xreg, local] -> {
+ x86_64_maxss_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [xreg, xreg] -> {
+ x86_64_maxss_reg_reg(inst, $1, $2);
+ }
+
+JIT_OP_FMIN:
+ [xreg, local] -> {
+ x86_64_minss_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [xreg, xreg] -> {
+ x86_64_minss_reg_reg(inst, $1, $2);
+ }
+
+JIT_OP_DMAX:
+ [xreg, local] -> {
+ x86_64_maxsd_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [xreg, xreg] -> {
+ x86_64_maxsd_reg_reg(inst, $1, $2);
+ }
+
+JIT_OP_DMIN:
+ [xreg, local] -> {
+ x86_64_minsd_reg_membase(inst, $1, X86_64_RBP, $2);
+ }
+ [xreg, xreg] -> {
+ x86_64_minsd_reg_reg(inst, $1, $2);
+ }
+
+/*
+ * Rounding
+ */
+JIT_OP_FFLOOR: more_space
+ [=xreg, local, scratch reg] -> {
+ inst = x86_64_rounds_reg_membase(inst, $1, $2, $3,
X86_ROUND_DOWN);
+ }
+ [=xreg, xreg, scratch reg] -> {
+ inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
+ }
+
+JIT_OP_DFLOOR: more_space
+ [=xreg, local, scratch reg] -> {
+ inst = x86_64_roundd_reg_membase(inst, $1, $2, $3,
X86_ROUND_DOWN);
+ }
+ [=xreg, xreg, scratch reg] -> {
+ inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_DOWN);
+ }
+
+JIT_OP_NFFLOOR: more_space
+ [freg, scratch reg] -> {
+ inst = x86_64_roundnf(inst, $2, X86_ROUND_DOWN);
+ }
+
+JIT_OP_FCEIL: more_space
+ [=xreg, local, scratch reg] -> {
+ inst = x86_64_rounds_reg_membase(inst, $1, $2, $3,
X86_ROUND_UP);
+ }
+ [=xreg, xreg, scratch reg] -> {
+ inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
+ }
+
+JIT_OP_DCEIL: more_space
+ [=xreg, local, scratch reg] -> {
+ inst = x86_64_roundd_reg_membase(inst, $1, $2, $3,
X86_ROUND_UP);
+ }
+ [=xreg, xreg, scratch reg] -> {
+ inst = x86_64_roundd_reg_reg(inst, $1, $2, $3, X86_ROUND_UP);
+ }
+
+JIT_OP_NFCEIL: more_space
+ [freg, scratch reg] -> {
+ inst = x86_64_roundnf(inst, $2, X86_ROUND_UP);
+ }
+
+/*
+JIT_OP_FRINT: more_space
+ [=xreg, local, scratch reg] -> {
+ inst = x86_64_rounds_reg_membase(inst, $1, $2, $3,
X86_ROUND_ZERO);
+ }
+ [=xreg, xreg, scratch reg] -> {
+ inst = x86_64_rounds_reg_reg(inst, $1, $2, $3, X86_ROUND_ZERO);
+ }
+*/
+
/*
* Pointer check opcodes.
*/
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [dotgnu-pnet-commits] libjit ChangeLog jit/jit-gen-x86-64.h jit/jit-r...,
Klaus Treichel <=