[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 15/36] tcg: Add guest load/store primitives for TCGv_i128
From: |
Richard Henderson |
Subject: |
[PATCH v4 15/36] tcg: Add guest load/store primitives for TCGv_i128 |
Date: |
Sat, 7 Jan 2023 18:36:58 -0800 |
These are not yet considering atomicity of the 16-byte value;
this is a direct replacement for the current target code which
uses a pair of 8-byte operations.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
include/exec/cpu_ldst.h | 10 +++
include/tcg/tcg-op.h | 2 +
accel/tcg/cputlb.c | 112 +++++++++++++++++++++++++++++++++
accel/tcg/user-exec.c | 66 ++++++++++++++++++++
tcg/tcg-op.c | 134 ++++++++++++++++++++++++++++++++++++++++
5 files changed, 324 insertions(+)
diff --git a/include/exec/cpu_ldst.h b/include/exec/cpu_ldst.h
index d0c7c0d5fe..09b55cc0ee 100644
--- a/include/exec/cpu_ldst.h
+++ b/include/exec/cpu_ldst.h
@@ -220,6 +220,11 @@ uint32_t cpu_ldl_le_mmu(CPUArchState *env, abi_ptr ptr,
uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr ptr,
MemOpIdx oi, uintptr_t ra);
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra);
+
void cpu_stb_mmu(CPUArchState *env, abi_ptr ptr, uint8_t val,
MemOpIdx oi, uintptr_t ra);
void cpu_stw_be_mmu(CPUArchState *env, abi_ptr ptr, uint16_t val,
@@ -235,6 +240,11 @@ void cpu_stl_le_mmu(CPUArchState *env, abi_ptr ptr,
uint32_t val,
void cpu_stq_le_mmu(CPUArchState *env, abi_ptr ptr, uint64_t val,
MemOpIdx oi, uintptr_t ra);
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra);
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra);
+
uint32_t cpu_atomic_cmpxchgb_mmu(CPUArchState *env, target_ulong addr,
uint32_t cmpv, uint32_t newv,
MemOpIdx oi, uintptr_t retaddr);
diff --git a/include/tcg/tcg-op.h b/include/tcg/tcg-op.h
index c4276767d1..e5f5b63c37 100644
--- a/include/tcg/tcg-op.h
+++ b/include/tcg/tcg-op.h
@@ -845,6 +845,8 @@ void tcg_gen_qemu_ld_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i32(TCGv_i32, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_ld_i64(TCGv_i64, TCGv, TCGArg, MemOp);
void tcg_gen_qemu_st_i64(TCGv_i64, TCGv, TCGArg, MemOp);
+void tcg_gen_qemu_ld_i128(TCGv_i128, TCGv, TCGArg, MemOp);
+void tcg_gen_qemu_st_i128(TCGv_i128, TCGv, TCGArg, MemOp);
static inline void tcg_gen_qemu_ld8u(TCGv ret, TCGv addr, int mem_index)
{
diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index 4948729917..79101306a3 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -2187,6 +2187,64 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return cpu_load_helper(env, addr, oi, ra, helper_le_ldq_mmu);
}
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+ uint64_t h, l;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ h = helper_be_ldq_mmu(env, addr, new_oi, ra);
+ l = helper_be_ldq_mmu(env, addr + 8, new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ return int128_make128(l, h);
+}
+
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+ uint64_t h, l;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_LOAD,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ l = helper_le_ldq_mmu(env, addr, new_oi, ra);
+ h = helper_le_ldq_mmu(env, addr + 8, new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+ return int128_make128(l, h);
+}
+
/*
* Store Helpers
*/
@@ -2541,6 +2599,60 @@ void cpu_stq_le_mmu(CPUArchState *env, target_ulong
addr, uint64_t val,
cpu_store_helper(env, addr, val, oi, retaddr, helper_le_stq_mmu);
}
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_BE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ helper_be_stq_mmu(env, addr, int128_gethi(val), new_oi, ra);
+ helper_be_stq_mmu(env, addr + 8, int128_getlo(val), new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr, Int128 val,
+ MemOpIdx oi, uintptr_t ra)
+{
+ MemOp mop = get_memop(oi);
+ int mmu_idx = get_mmuidx(oi);
+ MemOpIdx new_oi;
+ unsigned a_bits;
+
+ tcg_debug_assert((mop & (MO_BSWAP|MO_SSIZE)) == (MO_LE|MO_128));
+ a_bits = get_alignment_bits(mop);
+
+ /* Handle CPU specific unaligned behaviour */
+ if (addr & ((1 << a_bits) - 1)) {
+ cpu_unaligned_access(env_cpu(env), addr, MMU_DATA_STORE,
+ mmu_idx, ra);
+ }
+
+ /* Construct an unaligned 64-bit replacement MemOpIdx. */
+ mop = (mop & ~(MO_SIZE | MO_AMASK)) | MO_64 | MO_UNALN;
+ new_oi = make_memop_idx(mop, mmu_idx);
+
+ helper_le_stq_mmu(env, addr, int128_getlo(val), new_oi, ra);
+ helper_le_stq_mmu(env, addr + 8, int128_gethi(val), new_oi, ra);
+
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
#include "ldst_common.c.inc"
/*
diff --git a/accel/tcg/user-exec.c b/accel/tcg/user-exec.c
index a8eb63ab96..ae67d84638 100644
--- a/accel/tcg/user-exec.c
+++ b/accel/tcg/user-exec.c
@@ -1031,6 +1031,42 @@ uint64_t cpu_ldq_le_mmu(CPUArchState *env, abi_ptr addr,
return ret;
}
+Int128 cpu_ld16_be_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ Int128 ret;
+
+ validate_memop(oi, MO_128 | MO_BE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+ memcpy(&ret, haddr, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if (!HOST_BIG_ENDIAN) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
+Int128 cpu_ld16_le_mmu(CPUArchState *env, abi_ptr addr,
+ MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+ Int128 ret;
+
+ validate_memop(oi, MO_128 | MO_LE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_LOAD);
+ memcpy(&ret, haddr, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_R);
+
+ if (HOST_BIG_ENDIAN) {
+ ret = bswap128(ret);
+ }
+ return ret;
+}
+
void cpu_stb_mmu(CPUArchState *env, abi_ptr addr, uint8_t val,
MemOpIdx oi, uintptr_t ra)
{
@@ -1115,6 +1151,36 @@ void cpu_stq_le_mmu(CPUArchState *env, abi_ptr addr,
uint64_t val,
qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
}
+void cpu_st16_be_mmu(CPUArchState *env, abi_ptr addr,
+ Int128 val, MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+
+ validate_memop(oi, MO_128 | MO_BE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ if (!HOST_BIG_ENDIAN) {
+ val = bswap128(val);
+ }
+ memcpy(haddr, &val, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
+void cpu_st16_le_mmu(CPUArchState *env, abi_ptr addr,
+ Int128 val, MemOpIdx oi, uintptr_t ra)
+{
+ void *haddr;
+
+ validate_memop(oi, MO_128 | MO_LE);
+ haddr = cpu_mmu_lookup(env, addr, oi, ra, MMU_DATA_STORE);
+ if (HOST_BIG_ENDIAN) {
+ val = bswap128(val);
+ }
+ memcpy(haddr, &val, 16);
+ clear_helper_retaddr();
+ qemu_plugin_vcpu_mem_cb(env_cpu(env), addr, oi, QEMU_PLUGIN_MEM_W);
+}
+
uint32_t cpu_ldub_code(CPUArchState *env, abi_ptr ptr)
{
uint32_t ret;
diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
index 658cee7d6c..55ecedb66f 100644
--- a/tcg/tcg-op.c
+++ b/tcg/tcg-op.c
@@ -3107,6 +3107,140 @@ void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr,
TCGArg idx, MemOp memop)
}
}
+static void canonicalize_memop_i128_as_i64(MemOp ret[2], MemOp orig)
+{
+ MemOp mop_1 = orig, mop_2;
+
+ tcg_debug_assert((orig & MO_SIZE) == MO_128);
+ tcg_debug_assert((orig & MO_SIGN) == 0);
+
+ /* Use a memory ordering implemented by the host. */
+ if (!TCG_TARGET_HAS_MEMORY_BSWAP && (orig & MO_BSWAP)) {
+ mop_1 &= ~MO_BSWAP;
+ }
+
+ /* Reduce the size to 64-bit. */
+ mop_1 = (mop_1 & ~MO_SIZE) | MO_64;
+
+ /* Retain the alignment constraints of the original. */
+ switch (orig & MO_AMASK) {
+ case MO_UNALN:
+ case MO_ALIGN_2:
+ case MO_ALIGN_4:
+ mop_2 = mop_1;
+ break;
+ case MO_ALIGN_8:
+ /* Prefer MO_ALIGN+MO_64 to MO_ALIGN_8+MO_64. */
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
+ mop_2 = mop_1;
+ break;
+ case MO_ALIGN:
+ /* Second has 8-byte alignment; first has 16-byte alignment. */
+ mop_2 = mop_1;
+ mop_1 = (mop_1 & ~MO_AMASK) | MO_ALIGN_16;
+ break;
+ case MO_ALIGN_16:
+ case MO_ALIGN_32:
+ case MO_ALIGN_64:
+ /* Second has 8-byte alignment; first retains original. */
+ mop_2 = (mop_1 & ~MO_AMASK) | MO_ALIGN;
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ ret[0] = mop_1;
+ ret[1] = mop_2;
+}
+
+void tcg_gen_qemu_ld_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp mop[2];
+ TCGv addr_p8;
+ TCGv_i64 x, y;
+
+ canonicalize_memop_i128_as_i64(mop, memop);
+
+ tcg_gen_req_mo(TCG_MO_LD_LD | TCG_MO_ST_LD);
+ addr = plugin_prep_mem_callbacks(addr);
+
+ /* TODO: respect atomicity of the operation. */
+ /* TODO: allow the tcg backend to see the whole operation. */
+
+ /*
+ * Since there are no global TCGv_i128, there is no visible state
+ * changed if the second load faults. Load directly into the two
+ * subwords.
+ */
+ if ((memop & MO_BSWAP) == MO_LE) {
+ x = TCGV128_LOW(val);
+ y = TCGV128_HIGH(val);
+ } else {
+ x = TCGV128_HIGH(val);
+ y = TCGV128_LOW(val);
+ }
+
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, x, addr, mop[0], idx);
+
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ tcg_gen_bswap64_i64(x, x);
+ }
+
+ addr_p8 = tcg_temp_new();
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_ld_i64, y, addr_p8, mop[1], idx);
+ tcg_temp_free(addr_p8);
+
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ tcg_gen_bswap64_i64(y, y);
+ }
+
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
+ QEMU_PLUGIN_MEM_R);
+}
+
+void tcg_gen_qemu_st_i128(TCGv_i128 val, TCGv addr, TCGArg idx, MemOp memop)
+{
+ MemOp mop[2];
+ TCGv addr_p8;
+ TCGv_i64 x, y;
+
+ canonicalize_memop_i128_as_i64(mop, memop);
+
+ tcg_gen_req_mo(TCG_MO_ST_LD | TCG_MO_ST_ST);
+ addr = plugin_prep_mem_callbacks(addr);
+
+ /* TODO: respect atomicity of the operation. */
+ /* TODO: allow the tcg backend to see the whole operation. */
+
+ if ((memop & MO_BSWAP) == MO_LE) {
+ x = TCGV128_LOW(val);
+ y = TCGV128_HIGH(val);
+ } else {
+ x = TCGV128_HIGH(val);
+ y = TCGV128_LOW(val);
+ }
+
+ addr_p8 = tcg_temp_new();
+ if ((mop[0] ^ memop) & MO_BSWAP) {
+ TCGv_i64 t = tcg_temp_new_i64();
+
+ tcg_gen_bswap64_i64(t, x);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr, mop[0], idx);
+ tcg_gen_bswap64_i64(t, y);
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, t, addr_p8, mop[1], idx);
+ tcg_temp_free_i64(t);
+ } else {
+ gen_ldst_i64(INDEX_op_qemu_st_i64, x, addr, mop[0], idx);
+ tcg_gen_addi_tl(addr_p8, addr, 8);
+ gen_ldst_i64(INDEX_op_qemu_st_i64, y, addr_p8, mop[1], idx);
+ }
+ tcg_temp_free(addr_p8);
+
+ plugin_gen_mem_callbacks(addr, make_memop_idx(memop, idx),
+ QEMU_PLUGIN_MEM_W);
+}
+
static void tcg_gen_ext_i32(TCGv_i32 ret, TCGv_i32 val, MemOp opc)
{
switch (opc & MO_SSIZE) {
--
2.34.1
- Re: [PATCH v4 08/36] include/qemu/int128: Use Int128 structure for TCI, (continued)
- [PATCH v4 09/36] tcg/i386: Add TCG_TARGET_CALL_{RET,ARG}_I128, Richard Henderson, 2023/01/07
- [PATCH v4 10/36] tcg/tci: Fix big-endian return register ordering, Richard Henderson, 2023/01/07
- [PATCH v4 11/36] tcg/tci: Add TCG_TARGET_CALL_{RET,ARG}_I128, Richard Henderson, 2023/01/07
- [PATCH v4 12/36] tcg: Add TCG_TARGET_CALL_{RET,ARG}_I128, Richard Henderson, 2023/01/07
- [PATCH v4 13/36] tcg: Add temp allocation for TCGv_i128, Richard Henderson, 2023/01/07
- [PATCH v4 14/36] tcg: Add basic data movement for TCGv_i128, Richard Henderson, 2023/01/07
- [PATCH v4 15/36] tcg: Add guest load/store primitives for TCGv_i128,
Richard Henderson <=
- [PATCH v4 16/36] tcg: Add tcg_gen_{non}atomic_cmpxchg_i128, Richard Henderson, 2023/01/07
- [PATCH v4 17/36] tcg: Split out tcg_gen_nonatomic_cmpxchg_i{32,64}, Richard Henderson, 2023/01/07
- [PATCH v4 18/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for STXP, Richard Henderson, 2023/01/07
- [PATCH v4 19/36] target/arm: Use tcg_gen_atomic_cmpxchg_i128 for CASP, Richard Henderson, 2023/01/07
- [PATCH v4 20/36] target/ppc: Use tcg_gen_atomic_cmpxchg_i128 for STQCX, Richard Henderson, 2023/01/07
- [PATCH v4 21/36] tests/tcg/s390x: Add div.c, Richard Henderson, 2023/01/07
- [PATCH v4 22/36] tests/tcg/s390x: Add clst.c, Richard Henderson, 2023/01/07
- [PATCH v4 23/36] tests/tcg/s390x: Add long-double.c, Richard Henderson, 2023/01/07
- [PATCH v4 24/36] target/s390x: Use a single return for helper_divs32/u32, Richard Henderson, 2023/01/07
- [PATCH v4 25/36] target/s390x: Use a single return for helper_divs64/u64, Richard Henderson, 2023/01/07