[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 1/2] target/arm: Use x86 intrinsics to implement PMULL.P64
From: |
Ard Biesheuvel |
Subject: |
[PATCH 1/2] target/arm: Use x86 intrinsics to implement PMULL.P64 |
Date: |
Thu, 1 Jun 2023 14:33:31 +0200 |
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
host/include/i386/host/cpuinfo.h | 1 +
target/arm/tcg/vec_helper.c | 26 +++++++++++++++++++-
util/cpuinfo-i386.c | 1 +
3 files changed, 27 insertions(+), 1 deletion(-)
diff --git a/host/include/i386/host/cpuinfo.h b/host/include/i386/host/cpuinfo.h
index 073d0a426f31487d..cf4ced844760d28f 100644
--- a/host/include/i386/host/cpuinfo.h
+++ b/host/include/i386/host/cpuinfo.h
@@ -27,6 +27,7 @@
#define CPUINFO_ATOMIC_VMOVDQA (1u << 16)
#define CPUINFO_ATOMIC_VMOVDQU (1u << 17)
#define CPUINFO_AES (1u << 18)
+#define CPUINFO_PMULL (1u << 19)
/* Initialized with a constructor. */
extern unsigned cpuinfo;
diff --git a/target/arm/tcg/vec_helper.c b/target/arm/tcg/vec_helper.c
index f59d3b26eacf08f8..fb422627588439b3 100644
--- a/target/arm/tcg/vec_helper.c
+++ b/target/arm/tcg/vec_helper.c
@@ -25,6 +25,14 @@
#include "qemu/int128.h"
#include "vec_internal.h"
+#ifdef __x86_64__
+#include "host/cpuinfo.h"
+#include <wmmintrin.h>
+#define TARGET_PMULL __attribute__((__target__("pclmul")))
+#else
+#define TARGET_PMULL
+#endif
+
/*
* Data for expanding active predicate bits to bytes, for byte elements.
*
@@ -2010,12 +2018,28 @@ void HELPER(gvec_pmul_b)(void *vd, void *vn, void *vm,
uint32_t desc)
* Because of the lanes are not accessed in strict columns,
* this probably cannot be turned into a generic helper.
*/
-void HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t desc)
+void TARGET_PMULL HELPER(gvec_pmull_q)(void *vd, void *vn, void *vm, uint32_t
desc)
{
intptr_t i, j, opr_sz = simd_oprsz(desc);
intptr_t hi = simd_data(desc);
uint64_t *d = vd, *n = vn, *m = vm;
+#ifdef __x86_64__
+ if (cpuinfo & CPUINFO_PMULL) {
+ switch (hi) {
+ case 0:
+ *(__m128i *)vd = _mm_clmulepi64_si128(*(__m128i *)vm, *(__m128i
*)vn, 0x0);
+ break;
+ case 1:
+ *(__m128i *)vd = _mm_clmulepi64_si128(*(__m128i *)vm, *(__m128i
*)vn, 0x11);
+ break;
+ default:
+ g_assert_not_reached();
+ }
+ return;
+ }
+#endif
+
for (i = 0; i < opr_sz / 8; i += 2) {
uint64_t nn = n[i + hi];
uint64_t mm = m[i + hi];
diff --git a/util/cpuinfo-i386.c b/util/cpuinfo-i386.c
index 3043f066c0182dc8..8930e13451201a64 100644
--- a/util/cpuinfo-i386.c
+++ b/util/cpuinfo-i386.c
@@ -40,6 +40,7 @@ unsigned __attribute__((constructor)) cpuinfo_init(void)
info |= (c & bit_MOVBE ? CPUINFO_MOVBE : 0);
info |= (c & bit_POPCNT ? CPUINFO_POPCNT : 0);
info |= (c & bit_AES ? CPUINFO_AES : 0);
+ info |= (c & bit_PCLMULQDQ ? CPUINFO_PMULL : 0);
/* For AVX features, we must check available and usable. */
if ((c & bit_AVX) && (c & bit_OSXSAVE)) {
--
2.39.2