On Sat, 3 Jun 2023 at 04:34, Richard Henderson
<richard.henderson@linaro.org> wrote:
Detect AES in cpuinfo; implement the accel hooks.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
host/include/aarch64/host/aes-round.h | 204 ++++++++++++++++++++++++++
host/include/aarch64/host/cpuinfo.h | 1 +
util/cpuinfo-aarch64.c | 2 +
3 files changed, 207 insertions(+)
create mode 100644 host/include/aarch64/host/aes-round.h
diff --git a/host/include/aarch64/host/aes-round.h
b/host/include/aarch64/host/aes-round.h
new file mode 100644
index 0000000000..27ca823db6
--- /dev/null
+++ b/host/include/aarch64/host/aes-round.h
@@ -0,0 +1,204 @@
+/*
+ * AArch64 specific aes acceleration.
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#ifndef HOST_AES_ROUND_H
+#define HOST_AES_ROUND_H
+
+#include "host/cpuinfo.h"
+#include <arm_neon.h>
+
+#ifdef __ARM_FEATURE_AES
+# define HAVE_AES_ACCEL true
+# define ATTR_AES_ACCEL
+#else
+# define HAVE_AES_ACCEL likely(cpuinfo & CPUINFO_AES)
+# define ATTR_AES_ACCEL __attribute__((target("+crypto")))
+#endif
+
+static inline uint8x16_t aes_accel_bswap(uint8x16_t x)
+{
+ /* No arm_neon.h primitive, and the compilers don't share builtins. */
vqtbl1q_u8() perhaps?
+static inline uint8x16_t aes_accel_aesmc(uint8x16_t d)
+{
+ asm(".arch_extension aes\n\t"
+ "aesmc %0.16b, %1.16b" : "=w"(d) : "w"(d));
Most ARM cores fuse aese/aesmc into a single uop (with the associated
performance boost) if the pattern is
aese x, y
aesmc x,x
aesd x, y
aesimc x,x
So it might make sense to use +w here at least, and use only a single
register (which the compiler will likely do in any case, but still)
I would assume that the compiler cannot issue these separately based
on the sequences below, but if it might, it may be worth it to emit
the aese/aesmc together in a single asm() block