[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-arm] [RFC PATCH v2 1/3] target-arm: Use Neon for zero checking
From: |
vijayak |
Subject: |
[Qemu-arm] [RFC PATCH v2 1/3] target-arm: Use Neon for zero checking |
Date: |
Thu, 7 Apr 2016 15:28:05 +0530 |
From: Vijay <address@hidden>
Use Neon instructions to perform zero checking of
buffer. This is helps in reducing downtime during
live migration.
Signed-off-by: Vijaya Kumar K <address@hidden>
Signed-off-by: Suresh <address@hidden>
---
util/cutils.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/util/cutils.c b/util/cutils.c
index 43d1afb..bb61c91 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -352,6 +352,80 @@ static void *can_use_buffer_find_nonzero_offset_ifunc(void)
return func;
}
#pragma GCC pop_options
+
+#elif defined __aarch64__
+#include "arm_neon.h"
+
+#define NEON_VECTYPE uint64x2_t
+#define NEON_LOAD_N_ORR(v1, v2) (vld1q_u64(&v1) | vld1q_u64(&v2))
+#define NEON_ORR(v1, v2) ((v1) | (v2))
+#define NEON_NOT_EQ_ZERO(v1) \
+ ((vgetq_lane_u64(v1, 0) != 0) || (vgetq_lane_u64(v1, 1) != 0))
+
+#define BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON 16
+
+/*
+ * Zero page/buffer checking using SIMD(Neon)
+ */
+
+static bool
+can_use_buffer_find_nonzero_offset_neon(const void *buf, size_t len)
+{
+ return (len % (BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON
+ * sizeof(NEON_VECTYPE)) == 0
+ && ((uintptr_t) buf) % sizeof(NEON_VECTYPE) == 0);
+}
+
+static size_t buffer_find_nonzero_offset_neon(const void *buf, size_t len)
+{
+ size_t i;
+ NEON_VECTYPE qword0, qword1, qword2, qword3, qword4, qword5, qword6;
+ uint64_t const *data = buf;
+
+ if (!len) {
+ return 0;
+ }
+
+ assert(can_use_buffer_find_nonzero_offset_neon(buf, len));
+ len /= sizeof(unsigned long);
+
+ for (i = 0; i < len; i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR_NEON) {
+ qword0 = NEON_LOAD_N_ORR(data[i], data[i + 2]);
+ qword1 = NEON_LOAD_N_ORR(data[i + 4], data[i + 6]);
+ qword2 = NEON_LOAD_N_ORR(data[i + 8], data[i + 10]);
+ qword3 = NEON_LOAD_N_ORR(data[i + 12], data[i + 14]);
+ qword4 = NEON_ORR(qword0, qword1);
+ qword5 = NEON_ORR(qword2, qword3);
+ qword6 = NEON_ORR(qword4, qword5);
+
+ if (NEON_NOT_EQ_ZERO(qword6)) {
+ break;
+ }
+ }
+
+ return i * sizeof(unsigned long);
+}
+
+static inline bool neon_support(void)
+{
+ /*
+ * Check if neon feature is supported.
+ * By default neon is supported for aarch64.
+ */
+ return true;
+}
+
+bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ return neon_support() ? can_use_buffer_find_nonzero_offset_neon(buf, len) :
+ can_use_buffer_find_nonzero_offset_inner(buf, len);
+}
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ return neon_support() ? buffer_find_nonzero_offset_neon(buf, len) :
+ buffer_find_nonzero_offset_inner(buf, len);
+}
#else
bool can_use_buffer_find_nonzero_offset(const void *buf, size_t len)
{
--
1.7.9.5