[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 7/7] cutils: Rewrite ppc buffer zero checking
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH 7/7] cutils: Rewrite ppc buffer zero checking |
Date: |
Tue, 23 Aug 2016 21:17:59 -0700 |
GCC versions through 6 do a poor job with the indexed addressing,
and (for ppc64le) issues unnecessary xxswapd insns.
Cc: address@hidden
Cc: David Gibson <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
util/cutils.c | 41 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/util/cutils.c b/util/cutils.c
index fe860e8..30fac02 100644
--- a/util/cutils.c
+++ b/util/cutils.c
@@ -254,8 +254,45 @@ static bool select_accel_int(const void *buf, size_t len)
#undef pixel
#undef bool
#define bool _Bool
-#define DO_ZERO(X) vec_all_eq(X, (__vector unsigned char){ 0 })
-ACCEL_BUFFER_ZERO(buffer_zero_ppc, 128, __vector unsigned char, DO_ZERO)
+
+static bool __attribute__((noinline))
+buffer_zero_ppc(const void *buf, size_t len)
+{
+ typedef unsigned char vec __attribute__((vector_size(16)));
+ const vec *p = buf;
+ const vec *end = buf + len;
+ vec t0, t1, t2, t3, zero = (vec){ 0 };
+
+ do {
+ p += 8;
+ __builtin_prefetch(p);
+ /* ??? GCC6 does poorly with power64le; extra xxswap. */
+ __asm volatile("lvebx %0,%4,%5\n\t"
+ "lvebx %1,%4,%6\n\t"
+ "lvebx %2,%4,%7\n\t"
+ "lvebx %3,%4,%8\n\t"
+ "vor %0,%0,%1\n\t"
+ "vor %1,%2,%3\n\t"
+ "lvebx %2,%4,%9\n\t"
+ "lvebx %3,%4,%10\n\t"
+ "vor %0,%0,%1\n\t"
+ "vor %1,%2,%3\n\t"
+ "lvebx %2,%4,%11\n\t"
+ "lvebx %3,%4,%12\n\t"
+ "vor %0,%0,%1\n\t"
+ "vor %1,%2,%3\n\t"
+ "vor %0,%0,%1"
+ : "=v"(t0), "=v"(t1), "=v"(t2), "=v"(t3)
+ : "b"(p), "b"(-8 * 16), "b"(-7 * 16),
+ "b"(-6 * 16), "b"(-5 * 16),
+ "b"(-4 * 16), "b"(-3 * 16),
+ "b"(-2 * 16), "b"(-1 * 16));
+ if (unlikely(vec_any_ne(t0, zero))) {
+ return false;
+ }
+ } while (p < end);
+ return true;
+}
static bool select_accel_fn(const void *buf, size_t len)
{
--
2.7.4
- [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 4/7] cutils: Add generic prefetch, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 2/7] cutils: Export only buffer_is_zero, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 1/7] cutils: Remove SPLAT macro, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 3/7] cutils: Rearrange buffer_is_zero acceleration, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 5/7] cutils: Rewrite x86 buffer zero checking, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 6/7] cutils: Rewrite aarch64 buffer zero checking, Richard Henderson, 2016/08/24
- [Qemu-devel] [PATCH 7/7] cutils: Rewrite ppc buffer zero checking,
Richard Henderson <=
- Re: [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero, no-reply, 2016/08/24
- Re: [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero, Dr. David Alan Gilbert, 2016/08/24
- Re: [Qemu-devel] [PATCH 0/7] Improve buffer_is_zero, Vijay Kilari, 2016/08/25