[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2] target/arm/arch_dump: Add SVE notes
From: |
Andrew Jones |
Subject: |
Re: [PATCH v2] target/arm/arch_dump: Add SVE notes |
Date: |
Wed, 16 Oct 2019 11:13:14 +0200 |
User-agent: |
NeoMutt/20180716 |
On Thu, Oct 10, 2019 at 01:33:02PM -0400, Richard Henderson wrote:
> On 10/10/19 2:16 AM, Andrew Jones wrote:
> >> It might be best to avoid the ifdef altogether:
> >>
> >> for (i = 0; i < 32; ++i) {
> >> uint64_t *d = (uint64_t *)&buf[sve_zreg_offset(vq, i)];
> >> for (j = 0; j < vq * 2; ++j) {
> >> d[j] = cpu_to_le64(env->vfp.zregs[i].d[j]);
> >> }
> >> }
> >>
> >> The compiler may well transform the inner loop to memcpy for little-endian
> >> host, but even if it doesn't core dumping is hardly performance sensitive.
> >
> > True. I even had something like the above at first, but then
> > overcomplicated it with the #ifdef-ing.
>
> Ah, I wonder if you changed things around with the ifdefs due to the pregs.
> There's no trivial solution for those. It'd be nice to share the bswapping
> subroutine that you add in the SVE KVM patch set, and size the temporary array
> using ARM_MAX_VQ.
How about something like this squashed in?
Thanks,
drew
diff --git a/target/arm/arch_dump.c b/target/arm/arch_dump.c
index b02e398430b9..7c7fd23f4d94 100644
--- a/target/arm/arch_dump.c
+++ b/target/arm/arch_dump.c
@@ -182,6 +182,7 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction f,
struct aarch64_note *note;
ARMCPU *cpu = env_archcpu(env);
uint32_t vq = sve_current_vq(env);
+ uint64_t tmp[ARM_MAX_VQ * 2], *r;
uint32_t fpr;
uint8_t *buf;
int ret, i;
@@ -198,31 +199,14 @@ static int aarch64_write_elf64_sve(WriteCoreDumpFunction
f,
note->sve.flags = cpu_to_dump16(s, 1);
for (i = 0; i < 32; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t d[vq * 2];
- int j;
-
- for (j = 0; j < vq * 2; ++j) {
- d[j] = bswap64(env->vfp.zregs[i].d[j]);
- }
-#else
- uint64_t *d = &env->vfp.zregs[i].d[0];
-#endif
- memcpy(&buf[sve_zreg_offset(vq, i)], &d[0], vq * 16);
+ r = sve_bswap64(tmp, &env->vfp.zregs[i].d[0], vq * 2);
+ memcpy(&buf[sve_zreg_offset(vq, i)], r, vq * 16);
}
for (i = 0; i < 17; ++i) {
-#ifdef HOST_WORDS_BIGENDIAN
- uint64_t d[DIV_ROUND_UP(vq * 2, 8)];
- int j;
-
- for (j = 0; j < DIV_ROUND_UP(vq * 2, 8); ++j) {
- d[j] = bswap64(env->vfp.pregs[i].p[j]);
- }
-#else
- uint64_t *d = &env->vfp.pregs[i].p[0];
-#endif
- memcpy(&buf[sve_preg_offset(vq, i)], &d[0], vq * 16 / 8);
+ r = sve_bswap64(tmp, r = &env->vfp.pregs[i].p[0],
+ DIV_ROUND_UP(vq * 2, 8));
+ memcpy(&buf[sve_preg_offset(vq, i)], r, vq * 16 / 8);
}
fpr = cpu_to_dump32(s, vfp_get_fpsr(env));
diff --git a/target/arm/cpu.h b/target/arm/cpu.h
index 5b9c3e4cd73d..b3092e5213e6 100644
--- a/target/arm/cpu.h
+++ b/target/arm/cpu.h
@@ -975,6 +975,31 @@ void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq);
void aarch64_sve_change_el(CPUARMState *env, int old_el,
int new_el, bool el0_a64);
void aarch64_add_sve_properties(Object *obj);
+
+/*
+ * SVE registers are encoded in KVM's memory in an endianness-invariant format.
+ * The byte at offset i from the start of the in-memory representation contains
+ * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
+ * lowest offsets are stored in the lowest memory addresses, then that nearly
+ * matches QEMU's representation, which is to use an array of host-endian
+ * uint64_t's, where the lower offsets are at the lower indices. To complete
+ * the translation we just need to byte swap the uint64_t's on big-endian
hosts.
+ */
+static inline uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
+{
+#ifdef HOST_WORDS_BIGENDIAN
+ int i;
+
+ for (i = 0; i < nr; ++i) {
+ dst[i] = bswap64(src[i]);
+ }
+
+ return dst;
+#else
+ return src;
+#endif
+}
+
#else
static inline void aarch64_sve_narrow_vq(CPUARMState *env, unsigned vq) { }
static inline void aarch64_sve_change_el(CPUARMState *env, int o,
diff --git a/target/arm/kvm64.c b/target/arm/kvm64.c
index 876184b8fe4d..e2da756e65ed 100644
--- a/target/arm/kvm64.c
+++ b/target/arm/kvm64.c
@@ -876,30 +876,6 @@ static int kvm_arch_put_fpsimd(CPUState *cs)
return 0;
}
-/*
- * SVE registers are encoded in KVM's memory in an endianness-invariant format.
- * The byte at offset i from the start of the in-memory representation contains
- * the bits [(7 + 8 * i) : (8 * i)] of the register value. As this means the
- * lowest offsets are stored in the lowest memory addresses, then that nearly
- * matches QEMU's representation, which is to use an array of host-endian
- * uint64_t's, where the lower offsets are at the lower indices. To complete
- * the translation we just need to byte swap the uint64_t's on big-endian
hosts.
- */
-static uint64_t *sve_bswap64(uint64_t *dst, uint64_t *src, int nr)
-{
-#ifdef HOST_WORDS_BIGENDIAN
- int i;
-
- for (i = 0; i < nr; ++i) {
- dst[i] = bswap64(src[i]);
- }
-
- return dst;
-#else
- return src;
-#endif
-}
-
/*
* KVM SVE registers come in slices where ZREGs have a slice size of 2048 bits
* and PREGS and the FFR have a slice size of 256 bits. However we simply hard