qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] accel/tcg: Forward probe size on to notdirty_write


From: Jessica Clarke
Subject: [PATCH] accel/tcg: Forward probe size on to notdirty_write
Date: Sat, 4 Nov 2023 03:12:32 +0000

Without this, we just dirty a single byte, and so if the caller writes
more than one byte to the host memory then we won't have invalidated any
translation blocks that start after the first byte and overlap those
writes. In particular, AArch64's DC ZVA implementation uses probe_access
(via probe_write), and so we don't invalidate the entire block, only the
TB overlapping the first byte (and, in the unusual case an unaligned VA
is given to the instruction, we also probe that specific address in
order to get the right VA reported on an exception, so will invalidate a
TB overlapping that address too). Since our IC IVAU implementation is a
no-op for system emulation that relies on the softmmu already having
detected self-modifying code via this mechanism, this means we have
observably wrong behaviour when jumping to code that has been DC ZVA'ed.
In practice this is an unusual thing for software to do, as in reality
the OS will DC ZVA the page and the application will go and write actual
instructions to it that aren't UDF #0, but you can write a test that
clearly shows the faulty behaviour.

For functions other than probe_access it's not clear what size to use
when 0 is passed in. Arguably a size of 0 shouldn't dirty at all, since
if you want to actually write then you should pass in a real size, but I
have conservatively kept the implementation as dirtying the first byte
in that case so as to avoid breaking any assumptions about that
behaviour.

Signed-off-by: Jessica Clarke <jrtc27@jrtc27.com>
---

Source code for the test case:

#include <sys/mman.h>

#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

static void
sync_icache(void *start, void *end)
{
        uintptr_t start_addr = (uintptr_t)start;
        uintptr_t end_addr = (uintptr_t)end;
        size_t imin_line, dmin_line;
        uint64_t ctr_el0;
        uintptr_t addr;

        __asm__("mrs %0, ctr_el0" : "=r"(ctr_el0));

        /* Fields are Log2 words, i.e. Log2(bytes / 4) */
        imin_line = 4u << (ctr_el0 & 0xf);
        dmin_line = 4u << ((ctr_el0 >> 16) & 0xf);

        for (addr = start_addr & ~(dmin_line - 1); addr < end_addr;
            addr += dmin_line)
                __asm__ __volatile__("dc cvau, %0" :: "r"(addr));

        __asm__ __volatile__("dsb ish");

        for (addr = start_addr & ~(imin_line - 1); addr < end_addr;
            addr += imin_line)
                __asm__ __volatile__("ic ivau, %0" :: "r"(addr));

        __asm__ __volatile__("dsb ish");

        __asm__ __volatile__("isb");
}

int
main(void)
{
        uint32_t *insns;
        register uint64_t output __asm__("x0");

        insns = mmap(NULL, 4096, PROT_READ | PROT_WRITE | PROT_EXEC,
            MAP_ANON | MAP_PRIVATE, -1, 0);
        if (insns == MAP_FAILED) {
                fprintf(stderr, "mmap failed: %s\n", strerror(errno));
                return (1);
        }

        insns[0] = 0xd5033fdf; /* ISB (to force new TB) */
        insns[1] = 0xd2800540; /* MOV X0, #42 */
        insns[2] = 0xd65f03c0; /* RET */

        sync_icache(insns, insns + 3);

        /* Prime TB cache */
        output = 0;
        __asm__ __volatile__("blr %1" : "+r"(output) : "r"(insns) : "x30");

        if (output != 42) {
                fprintf(stderr, "first run gave %lu not 42", output);
                return (1);
        }

        /* Demonstrate that jumping to insns + 1 currently works too */
        output = 0;
        __asm__ __volatile__("blr %1" : "+r"(output) : "r"(insns + 1) : "x30");

        if (output != 42) {
                fprintf(stderr, "second run gave %lu not 42", output);
                return (1);
        }

        __asm__ __volatile("dc zva, %0" :: "r"(insns) : "memory");

        if (insns[0] != 0 || insns[1] != 0 || insns[2] != 0) {
                fprintf(stderr, "DC ZVA didn't zero out entire function; "
                    "block size too small?\n");
                return (1);
        }

        sync_icache(insns, insns + 3);

        /* Now the actual test; this should be well-defined */
        output = 0;
        __asm__ __volatile__("blr %1" : "+r"(output) : "r"(insns + 1) : "x30");

        fprintf(stderr, "Did not trap on UDF #0!\n");
        if (output == 42)
                fprintf(stderr, "Executed stale MOV X0, #42\n");
        else
                fprintf(stderr, "Unexpected failure mode; output is %lu\n",
                    output);

        return (0);
}

Output without this patch (using FreeBSD as the host since it's what I
have easy access to):

root@freebsd-aarch64:~ # ./qemu-tb-dc-zva
Did not trap on UDF #0!
Executed stale MOV X0, #42

Output with this patch:

root@freebsd-aarch64:~ # ./qemu-tb-dc-zva
Illegal instruction (core dumped)

The latter is what I believe to be the only specified behaviour
(assuming the DC ZVA block size is big enough), and is what I observe on
multiple hardware implementations.

 accel/tcg/cputlb.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/accel/tcg/cputlb.c b/accel/tcg/cputlb.c
index b8c5e345b8..a6c15d93ff 100644
--- a/accel/tcg/cputlb.c
+++ b/accel/tcg/cputlb.c
@@ -1487,13 +1487,14 @@ int probe_access_full(CPUArchState *env, vaddr addr, 
int size,
                       bool nonfault, void **phost, CPUTLBEntryFull **pfull,
                       uintptr_t retaddr)
 {
+    int dirtysize = size == 0 ? 1 : size;
     int flags = probe_access_internal(env_cpu(env), addr, size, access_type,
                                       mmu_idx, nonfault, phost, pfull, retaddr,
                                       true);
 
     /* Handle clean RAM pages.  */
     if (unlikely(flags & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1, *pfull, retaddr);
+        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, retaddr);
         flags &= ~TLB_NOTDIRTY;
     }
 
@@ -1506,6 +1507,7 @@ int probe_access_full_mmu(CPUArchState *env, vaddr addr, 
int size,
 {
     void *discard_phost;
     CPUTLBEntryFull *discard_tlb;
+    int dirtysize = size == 0 ? 1 : size;
 
     /* privately handle users that don't need full results */
     phost = phost ? phost : &discard_phost;
@@ -1516,7 +1518,7 @@ int probe_access_full_mmu(CPUArchState *env, vaddr addr, 
int size,
 
     /* Handle clean RAM pages.  */
     if (unlikely(flags & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1, *pfull, 0);
+        notdirty_write(env_cpu(env), addr, dirtysize, *pfull, 0);
         flags &= ~TLB_NOTDIRTY;
     }
 
@@ -1529,6 +1531,7 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int 
size,
 {
     CPUTLBEntryFull *full;
     int flags;
+    int dirtysize = size == 0 ? 1 : size;
 
     g_assert(-(addr | TARGET_PAGE_MASK) >= size);
 
@@ -1538,7 +1541,7 @@ int probe_access_flags(CPUArchState *env, vaddr addr, int 
size,
 
     /* Handle clean RAM pages. */
     if (unlikely(flags & TLB_NOTDIRTY)) {
-        notdirty_write(env_cpu(env), addr, 1, full, retaddr);
+        notdirty_write(env_cpu(env), addr, dirtysize, full, retaddr);
         flags &= ~TLB_NOTDIRTY;
     }
 
@@ -1574,7 +1577,7 @@ void *probe_access(CPUArchState *env, vaddr addr, int 
size,
 
         /* Handle clean RAM pages.  */
         if (flags & TLB_NOTDIRTY) {
-            notdirty_write(env_cpu(env), addr, 1, full, retaddr);
+            notdirty_write(env_cpu(env), addr, size, full, retaddr);
         }
     }
 
-- 
2.34.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]