Richard Henderson <richard.henderson@linaro.org> writes:
Finish weaning user-only away from PageDesc.
Using an interval tree to track page permissions means that
we can represent very large regions efficiently.
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/290
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/967
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1214
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
accel/tcg/internal.h | 4 +-
accel/tcg/tb-maint.c | 20 +-
accel/tcg/user-exec.c | 614 ++++++++++++++++++++++-----------
tests/tcg/multiarch/test-vma.c | 22 ++
4 files changed, 451 insertions(+), 209 deletions(-)
create mode 100644 tests/tcg/multiarch/test-vma.c
diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h
index 250f0daac9..c7e157d1cd 100644
--- a/accel/tcg/internal.h
+++ b/accel/tcg/internal.h
@@ -24,9 +24,7 @@
#endif
typedef struct PageDesc {
-#ifdef CONFIG_USER_ONLY
- unsigned long flags;
-#else
+#ifndef CONFIG_USER_ONLY
QemuSpin lock;
/* list of TBs intersecting this ram page */
uintptr_t first_tb;
diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c
index 14e8e47a6a..694440cb4a 100644
--- a/accel/tcg/tb-maint.c
+++ b/accel/tcg/tb-maint.c
@@ -68,15 +68,23 @@ static void page_flush_tb(void)
<snip>
int page_get_flags(target_ulong address)
{
- PageDesc *p;
+ PageFlagsNode *p = pageflags_find(address, address);
- p = page_find(address >> TARGET_PAGE_BITS);
+ /*
+ * See util/interval-tree.c re lockless lookups: no false positives but
+ * there are false negatives. If we find nothing, retry with the mmap
+ * lock acquired.
+ */
if (!p) {
- return 0;
+ if (have_mmap_lock()) {
+ return 0;
+ }
+ mmap_lock();
+ p = pageflags_find(address, address);
+ mmap_unlock();
+ if (!p) {
+ return 0;
+ }
}
return p->flags;
To avoid the brain twisting following locks and multiple return legs how about
this:
int page_get_flags(target_ulong address)
{
PageFlagsNode *p = pageflags_find(address, address);
/*
* See util/interval-tree.c re lockless lookups: no false positives but
* there are false negatives. If we had the lock and found
* nothing we are done, otherwise retry with the mmap lock acquired.
*/
if (have_mmap_lock()) {
return p ? p->flags : 0;
}
mmap_lock();
p = pageflags_find(address, address);
mmap_unlock();
return p ? p->flags : 0;
}
+/*
+ * Test very large vma allocations.
+ * The qemu out-of-memory condition was within the mmap syscall itself.
+ * If the syscall actually returns with MAP_FAILED, the test succeeded.
+ */
+#include <sys/mman.h>
+
+int main()
+{
+ int n = sizeof(size_t) == 4 ? 32 : 45;
+
+ for (int i = 28; i < n; i++) {
+ size_t l = (size_t)1 << i;
+ void *p = mmap(0, l, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
+ if (p == MAP_FAILED) {
+ break;
+ }
+ munmap(p, l);
+ }
+ return 0;
+}
So is the failure mode here we actually seg or bus out?