[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 5/6] xen: modify page table construction
From: |
Juergen Gross |
Subject: |
Re: [PATCH v2 5/6] xen: modify page table construction |
Date: |
Thu, 11 Feb 2016 15:35:45 +0100 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.5.0 |
On 11/02/16 13:47, Daniel Kiper wrote:
> On Thu, Feb 11, 2016 at 08:53:25AM +0100, Juergen Gross wrote:
>> Modify the page table construction to allow multiple virtual regions
>> to be mapped. This is done as preparation for removing the p2m list
>> from the initial kernel mapping in order to support huge pv domains.
>>
>> This allows a cleaner approach for mapping the relocator page by
>> using this capability.
>>
>> The interface to the assembler level of the relocator has to be changed
>> in order to be able to process multiple page table areas.
>
> I hope that older kernels could be loaded as is and everything works as
> expected.
See Patch 0/6. I have tested old kernels, too.
>
>> Signed-off-by: Juergen Gross <address@hidden>
>> ---
>> grub-core/lib/i386/xen/relocator.S | 47 +++---
>> grub-core/lib/x86_64/xen/relocator.S | 44 +++--
>> grub-core/lib/xen/relocator.c | 22 ++-
>> grub-core/loader/i386/xen.c | 313
>> +++++++++++++++++++++++------------
>> include/grub/xen/relocator.h | 6 +-
>> 5 files changed, 277 insertions(+), 155 deletions(-)
>>
>> diff --git a/grub-core/lib/i386/xen/relocator.S
>> b/grub-core/lib/i386/xen/relocator.S
>> index 694a54c..c23b405 100644
>> --- a/grub-core/lib/i386/xen/relocator.S
>> +++ b/grub-core/lib/i386/xen/relocator.S
>> @@ -50,41 +50,45 @@ VARIABLE(grub_relocator_xen_remapper_map_high)
>> jmp *%ebx
>>
>> LOCAL(cont):
>> - xorl %eax, %eax
>> - movl %eax, %ebp
>> + /* mov imm32, %eax */
>> + .byte 0xb8
>> +VARIABLE(grub_relocator_xen_paging_areas_addr)
>> + .long 0
>> + movl %eax, %ebx
>> 1:
>> -
>> + movl 0(%ebx), %ebp
>> + movl 4(%ebx), %ecx
>
> Oh... No, please use constants not plain numbers and
> explain in comments what is going on. Otherwise it
> is unreadable.
Hmm, yes, some comments wouldn't hurt. :-)
Regarding constants: do you really think I should introduce their
first usage in this file with my patch?
>
>> + testl %ecx, %ecx
>> + jz 3f
>> + addl $8, %ebx
>> + movl %ebx, %esp
>> +
>> +2:
>> + movl %ecx, %edi
>> /* mov imm32, %eax */
>> .byte 0xb8
>> VARIABLE(grub_relocator_xen_mfn_list)
>> .long 0
>> - movl %eax, %edi
>> - movl %ebp, %eax
>> - movl 0(%edi, %eax, 4), %ecx
>> -
>> - /* mov imm32, %ebx */
>> - .byte 0xbb
>> -VARIABLE(grub_relocator_xen_paging_start)
>> - .long 0
>> - shll $12, %eax
>> - addl %eax, %ebx
>> + movl 0(%eax, %ebp, 4), %ecx
>> + movl %ebp, %ebx
>> + shll $12, %ebx
>
> Ditto.
Look at the removed line above: I just switched register usage.
>
>> movl %ecx, %edx
>> shll $12, %ecx
>> shrl $20, %edx
>> orl $5, %ecx
>> movl $2, %esi
>> movl $__HYPERVISOR_update_va_mapping, %eax
>> - int $0x82
>> + int $0x82 /* parameters: eax, ebx, ecx, edx, esi */
>
> Please use more verbose comments.
:-)
>
>>
>> incl %ebp
>> - /* mov imm32, %ecx */
>> - .byte 0xb9
>> -VARIABLE(grub_relocator_xen_paging_size)
>> - .long 0
>> - cmpl %ebp, %ecx
>> + movl %edi, %ecx
>> +
>> + loop 2b
>>
>> - ja 1b
>> + mov %esp, %ebx
>> + jmp 1b
>>
>> +3:
>> /* mov imm32, %ebx */
>> .byte 0xbb
>> VARIABLE(grub_relocator_xen_mmu_op_addr)
>> @@ -102,6 +106,9 @@ VARIABLE(grub_relocator_xen_remap_continue)
>>
>> jmp *%eax
>>
>> +VARIABLE(grub_relocator_xen_paging_areas)
>> + .long 0, 0, 0, 0, 0, 0, 0, 0
>> +
>> VARIABLE(grub_relocator_xen_mmu_op)
>> .space 256
>>
>> diff --git a/grub-core/lib/x86_64/xen/relocator.S
>> b/grub-core/lib/x86_64/xen/relocator.S
>> index 92e9e72..dbb90c7 100644
>> --- a/grub-core/lib/x86_64/xen/relocator.S
>> +++ b/grub-core/lib/x86_64/xen/relocator.S
>
> Is to possible to split this patch to i386 and x86-64 stuff patches?
I don't think so. The C-part is common and assembler sources must both
match.
>
>> @@ -50,31 +50,24 @@ VARIABLE(grub_relocator_xen_remapper_map)
>>
>> LOCAL(cont):
>>
>> - /* mov imm64, %rcx */
>> - .byte 0x48
>> - .byte 0xb9
>> -VARIABLE(grub_relocator_xen_paging_size)
>> - .quad 0
>> -
>> - /* mov imm64, %rax */
>> - .byte 0x48
>> - .byte 0xb8
>> -VARIABLE(grub_relocator_xen_paging_start)
>> - .quad 0
>> -
>> - movq %rax, %r12
>> -
>> /* mov imm64, %rax */
>> .byte 0x48
>> .byte 0xb8
>> VARIABLE(grub_relocator_xen_mfn_list)
>> .quad 0
>>
>> - movq %rax, %rsi
>> + movq %rax, %rbx
>> + leaq EXT_C(grub_relocator_xen_paging_areas) (%rip), %r8
>> +
>> 1:
>> + movq 0(%r8), %r12
>> + movq 8(%r8), %rcx
>
> Ditto.
>
>> + testq %rcx, %rcx
>> + jz 3f
>> +2:
>> movq %r12, %rdi
>> - movq %rsi, %rbx
>> - movq 0(%rsi), %rsi
>> + shlq $12, %rdi
>> + movq (%rbx, %r12, 8), %rsi
>
> Ditto.
>
>> shlq $12, %rsi
>> orq $5, %rsi
>> movq $2, %rdx
>> @@ -83,13 +76,15 @@ VARIABLE(grub_relocator_xen_mfn_list)
>> syscall
>>
>> movq %r9, %rcx
>> - addq $8, %rbx
>> - addq $4096, %r12
>> - movq %rbx, %rsi
>> + incq %r12
>> +
>> + loop 2b
>>
>> - loop 1b
>> + addq $16, %r8
>
> Ditto.
>
>> + jmp 1b
>>
>> - leaq LOCAL(mmu_op) (%rip), %rdi
>> +3:
>> + leaq EXT_C(grub_relocator_xen_mmu_op) (%rip), %rdi
>> movq $3, %rsi
>> movq $0, %rdx
>> movq $0x7FF0, %r10
>
> Ugh... Please fix this too. Probably in separate patch.
I don't mind cleaning the assembler sources by using more comments
and constants. I just don't think this should be part of this series.
>
>> @@ -104,7 +99,10 @@ VARIABLE(grub_relocator_xen_remap_continue)
>>
>> jmp *%rax
>>
>> -LOCAL(mmu_op):
>> +VARIABLE(grub_relocator_xen_paging_areas)
>> + /* array of start, size pairs, size 0 is end marker */
>> + .quad 0, 0, 0, 0, 0, 0, 0, 0
>> +
>> VARIABLE(grub_relocator_xen_mmu_op)
>> .space 256
>>
>> diff --git a/grub-core/lib/xen/relocator.c b/grub-core/lib/xen/relocator.c
>> index 8f427d3..bc29055 100644
>> --- a/grub-core/lib/xen/relocator.c
>> +++ b/grub-core/lib/xen/relocator.c
>> @@ -36,15 +36,15 @@ extern grub_uint8_t grub_relocator_xen_remap_end;
>> extern grub_xen_reg_t grub_relocator_xen_stack;
>> extern grub_xen_reg_t grub_relocator_xen_start_info;
>> extern grub_xen_reg_t grub_relocator_xen_entry_point;
>> -extern grub_xen_reg_t grub_relocator_xen_paging_start;
>> -extern grub_xen_reg_t grub_relocator_xen_paging_size;
>> extern grub_xen_reg_t grub_relocator_xen_remapper_virt;
>> extern grub_xen_reg_t grub_relocator_xen_remapper_virt2;
>> extern grub_xen_reg_t grub_relocator_xen_remapper_map;
>> extern grub_xen_reg_t grub_relocator_xen_mfn_list;
>> +extern grub_xen_reg_t grub_relocator_xen_paging_areas[2 * XEN_MAX_MAPPINGS];
>> extern grub_xen_reg_t grub_relocator_xen_remap_continue;
>> #ifdef __i386__
>> extern grub_xen_reg_t grub_relocator_xen_mmu_op_addr;
>> +extern grub_xen_reg_t grub_relocator_xen_paging_areas_addr;
>> extern grub_xen_reg_t grub_relocator_xen_remapper_map_high;
>> #endif
>> extern mmuext_op_t grub_relocator_xen_mmu_op[3];
>> @@ -61,6 +61,7 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>> {
>> grub_err_t err;
>> void *relst;
>> + int i;
>> grub_relocator_chunk_t ch, ch_tramp;
>> grub_xen_mfn_t *mfn_list =
>> (grub_xen_mfn_t *) grub_xen_start_page_addr->mfn_list;
>> @@ -77,8 +78,11 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>> grub_relocator_xen_stack = state.stack;
>> grub_relocator_xen_start_info = state.start_info;
>> grub_relocator_xen_entry_point = state.entry_point;
>> - grub_relocator_xen_paging_start = state.paging_start << 12;
>> - grub_relocator_xen_paging_size = state.paging_size;
>> + for (i = 0; i < XEN_MAX_MAPPINGS; i++)
>> + {
>> + grub_relocator_xen_paging_areas[2 * i] = state.paging_start[i];
>> + grub_relocator_xen_paging_areas[2 * i + 1] = state.paging_size[i];
>
> Why 2 and 1? Constants?
I guess I should probably define grub_relocator_xen_paging_areas as
an array of struct { grub_xen_reg_t start; grub_xen_reg_t size; };
>
>> + }
>> grub_relocator_xen_remapper_virt = remapper_virt;
>> grub_relocator_xen_remapper_virt2 = remapper_virt;
>> grub_relocator_xen_remap_continue = trampoline_virt;
>> @@ -88,10 +92,12 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>> grub_relocator_xen_remapper_map_high = (mfn_list[remapper_pfn] >> 20);
>> grub_relocator_xen_mmu_op_addr = (char *) &grub_relocator_xen_mmu_op
>> - (char *) &grub_relocator_xen_remap_start + remapper_virt;
>> + grub_relocator_xen_paging_areas_addr =
>> + (char *) &grub_relocator_xen_paging_areas
>> + - (char *) &grub_relocator_xen_remap_start + remapper_virt;
>> #endif
>>
>> - grub_relocator_xen_mfn_list = state.mfn_list
>> - + state.paging_start * sizeof (grub_addr_t);
>> + grub_relocator_xen_mfn_list = state.mfn_list;
>>
>> grub_memset (grub_relocator_xen_mmu_op, 0,
>> sizeof (grub_relocator_xen_mmu_op));
>> @@ -100,9 +106,9 @@ grub_relocator_xen_boot (struct grub_relocator *rel,
>> #else
>> grub_relocator_xen_mmu_op[0].cmd = MMUEXT_PIN_L4_TABLE;
>> #endif
>> - grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start];
>> + grub_relocator_xen_mmu_op[0].arg1.mfn = mfn_list[state.paging_start[0]];
>> grub_relocator_xen_mmu_op[1].cmd = MMUEXT_NEW_BASEPTR;
>> - grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start];
>> + grub_relocator_xen_mmu_op[1].arg1.mfn = mfn_list[state.paging_start[0]];
>> grub_relocator_xen_mmu_op[2].cmd = MMUEXT_UNPIN_TABLE;
>> grub_relocator_xen_mmu_op[2].arg1.mfn =
>> mfn_list[grub_xen_start_page_addr->pt_base >> 12];
>> diff --git a/grub-core/loader/i386/xen.c b/grub-core/loader/i386/xen.c
>> index 0f41048..5e10420 100644
>> --- a/grub-core/loader/i386/xen.c
>> +++ b/grub-core/loader/i386/xen.c
>> @@ -42,6 +42,29 @@
>>
>> GRUB_MOD_LICENSE ("GPLv3+");
>>
>> +#ifdef __x86_64__
>> +#define NUMBER_OF_LEVELS 4
>> +#define INTERMEDIATE_OR 7
>> +#define VIRT_MASK 0x0000ffffffffffffULL
>> +#else
>> +#define NUMBER_OF_LEVELS 3
>> +#define INTERMEDIATE_OR 3
>> +#define VIRT_MASK 0x00000000ffffffffULL
>
> Long expected constants... Nice!
Just to please you. :-)
They were just moved up some lines as they are used in the new
structure definitions.
Using constants was unavoidable here. ;-)
>
>> +#endif
>> +
>> +struct grub_xen_mapping_lvl {
>> + grub_uint64_t virt_start;
>> + grub_uint64_t virt_end;
>> + grub_uint64_t pfn_start;
>> + grub_uint64_t n_pt_pages;
>> +};
>> +
>> +struct grub_xen_mapping {
>> + grub_uint64_t *where;
>> + struct grub_xen_mapping_lvl area;
>> + struct grub_xen_mapping_lvl lvls[NUMBER_OF_LEVELS];
>> +};
>> +
>> static struct grub_relocator *relocator = NULL;
>> static grub_uint64_t max_addr;
>> static grub_dl_t my_mod;
>> @@ -56,9 +79,10 @@ static struct grub_relocator_xen_state state;
>> static grub_xen_mfn_t *virt_mfn_list;
>> static struct start_info *virt_start_info;
>> static grub_xen_mfn_t console_pfn;
>> -static grub_uint64_t *virt_pgtable;
>> -static grub_uint64_t pgtbl_start;
>> static grub_uint64_t pgtbl_end;
>> +static struct grub_xen_mapping mappings[XEN_MAX_MAPPINGS];
>> +static int n_mappings;
>> +static struct grub_xen_mapping *map_reloc;
>>
>> #define PAGE_SIZE 4096
>> #define MAX_MODULES (PAGE_SIZE / sizeof (struct xen_multiboot_mod_list))
>> @@ -75,100 +99,162 @@ page2offset (grub_uint64_t page)
>> return page << PAGE_SHIFT;
>> }
>>
>> -#ifdef __x86_64__
>> -#define NUMBER_OF_LEVELS 4
>> -#define INTERMEDIATE_OR 7
>> -#else
>> -#define NUMBER_OF_LEVELS 3
>> -#define INTERMEDIATE_OR 3
>> -#endif
>> -
>> -static grub_uint64_t
>> -get_pgtable_size (grub_uint64_t total_pages, grub_uint64_t virt_base)
>> +static grub_err_t
>> +get_pgtable_size (grub_uint64_t from, grub_uint64_t to, grub_uint64_t pfn)
>> {
>> - if (!virt_base)
>> - total_pages++;
>> - grub_uint64_t ret = 0;
>> - grub_uint64_t ll = total_pages;
>> - int i;
>> - for (i = 0; i < NUMBER_OF_LEVELS; i++)
>> - {
>> - ll = (ll + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE;
>> - /* PAE wants all 4 root directories present. */
>> -#ifdef __i386__
>> - if (i == 1)
>> - ll = 4;
>> -#endif
>> - ret += ll;
>> - }
>> - for (i = 1; i < NUMBER_OF_LEVELS; i++)
>> - if (virt_base >> (PAGE_SHIFT + i * LOG_POINTERS_PER_PAGE))
>> - ret++;
>> - return ret;
>> -}
>> + struct grub_xen_mapping *map, *map_cmp;
>> + grub_uint64_t mask, bits;
>> + int i, m;
>>
>> -static void
>> -generate_page_table (grub_uint64_t *where, grub_uint64_t paging_start,
>> - grub_uint64_t paging_end, grub_uint64_t total_pages,
>> - grub_uint64_t virt_base, grub_xen_mfn_t *mfn_list)
>> -{
>> - if (!virt_base)
>> - paging_end++;
>> + if (n_mappings == XEN_MAX_MAPPINGS)
>> + return grub_error (GRUB_ERR_BUG, "too many mapped areas");
>> +
>> + grub_dprintf ("xen", "get_pgtable_size %d from=%llx, to=%llx, pfn=%llx\n",
>> + n_mappings, (unsigned long long) from, (unsigned long long) to,
>> + (unsigned long long) pfn);
>>
>> - grub_uint64_t lx[NUMBER_OF_LEVELS], lxs[NUMBER_OF_LEVELS];
>> - grub_uint64_t nlx, nls, sz = 0;
>> - int l;
>> + map = mappings + n_mappings;
>> + grub_memset (map, 0, sizeof (*map));
>>
>> - nlx = paging_end;
>> - nls = virt_base >> PAGE_SHIFT;
>> - for (l = 0; l < NUMBER_OF_LEVELS; l++)
>> + map->area.virt_start = from & VIRT_MASK;
>> + map->area.virt_end = (to - 1) & VIRT_MASK;
>> + map->area.n_pt_pages = 0;
>> +
>> + for (i = NUMBER_OF_LEVELS - 1; i >= 0; i--)
>> {
>> - nlx = (nlx + POINTERS_PER_PAGE - 1) >> LOG_POINTERS_PER_PAGE;
>> - /* PAE wants all 4 root directories present. */
>> + map->lvls[i].pfn_start = pfn + map->area.n_pt_pages;
>> + if (i == NUMBER_OF_LEVELS - 1)
>> + {
>> + if (n_mappings == 0)
>> + {
>> + map->lvls[i].virt_start = 0;
>> + map->lvls[i].virt_end = VIRT_MASK;
>> + map->lvls[i].n_pt_pages = 1;
>> + map->area.n_pt_pages++;
>> + }
>> + continue;
>> + }
>> +
>> + bits = PAGE_SHIFT + (i + 1) * LOG_POINTERS_PER_PAGE;
>> + mask = (1ULL << bits) - 1;
>> + map->lvls[i].virt_start = map->area.virt_start & ~mask;
>> + map->lvls[i].virt_end = map->area.virt_end | mask;
>> #ifdef __i386__
>> - if (l == 1)
>> - nlx = 4;
>> + /* PAE wants last root directory present. */
>> + if (i == 1 && to <= 0xc0000000 && n_mappings == 0)
>
> Ditto.
>
>> + map->lvls[i].virt_end = VIRT_MASK;
>> #endif
>> - lx[l] = nlx;
>> - sz += lx[l];
>> - lxs[l] = nls & (POINTERS_PER_PAGE - 1);
>> - if (nls && l != 0)
>> - sz++;
>> - nls >>= LOG_POINTERS_PER_PAGE;
>> + for (m = 0; m < n_mappings; m++)
>> + {
>> + map_cmp = mappings + m;
>> + if (map_cmp->lvls[i].virt_start == map_cmp->lvls[i].virt_end)
>> + continue;
>> + if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start &&
>> + map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end)
>> + {
>> + map->lvls[i].virt_start = 0;
>> + map->lvls[i].virt_end = 0;
>> + break;
>> + }
>> + if (map->lvls[i].virt_start >= map_cmp->lvls[i].virt_start &&
>> + map->lvls[i].virt_start <= map_cmp->lvls[i].virt_end)
>> + map->lvls[i].virt_start = map_cmp->lvls[i].virt_end + 1;
>> + if (map->lvls[i].virt_end >= map_cmp->lvls[i].virt_start &&
>> + map->lvls[i].virt_end <= map_cmp->lvls[i].virt_end)
>> + map->lvls[i].virt_end = map_cmp->lvls[i].virt_start - 1;
>> + }
>> + if (map->lvls[i].virt_start < map->lvls[i].virt_end)
>> + map->lvls[i].n_pt_pages =
>> + ((map->lvls[i].virt_end - map->lvls[i].virt_start) >> bits) + 1;
>> + map->area.n_pt_pages += map->lvls[i].n_pt_pages;
>> + grub_dprintf ("xen", "get_pgtable_size level %d: virt %llx-%llx %d
>> pts\n",
>> + i, (unsigned long long) map->lvls[i].virt_start,
>> + (unsigned long long) map->lvls[i].virt_end,
>> + (int) map->lvls[i].n_pt_pages);
>> }
>>
>> - grub_uint64_t lp;
>> - grub_uint64_t j;
>> - grub_uint64_t *pg = (grub_uint64_t *) where;
>> - int pr = 0;
>> + grub_dprintf ("xen", "get_pgtable_size return: %d page tables\n",
>> + (int) map->area.n_pt_pages);
>> +
>> + state.paging_start[n_mappings] = pfn;
>> + state.paging_size[n_mappings] = map->area.n_pt_pages;
>> +
>> + return GRUB_ERR_NONE;
>> +}
>> +
>> +static grub_uint64_t *
>> +get_pg_table_virt (int mapping, int level)
>> +{
>> + grub_uint64_t pfn;
>> + struct grub_xen_mapping *map;
>> +
>> + map = mappings + mapping;
>> + pfn = map->lvls[level].pfn_start - map->lvls[NUMBER_OF_LEVELS -
>> 1].pfn_start;
>> + return map->where + pfn * POINTERS_PER_PAGE;
>> +}
>>
>> - grub_memset (pg, 0, sz * PAGE_SIZE);
>> +static grub_uint64_t
>> +get_pg_table_prot (int level, grub_uint64_t pfn)
>> +{
>> + int m;
>> + grub_uint64_t pfn_s, pfn_e;
>>
>> - lp = paging_start + lx[NUMBER_OF_LEVELS - 1];
>> - for (l = NUMBER_OF_LEVELS - 1; l >= 1; l--)
>> + if (level > 0)
>> + return INTERMEDIATE_OR;
>> + for (m = 0; m < n_mappings; m++)
>> {
>> - if (lxs[l] || pr)
>> - pg[0] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR;
>> - if (pr)
>> - pg += POINTERS_PER_PAGE;
>> - for (j = 0; j < lx[l - 1]; j++)
>> - pg[j + lxs[l]] = page2offset (mfn_list[lp++]) | INTERMEDIATE_OR;
>> - pg += lx[l] * POINTERS_PER_PAGE;
>> - if (lxs[l])
>> - pr = 1;
>> + pfn_s = mappings[m].lvls[NUMBER_OF_LEVELS - 1].pfn_start;
>> + pfn_e = mappings[m].area.n_pt_pages + pfn_s;
>> + if (pfn >= pfn_s && pfn < pfn_e)
>> + return 5;
>> }
>> + return 7;
>
> What 7 and 5 means?
Page table protection bits.
> I am exhausted. Please fix above stuff and I will review this patch
> again after fixes.
I'd really like to have the maintainer's opinion on usage of constants
vs. pure numbers. Up to now I have the impression that constants are
used only to abstract i386 vs. x86-64. I wouldn't mind changing that,
but I don't like modifying all my patches which are then rejected due to
that change.
Thanks for doing the review,
Juergen
- Re: [PATCH v2 1/6] xen: factor out p2m list allocation into separate function, (continued)
[PATCH v2 3/6] xen: factor out allocation of page tables into separate function, Juergen Gross, 2016/02/11
[PATCH v2 5/6] xen: modify page table construction, Juergen Gross, 2016/02/11