grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/7] Initial support for ARMv7 architecture


From: Francesco Lavra
Subject: Re: [PATCH 2/7] Initial support for ARMv7 architecture
Date: Sat, 30 Mar 2013 16:15:54 +0100
User-agent: Mozilla/5.0 (X11; Linux i686; rv:7.0.1) Gecko/20110929 Thunderbird/7.0.1

On 03/24/2013 06:01 PM, Leif Lindholm wrote:
[...]
> === added file 'grub-core/kern/arm/cache.S'
> --- grub-core/kern/arm/cache.S        1970-01-01 00:00:00 +0000
> +++ grub-core/kern/arm/cache.S        2013-03-24 12:56:20 +0000
> @@ -0,0 +1,251 @@
> +/*
> + *  GRUB  --  GRand Unified Bootloader
> + *  Copyright (C) 2013  Free Software Foundation, Inc.
> + *
> + *  GRUB is free software: you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation, either version 3 of the License, or
> + *  (at your option) any later version.
> + *
> + *  GRUB is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <grub/symbol.h>
> +#include <grub/dl.h>

This include is not needed.

> +
> +     .file   "cache.S"
> +     .text
> +     .syntax unified
> +#if !defined (__thumb2__)
> +     .arm
> +#define ARM(x...)    x
> +#define THUMB(x...)
> +#else
> +     .thumb
> +#define THUMB(x...)  x
> +#define ARM(x...)
> +#endif
> +
> +     .align  2
> +
> +/*
> + * Simple cache maintenance functions
> + */
> +
> +@ r0 - *beg (inclusive)
> +@ r1 - *end (exclusive)      
> +clean_dcache_range:
> +     @ Clean data cache range for range to point-of-unification
> +     ldr     r2, dlinesz
> +1:   cmp     r0, r1
> +     bge     2f
> +#ifdef DEBUG
> +     push    {r0-r2, lr}
> +     mov     r1, r2
> +     mov     r2, r0
> +     ldr     r0, =dcstr
> +     bl      EXT_C(grub_printf)
> +     pop     {r0-r2, lr}
> +#endif
> +     mcr     p15, 0, r0, c7, c11, 1  @ DCCMVAU
> +     add     r0, r0, r2              @ Next line
> +     b       1b
> +2:   dsb
> +     bx      lr

If the start address (initial value of r0 in this function) is not
aligned to a cache line boundary, the last cache line in the range to be
cleaned might not be cleaned, depending on the value of r1 (the end
address). To handle correctly such cases, the initial value of r0 should
be aligned to the cache line boundary.

> +
> +@ r0 - *beg (inclusive)
> +@ r1 - *end (exclusive)      
> +invalidate_icache_range:
> +     @ Invalidate instruction cache for range to point-of-unification
> +     ldr     r2, ilinesz
> +1:   cmp     r0, r1
> +     bge     2f
> +#ifdef DEBUG
> +     push    {r0-r2, lr}
> +     mov     r1, r2
> +     mov     r2, r0
> +     ldr     r0, =icstr
> +     bl      EXT_C(grub_printf)
> +     pop     {r0-r2, lr}
> +#endif
> +     mcr     p15, 0, r0, c7, c5, 1   @ ICIMVAU
> +     add     r0, r0, r2              @ Next line
> +     b       1b

The same applies here: the initial value of r0 should be aligned to the
cache line boundary.

> +     @ Branch predictor invalidate all
> +2:   mcr     p15, 0, r0, c7, c5, 6   @ BPIALL
> +     dsb
> +     isb
> +     bx      lr
> +     
> address@hidden __wrap___clear_cache(char *beg, char *end);
> +FUNCTION(__wrap___clear_cache)
> +     dmb
> +     dsb
> +     push    {r4-r6, lr}
> +     ldr     r2, probed      @ If first call, probe cache sizes
> +     cmp     r2, #0

If the -mimplicit-it=thumb assembler option is removed from patch 1/7
(as I think it should), then here goes the instruction:

        it      eq

> +     bleq    probe_caches    @ This call corrupts r3
> +     mov     r4, r0
> +     mov     r5, r1
> +     bl      clean_dcache_range
> +     mov     r0, r4
> +     mov     r1, r5
> +     bl      invalidate_icache_range
> +     pop     {r4-r6, pc}
> +
> +probe_caches:
> +     push    {r4-r6, lr}
> +     mrc     p15, 0, r4, c0, c0, 1   @ Read Cache Type Register
> +     mov     r5, #1
> +     ubfx    r6, r4, #16, #4         @ Extract min D-cache num word log2
> +     add     r6, r6, #2              @ words->bytes
> +     lsl     r6, r5, r6              @ Convert to num bytes
> +     ldr     r3, =dlinesz
> +     str     r6, [r3]
> +     and     r6, r4, #0xf            @ Extract min I-cache num word log2
> +     add     r6, r6, #2              @ words->bytes
> +     lsl     r6, r5, r6              @ Convert to num bytes
> +     ldr     r3, =ilinesz
> +     str     r6, [r3]
> +     ldr     r3, =probed             @ Flag cache probing done
> +     str     r5, [r3]
> +     pop     {r4-r6, pc}
> +
> +#ifdef DEBUG
> +dcstr:       .asciz  "cleaning %d bytes of D cache @ 0x%08x\n"
> +icstr:       .asciz  "invalidating %d bytes of I cache @ 0x%08x\n"
> +#endif
> +     
> +     .align  3
> +probed:      .long   0
> +dlinesz:
> +     .long   0
> +ilinesz:
> +     .long   0
> +
> address@hidden grub_arch_sync_caches (void *address, grub_size_t len)
> +FUNCTION(grub_arch_sync_caches)
> +     add     r1, r0, r1
> +     b       __wrap___clear_cache
> +
> +     @ r0  - CLIDR
> +     @ r1  - LoC
> +     @ r2  - current level
> +     @ r3  - num sets
> +     @ r4  - num ways
> +     @ r5  - current set
> +     @ r6  - current way
> +     @ r7  - line size
> +     @ r8  - scratch
> +     @ r9  - scratch
> +     @ r10 - scratch
> +     @ r11 - scratch
> +clean_invalidate_dcache:
> +     push    {r4-r12, lr}
> +     mrc     p15, 1, r0, c0, c0, 1   @ Read CLIDR
> +     ubfx    r1, r0, #24, #3         @ Extract LoC
> +     
> +     mov     r2, #0                  @ First level, L1
> +2:   and     r8, r0, #7              @ cache type at current level
> +     cmp     r8, #2
> +     blt     5f                      @ instruction only, or none, skip level
> +
> +     @ set current cache level/type (for CSSIDR read)
> +     lsl     r8, r2, #1
> +     mcr     p15, 2, r8, c0, c0, 0   @ Write CSSELR (level, type: data/uni)
> +
> +     @ read current cache information
> +     mrc     p15, 1, r8, c0, c0, 0   @ Read CSSIDR
> +     ubfx    r3, r8, #13, #14        @ Number of sets -1
> +     ubfx    r4, r8, #3, #9          @ Number of ways -1
> +     and     r7, r8, #7              @ log2(line size in words) - 2
> +     add     r7, r7, #2              @  adjust
> +     mov     r8, #1
> +     lsl     r7, r8, r7              @  -> line size in words
> +     lsl     r7, r7, #2              @  -> bytes
> +
> +     @ set loop
> +     mov     r5, #0                  @ current set = 0
> +3:   lsl     r8, r2, #1              @ insert level
> +     clz     r9, r7                  @ calculate set field offset
> +     mov     r10, #31
> +     sub     r9, r10, r9
> +     lsl     r10, r5, r9
> +     orr     r8, r8, r10             @ insert set field
> +
> +     @ way loop
> +     @ calculate way field offset
> +     mov     r6, #0                  @ current way = 0
> +     add     r10, r4, #1
> +     clz     r9, r10                 @ r9 = way field offset
> +     add     r9, r9, #1
> +4:   lsl     r10, r6, r9
> +     orr     r11, r8, r10            @ insert way field      
> +     
> +     @ clean line by set/way

Nitpick: the comment should be "clean and invalidate line by set/way".

[...]
> === added file 'grub-core/kern/arm/dl.c'
> --- grub-core/kern/arm/dl.c   1970-01-01 00:00:00 +0000
> +++ grub-core/kern/arm/dl.c   2013-03-24 12:56:20 +0000
[...]
> +/*
> + * R_ARM_THM_JUMP19
> + *
> + * Relocate conditional Thumb (T32) B<c>.W
> + */
> +grub_err_t
> +reloc_thm_jump19 (grub_uint16_t *addr, Elf32_Addr sym_addr)
> +{
> +  grub_int32_t offset;
> +  grub_uint32_t insword, insmask;
> +
> +  /* Extract instruction word in alignment-safe manner */
> +  insword = (*addr) << 16 | *(addr + 1);
> +  insmask = 0xfbc0d800;

Shouldn't it be 0xfbc0d000? Bit 11 of the second half-word corresponds
to field J2, and as such is part of the offset.

> +
> +  /* Extract and sign extend offset */
> +  offset = ((insword >> 26) & 1) << 18
> +    | ((insword >> 11) & 1) << 17
> +    | ((insword >> 13) & 1) << 16
> +    | ((insword >> 16) & 0x3f) << 11
> +    | (insword & 0x7ff);
> +  offset <<= 1;
> +  if (offset & (1 << 19))
> +    offset -= (1 << 20);

It looks to me like some shifts are wrong here. It should be:

  offset = ((insword >> 26) & 1) << 19
    | ((insword >> 11) & 1) << 18
    | ((insword >> 13) & 1) << 17
    | ((insword >> 16) & 0x3f) << 11
    | (insword & 0x7ff);
  offset <<= 1;
  if (offset & (1 << 20))
    offset -= (1 << 21);

> +
> +  /* Adjust and re-truncate offset */
> +#ifdef GRUB_UTIL
> +  offset += sym_addr;
> +#else
> +  offset += sym_addr - (grub_uint32_t) addr;
> +#endif
> +  if ((offset > 1048574) || (offset < -1048576))
> +    {
> +      return grub_error
> +        (GRUB_ERR_OUT_OF_RANGE, N_("THM_JUMP19 Relocation out of range."));
> +    }
> +
> +  offset >>= 1;
> +  offset &= 0x7ffff;

The mask should be 0xfffff.

> +
> +  /* Reassemble instruction word and write back */
> +  insword &= insmask;
> +  insword |= ((offset >> 18) & 1) << 26
> +    | ((offset >> 17) & 1) << 11
> +    | ((offset >> 16) & 1) << 13
> +    | ((offset >> 11) & 0x3f) << 16
> +    | (offset & 0x7ff);

As above, it looks like some shifts are wrong. It should be:

  insword |= ((offset >> 19) & 1) << 26
    | ((offset >> 18) & 1) << 11
    | ((offset >> 17) & 1) << 13
    | ((offset >> 11) & 0x3f) << 16
    | (offset & 0x7ff);

> +  *addr = insword >> 16;
> +  *(addr + 1) = insword & 0xffff;
> +  return GRUB_ERR_NONE;
> +}
> 
> 
> 
> /***********************************************************
>  * ARM (A32) relocations:                                  *
>  *                                                         *
>  * ARM instructions are 32-bit in size and 32-bit aligned. *
>  ***********************************************************/
> 
> /*
>  * R_ARM_JUMP24
>  *
>  * Relocate ARM (A32) B
>  */
> grub_err_t
> reloc_jump24 (grub_uint32_t *addr, Elf32_Addr sym_addr)
> {
>   grub_uint32_t insword;
>   grub_int32_t offset;
> 
>   insword = *addr;
> 
>   offset = (insword & 0x00ffffff) << 2;
>   if (offset & 0x02000000)
>     offset -= 0x04000000;
> #ifdef GRUB_UTIL
>   offset += sym_addr;
> #else
>   offset += sym_addr - (grub_uint32_t) addr;
> #endif
> 
>   insword &= 0xff000000;
>   insword |= (offset >> 2) & 0x00ffffff;
> 
>   *addr = insword;
> 
>   return GRUB_ERR_NONE;
> }

If sym_addr has its least significant bit set, it means it addresses a
Thumb instruction, and in this case an inter-working veneer must be used
to effect the transition from ARM to Thumb state. Implementing veneers
is probably overkill here, but I think in this case this function should
error out as the relocation is not supported, instead of silently
performing a wrong relocation.

[...]
> +/*
> + * do_relocations():
> + *   Iterate over all relocations in section, calling appropriate functions
> + *   for patching.
> + */
> +static grub_err_t
> +do_relocations (Elf_Shdr * relhdr, Elf_Ehdr * e, grub_dl_t mod)
> +{
> +  grub_dl_segment_t seg;
> +  Elf_Rel *rel;
> +  Elf_Sym *sym;
> +  int i, entnum;
> +
> +  entnum = relhdr->sh_size / sizeof (Elf_Rel);
> +
> +  /* Find the target segment for this relocation section. */
> +  seg = find_segment (mod->segment, relhdr->sh_info);
> +  if (!seg)
> +    return grub_error (GRUB_ERR_EOF, N_("relocation segment not found"));
> +
> +  rel = (Elf_Rel *) ((grub_addr_t) e + relhdr->sh_offset);
> +
> +  /* Step through all relocations */
> +  for (i = 0, sym = mod->symtab; i < entnum; i++)
> +    {
> +      Elf_Addr *target, sym_addr;
> +      int relsym, reltype;
> +      grub_err_t retval;
> +
> +      if (seg->size < rel[i].r_offset)
> +     return grub_error (GRUB_ERR_BAD_MODULE,
> +                        "reloc offset is out of the segment");
> +      relsym = ELF_R_SYM (rel[i].r_info);
> +      reltype = ELF_R_TYPE (rel[i].r_info);
> +      target = (Elf_Word *) ((grub_addr_t) seg->addr + rel[i].r_offset);

target is declared as Elf_Addr *, so the cast to Elf_Word * seems
inappropriate, even though they are practically the same type.
Since target can point at either a 32 bit word or a 16 bit half-word,
I'm wondering whether void * would be a more appropriate type here.

[...]
> === added file 'grub-core/kern/arm/misc.S'
> --- grub-core/kern/arm/misc.S 1970-01-01 00:00:00 +0000
> +++ grub-core/kern/arm/misc.S 2013-03-24 12:56:20 +0000
> @@ -0,0 +1,44 @@
> +/*
> + *  GRUB  --  GRand Unified Bootloader
> + *  Copyright (C) 2013  Free Software Foundation, Inc.
> + *
> + *  GRUB is free software: you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation, either version 3 of the License, or
> + *  (at your option) any later version.
> + *
> + *  GRUB is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <grub/symbol.h>
> +#include <grub/dl.h>

Unneeded include.

> +
> +     .file   "misc.S"
> +     .text
> +     .syntax unified
> +#if !defined (__thumb2__)
> +     .arm
> +#define ARM(x...)    x
> +#define THUMB(x...)
> +#else
> +     .thumb
> +#define THUMB(x...)  x
> +#define ARM(x...)
> +#endif
> +
> +     .align  2
> +
> +/*
> + * Null divide-by-zero handler
> + */
> +FUNCTION(raise)
> +     mov     r0, #0
> +     bx      lr
> +     
> +     .end
> 
[...]
> === added file 'grub-core/lib/arm/setjmp.S'
> --- grub-core/lib/arm/setjmp.S        1970-01-01 00:00:00 +0000
> +++ grub-core/lib/arm/setjmp.S        2013-03-24 12:56:20 +0000
> @@ -0,0 +1,55 @@
> +/*
> + *  GRUB  --  GRand Unified Bootloader
> + *  Copyright (C) 2013  Free Software Foundation, Inc.
> + *
> + *  GRUB is free software: you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation, either version 3 of the License, or
> + *  (at your option) any later version.
> + *
> + *  GRUB is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <grub/symbol.h>
> +#include <grub/dl.h>

Unneeded include.

> +
> +     .file   "setjmp.S"
> +     .syntax unified
> +#if !defined (__thumb2__)
> +     .arm
> +#define ARM(x...)    x
> +#define THUMB(x...)
> +#else
> +     .thumb
> +#define THUMB(x...)  x
> +#define ARM(x...)
> +#endif
> +
> +     .text
> +
> +/*
> + * int grub_setjmp (grub_jmp_buf env)
> + */
> +FUNCTION(grub_setjmp)
> + THUMB(      mov     ip, sp                  )
> + THUMB(      stm     r0, { r4-r11, ip, lr }  )
> + ARM(        stm     r0, { r4-r11, sp, lr }  )
> +     mov     r0, #0
> +     bx      lr
> +
> +/*
> + * int grub_longjmp (grub_jmp_buf env, int val)
> + */
> +FUNCTION(grub_longjmp)
> + THUMB(      ldm     r0, { r4-r11, ip, lr }  )
> + THUMB(      mov     sp, ip                  )
> + ARM(        ldm     r0, { r4-r11, sp, lr }  )
> +     movs    r0, r1

As above, I would drop the -mimplicit-it=thumb assembler option and here
I would insert the instruction:

        it      eq

> +     moveq   r0, #1
> +     bx      lr
> 

--
Francesco



reply via email to

[Prev in Thread] Current Thread [Next in Thread]