grub-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 2/7] Initial support for ARMv7 architecture


From: Leif Lindholm
Subject: Re: [PATCH 2/7] Initial support for ARMv7 architecture
Date: Wed, 3 Apr 2013 15:36:22 +0000
User-agent: Mutt/1.5.20 (2009-06-14)

On Sat, Mar 30, 2013 at 04:15:54PM +0100, Francesco Lavra wrote:
> On 03/24/2013 06:01 PM, Leif Lindholm wrote:
> [...]
> > === added file 'grub-core/kern/arm/cache.S'
> > --- grub-core/kern/arm/cache.S      1970-01-01 00:00:00 +0000
> > +++ grub-core/kern/arm/cache.S      2013-03-24 12:56:20 +0000
> > @@ -0,0 +1,251 @@
> > +/*
> > + *  GRUB  --  GRand Unified Bootloader
> > + *  Copyright (C) 2013  Free Software Foundation, Inc.
> > + *
> > + *  GRUB is free software: you can redistribute it and/or modify
> > + *  it under the terms of the GNU General Public License as published by
> > + *  the Free Software Foundation, either version 3 of the License, or
> > + *  (at your option) any later version.
> > + *
> > + *  GRUB is distributed in the hope that it will be useful,
> > + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> > + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> > + *  GNU General Public License for more details.
> > + *
> > + *  You should have received a copy of the GNU General Public License
> > + *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
> > + */
> > +
> > +#include <grub/symbol.h>
> > +#include <grub/dl.h>
> 
> This include is not needed.
> 
> > +
> > +   .file   "cache.S"
> > +   .text
> > +   .syntax unified
> > +#if !defined (__thumb2__)
> > +   .arm
> > +#define ARM(x...)  x
> > +#define THUMB(x...)
> > +#else
> > +   .thumb
> > +#define THUMB(x...)        x
> > +#define ARM(x...)
> > +#endif
> > +
> > +   .align  2
> > +
> > +/*
> > + * Simple cache maintenance functions
> > + */
> > +
> > +@ r0 - *beg (inclusive)
> > +@ r1 - *end (exclusive)    
> > +clean_dcache_range:
> > +   @ Clean data cache range for range to point-of-unification
> > +   ldr     r2, dlinesz
> > +1: cmp     r0, r1
> > +   bge     2f
> > +#ifdef DEBUG
> > +   push    {r0-r2, lr}
> > +   mov     r1, r2
> > +   mov     r2, r0
> > +   ldr     r0, =dcstr
> > +   bl      EXT_C(grub_printf)
> > +   pop     {r0-r2, lr}
> > +#endif
> > +   mcr     p15, 0, r0, c7, c11, 1  @ DCCMVAU
> > +   add     r0, r0, r2              @ Next line
> > +   b       1b
> > +2: dsb
> > +   bx      lr
> 
> If the start address (initial value of r0 in this function) is not
> aligned to a cache line boundary, the last cache line in the range to be
> cleaned might not be cleaned, depending on the value of r1 (the end
> address). To handle correctly such cases, the initial value of r0 should
> be aligned to the cache line boundary.
 
ACK. Fixed, will be in next version.

> > +
> > +@ r0 - *beg (inclusive)
> > +@ r1 - *end (exclusive)    
> > +invalidate_icache_range:
> > +   @ Invalidate instruction cache for range to point-of-unification
> > +   ldr     r2, ilinesz
> > +1: cmp     r0, r1
> > +   bge     2f
> > +#ifdef DEBUG
> > +   push    {r0-r2, lr}
> > +   mov     r1, r2
> > +   mov     r2, r0
> > +   ldr     r0, =icstr
> > +   bl      EXT_C(grub_printf)
> > +   pop     {r0-r2, lr}
> > +#endif
> > +   mcr     p15, 0, r0, c7, c5, 1   @ ICIMVAU
> > +   add     r0, r0, r2              @ Next line
> > +   b       1b
> 
> The same applies here: the initial value of r0 should be aligned to the
> cache line boundary.
 
And again.

> > +   @ Branch predictor invalidate all
> > +2: mcr     p15, 0, r0, c7, c5, 6   @ BPIALL
> > +   dsb
> > +   isb
> > +   bx      lr
> > +   
> > address@hidden __wrap___clear_cache(char *beg, char *end);
> > +FUNCTION(__wrap___clear_cache)
> > +   dmb
> > +   dsb
> > +   push    {r4-r6, lr}
> > +   ldr     r2, probed      @ If first call, probe cache sizes
> > +   cmp     r2, #0
> 
> If the -mimplicit-it=thumb assembler option is removed from patch 1/7
> (as I think it should), then here goes the instruction:
> 
>       it      eq
> 
> > +   bleq    probe_caches    @ This call corrupts r3
> > +   mov     r4, r0
> > +   mov     r5, r1
> > +   bl      clean_dcache_range
> > +   mov     r0, r4
> > +   mov     r1, r5
> > +   bl      invalidate_icache_range
> > +   pop     {r4-r6, pc}
> > +
> > +probe_caches:
> > +   push    {r4-r6, lr}
> > +   mrc     p15, 0, r4, c0, c0, 1   @ Read Cache Type Register
> > +   mov     r5, #1
> > +   ubfx    r6, r4, #16, #4         @ Extract min D-cache num word log2
> > +   add     r6, r6, #2              @ words->bytes
> > +   lsl     r6, r5, r6              @ Convert to num bytes
> > +   ldr     r3, =dlinesz
> > +   str     r6, [r3]
> > +   and     r6, r4, #0xf            @ Extract min I-cache num word log2
> > +   add     r6, r6, #2              @ words->bytes
> > +   lsl     r6, r5, r6              @ Convert to num bytes
> > +   ldr     r3, =ilinesz
> > +   str     r6, [r3]
> > +   ldr     r3, =probed             @ Flag cache probing done
> > +   str     r5, [r3]
> > +   pop     {r4-r6, pc}
> > +
> > +#ifdef DEBUG
> > +dcstr:     .asciz  "cleaning %d bytes of D cache @ 0x%08x\n"
> > +icstr:     .asciz  "invalidating %d bytes of I cache @ 0x%08x\n"
> > +#endif
> > +   
> > +   .align  3
> > +probed:    .long   0
> > +dlinesz:
> > +   .long   0
> > +ilinesz:
> > +   .long   0
> > +
> > address@hidden grub_arch_sync_caches (void *address, grub_size_t len)
> > +FUNCTION(grub_arch_sync_caches)
> > +   add     r1, r0, r1
> > +   b       __wrap___clear_cache
> > +
> > +   @ r0  - CLIDR
> > +   @ r1  - LoC
> > +   @ r2  - current level
> > +   @ r3  - num sets
> > +   @ r4  - num ways
> > +   @ r5  - current set
> > +   @ r6  - current way
> > +   @ r7  - line size
> > +   @ r8  - scratch
> > +   @ r9  - scratch
> > +   @ r10 - scratch
> > +   @ r11 - scratch
> > +clean_invalidate_dcache:
> > +   push    {r4-r12, lr}
> > +   mrc     p15, 1, r0, c0, c0, 1   @ Read CLIDR
> > +   ubfx    r1, r0, #24, #3         @ Extract LoC
> > +   
> > +   mov     r2, #0                  @ First level, L1
> > +2: and     r8, r0, #7              @ cache type at current level
> > +   cmp     r8, #2
> > +   blt     5f                      @ instruction only, or none, skip level
> > +
> > +   @ set current cache level/type (for CSSIDR read)
> > +   lsl     r8, r2, #1
> > +   mcr     p15, 2, r8, c0, c0, 0   @ Write CSSELR (level, type: data/uni)
> > +
> > +   @ read current cache information
> > +   mrc     p15, 1, r8, c0, c0, 0   @ Read CSSIDR
> > +   ubfx    r3, r8, #13, #14        @ Number of sets -1
> > +   ubfx    r4, r8, #3, #9          @ Number of ways -1
> > +   and     r7, r8, #7              @ log2(line size in words) - 2
> > +   add     r7, r7, #2              @  adjust
> > +   mov     r8, #1
> > +   lsl     r7, r8, r7              @  -> line size in words
> > +   lsl     r7, r7, #2              @  -> bytes
> > +
> > +   @ set loop
> > +   mov     r5, #0                  @ current set = 0
> > +3: lsl     r8, r2, #1              @ insert level
> > +   clz     r9, r7                  @ calculate set field offset
> > +   mov     r10, #31
> > +   sub     r9, r10, r9
> > +   lsl     r10, r5, r9
> > +   orr     r8, r8, r10             @ insert set field
> > +
> > +   @ way loop
> > +   @ calculate way field offset
> > +   mov     r6, #0                  @ current way = 0
> > +   add     r10, r4, #1
> > +   clz     r9, r10                 @ r9 = way field offset
> > +   add     r9, r9, #1
> > +4: lsl     r10, r6, r9
> > +   orr     r11, r8, r10            @ insert way field      
> > +   
> > +   @ clean line by set/way
> 
> Nitpick: the comment should be "clean and invalidate line by set/way".
 
Definitely, the current comment is incorrect.

> [...]
> > === added file 'grub-core/kern/arm/dl.c'
> > --- grub-core/kern/arm/dl.c 1970-01-01 00:00:00 +0000
> > +++ grub-core/kern/arm/dl.c 2013-03-24 12:56:20 +0000
> [...]
> > +/*
> > + * R_ARM_THM_JUMP19
> > + *
> > + * Relocate conditional Thumb (T32) B<c>.W
> > + */
> > +grub_err_t
> > +reloc_thm_jump19 (grub_uint16_t *addr, Elf32_Addr sym_addr)
> > +{
> > +  grub_int32_t offset;
> > +  grub_uint32_t insword, insmask;
> > +
> > +  /* Extract instruction word in alignment-safe manner */
> > +  insword = (*addr) << 16 | *(addr + 1);
> > +  insmask = 0xfbc0d800;
> 
> Shouldn't it be 0xfbc0d000? Bit 11 of the second half-word corresponds
> to field J2, and as such is part of the offset.

Err, yes - certainly.
 
> > +
> > +  /* Extract and sign extend offset */
> > +  offset = ((insword >> 26) & 1) << 18
> > +    | ((insword >> 11) & 1) << 17
> > +    | ((insword >> 13) & 1) << 16
> > +    | ((insword >> 16) & 0x3f) << 11
> > +    | (insword & 0x7ff);
> > +  offset <<= 1;
> > +  if (offset & (1 << 19))
> > +    offset -= (1 << 20);
> 
> It looks to me like some shifts are wrong here. It should be:
> 
>   offset = ((insword >> 26) & 1) << 19
>     | ((insword >> 11) & 1) << 18
>     | ((insword >> 13) & 1) << 17
>     | ((insword >> 16) & 0x3f) << 11
>     | (insword & 0x7ff);
>   offset <<= 1;
>   if (offset & (1 << 20))
>     offset -= (1 << 21);
 
ACK.

> > +
> > +  /* Adjust and re-truncate offset */
> > +#ifdef GRUB_UTIL
> > +  offset += sym_addr;
> > +#else
> > +  offset += sym_addr - (grub_uint32_t) addr;
> > +#endif
> > +  if ((offset > 1048574) || (offset < -1048576))
> > +    {
> > +      return grub_error
> > +        (GRUB_ERR_OUT_OF_RANGE, N_("THM_JUMP19 Relocation out of range."));
> > +    }
> > +
> > +  offset >>= 1;
> > +  offset &= 0x7ffff;
> 
> The mask should be 0xfffff.
 
ACK.

> > +
> > +  /* Reassemble instruction word and write back */
> > +  insword &= insmask;
> > +  insword |= ((offset >> 18) & 1) << 26
> > +    | ((offset >> 17) & 1) << 11
> > +    | ((offset >> 16) & 1) << 13
> > +    | ((offset >> 11) & 0x3f) << 16
> > +    | (offset & 0x7ff);
> 
> As above, it looks like some shifts are wrong. It should be:
> 
>   insword |= ((offset >> 19) & 1) << 26
>     | ((offset >> 18) & 1) << 11
>     | ((offset >> 17) & 1) << 13
>     | ((offset >> 11) & 0x3f) << 16
>     | (offset & 0x7ff);

ACK. The scary thing is I have a GRUB happily running under UEFI,
successfully loading a Linux kernel from MMC, using this code...
 
Fixing for next version.

> > +  *addr = insword >> 16;
> > +  *(addr + 1) = insword & 0xffff;
> > +  return GRUB_ERR_NONE;
> > +}
> > 
> > 
> > 
> > /***********************************************************
> >  * ARM (A32) relocations:                                  *
> >  *                                                         *
> >  * ARM instructions are 32-bit in size and 32-bit aligned. *
> >  ***********************************************************/
> > 
> > /*
> >  * R_ARM_JUMP24
> >  *
> >  * Relocate ARM (A32) B
> >  */
> > grub_err_t
> > reloc_jump24 (grub_uint32_t *addr, Elf32_Addr sym_addr)
> > {
> >   grub_uint32_t insword;
> >   grub_int32_t offset;
> > 
> >   insword = *addr;
> > 
> >   offset = (insword & 0x00ffffff) << 2;
> >   if (offset & 0x02000000)
> >     offset -= 0x04000000;
> > #ifdef GRUB_UTIL
> >   offset += sym_addr;
> > #else
> >   offset += sym_addr - (grub_uint32_t) addr;
> > #endif
> > 
> >   insword &= 0xff000000;
> >   insword |= (offset >> 2) & 0x00ffffff;
> > 
> >   *addr = insword;
> > 
> >   return GRUB_ERR_NONE;
> > }
> 
> If sym_addr has its least significant bit set, it means it addresses a
> Thumb instruction, and in this case an inter-working veneer must be used
> to effect the transition from ARM to Thumb state. Implementing veneers
> is probably overkill here, but I think in this case this function should
> error out as the relocation is not supported, instead of silently
> performing a wrong relocation.
 
Yes, fair enough - I did it for R_THM_JUMP24, so just being lazy here.

> [...]
> > +/*
> > + * do_relocations():
> > + *   Iterate over all relocations in section, calling appropriate functions
> > + *   for patching.
> > + */
> > +static grub_err_t
> > +do_relocations (Elf_Shdr * relhdr, Elf_Ehdr * e, grub_dl_t mod)
> > +{
> > +  grub_dl_segment_t seg;
> > +  Elf_Rel *rel;
> > +  Elf_Sym *sym;
> > +  int i, entnum;
> > +
> > +  entnum = relhdr->sh_size / sizeof (Elf_Rel);
> > +
> > +  /* Find the target segment for this relocation section. */
> > +  seg = find_segment (mod->segment, relhdr->sh_info);
> > +  if (!seg)
> > +    return grub_error (GRUB_ERR_EOF, N_("relocation segment not found"));
> > +
> > +  rel = (Elf_Rel *) ((grub_addr_t) e + relhdr->sh_offset);
> > +
> > +  /* Step through all relocations */
> > +  for (i = 0, sym = mod->symtab; i < entnum; i++)
> > +    {
> > +      Elf_Addr *target, sym_addr;
> > +      int relsym, reltype;
> > +      grub_err_t retval;
> > +
> > +      if (seg->size < rel[i].r_offset)
> > +   return grub_error (GRUB_ERR_BAD_MODULE,
> > +                      "reloc offset is out of the segment");
> > +      relsym = ELF_R_SYM (rel[i].r_info);
> > +      reltype = ELF_R_TYPE (rel[i].r_info);
> > +      target = (Elf_Word *) ((grub_addr_t) seg->addr + rel[i].r_offset);
> 
> target is declared as Elf_Addr *, so the cast to Elf_Word * seems
> inappropriate, even though they are practically the same type.
> Since target can point at either a 32 bit word or a 16 bit half-word,
> I'm wondering whether void * would be a more appropriate type here.
 
void * makes sense to me.

/
    Leif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]