Hi David,
What is exactly wrong with my code?
Thanks by the way for __get_PSP and all your help.
This is what it looks like on my end:
__STATIC_INLINE uint32_t __get_PSP(void)
{
register uint32_t __regProcessStackPointer __ASM("psp");
return(__regProcessStackPointer);
}
in
C:\Program Files (x86)\Atmel\Atmel Toolchain\ARM GCC\Native\4.8.1443\CMSIS_Atmel\CMSIS\Include\core_cmFunc.h
Which for me looks a lot like my code except for volatile.
Actually I already had inline asm version, but I prefer to use C if I can:
unsigned int GetStackPointer()
{
//asm volatile ("mov %0, r13" : "=l" (sp));
volatile register unsigned int sp asm("r13");
return sp;
}
So why volatile?
Because it generates smaller code:
while (1)
{
unsigned int sp=GetStackPointer();
sum+=sp;
400270: 9b01 ldr r3, [sp, #4]
400272: 446b add r3, sp
400274: 9301 str r3, [sp, #4]
400276: e7fb b.n 400270 <main+0xc>
Without it:
//volatile
register unsigned int sp asm("r13");
return sp;
400270: 466a mov r2, sp
volatile
uint32_t sum=0;
while (1)
{
unsigned int sp=GetStackPointer();
sum+=sp;
400272: 9b01 ldr r3, [sp, #4]
400274: 4413 add r3, r2
400276: 9301 str r3, [sp, #4]
400278: e7fb b.n 400272 <main+0xe>
the code gets bigger.
__builtin_frame_address(0) returns r7 not sp. Which should be te same, but forces the use of r7:
400264: b580 push {r7, lr}
400266: b082 sub sp, #8
400268: af00 add r7, sp, #0
My version doesn't have this:
400264: b500 push {lr}
400266: b083 sub sp, #12
Here's te complete test case with code size statistics, in case someone want's to try it with different compiler versions...
#include "sam.h"
static inline unsigned int GetStackPointer() {
volatile register unsigned int sp asm("r13");
return sp;
}
static inline unsigned int GetStackPointerAsm() {
register uint32_t sp;
asm volatile ("mov %0, r13" : "=l" (sp));
return sp;
}
static inline unsigned int GetStackPointerNV() {
register unsigned int sp asm("r13");
return sp;
}
__attribute__( ( always_inline ) ) __STATIC_INLINE uint32_t GetStackPointerAsm2(void)
{
register uint32_t result;
__ASM volatile ("MRS %0, psp\n" : "=r" (result) );
return(result);
}
static inline uint32_t GetStackPointerAsm3(void)
{
uint32_t sp;
asm volatile("mov %0, r13" : "=r" (sp));
return sp;
}
int main(void)
{
SystemInit();
volatile uint32_t sum=0;
while (1) {
/// Program Memory Usage
sum+=GetStackPointer(); /// 2080 bytes
//sum+=GetStackPointerAsm(); /// 2084 bytes
//sum+=GetStackPointerAsm2(); /// 2084 bytes
//sum+=GetStackPointerAsm3(); /// 2084 bytes
//sum+=GetStackPointerNV(); /// 2084 bytes
//sum+=__get_PSP(); /// 2084 bytes
//sum+=__builtin_frame_address(0); /// 2084 bytes
}
}
I agree that the use of volatile is a hack, but that's the best solution I had found back when I wrote it.
We might consider moving the discussion off the list, since it no longer AVR related.