[avr-gcc-list] modulo vs. loop (gcc 4.3.x bug)

From: Szikra Istvan
Subject: [avr-gcc-list] modulo vs. loop (gcc 4.3.x bug)
Date: Sun, 28 Mar 2010 00:24:43 +0100

Category: bug: false optimization leads to critical bug
affects: avr-gcc 4.3.x
Problem: Modulo calculation is "optimized" to __udivmodqi4 calls from
simple loop (without asking for it)

-simple loop is smaller
-it is faster for small input numbers (which I know will be the case)
-bootloader section cannot have calls to the application section !

- if I wanted modulo, I would have written % , I did not waste my time
writing ten times as much character just for fun

-also generated divide+modulo code is inefficient, double call to
__udivmodqi4 with same parameters

-environment: WinAVR-20080610,20081205,20090313,20100110  (avr-gcc 4.3.x )
(WinAVR-20060421, 20070525 works)

-build command:
avr-gcc -mmcu=atmega16 -save-temps -Os test.c
or WinAVR Makefile Template

Here is the test code:
/** Note (to self): __udivmodqi4 is a paper-pencil division

other possible methods:
d = i/3 = (i*85+85)/256
m = i%3 = i-3*d

d = i/10 = (i*51+51)/512
m = i%10 = i-10*d

#include <avr/io.h>
#include <avr/boot.h>

void test(void) BOOTLOADER_SECTION;

int main (void)
  for (;;)

void test(void)
  unsigned char m =  PORTA;
  unsigned char d=0;

  while (m>=3){

/**  ------------------ What I got ---------------------  gcc 4.3.3
  unsigned char m =  PORTA;
    3800:       2b b3           in      r18, 0x1b       ; 27

  while (m>=3){
    3802:       82 2f           mov     r24, r18
    3804:       63 e0           ldi     r22, 0x03       ; 3
    3806:       0e 94 37 00     call    0x6e    ; 0x6e <__udivmodqi4>
    380a:       38 2f           mov     r19, r24
    380c:       82 2f           mov     r24, r18
    380e:       0e 94 37 00     call    0x6e    ; 0x6e <__udivmodqi4>

    3812:       38 bb           out     0x18, r19       ; 24
    3814:       95 bb           out     0x15, r25       ; 21

0000006e <__udivmodqi4>:
  6e:   99 1b           sub     r25, r25
  70:   79 e0           ldi     r23, 0x09       ; 9
  72:   04 c0           rjmp    .+8             ; 0x7c <__udivmodqi4_ep>

00000074 <__udivmodqi4_loop>:
  74:   99 1f           adc     r25, r25
  76:   96 17           cp      r25, r22
  78:   08 f0           brcs    .+2             ; 0x7c <__udivmodqi4_ep>
  7a:   96 1b           sub     r25, r22

0000007c <__udivmodqi4_ep>:
  7c:   88 1f           adc     r24, r24
  7e:   7a 95           dec     r23
  80:   c9 f7           brne    .-14            ; 0x74 <__udivmodqi4_loop>
  82:   80 95           com     r24
  84:   08 95           ret


/**  ------------------ What I wanted --------------------- gcc 4.1.2
  unsigned char m =  PORTA;
    3800:       8b b3           in      r24, 0x1b       ; 27
  unsigned char d=0;
    3802:       90 e0           ldi     r25, 0x00       ; 0

  while (m>=3){
    3804:       02 c0           rjmp    .+4             ; 0x380a <test+0xa>
    3806:       9f 5f           subi    r25, 0xFF       ; 255
    3808:       83 50           subi    r24, 0x03       ; 3
    380a:       83 30           cpi     r24, 0x03       ; 3
    380c:       e0 f7           brcc    .-8             ; 0x3806 <test+0x6>
    380e:       98 bb           out     0x18, r25       ; 24
    3810:       85 bb           out     0x15, r24       ; 21
/** ------------------      or       --------------------- gcc 3.4.6
  unsigned char m =  PORTA;
    3800:       8b b3           in      r24, 0x1b       ; 27
  unsigned char d=0;
    3802:       90 e0           ldi     r25, 0x00       ; 0

  while (m>=3){
    3804:       83 30           cpi     r24, 0x03       ; 3
    3806:       18 f0           brcs    .+6             ; 0x380e <test+0xe>
    3808:       9f 5f           subi    r25, 0xFF       ; 255
    380a:       83 50           subi    r24, 0x03       ; 3
    380c:       fb cf           rjmp    .-10            ; 0x3804 <test+0x4>
    380e:       98 bb           out     0x18, r25       ; 24
    3810:       85 bb           out     0x15, r24       ; 21

-------- begin --------
avr-gcc (WinAVR 20100110) 4.3.3
Copyright (C) 2008 Free Software Foundation, Inc.
avr-gcc -c -mmcu=atmega16 -I. -gdwarf-2 -DF_CPU=16000000UL
-DBOOTSIZE=1024  -Os -funsigned-char -funsigned-bitfields
-fpack-struct -fshort-enums -fno-strict-aliasing
-fno-inline-small-functions  -save-temps  -Wall -Winline
-Wstrict-prototypes -Wa,-adhlns=test.lst -ID:/Lib/ -std=gnu99 -MD -MP
-MF .dep/test.o.d test.c -o test.o
avr-gcc -mmcu=atmega16 -I. -gdwarf-2 -DF_CPU=16000000UL
-DBOOTSIZE=1024  -Os -funsigned-char -funsigned-bitfields
-fpack-struct -fshort-enums -fno-strict-aliasing
-fno-inline-small-functions  -save-temps  -Wall -Winline
-Wstrict-prototypes -Wa,-adhlns=test.o -ID:/Lib/ -std=gnu99 -MD -MP
-MF .dep/test.elf.d test.o --output test.elf -Wl,-Map=test.map,--cref
-lm -Wl,--section-start=.bootloader=0x3800
-------- begin --------
avr-gcc (GCC) 3.4.6
Copyright (C) 2006 Free Software Foundation, Inc.
avr-gcc -c -mmcu=atmega16 -I. -gdwarf-2 -DF_CPU=16000000UL
-DBOOTSIZE=1024  -Os -funsigned-char -funsigned-bitfields
-fpack-struct -fshort-enums -fno-strict-aliasing -Wall -Winline
-Wstrict-prototypes -Wa,-adhlns=test_il.lst -ID:/Lib/ -std=gnu99 -MD
-MP -MF .dep/test_il.o.d test_il.c -o test_il.o
avr-gcc -mmcu=atmega16 -I. -gdwarf-2 -DF_CPU=16000000UL
-DBOOTSIZE=1024  -Os -funsigned-char -funsigned-bitfields
-fpack-struct -fshort-enums -fno-strict-aliasing -Wall -Winline
-Wstrict-prototypes -Wa,-adhlns=test_il.o -ID:/Lib/ -std=gnu99 -MD -MP
-MF .dep/test_il.elf.d test_il.o --output test_il.elf
-Wl,-Map=test_il.map,--cref    -lm

Szikra Istvan

