[avr-libc-dev] Working octal code (FYI)

From: George Spelvin
Subject: [avr-libc-dev] Working octal code (FYI)
Date: 16 Dec 2016 02:41:06 -0500

(Am I annoying everyone by using this mailing list as my personal
coding blog?)

After considerable rearranging (and fixing one nasty logic bug in
the first algorithm posted), I have octal converison working to my

The logic bug was that I assumed I'd need at most one byte of zero-padding
to print a number.  But I was checking for termination before printing
a digit.  That ended up not working with 1-byte octal numbers where the
top digit is non-zero.  By the time I was ready to print the fourth digit
(when the termination check would fire), the lsbyte wanted to hold bits
9..16, and that meant loading a *second* byte (bits 16..23).

So I changed to checking for termination *after* printing a digit,
which I knew would save time, but I unexpectedly found additional
space savings, too.

Not counting preamble code shared with decimal printing (all the
stuff before the label "3:"), it's down to 29 instructions.  Still
a bit more than 20, but I'm satisfied.

It's even slightly faster than the previous code:

Bits    Old     New
 0       56      42
 8      144     113
16      276     232
24      364     314
32      496     430
40              546
48              628
56              744
64              860

/* Arguments */
#define out     X       /* Arrives in r24:r25, but we move it immediately */
#define out_lo  r26
#define out_hi  r27
#define bin     Z       /* Arrives in r22:r23, but we move it immediately */
#define bin_lo  r30
#define bin_hi  r31
#define len     r20
#define flags   r18     /* Mask, after removing two lsbits */

/* Local variables */
#define msb     r25     /* Overlaps input */
#define lsb     r24     /* Overlaps input */
#define digit   r23     /* Overlaps input */
#define delta   r22     /* Overlaps input */
#define tmask   r21
// len = r20
#define k       r19
// flags = r18

        .global binprint
        .type   binprint, @function
        movw    out_lo, r24
        movw    bin_lo, r22
#if 1
        add     bin_lo, len
        adc     bin_hi, zero
        mov     tmask, len
        ; Conditional negate using the standard identity -x = ~x + 1.
        ; Given mask of -1 or 0, (x ^ mask) - mask returns -x or x.
        ; However, we would need the carry bit clear to start this, and
        ; forming "mask" from the carry bit in one instruction preserves
        ; the carry bit.  So instead add zero with carry.
        lsr     flags           ; Lsbit is negate flag
        sbc     k, k            ; Set to 0 or -1, carry preserved
        ld      __tmp_reg__, bin
        eor     __tmp_reg__, k
        adc     __tmp_reg__, __zero_reg__
        st      bin+, __tmp_reg__
        dec     tmask
        brne    1b
        ; Strip trailing (most-significant) zeros from bin */
2:      dec     len
        breq    3f              ; If we've reached the end, stop
        ld      __tmp_reg__, -bin
        or      __tmp_reg__, __tmp_reg__
        breq    2b              ; Continue as long as bytes are zero

3:      movw    bin_lo, r22     ; Reset bin to lsbyte
        ; Len is now pre-decremented

        ; Done with args in r22-r25; now allowed to use delta, digit, lsb, msb
        ldi     delta, 'A'-'0'-10
        lsr     flags
        brcc    4f
         ldi    delta, 'a'-'0'-10
4:      ldi     msb, 1
        ld      lsb, bin+

.L_digit_out:                   ; Spit out a digit
        mov     digit, lsb
        and     digit, flags
        cpi     digit, 10
        brcs    5f
         add    digit, delta    ; Hex digit > 9
5:      subi    digit, -'0'
        st      X+, digit
        ; Check for done: is len:lsb < 0:flags?
        cp      flags, lsb
        cpc     __zero_reg__, len
        brcc    .L_epilogue     ; if (!lsb && !len) return X
        mov     tmask, flags
        lsr     msb
        brne    7f      ; if ((msb >>= 1) == 0) get another byte
        ; Fetch another byte
        or      len, len        ; Preserves carry
        breq    6f
         dec    len             ; Preserves carry
         ld     msb, Z+
6:      ror     msb             ; Shift carry=1 into msbit
7:      ror     lsb
        lsr     tmask
        brne    .L_bitloop      ; if ((tmask >>= 1)== 0) {
        rjmp    .L_digit_out
.size   binprint, .-binprint

