AVR Code for Dividing 32-Bit Register by 24-Bit Constant

Status
Not open for further replies.

Kerim

Member
The assembly code presented on my previous thread, “AVR Code for Dividing 16 Bits by 8-Bit Constant”, could be updated to divide 24bits by 16bits, 32bits by 24bits... and so on.

Below is the common code that divides 32-bit number [0 to 4,294,967,295] by 24-bit constant [2 to 16,777,215] with rounding, in 129 clocks maximum, not bad, right?

Code:
;===============================================================
; *R* Division of 32-bit register {A} by 24-bit constant {N} ***
;===============================================================

; {R} = {A}/{N} = r31,r30,r29,r28 / {N}
;     = r31,r30,r29,r28 * {Kd} /256/256/256/256 then rounding
; {A} dividend in r19,r18,r17,r16 [0 to 2^32-1]
; {N} constant divisor [N=2 to 2^24-1]
; {R} result in r31,r30,r29,r28 = {A}/{N} rounded
; {Kd} = round(256*256*256*256/{N},0) , the division constant
; {Kr} = INT[ (1-{N})/2 ] , the rounding constant
; not used registers: r25:r24, r12:r2
; 129, 134, or 136 cycles = 122c|127c|129 code + 4c RET + 3c RCALL

.set reg_N = 70000              ; {N}
.set reg_Kd=(2*2^32/reg_N+1)/2  ; {Kd}
.set reg_Kr=(1-reg_N)/2         ; {Kr}

D32_nnn:
    LDI   r20,   low(reg_Kd)    ;1abc
    LDI   r21,  high(reg_Kd)    ;1abc
    LDI   r22, byte3(reg_Kd)    ;1abc
    LDI   r23, byte4(reg_Kd)    ;1abc, 4abc
; r23:r20 = reg_Kd

; multiplicand in r19,r18,r17,r16 = {A}
; multiplier   in r23,r22,r21,r20 = {Kd}
; mul. result  in r31:r26, r14:r13
; valid result in r31,r30,r29,r28
    CLR r15                 ;1abc
    MUL r16,r20             ;2abc
    MOV r13,r0              ;1abc
    MOV r14,r1              ;1abc
    MUL r17,r21             ;2abc
    MOVW r27:r26,r1:r0      ;1abc
    MUL r18,r22             ;2abc
    MOVW r29:r28,r1:r0      ;1abc
    MUL r19,r23             ;2abc
    MOVW r31:r30,r1:r0      ;1abc, +14abc=18abc

    MUL r17,r20             ;2abc
    ADD r14,r0              ;1abc
    ADC r26,r1              ;1abc
    MUL r18,r20             ;2abc
    ADD r26,r0              ;1abc
    ADC r27,r1              ;1abc
    MUL r19,r20             ;2abc
    ADD r27,r0              ;1abc
    ADC r28,r1              ;1abc, +12abc=30abc

    MUL r16,r21             ;2abc
    ADD r14,r0              ;1abc
    ADC r26,r1              ;1abc
    ADC r27,r15             ;1abc
    ADC r28,r15             ;1abc
    ADC r29,r15             ;1abc
    MUL r18,r21             ;2abc
    ADD r27,r0              ;1abc
    ADC r28,r1              ;1abc
    ADC r29,r15             ;1abc
    MUL r19,r21             ;2abc
    ADD r28,r0              ;1abc
    ADC r29,r1              ;1abc, +16abc=46abc

    MUL r16,r22             ;2abc
    ADD r26,r0              ;1abc
    ADC r27,r1              ;1abc
    ADC r28,r15             ;1abc
    ADC r29,r15             ;1abc
    ADC r30,r15             ;1abc
    MUL r17,r22             ;2abc
    ADD r27,r0              ;1abc
    ADC r28,r1              ;1abc
    ADC r29,r15             ;1abc
    ADC r30,r15             ;1abc
    MUL r19,r22             ;2abc
    ADD r29,r0              ;1abc
    ADC r30,r1              ;1abc, +17abc=63abc

    MUL r16,r23             ;2abc
    ADD r27,r0              ;1abc
    ADC r28,r1              ;1abc
    ADC r29,r15             ;1abc
    ADC r30,r15             ;1abc
    ADC r31,r15             ;1abc
    MUL r17,r23             ;2abc
    ADD r28,r0              ;1abc
    ADC r29,r1              ;1abc
    ADC r30,r15             ;1abc
    ADC r31,r15             ;1abc
    MUL r18,r23             ;2abc
    ADD r29,r0              ;1abc
    ADC r30,r1              ;1abc
    ADC r31,r15             ;1abc, +18abc=81abc
; {B} = r31:r26 = r19:r16 * r23:r20 /256/256/256
; {B} = {A}*{Kd}/256/256/256/256
; {R} = {B} or {B}+1

; for rounding
    LDI   r26, byte3(reg_N)     ;1abc
    LDI   r25,  high(reg_N)     ;1abc
    LDI   r24,   low(reg_N)     ;1abc, +3abc=84abc
; r26:r24 = {N}

; multiplicand in r31,r28 = {B}
; multiplier   in r26,r24 = {N}
; valid result in r23,r20
    MUL r28,r24             ;2abc
    MOVW r21:r20,r1:r0      ;1abc
    MUL r29,r25             ;2abc
    MOVW r23:r22,r1:r0      ;1abc, +6abc=90abc

    MUL r29,r24             ;2abc
    ADD r21,r0              ;1abc
    ADC r22,r1              ;1abc
    MUL r30,r24             ;2abc
    ADD r22,r0              ;1abc
    ADC r23,r1              ;1abc
    MUL r31,r24             ;2abc
    ADD r23,r0              ;1abc, +11abc=101abc

    MUL r28,r25             ;2abc
    ADD r21,r0              ;1abc
    ADC r22,r1              ;1abc
    ADC r23,r15             ;1abc
    MUL r30,r25             ;2abc
    ADD r23,r0              ;1abc, +8abc=109abc

    MUL r28,r26             ;2abc
    ADD r22,r0              ;1abc
    ADC r23,r1              ;1abc
    MUL r29,r26             ;2abc
    ADD r23,r0              ;1abc, +7abc=116abc
; {C} = r23:r20 = {B}*{N} = r31:r28 * r26:r24

; the following conditions were deduced empirically
; if( Carry_1=0, {R}={B}, if( {D2}>0, {R}={B}, {R}={B}+1 ) )

    SUB   r20, r16              ;1abc
    SBC   r21, r17              ;1abc
    SBC   r22, r18              ;1abc
    SBC   r23, r19              ;1abc
; {D1} = r23:r20 = {C} - {A} = r23:r20 - r19:r16

    BRCC  DIV32_R               ;2a|1bc +6a=[122a]
; if Carry_1=0, {R}={B}

    SUBI  r20,   low(reg_Kr)    ;1bc
    SBCI  r21,  high(reg_Kr)    ;1bc
    SBCI  r22, byte3(reg_Kr)    ;1bc
    SBCI  r23, byte4(reg_Kr)    ;1bc
; {D2} = r23:r20 = {D1} - {Kr} = r23:r20 - {Kr}

    BRPL  DIV32_R               ;2b|1c, +11b=[127b]
; if {D2} positive, {R}={B}

    ADIW  r29:r28, 1            ;1c
    SBCI  r30, byte3(-1)        ;1c
    SBCI  r31, byte4(-1)        ;1c, +13c=[129c]
; {R}={B}+1

DIV32_R:
    RET                         ;4
; {R} = r31:r28 = {B} or {B}+1 [ {A}/{N} rounded ]
 
Status
Not open for further replies.
Cookies are required to use this site. You must accept them to continue using the site. Learn more…