Kerim
Member
The assembly code presented on my previous thread, “AVR Code for Dividing 16 Bits by 8-Bit Constant”, could be updated to divide 24bits by 16bits, 32bits by 24bits... and so on.
Below is the common code that divides 32-bit number [0 to 4,294,967,295] by 24-bit constant [2 to 16,777,215] with rounding, in 129 clocks maximum, not bad, right?
Below is the common code that divides 32-bit number [0 to 4,294,967,295] by 24-bit constant [2 to 16,777,215] with rounding, in 129 clocks maximum, not bad, right?
Code:
;===============================================================
; *R* Division of 32-bit register {A} by 24-bit constant {N} ***
;===============================================================
; {R} = {A}/{N} = r31,r30,r29,r28 / {N}
; = r31,r30,r29,r28 * {Kd} /256/256/256/256 then rounding
; {A} dividend in r19,r18,r17,r16 [0 to 2^32-1]
; {N} constant divisor [N=2 to 2^24-1]
; {R} result in r31,r30,r29,r28 = {A}/{N} rounded
; {Kd} = round(256*256*256*256/{N},0) , the division constant
; {Kr} = INT[ (1-{N})/2 ] , the rounding constant
; not used registers: r25:r24, r12:r2
; 129, 134, or 136 cycles = 122c|127c|129 code + 4c RET + 3c RCALL
.set reg_N = 70000 ; {N}
.set reg_Kd=(2*2^32/reg_N+1)/2 ; {Kd}
.set reg_Kr=(1-reg_N)/2 ; {Kr}
D32_nnn:
LDI r20, low(reg_Kd) ;1abc
LDI r21, high(reg_Kd) ;1abc
LDI r22, byte3(reg_Kd) ;1abc
LDI r23, byte4(reg_Kd) ;1abc, 4abc
; r23:r20 = reg_Kd
; multiplicand in r19,r18,r17,r16 = {A}
; multiplier in r23,r22,r21,r20 = {Kd}
; mul. result in r31:r26, r14:r13
; valid result in r31,r30,r29,r28
CLR r15 ;1abc
MUL r16,r20 ;2abc
MOV r13,r0 ;1abc
MOV r14,r1 ;1abc
MUL r17,r21 ;2abc
MOVW r27:r26,r1:r0 ;1abc
MUL r18,r22 ;2abc
MOVW r29:r28,r1:r0 ;1abc
MUL r19,r23 ;2abc
MOVW r31:r30,r1:r0 ;1abc, +14abc=18abc
MUL r17,r20 ;2abc
ADD r14,r0 ;1abc
ADC r26,r1 ;1abc
MUL r18,r20 ;2abc
ADD r26,r0 ;1abc
ADC r27,r1 ;1abc
MUL r19,r20 ;2abc
ADD r27,r0 ;1abc
ADC r28,r1 ;1abc, +12abc=30abc
MUL r16,r21 ;2abc
ADD r14,r0 ;1abc
ADC r26,r1 ;1abc
ADC r27,r15 ;1abc
ADC r28,r15 ;1abc
ADC r29,r15 ;1abc
MUL r18,r21 ;2abc
ADD r27,r0 ;1abc
ADC r28,r1 ;1abc
ADC r29,r15 ;1abc
MUL r19,r21 ;2abc
ADD r28,r0 ;1abc
ADC r29,r1 ;1abc, +16abc=46abc
MUL r16,r22 ;2abc
ADD r26,r0 ;1abc
ADC r27,r1 ;1abc
ADC r28,r15 ;1abc
ADC r29,r15 ;1abc
ADC r30,r15 ;1abc
MUL r17,r22 ;2abc
ADD r27,r0 ;1abc
ADC r28,r1 ;1abc
ADC r29,r15 ;1abc
ADC r30,r15 ;1abc
MUL r19,r22 ;2abc
ADD r29,r0 ;1abc
ADC r30,r1 ;1abc, +17abc=63abc
MUL r16,r23 ;2abc
ADD r27,r0 ;1abc
ADC r28,r1 ;1abc
ADC r29,r15 ;1abc
ADC r30,r15 ;1abc
ADC r31,r15 ;1abc
MUL r17,r23 ;2abc
ADD r28,r0 ;1abc
ADC r29,r1 ;1abc
ADC r30,r15 ;1abc
ADC r31,r15 ;1abc
MUL r18,r23 ;2abc
ADD r29,r0 ;1abc
ADC r30,r1 ;1abc
ADC r31,r15 ;1abc, +18abc=81abc
; {B} = r31:r26 = r19:r16 * r23:r20 /256/256/256
; {B} = {A}*{Kd}/256/256/256/256
; {R} = {B} or {B}+1
; for rounding
LDI r26, byte3(reg_N) ;1abc
LDI r25, high(reg_N) ;1abc
LDI r24, low(reg_N) ;1abc, +3abc=84abc
; r26:r24 = {N}
; multiplicand in r31,r28 = {B}
; multiplier in r26,r24 = {N}
; valid result in r23,r20
MUL r28,r24 ;2abc
MOVW r21:r20,r1:r0 ;1abc
MUL r29,r25 ;2abc
MOVW r23:r22,r1:r0 ;1abc, +6abc=90abc
MUL r29,r24 ;2abc
ADD r21,r0 ;1abc
ADC r22,r1 ;1abc
MUL r30,r24 ;2abc
ADD r22,r0 ;1abc
ADC r23,r1 ;1abc
MUL r31,r24 ;2abc
ADD r23,r0 ;1abc, +11abc=101abc
MUL r28,r25 ;2abc
ADD r21,r0 ;1abc
ADC r22,r1 ;1abc
ADC r23,r15 ;1abc
MUL r30,r25 ;2abc
ADD r23,r0 ;1abc, +8abc=109abc
MUL r28,r26 ;2abc
ADD r22,r0 ;1abc
ADC r23,r1 ;1abc
MUL r29,r26 ;2abc
ADD r23,r0 ;1abc, +7abc=116abc
; {C} = r23:r20 = {B}*{N} = r31:r28 * r26:r24
; the following conditions were deduced empirically
; if( Carry_1=0, {R}={B}, if( {D2}>0, {R}={B}, {R}={B}+1 ) )
SUB r20, r16 ;1abc
SBC r21, r17 ;1abc
SBC r22, r18 ;1abc
SBC r23, r19 ;1abc
; {D1} = r23:r20 = {C} - {A} = r23:r20 - r19:r16
BRCC DIV32_R ;2a|1bc +6a=[122a]
; if Carry_1=0, {R}={B}
SUBI r20, low(reg_Kr) ;1bc
SBCI r21, high(reg_Kr) ;1bc
SBCI r22, byte3(reg_Kr) ;1bc
SBCI r23, byte4(reg_Kr) ;1bc
; {D2} = r23:r20 = {D1} - {Kr} = r23:r20 - {Kr}
BRPL DIV32_R ;2b|1c, +11b=[127b]
; if {D2} positive, {R}={B}
ADIW r29:r28, 1 ;1c
SBCI r30, byte3(-1) ;1c
SBCI r31, byte4(-1) ;1c, +13c=[129c]
; {R}={B}+1
DIV32_R:
RET ;4
; {R} = r31:r28 = {B} or {B}+1 [ {A}/{N} rounded ]