; 64-bit x 64-bit unsigned divide
; Bit-match algorithm
;
; Tim Victor, January 2, 1993
;
; Callable from C as follows:
; int ExtDiv(dividend, divisor, quotient, remainder);
;   returns 1 for attempted divide-by-zero, 0 otherwise
;

        .model  small
        .data
; jump table for shifting and storing dividend
jtable  dw   store0
        dw   store1
        dw   store2
        dw   store3

        .code
        public _ExtDiv
_ExtDiv proc near

        push bp         ; save caller's stack frame
        mov  bp,sp      ; address arguments
        push si         ; save caller's register vars
        push di

;scan divisor, looking for non-zero word
        sub  ax,ax
        sub  cx,cx
        mov  si,[bp+6]          ; get divisor addr
        or   ax,[si+6]
        jnz  nzdivis

        inc  ch
        or   ax,[si+4]
        jne  nzdivis

        inc  ch
        or   ax,[si+2]
        jne  nzdivis

        inc  ch
        or   ax,[si]
        jne  nzdivis

; divide-by-zero detected; return 1
dbz:    mov  ax,1               ; retcode <- 1
        jmp  exithere

; ax contains highest-order non-zero divisor word,
; ch contains right-shift word count;
; now find highest-order set bit, keep count in cl
nzdivis:
        inc  cl
        shl  ax,1
        jnc  nzdivis

; scan dividend, looking for non-zero word
; adjust word and bit counts in cl/ch
        sub  ax,ax
        mov  di,[bp+4]          ; get dividend addr
        or   ax,[di+6]
        jnz  nzdivid

        dec  ch
        or   ax,[di+4]
        jne  nzdivid

        dec  ch
        or   ax,[di+2]
        jne  nzdivid

        dec  ch
        or   ax,[di]
        je   zeroquot

; ax contains highest-order non-zero dividend word,
; find highest-order set bit
nzdivid:
        dec  cl
        shl  ax,1
        jnc  nzdivid

; if bit-shift count went negative, borrow from word-shift count
        test cl,080h
        jz   nobitbor
        add  cl,16
        dec  ch
nobitbor:

; double word-shift for jump-table offset;
; if value was negative, bail cuz quotient is zero
        shl  ch,1
        jnc  nzquot

; divisor > dividend, so quotient is zero
zeroquot:
        mov  si,[bp+4]          ; address dividend
        mov  di,[bp+0Ah]        ; address remainder
        mov  ax,[si]            ; remainder <= dividend
        mov  [di],ax
        mov  ax,[si+2]
        mov  [di+2],ax
        mov  ax,[si+4]
        mov  [di+4],ax
        mov  ax,[si+6]
        mov  [di+6],ax

        sub  ax,ax              ; retcode <= 0
        mov  di,[bp+8]          ; address quotient
        mov  [di],ax            ; quotient <= 0
        mov  [di+2],ax
        mov  [di+4],ax
        mov  [di+6],ax
        jmp  exithere
nzquot:

; save bit shift for loop count
        push cx

; load 5-word accum from dividend (addr still in di)
;  - bp reg gets blasted here; makes args harder to find
        sub  si,si
        mov  ax,[di]
        mov  bx,[di+2]
        mov  dx,[di+4]
        mov  bp,[di+6]

; right-shift dividend to match high-bit of MSW w/ divisor
        or   cl,cl
        je   nobitshft
bshftlp:
        shr  bp,1
        rcr  dx,1
        rcr  bx,1
        rcr  ax,1
        rcr  si,1
        dec  cl
        jnz  bshftlp
nobitshft:

; switch based on word-shift count, load accum appropriately
        xchg ch,cl              ; cl was just zeroed above
        mov  di,cx
        pop  cx                 ; saved bit-shift value makes loop count
        sub  ch,ch              ; only interested in low byte
        jmp  [jtable+di]

; init accum and quotient storage with zero-word shift
store0:
        mov  di,sp              ; find quotient storage
        mov  di,[di+0Ch]        ; (kinda tricky w/o base reg)
        mov  [di+6],si          ; store partial word in quotient
        sub  si,si
        mov  [di+4],si          ; zero the other three
        mov  [di+2],si
        mov  [di],si
        add  cx,1               ; adjust loop count
        jmp  cmnstore

; init accum and quotient storage with one-word shift
store1:
        mov  di,sp              ; find quotient storage
        mov  di,[di+0Ch]
        mov  [di+6],ax          ; store two words in quotient
        mov  [di+4],si
        mov  ax,bx              ; juggle words in accum
        mov  bx,dx
        mov  dx,bp
        sub  bp,bp              ; zero high word of accum
        mov  [di+2],bp          ; zero out two other quotient words
        mov  [di],bp
        add  cx,17              ; adjust loop count
        jmp  cmnstore

; init accum and quotient storage with two-word shift
store2:
        mov  di,sp              ; find quotient storage
        mov  di,[di+0Ch]
        mov  [di+6],bx          ; store three words in quotient
        mov  [di+4],ax
        mov  [di+2],si
        mov  ax,dx              ; juggle words in accum
        mov  bx,bp
        sub  bp,bp              ; zero two highest words of accum
        mov  dx,bp
        mov  [di],bp            ; zero out other quotient word
        add  cx,33              ; adjust loop count
        jmp  cmnstore

; init accum and quotient storage with three-word shift
store3:
        mov  di,sp              ; find quotient storage
        mov  di,[di+0Ch]
        mov  [di+6],dx          ; store four words in quotient
        mov  [di+4],bx
        mov  [di+2],ax
        mov  [di],si
        mov  ax,bp              ; one (partial) word stays in accum
        sub  bp,bp              ; zero out other accum words
        mov  dx,bp
        mov  bx,bp
        add  cx,49              ; adjust loop count
cmnstore:

; point si to divisor, then do it
; (di still points to quotient)
        mov  si,sp
        mov  si,[si+0Ah]
        jmp  entry1

divloop:
; shift high bit out of dividend
        shl  word ptr [di],1
        rcl  word ptr [di+2],1
        rcl  word ptr [di+4],1
        rcl  word ptr [di+6],1

; shift bit into accumulator
        rcl  ax,1               
        rcl  bx,1
        rcl  dx,1
        rcl  bp,1

; compare divisor to accum
entry1:
        cmp  bp,[si+6]
        jb   nosub
        ja   subdiv

        cmp  dx,[si+4]
        jb   nosub
        ja   subdiv

        cmp  bx,[si+2]
        jb   nosub
        ja   subdiv

        cmp  ax,[si]
        jb   nosub

subdiv:
; if accum > divisor, subtract divisor
        sub  ax,[si]
        sbb  bx,[si+2]
        sbb  dx,[si+4]
        sbb  bp,[si+6]

; put a one bit in quotient
        inc  word ptr [di]

nosub:
        loop divloop

; done, store remainder
        mov  si,sp
        mov  si,[si+0Eh]        ; address remainder
        mov  [si],ax
        mov  [si+2],bx
        mov  [si+4],dx
        mov  [si+6],bp

; signal success
        sub  ax,ax

; restore caller's regs
exithere:
        pop  di
        pop  si
        pop  bp

        ret

_ExtDiv endp
        end

