        page    ,132
;---------------------------Module-Header-------------------------------;
; Module Name: MATH.ASM
;
; Contains FIXED point math routines.
;
; Copyright (c) 1987  Microsoft Corporation
;-----------------------------------------------------------------------;

;     (C) Copyright Microsoft Corp. 1987,1991.  All rights reserved.
;
;     You have a royalty-free right to use, modify, reproduce and 
;     distribute the Sample Files (and/or any modified version) in 
;     any way you find useful, provided that you agree that 
;     Microsoft has no warranty obligations or liability for any 
;     Sample Application Files which are modified. 

?WIN	= 0
?PLM	= 1
?NODATA = 0
?386	= 1

	.286
        .xlist
        include cmacros.inc
        .list

UQUAD   struc
uq0     dw      ?
uq1     dw      ?
uq2     dw      ?
uq3     dw      ?
UQUAD	ends

PTL	struc
ptl_x	dd	?
ptl_y	dd	?
PTL	ends

POINTFX struc
x       dd      ?
y       dd      ?
z       dd      ?
w       dd      ?
POINTFX ends


MAXINT equ 7FFFh
MININT equ 8000h


;       The following two equates are just used as shorthand
;       for the "word ptr" and "byte ptr" overrides.

wptr    equ     word ptr
bptr    equ     byte ptr

; The following structure should be used to access high and low
; words of a DWORD.  This means that "word ptr foo[2]" -> "foo.hi".

LONG    struc
lo      dw      ?
hi      dw      ?
LONG    ends

FARPOINTER      struc
off     dw      ?
sel     dw      ?
FARPOINTER      ends

assert macro   a,b,c,d
endm

EAXtoDXAX   macro
        shld    edx,eax,16      ; move HIWORD(eax) to dx
        endm

DXAXtoEAX   macro
        ror     eax,16          ; xchg HIWORD(eax) and LOWORD(eax)
        shrd    eax,edx,16      ; move LOWORD(edx) to HIWORD(eax)
        endm

createSeg MATH_TEXT,MathSeg,word,public,CODE
createSeg TRIG_TEXT,TrigSeg,word,public,CODE

;---------------------------Public-Routine------------------------------;
; multiply                                                              ;
;                                                                       ;
; Multiplies two 32 bit fixed point numbers.  FIXED is a signed 32 bit  ;
; number with 16 bits of integer and 16 bits of fraction.               ;
;                                                                       ;
; This routine is pretty small and very popular.  We therefore put a    ;
; copy in each of several segments.                                     ;
;                                                                       ;
; Entry:                                                                ;
;       DX:AX = FIXED x                                                 ;
;       CX:BX = FIXED y                                                 ;
; Returns:                                                              ;
;       OF = 0                                                          ;
;         DX:AX = FIXED x*y                                             ;
;         BX    = error term                                            ;
; Error Returns:                                                        ;
;       OF = 1                                                          ;
; Registers destroyed:                                                  ;
;       BX,CX                                                           ;
;-----------------------------------------------------------------------;
; Breaks the 32 bit multiply into four 16 bit multiplies.  The key      ;
; feature of this routine is the way that the 32 bit numbers are        ;
; "SPLIT".  This means that we break up the 32 bit signed numbers into  ;
; two 16 bit signed numbers.  After the breakup, the upper word,        ;
; say X.1, is assumed to be the upper word of a 32 bit signed integer   ;
; whose lower word is 0000.  The lower word is assumed to be the lower  ;
; 16 bits of a 32 bit signed word, THE UPPER WORD OF WHICH CAN BE       ;
; GENERATED AS A SIGN EXTENSION OF THE LOWER WORD.  As an example,      ;
; consider what we do with the number 2.500:                            ;
;                                                                       ;
;     In FIXED notation:   2.500 = 00028000h                            ;
;                                                                       ;
;     We break it up as:         = 00030000h + FFFF8000h = 3.000 - .500 ;
;                                                                       ;
;     So, in SPLIT notation:     = 00038000h                            ;
;                                                                       ;
; This allows the upper and lower words to each be treated as a signed  ;
; 16 bit integer in their own right.  Thus, we can use the signed       ;
; multiply instruction IMUL.                                            ;
;-----------------------------------------------------------------------;

multiply_fixed  macro
        local   multiply_fixed_done

; make arg1 a SPLIT

        mov     di,dx
        cwd
        sub     di,dx
        mov     si,ax                   ; SPLIT arg1 in DI:SI

; make arg2 a SPLIT

        mov     ax,bx
        cwd
        sub     cx,dx                   ; SPLIT arg2 in CX:BX

                                        ;  AX  BX  CX  DX  SI  DI
                                        ;  --  y.0 y.1 --  x.0 x.1

; start multiplying the high order words, this saves a register

        mov     ax,cx                   ;  Y.1 y.0 Y.1 --  x.0 X.1

; X.1 * Y.1

        imul    di                      ;  p.1 y.0 Y.1 --  x.0 X.1
        jo      multiply_fixed_done
        xchg    ax,cx                   ;  Y.1 y.0 p.1 --  x.0 X.1

; x.0 * Y.1

        imul    si                      ;  p.0 y.0 p.1 p.1 x.0 X.1
        add     cx,dx                   ;  p.0 y.0 p.1 --  x.0 X.1
        jo      multiply_fixed_done
        xchg    ax,di                   ;  X.1 y.0 p.1 --  x.0 p.0

; X.1 * y.0

        imul    bx                      ;  p.0 y.0 p.1 p.1 x.0 p.0
        add     di,ax                   ;  --  y.0 p.1 p.1 x.0 p.0
        adc     cx,dx                   ;  --  y.0 p.1 --  x.0 p.0
        jo      multiply_fixed_done
        xchg    ax,bx                   ;  y.0 --  p.1 --  x.0 p.0

; x.0 * y.0

        imul    si                      ;  p.e --  p.1 p.0 x.0 p.0
        xchg    ax,bx                   ;  --  p.e p.1 p.0 --  p.0
        xchg    ax,dx                   ;  p.0 p.e p.1 --  --  p.0
        cwd                             ;  p.0 p.e p.1 cwd --  p.0
        add     ax,di                   ;  p.0 p.e p.1 cwd --  -- 
        adc     dx,cx                   ;  p.0 p.e --  p.1 --  -- 
multiply_fixed_done:
        endm

sBegin	MathSeg
	assumes cs,MathSeg
	assumes ds,nothing
	assumes es,nothing

if ?386
    .386
endif

if ?386
    cProc   multiply,<FAR,PUBLIC>
	    ParmD  fx1
	    ParmD  fx2
    cBegin
	    mov     eax,fx1
	    imul    fx2 	; result is in	LOWORD(edx):HIWORD(eax)
	    shr     eax,16
    cEnd
else
    cProc   multiply,<FAR,PUBLIC>,<si,di>
	    ParmD  fx1
	    ParmD  fx2
    cBegin
	    mov     ax,fx1.lo
	    mov     dx,fx1.hi

	    mov     bx,fx2.lo
	    mov     cx,fx2.hi

	    call    idmul	    ; dx:cx:bx:ax = dx:ax * cx:bx

	    mov     dx,cx	    ; dx:ax = dx:cx:bx:ax >> 16
	    mov     ax,bx
    ;;	      multiply_fixed
    cEnd
endif

;------------------------------    Public   -------------------------------;
; fxRound
;
; Return the fixed number corresponding to the rounding of the input
; fixed number.
;
; Entry:
;       Rhi,Rlo: registers containing the .hi and .lo portions
;                of the fixed number.
; Returns:
;       Rhi,Rlo:        The answer is returned the the same registers
; Error Returns:
;       none
;
;--------------------------------------------------------------------------;
cProc fxRound, <FAR, PASCAL>
	ParmD  fx
cBegin
	mov	ax,fx.lo
	mov	dx,fx.hi

	add	ax,ax
	adc	dx,0
	xor	ax,ax
cEnd

;---------------------------Public-Routine------------------------------;
; ulNormalize
;
; Normalizes a ULONG so that the highest order bit is 1.  Returns the
; number of shifts done.  Also returns ZF=1 if the ULONG was zero.
;
; Entry:
;       DX:AX = ULONG
; Returns:
;       DX:AX = normalized ULONG
;       CX    = shift count
;       ZF    = 1 if the ULONG is zero, 0 otherwise
; Registers Destroyed:
;       none
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   ulNormalize,<PUBLIC,NEAR>
cBegin

; shift by words

        xor     cx,cx
        or      dx,dx
        js      ulNormalize_exit
        jnz     top_word_ok
        xchg    ax,dx
        or      dx,dx
        jz      ulNormalize_exit        ; the zero exit
        mov     cl,16
        js      ulNormalize_exit
top_word_ok:

; shift by bytes

        or      dh,dh
        jnz     top_byte_ok
        xchg    dh,dl
        xchg    dl,ah
        xchg    ah,al
        add     cl,8
        or      dh,dh
        js      ulNormalize_exit
top_byte_ok:

; do the rest by bits

        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
        js      ulNormalize_exit
        inc     cx
        add     ax,ax
        adc     dx,dx
ulNormalize_exit:
cEnd

;---------------------------Public-Routine------------------------------;
; far_idmul
;       see idmul below.
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_idmul,<PUBLIC,FAR,NONWIN,NODATA>
cBegin
        cCall   idmul

cEnd

;---------------------------Public-Routine------------------------------;
; idmul
;
; This is an extended precision multiply routine, intended to emulate
; 80386 imul instruction.
;
; Entry:
;       DX:AX = LONG
;       CX:BX = LONG
; Returns:
;       DX:CX:BX:AX = QUAD product
; Registers Destroyed:
;       none
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   idmul,<PUBLIC,NEAR>,<si,di>
        localQ  qTemp
cBegin

; put one argument in safe registers

        mov     si,dx
        mov     di,ax

; do the low order unsigned product

        mul     bx
        mov     qTemp.uq0,ax
        mov     qTemp.uq1,dx

; do the high order signed product

        mov     ax,si
        imul    cx
        mov     qTemp.uq2,ax
        mov     qTemp.uq3,dx

; do a mixed product

        mov     ax,si
        cwd
        and     dx,bx
        sub     qTemp.uq2,dx            ; adjust for sign bit
        sbb     qTemp.uq3,0
        mul     bx
        add     qTemp.uq1,ax
        adc     qTemp.uq2,dx
        adc     qTemp.uq3,0

; do the other mixed product

        mov     ax,cx
        cwd
	and	dx,di
        sub     qTemp.uq2,dx
        sbb     qTemp.uq3,0
        mul     di

; pick up the answer

        mov     bx,ax
        mov     cx,dx
        xor     dx,dx

        mov     ax,qTemp.uq0
        add     bx,qTemp.uq1
        adc     cx,qTemp.uq2
        adc     dx,qTemp.uq3
cEnd

;---------------------------Public-Routine------------------------------;
; far_dmul
;       see dmul below.
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_dmul,<PUBLIC,FAR,NONWIN,NODATA>
cBegin
        cCall   dmul
cEnd

;---------------------------Public-Routine------------------------------;
; dmul
;
; This is an extended precision multiply routine, intended to emulate
; 80386 mul instruction.
;
; Entry:
;       DX:AX = LONG
;       CX:BX = LONG
; Returns:
;       DX:CX:BX:AX = QUAD product
; Registers Destroyed:
;       none
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   dmul,<PUBLIC,NEAR>,<si,di>
        localQ  qTemp
cBegin

; put one argument in safe registers

        mov     si,dx
        mov     di,ax

; do the low order product

        mul     bx
        mov     qTemp.uq0,ax
        mov     qTemp.uq1,dx

; do the high order product

        mov     ax,si
        mul     cx
        mov     qTemp.uq2,ax
        mov     qTemp.uq3,dx

; do a mixed product

        mov     ax,si
        mul     bx
        add     qTemp.uq1,ax
        adc     qTemp.uq2,dx
        adc     qTemp.uq3,0

; do the other mixed product

        mov     ax,cx
        mul     di

; pick up the answer

        mov     bx,ax
        mov     cx,dx
        xor     dx,dx
        mov     ax,qTemp.uq0
        add     bx,qTemp.uq1
        adc     cx,qTemp.uq2
        adc     dx,qTemp.uq3
cEnd

;---------------------------Public-Routine------------------------------;
; far_iqdiv
;       see iqdiv below.
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_iqdiv,<PUBLIC,FAR,NONWIN,NODATA>
cBegin
        cCall   iqdiv
cEnd

;---------------------------Public-Routine------------------------------;
; iqdiv
;
; This is an extended precision divide routine which is intended to
; emulate the 80386 64 bit/32 bit IDIV instruction.  We don't have the
; 32 bit registers to work with, but we pack the arguments and results
; into what registers we do have.  We will divide two signed numbers
; and return the quotient and remainder.  We will do INT 0 for overflow,
; just like the 80386 microcode.  This should ease conversion later.
;
; This routine just keeps track of the signs and calls qdiv to do the
; real work.
;
; Entry:
;       DX:CX:BX:AX = QUAD Numerator
;       SI:DI       = LONG Denominator
; Returns:
;       DX:AX = quotient
;       CX:BX = remainder
; Registers Destroyed:
;       DI,SI
; Wrote it.
;-----------------------------------------------------------------------;

WIMP    equ     1

IQDIV_RESULT_SIGN       equ     1
IQDIV_REM_SIGN          equ     2

        assumes ds,nothing
        assumes es,nothing

cProc   iqdiv,<PUBLIC,NEAR>
        localB  flags
cBegin
        mov     flags,0

; take the absolute value of the denominator

        or      si,si
        jns     denominator_is_cool
        xor     flags,IQDIV_RESULT_SIGN
        neg     di
        adc     si,0
        neg     si
denominator_is_cool:

; take the absolute value of the denominator

        or      dx,dx
        jns     numerator_is_cool
        xor     flags,IQDIV_RESULT_SIGN + IQDIV_REM_SIGN
        not     ax
        not     bx
        not     cx
        not     dx
        add     ax,1
        adc     bx,0
        adc     cx,0
        adc     dx,0
numerator_is_cool:

; do the unsigned division

        call    qdiv
ifdef WIMP
        jo      iqdiv_exit
endif

; check for overflow

        or      dx,dx
        jns     have_a_bit_to_spare
ifdef WIMP
        mov     ax,8000h
        dec     ah
        jmp     short iqdiv_exit
else
        int     0                       ; You're toast, Jack!
endif
have_a_bit_to_spare:

; negate the result, if required

        test    flags,IQDIV_RESULT_SIGN
        jz      result_is_done
        neg     ax
        adc     dx,0
        neg     dx
result_is_done:

; negate the remainder, if required

        test    flags,IQDIV_REM_SIGN
        jz      remainder_is_done
        neg     bx
        adc     cx,0
        neg     cx
remainder_is_done:
iqdiv_exit:
cEnd

;---------------------------Public-Routine------------------------------;
; far_qdiv
;       see qdiv below.
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_qdiv,<PUBLIC,FAR,NONWIN,NODATA>
cBegin
        cCall   qdiv
cEnd

;---------------------------Public-Routine------------------------------;
; qdiv
;
; This is an extended precision divide routine which is intended to
; emulate the 80386 64 bit/32 bit DIV instruction.  We don't have the
; 32 bit registers to work with, but we pack the arguments and results
; into what registers we do have.  We will divide two unsigned numbers
; and return the quotient and remainder.  We will do INT 0 for overflow,
; just like the 80386 microcode.  This should ease conversion later.
;
; Entry:
;       DX:CX:BX:AX = UQUAD Numerator
;       SI:DI       = ULONG Denominator
; Returns:
;       DX:AX = quotient
;       CX:BX = remainder
; Registers Destroyed:
;       none
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   qdiv,<PUBLIC,NEAR>,<si,di>
        localQ  uqNumerator
        localD  ulDenominator
        localD  ulQuotient
        localW  cShift
cBegin

; stuff the quad word into local memory

        mov     uqNumerator.uq0,ax
        mov     uqNumerator.uq1,bx
        mov     uqNumerator.uq2,cx
        mov     uqNumerator.uq3,dx


; check for overflow

qdiv_restart:
        cmp     si,dx
        ja      qdiv_no_overflow
        jb      qdiv_overflow
        cmp     di,cx
        ja      qdiv_no_overflow
qdiv_overflow:
ifdef WIMP
        mov     ax,8000h
        dec     ah
        jmp     qdiv_exit
else
        int     0                       ; You're toast, Jack!
        jmp     qdiv_restart
endif
qdiv_no_overflow:

; check for a zero Numerator

        or      ax,bx
        or      ax,cx
        or      ax,dx
        jz      qdiv_exit_relay         ; quotient = remainder = 0

; handle the special case when the denominator lives in the low word

        or      si,si
        jnz     not_that_special

; calculate (DX=0):CX:BX:uqNumerator.uq0 / (SI=0):DI

        cmp     di,1                    ; separate out the trivial case
        jz      div_by_one
        xchg    dx,cx                   ; CX = remainder.hi = 0
        mov     ax,bx
        div     di
        mov     bx,ax                   ; BX = quotient.hi
        mov     ax,uqNumerator.uq0
        div     di                      ; AX = quotient.lo
        xchg    bx,dx                   ; DX = quotient.hi, BX = remainder.lo
ifdef WIMP
        or      ax,ax           ; clear OF
endif
qdiv_exit_relay:
        jmp     qdiv_exit

; calculate (DX=0):(CX=0):BX:uqNumerator.uq0 / (SI=0):(DI=1)

div_by_one:
        xchg    dx,bx                   ; DX = quotient.hi, BX = remainder.lo = 0
        mov     ax,uqNumerator.uq0      ; AX = quotient.lo
        jmp     qdiv_exit
not_that_special:

; handle the special case when the denominator lives in the high word

        or      di,di
        jnz     not_this_special_either

; calculate DX:CX:BX:uqNumerator.uq0 / SI:(DI=0)

        cmp     si,1                    ; separate out the trivial case
        jz      div_by_10000h
        mov     ax,cx
        div     si
        mov     cx,ax                   ; CX = quotient.hi
        mov     ax,bx
        div     si                      ; AX = quotient.lo
        xchg    cx,dx                   ; DX = quotient.hi, CX = remainder.hi
        mov     bx,uqNumerator.uq0      ; BX = remainder.lo
ifdef WIMP
        or      ax,ax           ; clear OF
endif
        jmp     qdiv_exit

; calculate (DX=0):CX:BX:uqNumerator.uq0 / (SI=1):(DI=0)

div_by_10000h:
        xchg    cx,dx                   ; DX = quotient.hi, CX = remainder.hi = 0
        mov     ax,bx                   ; AX = quotient.lo
        mov     bx,uqNumerator.uq0      ; BX = remainder.lo
        jmp     qdiv_exit
not_this_special_either:

; normalize the denominator

        mov     dx,si
        mov     ax,di
        call    ulNormalize             ; DX:AX = normalized denominator
        mov     cShift,cx               ; CX < 16
        mov     ulDenominator.lo,ax
        mov     ulDenominator.hi,dx


; shift the Numerator by the same amount

        jcxz    numerator_is_shifted
        mov     si,-1
        shl     si,cl
        not     si                      ; SI = mask
        mov     bx,uqNumerator.uq3
        shl     bx,cl
        mov     ax,uqNumerator.uq2
        rol     ax,cl
        mov     di,si
        and     di,ax
        or      bx,di
        mov     uqNumerator.uq3,bx
        xor     ax,di
        mov     bx,uqNumerator.uq1
        rol     bx,cl
        mov     di,si
        and     di,bx
        or      ax,di
        mov     uqNumerator.uq2,ax
        xor     bx,di
        mov     ax,uqNumerator.uq0
        rol     ax,cl
        mov     di,si
        and     di,ax
        or      bx,di
        mov     uqNumerator.uq1,bx
        xor     ax,di
        mov     uqNumerator.uq0,ax
numerator_is_shifted:

; set up registers for division

        mov     dx,uqNumerator.uq3
        mov     ax,uqNumerator.uq2
        mov     di,uqNumerator.uq1
        mov     cx,ulDenominator.hi
        mov     bx,ulDenominator.lo

; check for case when Denominator has only 16 bits

        or      bx,bx
        jnz     must_do_long_division
        div     cx
        mov     si,ax
        mov     ax,uqNumerator.uq1
        div     cx
        xchg    si,dx                   ; DX:AX = quotient
        mov     di,uqNumerator.uq0      ; SI:DI = remainder (shifted)
        jmp     short unshift_remainder
must_do_long_division:

; do the long division, part IZ@NL@%

        cmp     dx,cx                   ; we only know that DX:AX < CX:BX!
        jb      first_division_is_safe
        mov     ulQuotient.hi,0         ; i.e. 10000h, our guess is too big
        mov     si,ax
        sub     si,bx                   ; ... remainder is negative
        jmp     short first_adjuster
first_division_is_safe:
        div     cx
        mov     ulQuotient.hi,ax
        mov     si,dx
        mul     bx                      ; fix remainder for low order term
        sub     di,ax
        sbb     si,dx
        jnc     first_adjuster_done     ; The remainder is UNSIGNED!  We have
first_adjuster:                         ; to use the carry flag to keep track
        dec     ulQuotient.hi           ; of the sign.  The adjuster loop
        add     di,bx                   ; watches for a change to the carry
        adc     si,cx                   ; flag which would indicate a sign
        jnc     first_adjuster          ; change IF we had more bits to keep
first_adjuster_done:                    ; a sign in.

; do the long division, part II

        mov     dx,si
        mov     ax,di
        mov     di,uqNumerator.uq0
        cmp     dx,cx                   ; we only know that DX:AX < CX:BX!
        jb      second_division_is_safe
        mov     ulQuotient.lo,0         ; i.e. 10000h, our guess is too big
        mov     si,ax
        sub     si,bx                   ; ... remainder is negative
        jmp     short second_adjuster
second_division_is_safe:
        div     cx
        mov     ulQuotient.lo,ax
        mov     si,dx
        mul     bx                      ; fix remainder for low order term
        sub     di,ax
        sbb     si,dx
        jnc     second_adjuster_done
second_adjuster:
        dec     ulQuotient.lo
        add     di,bx
        adc     si,cx
        jnc     second_adjuster
second_adjuster_done:
        mov     ax,ulQuotient.lo
        mov     dx,ulQuotient.hi

; unshift the remainder in SI:DI

unshift_remainder:
        mov     cx,cShift
        jcxz    remainder_unshifted
        mov     bx,-1
        shr     bx,cl
        not     bx
        shr     di,cl
        ror     si,cl
        and     bx,si
        or      di,bx
        xor     si,bx
remainder_unshifted:
        mov     cx,si
        mov     bx,di
ifdef WIMP
        or      ax,ax           ; clear OF
endif
qdiv_exit:
cEnd

;---------------------------Public-Routine------------------------------;
; divide
;
; Computes the FIXED quotient of two longs.  Works by noting that all
; we want is 16 bits of fraction tacked onto the quotient.  To get this,
; we multiply the numerator by 10000h, and let iqdiv do the rest.
;
; Entry:
;       DX:AX = long or FIXED numerator
;       CX:BX = long or FIXED denominator
; Returns:
;       OF = 0
;         DX:AX = FIXED quotient
; Error Returns:
;       OF = 1          ;!!! for now !!!
; Registers Destroyed:
;       BX,CX
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

if 0; ?386
    cProc   divide,<PUBLIC,FAR,NODATA,NONWIN>
	    ParmD  fx1
	    ParmD  fx2
    cBegin
	    mov     eax,fx1
            mov     ebx,fx2         ; ebx     = denominator
            xor     edx,edx
            shld    edx,eax,16      ; edx:eax = numerator * 10000h
            shl     eax,16
            idiv    ebx
            EAXtoDXAX
    cEnd
else
    cProc   divide,<PUBLIC,FAR,NODATA,NONWIN>,<di,si>
	    ParmD  fx1
	    ParmD  fx2
    cBegin
	    mov     ax,fx1.lo
	    mov     dx,fx1.hi

	    mov     bx,fx2.lo
	    mov     cx,fx2.hi

	    mov     si,cx
	    mov     di,bx	    ; SI:DI = denominator
	    mov     bx,ax
	    mov     cx,dx
	    mov     ax,cx
	    cwd
	    xor     ax,ax	    ; DX:CX:BX:AX = numerator * 10000h
	    call    iqdiv
    cEnd
endif

;---------------------------Private-Macro-------------------------------;
; set_ov
;
; Sets the overflow flag
;
; Entry:
;       reg = word register to use for scratch
; Returns:
;       OF set
; Error Returns:
;       none
; Registers Destroyed:
;       reg
;-----------------------------------------------------------------------;

set_ov  macro   reg
        mov     reg,8000h
        dec     reg
        endm

;---------------------------Public-Routine------------------------------;
; square_root
;
; Computes the Fixed square root of a Fixed.  This algorithm
; comes from Knuth D.E; Metafont:The Program (Addison-Weseley, 1986)
; Part 8:Algebraic and Transcendental Functions.
;
; Notation used below:
;
; Entry:
;       DX:AX = number to square root
; Returns:
;         DX:AX = square root of input
; Error Returns:
;       OF = 1
; Registers Destroyed:
;       BX,CX
; Registers Preserved:
;       DS,ES,SI,DI
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   fxsqrt,<PUBLIC,FAR,NODATA,NONWIN>,<>
        ParmD   fx
cBegin
	mov	ax, fx.lo
        mov     dx, fx.hi

        cCall   square_root

        mov     dh,dl           ; DX:AX *= sqrt(1000h)
        mov     dl,ah
        mov     ah,al
        xor     al,al
cEnd

cProc   ulsqrt,<PUBLIC,FAR,NODATA,NONWIN>,<>
        ParmD   ul
cBegin
        mov     ax, ul.lo
        mov     dx, ul.hi

        cCall   square_root
cEnd

;
;   compute the square root of EDX:EAX
;
;   retult returned in EAX and DX:AX
;
;   We dont have a 64-bit square_root function
;   so approximate it by dividing by 4 until we can use our
;   32-bit function
;

cProc   sqrt64,<PUBLIC,NEAR,NODATA,NONWIN>,<>
cBegin
        xor     cx,cx

test_less_than_32:
        or      edx,edx             ; is it less than 32-bit?
        jz      less_than_32

        inc     cx

        shrd    eax,edx,2           ; divide by 4
        shr     edx,2

        jmp     short test_less_than_32

less_than_32:
        ;;
        ;;  EDX:EAX is now less than 32 bits, CX contains count of
        ;;  divisions by 4 we had to do to get here.  call square_root
        ;;  and then re-normalize
        ;;

        push    cx
        EAXtoDXAX
        call    square_root
        pop     cx
        jcxz    sqrt64_exit

@@:     shl     ax,1
        rcl     dx,1
        loop    @b

sqrt64_exit:
        DXAXtoEAX
cEnd

cProc   square_root,<PUBLIC,NEAR,NODATA,NONWIN>,<SI,DI>
;;        ParmD   fx
cBegin
;;        mov     ax, fx.lo
;;        mov     dx, fx.hi
; check for < 1 since this algorithm returns 0000:0001

        or      dx,dx
        jnz     non_zero_arg
        cmp     ax,1
        ja      non_zero_arg
        jmp     square_root_exit

negative_arg:                           ; can't have number negative
        xor     ax,ax
        cwd
        set_ov  bx
        jmp     square_root_exit

non_zero_arg:
        cCall   ulNormalize
;;;;;;;;jcxz    negative_arg
        jz      exit_relay
        shr     cx,1
        jc      add_shifts_only
        shr     dx,1
        rcr     ax,1
        dec     cx
add_shifts_only:

;;
;; use 23 for a FIXED square_root, and 15 for a ULONG square_root!
;;

if 0
        mov     ch,23                   ; FIXED square_root
else
        mov     ch,15                   ; ULONG square_root
endif

        sub     ch,cl
        mov     cl,8
        sub     ch,cl
        jg      more_than_8             ; CL = Min(count,8)
        add     cl,ch
        xor     ch,ch                   ; CH = Max(count-8,0)
more_than_8:


        xchg    ax,si                   ; DI:SI = x
        mov     di,dx

        xor     ax,ax                   ; AX = y = lower bound = 0

        mov     bx,2                    ; BX = q = estimate of root = 2

        add     si,si                   ; intiialize y = top bit of x
        adc     di,di
        adc     ax,ax

first_8_loop:
        add     si,si                   ; move 2 bits from top
        adc     di,di                   ; of x to bottom of y
        adc     ax,ax
        assert  NC

        add     si,si
        adc     di,di
        adc     ax,ax
        assert  NC

        sub     ax,bx                   ; AX = y - q
        jle     y_neg_or_zero_8

;!!!CR loops can be cleaned up some

        add     bx,bx                   ; BX = 2q
        sub     ax,bx                   ; AX = y - 3q
        jg      y_greater_q_8

        add     ax,bx                   ; AX = y - q

        dec     cl
        jnz     first_8_loop

        jcxz    first_8_was_enough
        jmp     short done_with_1st_8

y_greater_q_8:
        add     bx,2                    ; BX = 2q + 2 (or 2q if jg not taken)

        dec     cl
        jnz     first_8_loop

        or      ch,ch
        jnz     done_with_1st_8

first_8_was_enough:
        xchg    ax,bx                   ; DX:AX = q
        xor     dx,dx
        shr     ax,1                    ; root = q >> 1
        or      ax,ax                   ; clear overflow flag
exit_relay:
        jmp     square_root_exit

y_neg_or_zero_8:                       ; come here if y <= 0
        dec     bx
        add     bx,bx                   ; BX = 2q - 2
        add     ax,bx                   ; AX = y + q - 2

        dec     cl
        jnz     first_8_loop
        jcxz    first_8_was_enough

;si is now empty, di contains all signifigant bits
done_with_1st_8:

        mov     cl,4
        sub     ch,cl
        jg      second_4_loop           ; CL = Min(count,4)
        add     cl,ch
        xor     ch,ch                   ; CH = Max(count-8,0)

second_4_loop:
                                        ; move 2 bits from top
        add     di,di                   ; of x to bottom of y
        adc     ax,ax
        assert  NC

        add     di,di
        adc     ax,ax
        assert  NC

        sub     ax,bx                   ; AX = y - q
        jle     y_neg_or_zero_2nd_4

        add     bx,bx                   ; BX = 2q

;!!!CR use compare and jump rather than sub, branch, and restore
        sub     ax,bx                   ; AX = y - 3q
        jg      y_greater_q_2nd_4

        add     ax,bx                   ; AX = y - q

        dec     cl
        jnz     second_4_loop

        jcxz    second_4_was_enough
        jmp     short done_with_2nd_4

y_greater_q_2nd_4:
        add     bx,2                    ; BX = 2q + 2 (or 2q if jg not taken)

        dec     cl
        jnz     second_4_loop

        or      ch,ch
        jnz     done_with_2nd_4


second_4_was_enough:
        xchg    ax,bx                   ; DX:AX = q
        xor     dx,dx
        shr     ax,1                    ; root = q >> 1
        or      ax,ax                   ; clear overflow flag
        jmp     square_root_exit


y_neg_or_zero_2nd_4:                       ; come here if y <= 0
        dec     bx
        add     bx,bx                   ; BX = 2q - 2
        add     ax,bx                   ; AX = y + q - 2

        dec     cl
        jnz     second_4_loop
        jcxz    second_4_was_enough

done_with_2nd_4:

        xor     dx,dx                   ; DX:AX = y   SI:BX = q

        mov     cl,4
        sub     ch,cl
        jg      third_4_loop             ; CL = Min(count,4)
        add     cl,ch
        xor     ch,ch                   ; CH = Max(count-13,0)

third_4_loop:
;!!!CR Add comment noting that 32 bit math is being used now
                                        ; move 2 bits from top
        add     di,di                   ; of x to bottom of y
        adc     ax,ax
        adc     dx,dx
        assert  NC

        add     di,di
        adc     ax,ax
        adc     dx,dx
        assert  NC

        sub     ax,bx                   ; DX:AX = y - q
        sbb     dx,si
        js      y_negative_3rd_4
        jz      y_might_be_zero_3rd_4

sorry_y_not_zero_3rd_4:
        add     bx,bx
        adc     si,si                   ; SI:BX = 2q
        sub     ax,bx                   ; DX:AX = y - 3q
        sbb     dx,si
        jg      y_greater_q_3rd_4
        jl      y_less_or_equal_q_3rd_4
        or      ax,ax
        jnz     y_greater_q_3rd_4

y_less_or_equal_q_3rd_4:
        add     ax,bx                   ; DX:AX = y - q
        adc     dx,si

        dec     cl
        jnz     third_4_loop

        jcxz    third_4_was_enough
        jmp     short done_with_3rd_4

y_greater_q_3rd_4:
        add     bx,2
        adc     si,0

        dec     cl
        jnz     third_4_loop

        or      ch,ch
        jnz     done_with_3rd_4

third_4_was_enough:
        xchg    ax,bx                   ; DX:AX = q
        mov     dx,si
        shr     dx,1                    ; root = q >> 1
        rcr     ax,1
        or      ax,ax                   ; clear overflow flag
	jmp	square_root_exit ;;; short!

y_might_be_zero_3rd_4:
        or      ax,ax
        jnz     sorry_y_not_zero_3rd_4

y_negative_3rd_4:                       ; come here if y <= 0

        sub     bx,1
        sbb     si,0
        add     bx,bx                   ; DL:BX = 2q - 2
        adc     si,si

        add     ax,bx
        adc     dx,si                   ; DH:AX = y + q - 2

        dec     cl
        jnz     third_4_loop
        jcxz    third_4_was_enough


done_with_3rd_4:

        xchg    ch,cl

last_7_loop:
        add     ax,ax
        adc     dx,dx

        add     ax,ax
        adc     dx,dx

        sub     ax,bx                   ; DX:AX = y - q
        sbb     dx,si
        js      y_negative_last_7
        jz      y_might_be_zero_last_7

;!!!CR Clean up the exit so that a single exit point is used.

sorry_y_not_zero_last_7:
        add     bx,bx
        adc     si,si                   ; SI:BX = 2q
        sub     ax,bx                   ; DX:AX = y - 3q
        sbb     dx,si
        jg      y_greater_q_last_7
        jl      y_less_or_equal_q_last_7
        or      ax,ax
        jnz     y_greater_q_last_7

y_less_or_equal_q_last_7:
        add     ax,bx                   ; DX:AX = y - q
        adc     dx,si

        loop    last_7_loop

        xchg    ax,bx                   ; DX:AX = q
        mov     dx,si
        shr     dx,1                    ; root = q >> 1
        rcr     ax,1
        or      ax,ax                   ; clear overflow flag
        jmp     short square_root_exit

y_greater_q_last_7:
        add     bx,2
        adc     si,0

        loop    last_7_loop

        xchg    ax,bx                   ; DX:AX = q
        mov     dx,si
        shr     dx,1                    ; root = q >> 1
        rcr     ax,1
        or      ax,ax                   ; clear overflow flag
        jmp     short square_root_exit

y_might_be_zero_last_7:
        or      ax,ax
        jnz     sorry_y_not_zero_last_7

y_negative_last_7:                       ; come here if y <= 0

        sub     bx,1
        sbb     si,0
        add     bx,bx                   ; DL:BX = 2q - 2
        adc     si,si

        add     ax,bx
        adc     dx,si                   ; DH:AX = y + q - 2

        loop    last_7_loop

        xchg    ax,bx                   ; DX:AX = q
        mov     dx,si
        shr     dx,1                    ; root = q >> 1
        rcr     ax,1

        or      ax,ax                   ; clear overflow flag
square_root_exit:
cEnd

;---------------------------Public-Routine------------------------------;
; QUAD far_big_cross_product(ptlA,ptlB)
;
;       See big_cross_product
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_big_cross_product,<PUBLIC,FAR,NONWIN,NODATA>
        parmQ   ptlA
        parmQ   ptlB
cBegin
        cCall   big_cross_product,<ptlA,ptlB>
cEnd

;---------------------------Public-Routine------------------------------;
; QUAD big_cross_product(ptlA,ptlB)
;
; Computes the cross product A B - A B  with total accuracy.
;                             x y   y x
;
; Answer is returned in DX:CX:BX:AX.
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   big_cross_product,<PUBLIC,NEAR>
        parmQ   ptlA
        parmQ   ptlB
        localQ  Result
cBegin

; first do A B
;           y x

        mov     ax,ptlA.ptl_y.lo
        mov     dx,ptlA.ptl_y.hi
        mov     bx,ptlB.ptl_x.lo
        mov     cx,ptlB.ptl_x.hi
        call    idmul
        mov     Result.uq0,ax
        mov     Result.uq1,bx
        mov     Result.uq2,cx
        mov     Result.uq3,dx

; now do A B
;         x y

        mov     ax,ptlA.ptl_x.lo
        mov     dx,ptlA.ptl_x.hi
        mov     bx,ptlB.ptl_y.lo
        mov     cx,ptlB.ptl_y.hi
        call    idmul
        sub     ax,Result.uq0
        sbb     bx,Result.uq1
        sbb     cx,Result.uq2
        sbb     dx,Result.uq3
cEnd

;---------------------------Public-Routine------------------------------;
; QUAD far_big_dot_product(ptlA,ptlB)
;
;       See big_dot_product
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   far_big_dot_product,<PUBLIC,FAR,NONWIN,NODATA>
        parmQ   ptlA
        parmQ   ptlB
cBegin
        cCall   big_dot_product,<ptlA,ptlB>
cEnd

;---------------------------Public-Routine------------------------------;
; QUAD big_dot_product(ptlA,ptlB)
;
; Computes the dot product A B + A B  with total accuracy.
;                           x x   y y
;
; Answer is returned in DX:CX:BX:AX.
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   big_dot_product,<PUBLIC,NEAR>
        parmQ   ptlA
        parmQ   ptlB
        localQ  Result
cBegin

; first do A B
;           x x

        mov     ax,ptlA.ptl_x.lo
        mov     dx,ptlA.ptl_x.hi
        mov     bx,ptlB.ptl_x.lo
        mov     cx,ptlB.ptl_x.hi
        call    idmul
        mov     Result.uq0,ax
        mov     Result.uq1,bx
        mov     Result.uq2,cx
        mov     Result.uq3,dx

; now do A B
;         y y

        mov     ax,ptlA.ptl_y.lo
        mov     dx,ptlA.ptl_y.hi
        mov     bx,ptlB.ptl_y.lo
        mov     cx,ptlB.ptl_y.hi
        call    idmul
        add     ax,Result.uq0
        adc     bx,Result.uq1
        adc     cx,Result.uq2
        adc     dx,Result.uq3
cEnd

;---------------------------Public-Routine------------------------------;
; ULONG lDot4D(ptA,ptB)
;
; Computes the dot product Ax Bx + Ay By + Az Bz  with total accuracy.
;
; Answer is returned in DX:AX:CX.
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   lDot4D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN>
        parmD   Pw
        parmD   Pz
        parmD   Py
        parmD   Px

        parmD   Qw
        parmD   Qz
        parmD   Qy
        parmD   Qx

cBegin
        xor     ebx,ebx             ; accum result in EBX:ECX
        mov     ecx,ebx

irp     xyz,<x,y,z>
        mov     eax,P&xyz
        imul    Q&xyz

        add     ecx,eax
        adc     ebx,edx
endm
        ;;
        ;;  return result in DX:AX:CX
        ;;
        shr     ecx,16
        mov     eax,ebx
        EAXtoDXAX
cEnd

;---------------------------Public-Routine------------------------------;
; fxCross4D(pvV,vP,vQ)
;
; Computes the cross product of vector vP and vector vQ
;
; result is stored in *pvV
;
;    v.x =   P.y*Q.z - P.z*Q.y
;    v.y =  -P.x*Q.z - P.z*Q.x
;    v.z =   P.x*Q.y - P.y*Q.x
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   fxCross4D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN>
        parmD   ppR

        parmD   Pw
        parmD   Pz
        parmD   Py
        parmD   Px

        parmD   Qw
        parmD   Qz
        parmD   Qy
        parmD   Qx

cBegin
        les     di,ppR
;
;    v.x =   P.y*Q.z - P.z*Q.y
;
        mov     eax,Py
        mov     edx,Qz
        imul    edx
        mov     ebx,edx
        mov     ecx,eax
        mov     eax,Pz
        mov     edx,Qy
        imul    edx
        sub     ecx,eax
        sbb     ebx,edx

        shrd    ecx,ebx,16
        mov     es:[di].x,ecx
;
;    v.y =  -P.x*Q.z - P.z*Q.x
;
        mov     eax,Px
        mov     edx,Qz
        neg     eax
        imul    edx
        mov     ebx,edx
        mov     ecx,eax
        mov     eax,Pz
        mov     edx,Qx
        imul    edx
        sub     ecx,eax
        sbb     ebx,edx

        shrd    ecx,ebx,16
        mov     es:[di].y,ecx
;
;    v.z =   P.x*Q.y - P.y*Q.x
;
        mov     eax,Px
        mov     edx,Qy
        imul    edx
        mov     ebx,edx
        mov     ecx,eax
        mov     eax,Py
        mov     edx,Qx
        imul    edx
        sub     ecx,eax
        sbb     ebx,edx

        shrd    ecx,ebx,16
        mov     es:[di].z,ecx

        xor     eax,eax
        mov     es:[di].w,eax
cEnd

;---------------------------Public-Routine------------------------------;
; FIXED fxLength3D(Vx,Vy,Vz)
;
; Computes length of the vector V as a FIXED number
;
; Answer is returned in DX:AX:CX.
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc   fxLength3D,<PUBLIC,FAR,PASCAL,NODATA,NONWIN>
        parmD   Vx
        parmD   Vy
        parmD   Vz
cBegin
        xor     ebx,ebx             ; accum sum of squares in EBX:ECX
        mov     ecx,ebx

irp     xyz,<x,y,z>
        mov     eax,V&xyz           ; EAX = Vi
        imul    eax                 ; EDX:EAX = Vi^2

        add     ecx,eax             ; EBX:ECX += Vi^2
        adc     ebx,edx
endm
        mov     eax,ecx
        mov     edx,ebx
        ;;
        ;;  EDX:EAX now contains sum of squares*1000h, take the square_root

        cCall   sqrt64              ; calculates sqrt(EDX:EAX) ==> DX:AX
cEnd

sEnd	MathSeg

.286

if ?386
    .386
endif

sBegin TrigSeg
	assumes cs,TrigSeg

	include vttable.inc

;----------------------------Public-Routine-----------------------------;
; CalcSine
;
; Computes sine(angle).
;
; Entry:
;       DX:AX = angle UNSIGNED FIXED
;
; Returns:
;       BX = 1 => OK.
;       DX:AX = result.
;
; Error Returns:
;       BX = 0.
;
; Registers Destroyed:
;       CX, flags.
;
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing
        
cProc	CalcSine,<FAR,PUBLIC>
	ParmD	fx
cBegin
	mov	ax,fx.lo
	mov	dx,fx.hi

        sub     dx,90
        jnc     no_wrap
        add     dx,360
no_wrap:
	push	dx
	push	ax
;;;	   errn$   CalcCosine
	call	FAR PTR CalcCosine
cEnd

;----------------------------Public-Routine-----------------------------;
; CalcCosine
;
; Computes Cosine(angle).
;
; Entry:
;       DX:AX = angle UNSIGNED FIXED
;
; Returns:
;       BX = 1 => OK.
;       DX:AX = result.
;
; Error Returns:
;       BX = 0.
;
; Registers Destroyed:
;       CX, flags.
;
;-----------------------------------------------------------------------;

;----------------------------Pseudo-Code--------------------------------;
; INTEGER FAR PASCAL CalcCosine(FIXED)
; short R;
; short Angle;
; {
;   negateflag = 0;         // don't want result negated 
;
;   AX = Angle;
;   AX -= 900;
;
;   if (Angle <= 900)
;   {
;       jump to negate_cos_result;     // return RTable(R, 900 - Angle) 
;   }
;
;   negateflag = -1;        // want result negated 
;
;   if (Angle <= 1800)
;   {
;       jump to no_cos_negation;     // return -RTable(R, Angle - 900) 
;   }
;
;   AX -= 1800;
;
;   if (Angle <= 2700)
;   {
;       jump to negate_cos_result;     // return -RTable(R, 2700 - Angle) 
;   }
;
;   negateflag = 0;         // dont want result negated 
;
;   jump to no_cos_negation;        // return RTable(R, Angle - 2700) 
;
;negate_cos_result:
;   negate AX;
;
;no_cos_negation:
;   jump to RTable;
; }
;-----------------------------------------------------------------------;

        assumes ds,nothing
        assumes es,nothing

cProc	CalcCosine,<FAR,PUBLIC>
	ParmD	fx
cBegin
	mov	ax,fx.lo
	mov	dx,fx.hi

        xor     cx,cx                   ;Don't want result negated
        sub     dx,90                   ; if angle <= 90
        jg      greater_than_90
        jl      negate_cos_result       ;  return r_table(R,90 - angle)
        or      ax,ax
        jz      negate_cos_result
greater_than_90:

        dec     cx                      ;Want result negated
        mov     bx,dx                   ;If Angle <= 180
        sub     bx,90                   ;  return -RTable(R,Angle-90)
        jg      greater_than_180
        jl      no_cos_negation
        or      ax,ax
        jz      no_cos_negation
greater_than_180:

        sub     dx,180                  ;If Angle < 270
        jl      negate_cos_result       ;  return -RTable(R,270-Angle)

        inc     cx                      ;Don't want result negated
        jmp     short no_cos_negation   ;  return RTable(R,Angle-270)

negate_cos_result:
        neg     ax                      ; do the negation.
        adc     dx,0
        neg     dx

no_cos_negation:
        push    cx                      ; Save negation flag

	push	di			; Interpolation
	mov	di,dx

	imul	di,10

	shl	di,1			; make it a word index.
	mov	dx,cs:vttable[di][0]	; Get base value
if 0
        push    dx                      ; save base value.
	mov	cx,cs:vttable[di][2]	; find (difference between adjacent
        sub     cx,dx
        xor     dx,dx
	xor	bx,bx

	push	dx
	push	ax
	push	cx
	push	bx
	call	multiply		; call	far_multiply

        pop     ax
        add     dx,ax
endif
        xor     ax,ax
        mov     cx,10000
        xor     bx,bx

	push	dx
	push	ax
	push	cx
	push	bx
        call    divide

        pop     di
        pop     cx                      ; Negate result if needed
        jcxz    r_table_exit

        neg     ax                      ; do the negation.
        adc     dx,0
        neg     dx
r_table_exit:

cEnd

sEnd TrigSeg

       end
