; mem.asm:
;
;     (C) Copyright Microsoft Corp. 1991.  All rights reserved.
;
;     You have a royalty-free right to use, modify, reproduce and 
;     distribute the Sample Files (and/or any modified version) in 
;     any way you find useful, provided that you agree that 
;     Microsoft has no warranty obligations or liability for any 
;     Sample Application Files which are modified. 
;
; masm -Mx -Zi -DSEGNAME=????? asm.asm
;
	TITLE MEM.ASM

;****************************************************************
;* MEM.ASM - Assembly mem-fill and mem-copy routines		*
;*		for 80286 and 80386				*
;****************************************************************
;

?PLM=1	    ; PASCAL Calling convention is DEFAULT
?WIN=1	    ; Windows calling convention
?386=0	    ; Use 386 code?

.xlist
include cmacros.inc
include windows.inc
.list

	externA	    __WinFlags	    ; in KERNEL
	externA	    __AHINCR	    ; in KERNEL
	externA	    __AHSHIFT	    ; in KERNEL

; The following structure should be used to access high and low
; words of a DWORD.  This means that "word ptr foo[2]" -> "foo.hi".

LONG	struc
lo	dw	?
hi	dw	?
LONG	ends

FARPOINTER	struc
off	dw	?
sel	dw	?
FARPOINTER	ends

; -------------------------------------------------------
;		DATA SEGMENT DECLARATIONS
; -------------------------------------------------------

ifndef SEGNAME
    SEGNAME equ <_TEXT>
endif

createSeg %SEGNAME, CodeSeg, word, public, CODE

sBegin Data
sEnd Data

sBegin CodeSeg
assumes cs,CodeSeg
assumes ds,DATA

;---------------------------Public-Routine------------------------------;
; fmemfill
;
;   fills memory with a bunch of bytes
;
; Entry:
;	lpMem	LPSTR to memory to fill
;	cbMem	DWORD count of bytes to fill
;	bFill	BYTE  byte to fill
;
; Returns:
;	nothing
; Error Returns:
;	None
; Registers Preserved:
;	BP,DS,SI,DI
; Registers Destroyed:
;	AX,BX,CX,DX,FLAGS
; Calls:
;	fmemfill286 or fmemfill386
;-----------------------------------------------------------------------;

cProc fmemfill,<FAR,PUBLIC,NODATA>,<>
;	 ParmD	 lpMem
;	 ParmD	 cbMem
;	 ParmB	 bFill
cBegin	<nogen>
	mov	ax,__WinFlags
	test	ax,WF_CPU286
	jnz	fmemfill286
	errn$	fmemfill386
cEnd <nogen>

cProc fmemfill386,<FAR,PUBLIC,NODATA>,<>
	ParmD	lpMem
	ParmD	cbMem
	ParmB	bFill
cBegin
	.386
	push	edi

	cld
	mov	bl, byte ptr bFill	    ; repeat the byte through EAX
	mov	bh, bl
	mov	ax,bx
	shl	eax,16
	mov	ax,bx

	les	di, lpMem
	movzx	edi,di
	mov	ebx,cbMem

	mov	ecx,edi
	neg	ecx
	and	ecx,0011b
	sub	ebx,ecx
	rep	stos byte ptr es:[edi]	    ; note can optimize WORD/DWORD writes
	db	67H		; Fix strange 386 bug
	mov	ecx,ebx
	shr	ecx,2
	rep	stos dword ptr es:[edi]
	db	67H		; Fix strange 386 bug
	mov	ecx,ebx
	and	ecx,0011b
	rep	stos byte ptr es:[edi]
	db	67H		; Fix strange 386 bug

mf386_exit:
	pop	edi
	.286
cEnd

cProc fmemfill286,<FAR,PUBLIC,NODATA>,<di>
	ParmD	lpMem
	ParmD	cbMem
	ParmB	bFill
cBegin
	cld
	les	di, lpMem
	mov	al, byte ptr bFill
	mov	ah, al

	mov	bx,cbMem.lo		; dx:bx = cbMem
	mov	dx,cbMem.hi

	mov	cx,bx			; cbMem == 0?
	or	cx,dx			;  Yes == get out
	jz	mf_exit

	mov	cx,di			; should we move the first byte
	and	cx,1
	sub	bx,cx
	sbb	dx,0

	shr	dx,1			; divide by two for WORD count
	rcr	bx,1
	pushf				; save error bit

	test	di,1
	jz	mf_loop

	stosb				; move first byte

	or	di,di
	jz	mf_next_seg
mf_loop:
	mov	cx,di			; calc remaining words in seg
	shr	cx,1
	neg	cx
	add	cx,8000h
@@:	sub	bx,cx
	sbb	dx,0
	jl	mf_last_seg

	rep	stosw

mf_next_seg:
	mov	cx,bx			; is dx:bx == 0?
	or	cx,dx			; yes go check last byte
	jz	mf_last_byte

	mov	cx,es
	add	cx,__AHINCR
	mov	es,cx

	jmp	mf_loop
mf_last_seg:
	add	cx,bx
	rep	stosw
mf_last_byte:
	popf
	jnc	mf_exit
	or	di,di
	jnz	mf_ack
	mov	cx,es
	add	cx,__AHINCR
	mov	es,cx
mf_ack:
	stosb
mf_exit:
cEnd

;---------------------------Public-Routine------------------------------;
; hmemcpy
;
;   copy memory
;
; Entry:
;	lpSrc	HPSTR to copy from
;	lpDst	HPSTR to copy to
;	cbMem	DWORD count of bytes to move
;
;	NOTE: overlapped copies will work iff lpSrc.sel == lpDst.sel
;		[This is a lie.	 They will always work.]
;
; Returns:
;	destination pointer
; Error Returns:
;	None
; Registers Preserved:
;	BP,DS,SI,DI
; Registers Destroyed:
;	AX,BX,CX,DX,FLAGS
; Calls:
;	fmemcpy286 or fmemcpy386
;-----------------------------------------------------------------------;

cProc hmemcpy,<FAR,PASCAL,PUBLIC,NODATA>,<>
;	 ParmD	 lpDst
;	 ParmD	 lpSrc
;	 ParmD	 cbMem
cBegin	<nogen>
	mov	ax,__WinFlags
	test	ax,WF_CPU286
	jz	fmemcpy386
	jmp	FAR PTR	fmemcpy286
cEnd <nogen>

cProc fmemcpy386,<FAR,PASCAL,PUBLIC,NODATA>,<ds>
	ParmD	lpDst
	ParmD	lpSrc
	ParmD	cbMem
cBegin
	.386
	push	edi
	push	esi
	cld

	mov	ecx,cbMem
	jecxz	mc386_exit

	movzx	edi,di
	movzx	esi,si
	lds	si,lpSrc
	les	di,lpDst
;
; calculate differance of pointers in "selector" space
;
	mov	ax,si		; DX:AX = lpSrc
	mov	dx,ds

	mov	bx,es		; BX = selector of ptr B

	mov	cx,__AHSHIFT	; number of selector bits per 64K 'segment'
	shr	dx,cl		; linearize ptr A
	shr	bx,cl		; linearize ptr B
;
; DX and BX contain normalized selectors
;
	sub	ax,di
	sbb	dx,bx		; do long subtraction.

	mov	ecx,cbMem

	or	dx,dx
	jns	mc_copy_forward

	std
	add	edi,ecx
	add	esi,ecx

	sub	edi,4
	sub	esi,4

	push	ecx
	shr	ecx,2		; get count in DWORDs
	rep	movs dword ptr es:[edi], dword ptr ds:[esi]
	db	67H		; Fix strange 386 bug
	add	edi,3
	add	esi,3
	pop	ecx
	and	ecx,3
	rep	movs byte ptr es:[edi], byte ptr ds:[esi]
	db	67H		; Fix strange 386 bug
	jmp	mc386_exit

mc_copy_forward:
	push	ecx
	shr	ecx,2		; get count in DWORDs
	rep	movs dword ptr es:[edi], dword ptr ds:[esi]
	db	67H
	pop	ecx
	and	ecx,3
	rep	movs byte ptr es:[edi], byte ptr ds:[esi]
	db	67H
	nop
mc386_exit:
	cld
	pop	esi
	pop	edi
	mov	dx,lpDst.sel	; return destination address
	mov	ax,lpDst.off
	.286
cEnd

cProc fmemcpy286,<FAR,PASCAL,PUBLIC,NODATA>,<ds,si,di>
	ParmD	lpDst
	ParmD	lpSrc
	ParmD	cbMem
cBegin
	mov	cx,cbMem.lo	; CX holds count
	or	cx,cbMem.hi	; or with high word
	jnz	@f
	jmp	empty_copy
@@:
	lds	si,lpSrc	  ; DS:SI = src
	les	di,lpDst	  ; ES:DI = dst
;
; calculate differance of pointers in "selector" space
;
	mov	ax,si		; DX:AX = lpSrc
	mov	dx,ds

	mov	bx,es		; BX = selector of ptr B

	mov	cx,__AHSHIFT	; number of selector bits per 64K 'segment'
	shr	dx,cl		; linearize ptr A
	shr	bx,cl		; linearize ptr B
;
; DX and BX contain normalized selectors
;
	sub	ax,di
	sbb	dx,bx		; do long subtraction.

	mov	cx,cbMem.lo

	or	dx,dx
	jns	forward_copy	; difference is positive, so copy forward

; see if the blocks intersect: is source + count > dest?
; equivalently, is source-dest + count > 0 ?
;	sub	ax,cx
;	sbb	dx,0
;	jnc	next		; This looks wrong.  Recheck!

	add	ax,cx
	adc	dx,cbMem.hi
	jc	reverse_copy	; carry, so >0, thus they do hit.

forward_copy:
	jmp	next
	
reverse_copy:
; first, we have to set ds:si and es:di to the _ends_ of the blocks

	sub	cx,2
	sbb	cbMem.hi,0	; subtract 2 from (long) count
	
	xor	ax,ax		
	add	si,cx
	adc	ax,cbMem.hi

	push	cx
	mov	cx,__AHSHIFT
	shl	ax,cl
	pop	cx
	mov	bx,ds
	add	ax,bx		; advance DS
	mov	ds,ax

	xor	ax,ax
	add	di,cx
	adc	ax,cbMem.hi

	push	cx
	mov	cx,__AHSHIFT
	shl	ax,cl
	pop	cx
	mov	bx,es
	add	ax,bx		; advance ES
	mov	es,ax

	add	cx,2
	adc	cbMem.hi,0	; restore count
;
;	DS:SI += Count
;	ES:DI += Count
;	While Count != 0 Do
;		Num = MIN(Count,SI+1,DI+1)
;		Reverse Copy "Num" Bytes from DS:SI to ES:DI
;			(SI -= Num, DI -= Num)
;		Count -= Num
;		If Count == 0 Then
;			BREAK
;		If SI == 0xFFFF Then
;			DS -= __AHINCR
;		If DI == 0xFFFF Then
;			ES -= __AHINCR
;
next_r:
	mov	ax,si

	sub	ax,di
	sbb	bx,bx
	and	ax,bx
	add	ax,di		; AX = MIN(SI, DI)
	
	xor	bx,bx
	add	ax,2		; AX = Num = MIN(SI+2,DI+2)
	adc	bx,0		; bx==1 if exactly 64k

	test	cbMem.hi,-1	; is high word not zero?
	jnz	@f		; at least 64k to go

	sub	ax,cx
	sbb	bx,bx
	and	ax,bx
	add	ax,cx		; AX = Num = MIN(Count,SI+2,DI+2)
	adc	bx,0

@@:
	xchg	ax,cx
	sub	ax,cx		; Count -= Num
	sbb	cbMem.hi,bx

	std
	shr	bx,1
	rcr	cx,1		; if bx==1, then cx ends up 0x8000
	jnc	@f
	inc	si		; adjust pointers for byte move
	inc	di
	movsb			; move first byte, if necessary
	dec	si		; realign pointers
	dec	di
@@:
	rep	movsw
	cld

	mov	cx,ax		; restore cx
	or	ax,cbMem.hi

	jz	done		; If Count == 0 Then BREAK

	cmp	si,-2		; if SI wraps, update DS
	jnz	@f		
;
	mov	ax,ds
	sub	ax,__AHINCR
	mov	ds,ax		; update DS if appropriate
@@:
	cmp	di,-2		; if DI wraps, update ES
	jnz	next_r
;
	mov	ax,es
	sub	ax,__AHINCR
	mov	es,ax		; update ES if appropriate
	jmp	next_r

;
;	While Count != 0 Do
;		If (Count + SI > 65536) OR (Count + DI > 65536) Then
;			Num = Min(65536-SI, 65536-DI)
;		Else
;			Num = Count
;		Copy "Num" Bytes from DS:SI to ES:DI (SI += Num, DI += Num)
;		Count -= Num
;		If Count == 0 Then
;			BREAK
;		If SI == 0 Then
;			DS += __AHINCR
;		If DI == 0 Then
;			ES += __AHINCR
;
next:
	mov	ax,cx
	dec	ax

	mov	ax,di
	not	ax		; AX = 65535-DI

	mov	dx,si
	not	dx		; DX = 65535-SI

	sub	ax,dx
	sbb	bx,bx
	and	ax,bx
	add	ax,dx		; AX = MIN(AX,DX) = MIN(65535-SI,65535-DI)

	; problem: ax might have wrapped to zero

	test	cbMem.hi,-1
	jnz	plentytogo	; at least 64k still to copy
	
	dec	cx		; this is ok, since high word is zero
	sub	ax,cx
	sbb	bx,bx
	and	ax,bx
	add	ax,cx		; AX = MIN(AX,CX)
	inc	cx

plentytogo:
	xor	bx,bx
	add	ax,1		; AX = Num = MIN(count,65536-SI,65536-DI)
				; we must check the carry here!
	adc	bx,0		; BX could be 1 here, if CX==0 indicating
				; exactly 64k to copy
	xchg	ax,cx
	sub	ax,cx		; Count -= Num
	sbb	cbMem.hi,bx

	shr	bx,1
	rcr	cx,1		; if bx==1, then cx ends up 0x8000
	rep	movsw
	jnc	@f
	movsb			; move last byte, if necessary
@@:
	mov	cx,ax		; put low word of count back in cx
	or	ax,cbMem.hi

	jz	done		; If Count == 0 Then BREAK

	or	si,si		; if SI wraps, update DS
	jnz	@f
;
	mov	ax,ds
	add	ax,__AHINCR
	mov	ds,ax		; update DS if appropriate
@@:
	or	di,di		; if DI wraps, update ES
	jnz	next
;
	mov	ax,es
	add	ax,__AHINCR
	mov	es,ax		; update ES if appropriate
	jmp	next
;
; Restore registers and return
;
done:
empty_copy:
	mov	dx,lpDst.sel	; return destination address
	mov	ax,lpDst.off
cEnd

sEnd

sEnd CodeSeg
end
