	TITLE	SQRTI - INTEGER SQRT AND MAGNITUDE

	COMMENT $

/* Routines for integer sqrt and magnitude for REND386 */

// All code by Dave Stampe, last updated 23/12/93
// These routines are (c) 1993 by Dave Stampe

/*
 This code is part of the VR-386 project, created by Dave Stampe.
 VR-386 is a desendent of REND386, created by Dave Stampe and
 Bernie Roehl.  Almost all the code has been rewritten by Dave
 Stampre for VR-386.

 Copyright (c) 1994 by Dave Stampe:
 May be freely used to write software for release into the public domain
 or for educational use; all commercial endeavours MUST contact Dave Stampe
 (dstampe@psych.toronto.edu) for permission to incorporate any part of
 this software or source code into their products!  Usually there is no
 charge for under 50-100 items for low-cost or shareware products, and terms
 are reasonable.  Any royalties are used for development, so equipment is
 often acceptable payment.

 ATTRIBUTION:  If you use any part of this source code or the libraries
 in your projects, you must give attribution to VR-386 and Dave Stampe,
 and any other authors in your documentation, source code, and at startup
 of your program.  Let's keep the freeware ball rolling!

 DEVELOPMENT: VR-386 is a effort to develop the process started by
 REND386, improving programmer access by rewriting the code and supplying
 a standard API.  If you write improvements, add new functions rather
 than rewriting current functions.  This will make it possible to
 include you improved code in the next API release.  YOU can help advance
 VR-386.  Comments on the API are welcome.

 CONTACT: dstampe@psych.toronto.edu
*/

		$

	.MODEL large

	.CODE INTMATH

			   ; one iteration of 32->16 sqrt
SQRT32  MACRO
	LOCAL skip
	shld	edx,eax,2  ;; get 2 bits of input to error
	shl	eax,2
	add	ebx,ebx    ;; estimate*2
	mov	ecx,ebx    ;; temp = est*2
	add	ecx,ecx
	cmp	edx,ecx    ;; error>2*est?
	jle	skip
	inc	ebx        ;; yes, update for new bit added
	inc	ecx
	sub	edx,ecx
skip:
	ENDM


;long squareroot16(long arg)

; takes root of 32 bit number to 16 bit result
; about 220 clocks worst case:
; 3 us on 486/66 and 10 us on 386/25


larg	equ	DWORD PTR [bp+8]

	PUBLIC	_squareroot32

_squareroot32 proc	far

	.386
	push	ebp
	mov	ebp,esp

	push	esi
	push	ecx

	xor	edx,edx
	xor	ebx,ebx

	mov	eax,DWORD PTR larg
	test	eax,0FFFF0000h		; can we cut it in half?
	jne	hasupper

	shl	eax,16		; yes, so prescale
	test	eax,0FF000000h		; half again?
	jne	do16

	shl	eax,8		; yes, prescale
	jmp	do8		; do 8 loops

hasupper:
	test	eax,0FF000000h		; half again?
	jne	do32
	shl	eax,8
	jmp	do24
do32:
	SQRT32
	SQRT32
	SQRT32
	SQRT32
do24:
	SQRT32
	SQRT32
	SQRT32
	SQRT32
do16:
	SQRT32
	SQRT32
	SQRT32
	SQRT32
do8:
	SQRT32
	SQRT32
	SQRT32
	SQRT32

	mov	eax,ebx
	shld	edx,eax,16   	; returns in both eax and dx:ax

	pop	ecx
	pop	esi

	mov	esp,ebp
	pop	ebp
	ret

_squareroot32  endp


			    ; one iteration of 62->31 sqrt
SQRT64  MACRO
	LOCAL skip
	shld	edi,edx,2
	shld	edx,eax,2  ; get 2 bits of input to error
	shl	eax,2
	add	ebx,ebx    ; estimate*2
	mov	ecx,ebx    ; temp = est*2
	add	ecx,ecx
	cmp	edi,ecx    ; error>2*est?
	jle	skip
	inc	ebx        ; yes, update for new bit added
	inc	ecx
	sub	edi,ecx
skip:
	ENDM



;long squareroot62(long hiarg, long loarg)

; takes root of 62 bit number to 31 bit result
; about 500 clocks worst case:
; 8 us on 486/66 and 20 us on 386/25


hiarg	equ	DWORD PTR [bp+8]
loarg	equ	DWORD PTR [bp+12]

	PUBLIC	_squareroot62

_squareroot62 proc	far

	.386
	push	ebp
	mov	ebp,esp

	mov	edx,DWORD PTR hiarg
	mov	eax,DWORD PTR loarg
	or	edx,edx
	jne	dohigh

	push	eax		; can use short root!
	call	_squareroot32
	sub	esp,4
	mov	esp,ebp
	pop	ebp
	ret

dohigh:
	push	ecx		; have to do 2 dwords
	push	esi
	push	edi

	xor	edi,edi
	xor	ebx,ebx

	shld	edx,eax,2	; prescale for 62 bits in 64 bit word
	shl	eax,2

	test	edx,0FFFF0000h		; can we cut it in half?
	jne	hashigh

	shld	edx,eax,16      ; yes, so prescale
	shl	eax,16
	test	edx,0FF000000h	; half again?
	jne	do48            ; no, do 48 loops

	shld	edx,eax,8
	shl	eax,8		; yes, prescale
	jmp	do40		; do 40 loops

hashigh:
	test	edx,0FF000000h		; half again?
	jne	do64
	shld	edx,eax,8
	shl	eax,8
	jmp	do56

do64:
	SQRT64
	SQRT64
	SQRT64
	SQRT64
do56:
	SQRT64
	SQRT64
	SQRT64
	SQRT64
do48:
	SQRT64
	SQRT64
	SQRT64
	SQRT64
do40:
	SQRT64
	SQRT64
	SQRT64
	SQRT64

	SQRT64
	SQRT64
	SQRT64
	SQRT64

	SQRT64
	SQRT64
	SQRT64
	SQRT64

	SQRT64
	SQRT64
	SQRT64
	SQRT64

	SQRT64
	SQRT64
	SQRT64
		; one missing because of prescale

	mov	eax,ebx

	pop	edi
	pop	esi
	pop	ecx

	shld	edx,eax,16	; returns in both eax and dx:ax

	mov	esp,ebp
	pop	ebp
	ret

_squareroot62  endp


;long magnitude32(long x, long y, long z)

; computes overall magnitude of vector
; no scaling or shortcuts: does 3x32-bit multiplies
; time: worst case of 650 clocks, best case of 200
; 3 to 10 us on 486/66, 8 to 25 us on 386/25


x	equ	DWORD PTR [bp+8]
y	equ	DWORD PTR [bp+12]
z	equ	DWORD PTR [bp+16]

	PUBLIC	_magnitude32

_magnitude32 proc	far

	.386
	push	ebp
	mov	ebp,esp
	push	ecx

	mov	eax,x           ; sum of squares
	imul	x
	mov	ebx,eax
	mov	ecx,edx

	mov	eax,y
	imul	y
	add	ebx,eax
	adc	ecx,edx

	mov	eax,z
	imul	z
	add	ebx,eax
	adc	ecx,edx

	push	ebx		; square root
	push	ecx
	call	_squareroot62
	add	esp,8

	pop	ecx
	mov	esp,ebp
	pop	ebp
	ret

_magnitude32  endp


;long magnitude16(int x, int y, int z)

; computes overall magnitude of vector
; no scaling or shortcuts: does 3x16-bit multiplies
; time: worst case of 300 clocks, best case of 150
; 2 to 5 us on 486/66, 6 to 12 us on 386/25


x	equ	WORD PTR [bp+8]
y	equ	WORD PTR [bp+10]
z	equ	WORD PTR [bp+12]

	PUBLIC	_magnitude16

_magnitude16 proc	far

	.386
	push	ebp
	mov	ebp,esp
	push	ecx

	mov	ax,x           ; sum of squares
	imul	x
	mov	bx,ax
	mov	cx,dx

	mov	ax,y
	imul	y
	add	bx,ax
	adc	cx,dx

	mov	ax,z
	imul	z
	add	bx,ax
	adc	cx,dx

	push	cx		; square root
	push	bx
	call	_squareroot32
	add	esp,4

	pop	ecx
	mov	esp,ebp
	pop	ebp
	ret

_magnitude16  endp



;void set_vector_length32(long length, long *xp, long *yp, long *zp)

; sets overall magnitude of vector


length	equ	DWORD PTR [bp+8]
xp	equ	DWORD PTR [bp+12]
yp	equ	DWORD PTR [bp+16]
zp	equ	DWORD PTR [bp+20]


	PUBLIC	_set_vector_magnitude32

_set_vector_magnitude32 proc	far

	.386
	push	ebp
	mov	ebp,esp
	sub	esp,20

	push	ecx
	push	edi
	push	esi

	les	bx,xp  			; compute magnitude
	push	DWORD PTR es:[bx]
	les	bx,yp
	push	DWORD PTR es:[bx]
	les	bx,zp
	push	DWORD PTR es:[bx]

	call	_magnitude32
	add	esp,12
	mov	esi,eax
	or	eax,eax
	je	zero_magnitude

	les	bx,xp  			; scale each part
	mov	eax, es:[bx]
	imul	length
	idiv	esi
	les	bx,xp
	mov	es:[bx],eax

	les	bx,yp
	mov	eax, es:[bx]
	imul	length
	idiv	esi
	les	bx,yp
	mov	es:[bx],eax

	les	bx,zp
	mov	eax, es:[bx]
	imul	length
	idiv	esi
	les	bx,zp
	mov	es:[bx],eax

zero_magnitude:
	pop	esi
	pop	edi
	pop	ecx

	mov	esp,ebp
	pop	ebp
	ret

_set_vector_magnitude32  endp



	end