; 
;  Function to calc 16 bit square root of 32 bit number in DISI, ret in AX
; 
.DATA
SRRoots	DB 0,1,4,9,16,25,36,49,64,81,100,121,144,169,196,225
	EVEN
SRUnp	DW ? ; Number of bytes unprocessed

.CODE
	EVEN
SqRoot	PROC
	mov	ax,ds
	mov	es,ax
	mov	dx,di
	mov	cx,si
	xor	ah,ah
	or	dh,dh		; Is most sig. byte nonzero?
	jz	SRL1
	mov	al,dh
	mov	dh,dl
	mov	dl,ch
	mov	ch,cl
	mov	[SRUnp],3	; 3 bytes unprocessed
	jmp	SRL6
SRL1:	or	dl,dl
	jz	SRL2
	mov	al,dl
	mov	dh,ch
	mov	dl,cl
	mov	[SRUnp],2	; 2 bytes unprocessed
	jmp	SRL6
SRL2:	or	ch,ch
	jz	SRL3
	mov	al,ch
	mov	dh,cl
	mov	[SRUnp],1	; 1 byte unprocessed
	jmp	SRL6
SRL3:	or	cl,cl
	jz	SRL5
	mov	al,cl
	mov	[SRUnp],0	; 0 bytes unprocessed
	jmp	SRL6
SRL5:	xor	ax,ax
	ret
SRL6:	mov	di,OFFSET SRRoots
	mov	si,10h
SRL7:	scasb
	je	SRL9
	jb	SRL8
	dec	si
	jnz	SRL7
	inc	di
SRL8:	dec	di
	inc	si
SRL9:	mov	bl,[di-1]
	xor	bh,bh		; BX: max square below AL
	dec	si
	xor	si,0Fh		; SI in (0,15) (index into sqr table)
	mov	di,si
	mov	bp,[SRUnp]	; Test if we have already finished
	or	bp,bp		; ^
	jz	SRExit		; ^
	sub	ax,bx		; Substract max square (already taken into
				;  account in DI)
	; Loop for unprocessed bytes
SRMainLoop:
	mov	ah,al		; Shift into AX next byte
	mov	al,dh		; ^
	mov	dh,dl		; ^
	mov	dl,ch		; ^
	mov	ch,cl		; ^
	mov	si,di
	REPT 5
	shl	si,1
	ENDM
	mov	bp,dx		; Save for later restore
	xor	dx,dx
	mov	cx,ax		; Save for later restore (can use CX
				;  because we have already processed
				;  2 bytes, so there can only 2 left
				;  in DX)
	div	si
	REPT 4
	rol	di,1
	ENDM
	add	di,ax
	add	si,ax
SRL10:	imul	si
	mov	dx,ax
	mov	ax,cx		; Restore previous (before DIV) value
	sub	ax,dx
	jc	SRL12
	mov	dx,bp		; Restore previous DX value (unp. bytes)
	dec	[SRUnp]
	jnz	SRMainLoop
SRExit: mov	ax,di
	ret

SRL12:	dec	si
	dec	di
	mov	ax,si
	and	ax,001Fh
	jmp	SRL10

SqRoot	ENDP