;----------------------------------------------------------------------
; ProjectSky - Project a 256x256 map to the sky
;
; Turbo Pascal near-callable as
; ProjectSky(Map:Pointer;Buffer:Pointer;X,Y,Height:longint);
;
; X,Y are the coords of the viewpoint (16.16 fixed point)
; Height is the distance from the sky to the viewpoint (16.16)
;
; This routine has been timed at ~4.8 cycles/pixel for the inner loop
; rendering to an offscreen buffer on a 486 with 256K cache.
;
; WARNING: this code is somewhat kludgy.
;
; In particular, I have done some nasty things to gain an extra
; register (sp) for the main unrolled loop, and at the same time
; modified the stack segment register as a general segment register.
; This is VERY NAUGHTY and I don't recommend doing it :). The instruction
; ordering is also a bit counter-intuitive in the inner loop to avoid
; register contentions and AGIs on both the Pentium and the 486.
;
; The code is also somewhat more spaghetti-ish than it need be, since
; this is cut down from a more general routine that allows pitching of
; the viewpoint as well as other things.
;
; One obvious improvement is to have two inner loops: one for close bits
; which dithers the sky map, the other for distant bits which interpolates
; between values, thus avoiding some of the aliasing you get close to the
; horizon. If anyone does this and wants to send me a copy, I'd be
; grateful :)
;
;				Mark Mackey (mdm1004@cus.cam.ac.uk)
;
; This code is Copyright (C) 1996 Mark Mackey. Use, modification,
; and redistribution of this code is freely permitted, provided that
; the original author is acknowledged.
;----------------------------------------------------------------------

.model small,pascal
.386

BORDER		equ	0	; Size of screen border
BUFFERWIDTH	equ	256	; Width of screen buffer

; General view parameters
d		equ  	1	; 2^d is the focal length
StartLine       equ     100	; Starting line value
EndLine		equ     4	; End line value

				; Total number of lines drawn is
				; StartLine-Endline. The horizon is
				; at 0 but it's generally worth clipping
				; the sky a few lines above the horizon
				; since you get nasty aliasing in the
				; last few lines. Change StartLine to
				; 97 and Endline to 1 to see what I
				; mean :).

;Sky
xscale		equ	6       ; scaling factors: change these to taste.
zscale		equ	1       ; Note that these values are logarithmic,
				; ie increasing them by 1 doubles the
				; scale.

Mapz		equ	-4
Mapx    	equ 	-8	; local variables.
STACK_FRAME_SIZE equ 	8

.DATA
Saved_ss	dw	(?)	; Used to hold ss and sp during inner loop.
Saved_sp	dw	(?)

.CODE

MyShl  MACRO  reg,int   ; General purpose shl by a constant. Handles negatives.
IF (int) EQ 0
ELSE                            ;ignore shifts by zero
  IF (int) LT 0
     MySar reg,-int
  ELSE
     IF (int) EQ 1
           add   reg,reg        ; Faster than shl xx,1
     ELSE
           shl   reg,int
     ENDIF
  ENDIF
ENDIF
ENDM

MySar  MACRO  reg,int   ; General purpose sar by a constant. Handles negatives.
IF (int) EQ 0
ELSE                            ; Ignore shifts by zero
  IF (int) LT 0
     MyShl reg,-int
  ELSE
     sar   reg,int		; Arithmetic shift.
  ENDIF
ENDIF
ENDM

	PUBLIC ProjectSky
ProjectSky proc near Pascal Map:FAR PTR,Buffer:FAR PTR,X:DWORD,Y:DWORD,Height:DWORD
; Kills all registers except ds,ss,sp,bp
	sub   sp,STACK_FRAME_SIZE
	push  ds
	cld
	mov   ax,_DATA
	mov   gs,ax
	les   di,[Buffer]
	add   di,BORDER
	xor   ecx,ecx
	mov   cx,StartLine	; Counter=value of first line
align 16
@@startloop:
	mov   ebx,ecx
	shl   ebx,8             ;ebx holds counter in 24.8
	xor   edx,edx
	mov   eax,[height]
	shld  edx,eax,8+d
	MyShl eax,8+d
	div   ebx
				;eax holds height*d/ebx 16.16
				; save as Mapz

        mov   dword ptr [bp+Mapz],eax

        MyShl eax,7-xscale-d
        neg   eax
        add   eax,[x]
	mov   [bp+Mapx],eax     ;mapx=(-128*mapz) shr xscale (scaling) div d + x in 16.16
				; This is the starting x value for the map

        mov   eax,[bp+Mapz]
	MyShl eax,zscale        ;mapz=mapz*2^zscale (scaling)
	mov   ebx,[y]
	add   eax,ebx           ;eax = mapz + y in 16.16

	shr   eax,8             ;ah holds map y value for this line.

        mov   ebx,[bp+Mapx]
	sar   ebx,16		; Whole value of mapx in bx
	mov   al,bl             ; mapx mod 256 placed in al
				; => ax holds map starting point (integer)
	lds   si,[Map]
	add   si,ax
	mov   eax,[bp+mapx]     ; ax holds fractional part of mapx
				; => si.ax holds complete map starting
				; point in 16.16 fixed point

        mov   ebx,[bp+mapz]
        shr   ebx,xscale+d      ;ebx = delta = mapz shr (xscale+d) (scaling)
        mov   edx,ebx
	shr   edx,16            ;delta in dx.bx in 16:16 fixed point

				; delta is the value we step across the
				; sky map with.

	push  cx		; save counter

	cli                     ;Prevent interrupts since stack is
				;clobbered below
	push  bp
	mov   gs:[Saved_sp],sp	;save stack pointer: can't use stack
	mov   gs:[Saved_ss],ss  ;until restored!

	mov   sp,es
	mov   ss,sp		; ss points to buffer. We do this to
				; avoid segment overrides in the inner
				; loop, which really slow the Pentium
				; down.

	mov   bp,di		; ss:bp points to current location in buffer

        mov   di,si
	mov   sp,ax             ; di.sp = si.ax = map starting point

	add   sp,bx             ;initialise odd start value: add delta
	adc   di,dx

	and   cx,0001		;Swap odd/even dither depending on
				; lsb of cx
	jnz   @odd
@even:
	add   ax,8000h		; add 0.5 for dithering on even bytes
	adc   si,0
	jmp   @ditherOK
@odd:
	add   sp,8000h          ; add 0.5 for dithering on odd bytes
	adc   di,0

@ditherOK:
	add   bx,bx           	;double delta
	adc   dx,dx

	add   ax,bx             ;setup due to rearranged instructions in
	adc   si,dx             ; main loop
	add   sp,bx             ;

        offs=0
rept (BUFFERWIDTH-BORDER*2)/2	 ; 2 pixels/loop. Pentium pipeline use below:
	mov   cl,ds:[si]         ;V0
	adc   di,dx              ;U1
	add   ax,bx              ;V1
	mov   ch,ds:[di]         ;U3 (AGI on di)
	adc   si,dx              ;U4
	add   sp,bx              ;V4
	mov   ss:[bp+offs],cx    ;U5
	offs=offs+2
endm
        mov   di,bp

	mov   ss,gs:[Saved_ss]		; restore stack
	mov   sp,gs:[Saved_sp]
	pop   bp

        sti
	add   di,BUFFERWIDTH		; set up for next row
	pop   cx
	dec   cx
	cmp   cx,EndLine		; Have we finished?
	jge   @@startloop

@End:
        pop   ds
	ret				; Done!
ProjectSky  endp

end