;1997 Nickolay CHirkov, rzv@si.moris.rosmail.com
;This program is example how to write very fast locally optimized code
;It is create lines in the raster of screen or in the memory block
;(max 64 Kbyte)
;The main procedure has tested in several types of machine (see table) in
;the cycle creating 100,000,000 pixels with defferent direction of lines
;and with length each of them 130 pixels
;ͻ
;  CPU      Video System         Performance,time-sec/tact-per-pixel     
;                              Ķ
;                                 *1        *2        *3        *4    
;͹
;486DX2 66 VLB,CirrusLogic 5429  19.25s    15.13s    10.40s    10.12s 
;                                12.7t     9.98t     6.86t     6.68t  
;Ķ
;486DX4 100 PCI,Avance Logic     10.51s    12.49s    8.8s      6.77s  
;           ALG-2302 V.5.10.A    10.51t    12.49t    8.8t      6.77t  
;Ķ
;Pentium100  PCI,S3 TRIO 64      15.79s    10.67s              4.24s  
;                                15.79t    10.67t              4.24t  
;ͼ
;  *1 - lines created into the video memory in standard VGA-mode 320*200*8bit
;  *2 - lines created into the base memory (Windows 95,DOS-mode)
;  *3 - lines created into the base memory (Windows 95,DOS-mode,without himem.sys)
;  *4 - without writing into the memory (Windows 95,DOS-mode)

; For Masm
.model small
.stack 6                              ;needed for int-instruction
.data

X0            dw    160               ;the coordinates of the line
Y0            dw    100
X1            dw    0
Y1            dw    0
VIDRAM        dw    0A000h            ;segment or selector of memory block where will be created the line
COLOR         db    3

.code
.286
Begin:
                mov  ax,@DATA
                mov  ds,ax
                mov  ax,13h
                int  10h

                call OutLine          ;near call the procedure which create the line

                xor  ax,ax
                int  16h

EndOfProgram:   mov  ax,3h
                int  10h
                mov   ah,04Ch
                int   21h

;------------------------------Line-------------------------------------------
OutLine:        mov  bx,1             ;this value will be added to address register if the next pixels of line will be for X
;                mov  cx,X1
;                mov  di,X0
;                mov  ax,Y0
;                mov  si,Y1
                mov  di,[bx-1]        ;this instruction is smaller (only 3 byte) than mov di,X1(4 byte)
                mov  ax,[bx+1]
                mov  cx,[bx+3]
                mov  si,[bx+5]
                mov  bp,320           ;this is number of pixels in the raster on X-coordinate
                sub  cx,di            ;X1-X0
                jge  LinePlusX
                neg  cx               ;the length of line on X-coordinate must not be minus
                neg  bx               ;the line will goes to the left side of the screen and the address register will be reduced
LinePlusX:      inc  cx               ;the length of line on X-coordinate is X1-X0+1
                sub  si,ax            ;the length of line on Y-coordinate is Y1-Y0
                mul  bp
                add  di,ax            ;the begin address of line on the screen is 320*Y0+X1
                test si,si
                jge  LinePlusY
                neg  si               ;the length of line on Y-coordinate must not be minus
                neg  bp               ;the line will goes to the top of the screen and the address register will be reduced
LinePlusY:      inc  si               ;the length of line on X-coordinate is Y1-Y0+1
                cmp  cx,si            ;if length on X-coordinate is more than on Y-coordinate
                jge  LineXGY
                xchg cx,si
                xchg bp,bx
LineXGY:        mov  ax,si            ;the begin value of estimated function is U=Max(Len(X),Len(Y))-Int(Min(Len(X),Len(Y))/2)-1
                mov  dx,cx            ;it is for more accurate view of line
                shr  ax,1             ;this instructions is mixed for better performance on Pentium
                add  bp,bx
                sub  dx,ax
                mov  ax,cx
                dec  dx
                shl  ax,6             ;it is number of pixels divided by 4 in ah
                mov  al,cl
                inc  ah
                sub  cx,si
                shl  al,7
                mov  al,COLOR
                push ds
                mov  ds,VIDRAM
                ja   LineStart1       ;if the rest from division the number of pixels of the line by 4 is 1
                jnz  LineStart3       ;is 3
                jc   LineStart2       ;is 2
                jmp  LineStart4       ;is 0

db       11     dup (?)               ;the address of the label LineStepX0 must be levelled on border of 16 byte for more performance

LineStepX0:     mov  [di],al          ;this is main cycle of creating the line on the screen raster
                add  di,bx            ;it is using Brezenghem's algorithm
LineStart3:     sub  dx,si            ;this structure provides reduction the number of decrementing
                jge  LineStepX1       ;the loop-variable and number of jumps
LineStepY1:     mov  [di],al
                add  di,bp
                add  dx,cx
                jl   LineStepY2
LineStepX2:     mov  [di],al
                add  di,bx
LineStart1:     sub  dx,si
                jge  LineStepX3
LineStepY3:     mov  [di],al
                dec  ah
                jz   LineEnd
                add  di,bp
                add  dx,cx
                jge  LineStepX0

LineStepY0:     mov  [di],al
                add  di,bp
                add  dx,cx
                jl   LineStepY1
LineStepX1:     mov  [di],al
                add  di,bx
LineStart2:     sub  dx,si
                jge  LineStepX2
LineStepY2:     mov  [di],al
                add  di,bp
                add  dx,cx
                jl   LineStepY3
LineStepX3:     mov  [di],al
                add  di,bx
LineStart4:     dec  ah
                jz   LineEnd
                sub  dx,si
                jl   LineStepY0
                jmp  LineStepX0

LineEnd:        pop  ds
                ret

        END   Begin