/*

NanoTech - a 3d game engine
Copyright (C) 1996  Sean Lane Fuller

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.

Sean Lane Fuller
124 Autumn Lane
Tullahoma, TN 37388
615-393-4550
email: fuller@edge.net

A lot of the sbuffer code here was taken from Michael Abrash's sample code
which was available from ftp.idsoftware.com.

*/

extern int do_time;

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

extern int no_perspective_correct;

#include "pix.h"
#include "keyboard.h"
#include "sbuffer.h"

extern float mx, my, mz;
extern float yaw, pitch, roll;
extern float sin_yaw, cos_yaw, sin_pitch, cos_pitch, sin_roll, cos_roll;

//void mymemcpy(char *dest, char *src, int len);
//#pragma aux mymemcpy = " rep movsb " parm [EDI] [ESI] [ECX] modify [EDI ESI ECX];
//void mymemset(char *dest, char val, int len);
//#pragma aux mymemset = " rep stosb " parm [EDI] [AL] [ECX] modify [EDI ECX];
#define mymemset memset
#define mymemcpy memcpy

#define SUBDIV 32
#define MAX_POLY_VERTS      8       // after clip
//typedef int INT;
//typedef unsigned int UINT;
#define TRUE (~0)
#define FALSE 0
#define MAX_SCREEN_HEIGHT   MAXNUMYPIXELS
#define MAX_SPANS           10000
#define MAX_SURFS           2000
#define MAX_EDGES           5000

typedef struct {
    float v[3];
    float tu, tv;
} point_t;

typedef struct {
    int x, y;
    float invu, invv, invz, dinvu, dinvv, dinvz;
    int count;
    int color;
} span_t;

typedef struct {
    float  distance;
    point_t normal;
} plane_t;


typedef struct {
    int         color;
    int         num_verts;
    point_t     verts[MAX_POLY_VERTS];
    plane_t     plane;
} polygon_t;

struct edge_s;
typedef struct surf_s {
    struct surf_s   *pnext, *pprev;
    int             color;
    int             visxstart;
    float          zinv00, zinvstepx, zinvstepy;
    int             state;
    float invu, invv, invz, dinvu, dinvv, dinvz;
    struct edge_s *edge;
} surf_t;

typedef struct edge_s {
    int             x;
    int             xstep;
    int             leading;
    float          invu, invv, invz, dinvu, dinvv, dinvz;
    surf_t          *psurf;
    struct edge_s   *pnext, *pprev;
    struct edge_s   *pnextremove;
} edge_t;

int DIBWidth, DIBHeight, DIBPitch;
#define NTEXTURES 16
unsigned char texture_map_buf[NTEXTURES * TMAPH * TMAPW + 256 * 256];
unsigned char *texture_map[NTEXTURES];
unsigned char background_texture[MAXNUMXPIXELS * MAXNUMYPIXELS];

void init_textures()
{
   extern int draw_shadows(char *, int, int);
   extern char *pix_mem;
   int tmap_offset;
   char tfile[256];
   int i, j, c, d, x, y, xx, yy, tnum;
   int dirx[8] = { 1, 1, 0, -1, -1, -1, 0, 1 };
   int diry[8] = { 0, 1, 1, 1, 0, -1, -1, -1 };
//   pix_loadpcx(background_texture, "bg.pcx", 0);
   memset(background_texture, 0, NUMXPIXELS * NUMYPIXELS);
   for (i=0; i<NUMXPIXELS; i++)
   {
      background_texture[(rand() % NUMYPIXELS) * NUMXPIXELS + (rand() % NUMXPIXELS)]
         = 0x61 + (rand() % 7);
   }
   pix_setup_palette(1.0);
   tmap_offset = 256 * 256 - ((long)texture_map_buf % (256 * 256));
   tmap_offset = 0;
   for (tnum=0; tnum<NTEXTURES; tnum++)
   {
      texture_map[tnum] = &texture_map_buf[tnum * TMAPH * TMAPW + tmap_offset];
      for (i=0; i<TMAPH; i++)
      {
         for (j=0; j<TMAPW; j++)
         {
            if (tnum != 15) texture_map[tnum][(i<<TSHIFT) + j] = 0xf + (tnum << 4);
            else texture_map[tnum][(i<<TSHIFT) + j] = i ^ j;
         }
      }
      if (tnum == 15) continue;
            x = rand() % TMAPW;
            y = rand() % TMAPH;
      if (tnum == 6)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] =
                  (sin(0.5 - (float)x/(float)(TMAPH * 2))
                     + cos(0.5 + (float)y/(float)(TMAPW * 2))) * 6 + 0xa0;
            }
         }
         continue;
      }
      if (tnum == 5)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] =
                  (sin((float)x/(float)TMAPH)
                     + cos((float)y/(float)TMAPW)) * 6 + 0x50;
            }
         }
         continue;
      }
      if (tnum == 4)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] = 0x4a
                  + (((x/4) ^ (y/4)) & 0x0f);
            }
         }
         continue;
      }
      if (tnum == 3)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] = 0x38
                  + ((x/8) ^ (y/8)) + (int)(sin(x/TMAPH) * 4);
            }
         }
         continue;
      }
      if (tnum == 2)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] = 0x25
                  + ((x/16) ^ (y/16)) ^ (int)(sin(x/TMAPH) * 4);
            }
         }
         continue;
      }
      if (tnum == 1)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] = 0x10
                  + ((int)sqrt((TMAPW/2 - x)*(TMAPW/2 - x)
                        + (TMAPH/4 - y)*(TMAPH/4 - y))) & 0x1f;
            }
         }
         continue;
      }
      if (tnum == 0)
      {
         for (x=0; x<TMAPH; x++)
         {
            for (y=0; y<TMAPW; y++)
            {
               texture_map[tnum][(x << TSHIFT) + y] = 7 +
                  ((int)sqrt((TMAPW/2 - x)*(TMAPW/2 - x)
                        + (TMAPH/2 - y)*(TMAPH/2 - y)) & 0x3);
            }
         }
         continue;
      }
      memcpy(texture_map[tnum], texture_map[tnum % 6], TMAPH * TMAPW);
//      for (c=0; c<TMAPH * TMAPW; c++)
//      {
//         texture_map[tnum][c] = (tnum << 4) + (c & 0x0f);
//      }
//      sprintf(tfile, "txt%d.pcx", tnum);
//      pix_loadpcx(texture_map[tnum], tfile, 0);
   }
   draw_shadows(texture_map[0], TMAPH, TMAPW);
}

void init_sbuffer()
{
   DIBWidth = NUMXPIXELS;
   DIBHeight = NUMYPIXELS;
   DIBPitch = NUMXPIXELS;
   init_textures();
   ClearEdgeLists();
}

void term_sbuffer()
{
}

// Span, edge, and surface lists
span_t  spans[MAX_SPANS];
edge_t  edges[MAX_EDGES];
surf_t  surfs[MAX_SURFS];

// Bucket list of new edges to add on each scan line
edge_t  newedges[MAX_SCREEN_HEIGHT];

// Bucket list of edges to remove on each scan line
edge_t  *removeedges[MAX_SCREEN_HEIGHT];

// Head and tail for the active edge list
edge_t  edgehead;
edge_t  edgetail;

// Edge used as sentinel of new edge lists
edge_t  maxedge = {0x7FFFFFFF};

// Head/tail/sentinel/background surface of active surface stack
surf_t  surfstack;

// pointers to next available surface and edge
surf_t  *pavailsurf;
edge_t  *pavailedge;

int currentcolor;

void UpdateWorld(void);


/////////////////////////////////////////////////////////////////////
// Add the polygon's edges to the global edge table.
/////////////////////////////////////////////////////////////////////
void AddPolygonEdges (polygon2D_t *screenpoly, int color)
{
    float  distinv, deltax, deltay, slope, temp_invz, temp_ninvz;
    int a, b;
    int     i, nextvert, num_verts, temp, topy, bottomy, height;
    edge_t  *pedge;

   currentcolor = color;
    num_verts = screenpoly->num_verts;

    if (num_verts < 3) return;
    if (/* plane == 0 || */ screenpoly == 0) return;

    // Clamp the polygon's vertices just in case some very near
    // points have wandered out of range due to floating-point
    // imprecision
    for (i=0 ; i<num_verts ; i++)
    {
        if (screenpoly->verts[i].x < -0.5)
            screenpoly->verts[i].x = -0.5;
        if (screenpoly->verts[i].x > ((float)DIBWidth - 0.5))
            screenpoly->verts[i].x = (float)DIBWidth - 0.5;
        if (screenpoly->verts[i].y < -0.5)
            screenpoly->verts[i].y = -0.5;
        if (screenpoly->verts[i].y > ((float)DIBHeight - 0.5))
            screenpoly->verts[i].y = (float)DIBHeight - 0.5;
    }

    // Add each edge in turn
    for (i=0 ; i<num_verts ; i++)
    {
        nextvert = i + 1;
        if (nextvert >= num_verts) nextvert = 0;
        topy = (int)ceil(screenpoly->verts[i].y);
        bottomy = (int)ceil(screenpoly->verts[nextvert].y);
        height = bottomy - topy;
        if (height == 0) continue;       // doesn't cross any scan lines
        if (height < 0)
        {
            // Leading edge
            temp = topy;
            topy = bottomy;
            bottomy = temp;
            pavailedge->leading = 1;
            deltax = screenpoly->verts[i].x -
                     screenpoly->verts[nextvert].x;
            deltay = screenpoly->verts[i].y -
                     screenpoly->verts[nextvert].y;
            if (deltay != 0)
               slope = deltax / deltay;
            else
               slope = 0;

            // Edge coordinates are in 16.16 fixed point
            pavailedge->xstep = (int)(slope * (float)0x10000);
            pavailedge->x = (int)((screenpoly->verts[nextvert].x +
                ((float)topy - screenpoly->verts[nextvert].y) *
                slope) * (float)0x10000);
            a = nextvert;
            b = i;
        }
        else
        {
            // Trailing edge
            pavailedge->leading = 0;

            deltax = screenpoly->verts[nextvert].x -
                     screenpoly->verts[i].x;
            deltay = screenpoly->verts[nextvert].y -
                     screenpoly->verts[i].y;
            if (deltay != 0)
               slope = deltax / deltay;
            else
               slope = 0;

            // Edge coordinates are in 16.16 fixed point
            pavailedge->xstep = (int)(slope * (float)0x10000);
            pavailedge->x = (int)((screenpoly->verts[i].x +
                ((float)topy - screenpoly->verts[i].y) * slope) *
                (float)0x10000); 
            a = i;
            b = nextvert;
         }
//         if (a > num_verts || b > num_verts)
//         {
//            printf("bounds problem\n");
//            a = 0; b = 1;
//         }
         if (screenpoly->verts[a].z == 0.0)
            temp_invz = 999999.0;
         else
            temp_invz = 1.0 / screenpoly->verts[a].z;
         if (screenpoly->verts[b].z == 0.0)
            temp_ninvz = 999999.0;
         else
            temp_ninvz = 1.0 / screenpoly->verts[b].z;

         pavailedge->invz = temp_invz;

         pavailedge->invu = screenpoly->verts[a].u * temp_invz;

         pavailedge->invv = screenpoly->verts[a].v * temp_invz;

         if (deltay != 0)
         {
deltay = fabs(height);
            pavailedge->dinvz = (temp_ninvz - temp_invz) / deltay;
            pavailedge->dinvu = (screenpoly->verts[b].u * temp_ninvz
               - pavailedge->invu) / deltay;
            pavailedge->dinvv = (screenpoly->verts[b].v * temp_ninvz
               - pavailedge->invv) / deltay;
         }
         else
         {
            pavailedge->dinvz = 0;
            pavailedge->dinvu = 0;
            pavailedge->dinvv = 0;
         }

         // Put the edge on the list to be added on top scan
//         if (topy < 0 || topy >= NUMYPIXELS)
//         {
//            printf("topy bounds problem\n");
//            exit(0);
//         }
         pedge = &newedges[topy];
//         if (pavailedge->x >= ((long)NUMXPIXELS) << 16)
//         {
//            printf("pavailedge->x >= NUMXPIXELS\n");
//         }
         while (pedge->pnext->x < pavailedge->x)
             pedge = pedge->pnext;
         pavailedge->pnext = pedge->pnext;
         pedge->pnext = pavailedge;

         // Put the edge on the list to be removed after final scan
         pavailedge->pnextremove = removeedges[bottomy - 1];
//         if (bottomy - 1 < 0 || bottomy - 1 >= NUMYPIXELS)
//         {
//            printf("bottomy bounds problem\n");
//            exit(0);
//         }
         removeedges[bottomy - 1] = pavailedge;

        // Associate the edge with the surface we'll create for
        // this polygon
        pavailedge->psurf = pavailsurf;

        // Make sure we don't overflow the edge array
        if (pavailedge < &edges[MAX_EDGES-1]) pavailedge++;
    }

    // Create the surface, so we'll know how to sort and draw from
    // the edges
    pavailsurf->state = 0;
    pavailsurf->color = currentcolor;

    // Make sure we don't overflow the surface array
    if (pavailsurf < &surfs[MAX_SURFS - 1]) pavailsurf++;
}

/////////////////////////////////////////////////////////////////////
// Scan all the edges in the global edge table into spans.
/////////////////////////////////////////////////////////////////////
void ScanEdges (void)
{
    int     x, y;
    float  fx, fy;
    float tinvz, tinvz2;
    edge_t  *pedge, *pedge2, *ptemp, *matching_edge;
    edge_t  left_edge;
    span_t  *pspan;
    surf_t  *psurf, *psurf2;
    float  slice_width;

    pspan = spans;

   left_edge.invu = 0;
   left_edge.invv = 0;
   left_edge.invz = -999999.0;
   left_edge.dinvu = 0;
   left_edge.dinvz = 0;
   left_edge.dinvv = 0;

    // Set up the active edge list as initially empty, containing
    // only the sentinels (which are also the background fill). Most
    // of these fields could be set up just once at start-up
    edgehead.pnext = &edgetail;
    edgehead.pprev = NULL;
    edgehead.x = -0xFFFF;           // left edge of screen
    edgehead.leading = 1;
    edgehead.psurf = &surfstack;

    edgetail.pnext = NULL;          // mark edge of list
    edgetail.pprev = &edgehead;
    edgetail.x = DIBWidth << 16;    // right edge of screen
    edgetail.leading = 0;
    edgetail.psurf = &surfstack;

    // The background surface is the entire stack initially, and
    // is infinitely far away, so everything sorts in front of it.
    // This could be set just once at start-up
    surfstack.pnext = surfstack.pprev = &surfstack;
    surfstack.color = 0;
    surfstack.zinv00 = -999999.0;
    surfstack.invz = -999999.0;
    surfstack.dinvz = 0;
    surfstack.zinvstepx = surfstack.zinvstepy = 0.0;
    surfstack.edge = &left_edge;

    for (y=0 ; y<DIBHeight ; y++)
    {
        fy = (float)y;

        // Sort in any edges that start on this scan
        pedge = newedges[y].pnext;
        pedge2 = &edgehead;
        while (pedge != &maxedge)
        {
            if (pedge == NULL || pedge2 == NULL || pedge2->pnext == 0)
            {
              printf("pedge==NULL||pedge2==NULL||pedge2->pnext==0 in ScanEdges\n");
              exit(0);
            }
            while (pedge->x > pedge2->pnext->x)
                pedge2 = pedge2->pnext;

            ptemp = pedge->pnext;
            pedge->pnext = pedge2->pnext;
            pedge->pprev = pedge2;
            pedge2->pnext->pprev = pedge;
            pedge2->pnext = pedge;

            pedge2 = pedge;
            pedge = ptemp;
        }

        // Scan out the active edges into spans

        // Start out with the left background edge already inserted,
        // and the surface stack containing only the background
        surfstack.state = 1;
        surfstack.visxstart = 0;

        for (pedge=edgehead.pnext ; pedge ; pedge=pedge->pnext)
        {
            psurf = pedge->psurf;
            if (psurf == 0)
            {
               printf("psurf == 0 in ScanEdges\n");
               exit(0);
            }

            if (pedge->leading)
            {
                // set surface edge upon finding leading edge
                psurf->edge = pedge;
                matching_edge = pedge->pnext;
                while (matching_edge && matching_edge != &maxedge)
                {
                   if (matching_edge->psurf == psurf) break;
                   matching_edge = matching_edge->pnext;
                }
                if (matching_edge == 0)
                  slice_width = 0;
                else
                  slice_width = (float)matching_edge->x * (1.0 / (float)0x10000)
                            - (float)pedge->x * (1.0 / (float)0x10000);
                if (slice_width != 0 && matching_edge != &maxedge)
                {
                   psurf->dinvu = (matching_edge->invu - pedge->invu)
                                 / slice_width;
                   psurf->dinvv = (matching_edge->invv - pedge->invv)
                                 / slice_width;
                   psurf->dinvz = (matching_edge->invz - pedge->invz)
                                 / slice_width;
                }
                else
                { 
                  psurf->dinvv = psurf->dinvz = psurf->dinvu = 0;
                }
                fx = (float)pedge->x * (1.0 / (float)0x10000);
                psurf->invu = pedge->invu - fx * psurf->dinvu;
                psurf->invv = pedge->invv - fx * psurf->dinvv;
                psurf->invz = pedge->invz - fx * psurf->dinvz;
                // It's a leading edge. Figure out where it is
                // relative to the current surfaces and insert in
                // the surface stack; if it's on top, emit the span
                // for the current top.
                // First, make sure the edges don't cross
                if (++psurf->state == 1)
                {
                    // Calculate the surface's 1/z value at this pixel

                    // See if that makes it a new top surface
                    psurf2 = surfstack.pnext;
           tinvz2 = psurf2->invz + psurf2->dinvz * fx;
           tinvz = psurf->invz + psurf->dinvz * fx;
   if (tinvz >= tinvz2)
                    {
                        // It's a new top surface
                        // emit the span for the current top
                        x = (pedge->x + 0xFFFF) >> 16;
                        pspan->count = x - psurf2->visxstart;
                        if (pspan->count > 0)
                        {
                            pspan->y = y;
                            pspan->x = psurf2->visxstart;
                            pspan->color = psurf2->color;
                            pspan->invu = psurf2->invu
                              + psurf2->dinvu * (float)pspan->x;
                            pspan->invv = psurf2->invv
                                 + psurf2->dinvv * (float)pspan->x;
                            pspan->invz = psurf2->invz
                                 + psurf2->dinvz * (float)pspan->x;
                            pspan->dinvu = psurf2->dinvu;
                            pspan->dinvv = psurf2->dinvv;
                            pspan->dinvz = psurf2->dinvz;

                            // Make sure we don't overflow
                            // the span array
                            if (pspan < &spans[MAX_SPANS])
                                pspan++;
                        }

                        psurf->visxstart = x;

                        // Add the edge to the stack
                        psurf->pnext = psurf2;
                        psurf2->pprev = psurf;
                        surfstack.pnext = psurf;
                        psurf->pprev = &surfstack;
                    }
                    else
                    {
                        // Not a new top; sort into the surface stack.
                        // Guaranteed to terminate due to sentinel
                        // background surface
                        do {
                           psurf2 = psurf2->pnext;
                           tinvz2 = psurf2->invz + psurf2->dinvz * fx;
                        } while (tinvz < tinvz2);
                        // Insert the surface into the stack
                        psurf->pnext = psurf2;
                        psurf->pprev = psurf2->pprev;
                        psurf2->pprev->pnext = psurf;
                        psurf2->pprev = psurf;
                    }
                }
            }
            else
            {
                // It's a trailing edge; if this was the top surface,
                // emit the span and remove it.
                // First, make sure the edges didn't cross
                if (--psurf->state == 0)
                {
                    if (surfstack.pnext == psurf)
                    {
                        // It's on top, emit the span
                        x = ((pedge->x + 0xFFFF) >> 16);
                        pspan->count = x - psurf->visxstart;
                        if (pspan->count > 0)
                        {
                            pspan->y = y;
                            pspan->x = psurf->visxstart;
                            pspan->color = psurf->color;

                           // some more modifications for texture mapping
                            pspan->invu = psurf->invu
                                 + psurf->dinvu * (float)pspan->x;
                            pspan->invv = psurf->invv
                                 + psurf->dinvv * (float)pspan->x;
                            pspan->invz = psurf->invz
                                 + psurf->dinvz * (float)pspan->x;
                            pspan->dinvu = psurf->dinvu;
                            pspan->dinvv = psurf->dinvv;
                            pspan->dinvz = psurf->dinvz;

                            // Make sure we don't overflow
                            // the span array
                            if (pspan < &spans[MAX_SPANS]) pspan++;
                        }

                        psurf->pnext->visxstart = x;
                    }

                    // Remove the surface from the stack
                    psurf->pnext->pprev = psurf->pprev;
                    psurf->pprev->pnext = psurf->pnext;
                }
            }
        }

        // Remove edges that are done
        pedge = removeedges[y];
        while (pedge)
        {
            pedge->pprev->pnext = pedge->pnext;
            pedge->pnext->pprev = pedge->pprev;
            pedge = pedge->pnextremove;
        }

        // Step the remaining edges one scan line, and re-sort
        for (pedge=edgehead.pnext ; pedge != &edgetail ; )
        {
            ptemp = pedge->pnext;

            // Step the edge
            pedge->x += pedge->xstep;

            // yet more modifications for texture mapping
            // Step the texture
            pedge->invu += pedge->dinvu;
            pedge->invv += pedge->dinvv;
            pedge->invz += pedge->dinvz;

            // Move the edge back to the proper sorted location,
            // if necessary
            while (pedge->x < pedge->pprev->x)
            {
                pedge2 = pedge->pprev;
                pedge2->pnext = pedge->pnext;
                pedge->pnext->pprev = pedge2;
                pedge2->pprev->pnext = pedge;
                pedge->pprev = pedge2->pprev;
                pedge->pnext = pedge2;
                pedge2->pprev = pedge;
            }

            pedge = ptemp;
        }
    }

    pspan->x = -1;  // mark the end of the list
}

int iru, irv, irdu, irdv;
unsigned int lim;
extern char *pix_mem;
unsigned char *texture_map1;
unsigned long clock_start, clock_cycles;

// Thanks to John DiCamillo for telling me the undocumented instructions
// used in timestart and timestop
void timestart();
#pragma aux timestart = \
   "db 0x0f"\
   "db 0x31"\
   "mov clock_start,eax"\
   modify [EDX EAX];

void timestop();
#pragma aux timestop = \
   "db 0x0f"\
   "db 0x31"\
   "sub eax,clock_start"\
   "mov clock_cycles,eax"\
   modify [EDX EAX];

// EBP  count
// EAX  Vf       0   color
// EBX  Tex Base Vi  Ui
// ECX  dVf      ?   ?
// EDX  Uf       dVi dUi
// ESI  dUf      0   0
// EDI  Screen Pointer

void texloop(char *scr, short count, long iru, long irv);
#pragma aux texloop = \
   "push ebp"\
   "mov ebp,irdu"\
"$1:"\
   "mov esi,edx"\
   "mov eax,ebx"\
   "shr esi,16"\
   "add ebx,ebp"\
   "shl esi,8"\
   "shr eax,16"\
   "add esi,eax"\
   "add esi,texture_map1"\
   "add edx,irdv"\
   "mov al,[esi]"\
   "mov [edi],al"\
   "inc edi"\
   "dec cx"\
   "jnz $1"\
   "pop ebp"\
   parm [EDI] [CX] [EBX] [EDX]\
   modify [EDI ECX EBX EDX EAX ESI];

void texloop8(char *scr, long count, long pos, long step);
#pragma aux texloop8 = \
   "push ebp"\
   "mov esi,texture_map1"\
   "mov ebp,ecx"\
   "mov eax,edx"\
   "rol edx,16"\
   "shr eax,24"\
   "mov ecx,ebx"\
   "rol ebx,16"\
   "sar ecx,24"\
   "mov dh,dl"\
   "mov bh,bl"\
   "xor dl,dl"\
   "ror edx,16"\
   "xor bl,bl"\
   "ror ebx,16"\
   "mov ah,dh"\
   "add edi,ebp"\
   "neg ebp"\
"$1:"\
   "add edx,ebx"\
   "mov ah,[eax+esi]"\
   "adc eax,ecx"\
   "mov [edi+ebp],ah"\
   "mov ah,dh"\
   "inc ebp"\
   "jnz $1"\
   "pop ebp"\
   parm [EDI] [ECX] [EDX] [EBX]\
   modify [EDI ECX EBX EDX EAX ESI];

void quickloop(char *scr, long count, long iru, long irv);
#pragma aux quickloop = \
   "mov eax,0x0f"\
"$1:"\
   "mov al,[esi]"\
   "mov [edi],al"\
   "inc edi"\
   "dec ecx"\
   "jnz $1"\
   parm [EDI] [ECX] [EBX] [EDX]\
   modify [ECX EAX EDI ESI];

void lineartexloop(char *scr, long count, long iru, long irv);
#pragma aux lineartexloop = \
   "push ebp"\
   "mov ebp,texture_map1"\
   "add edi,ecx"\
   "neg ecx"\
"$1:"\
   "mov esi,edx"\
   "mov eax,ebx"\
   "shr esi,16"\
   "shl esi,8"\
   "and esi,0x0000ff00"\
   "shr eax,16"\
   "add esi,ebp"\
   "add ebx,irdu"\
   "mov al,[esi+eax]"\
   "add edx,irdv"\
   "mov [edi+ecx],al"\
   "inc ecx"\
   "jnz $1"\
   "pop ebp"\
   parm [EDI] [ECX] [EBX] [EDX]\
   modify [EDI ECX EBX EDX EAX ESI];

void okaylineartexloop(char *scr, long count, long iru, long irv);
#pragma aux okaylineartexloop = \
   "push ebp"\
   "mov ebp,irdu"\
   "add edi,ecx"\
   "neg ecx"\
"$1:"\
   "mov esi,edx"\
   "mov eax,ebx"\
   "shr esi,16"\
   "add ebx,ebp"\
   "shl esi,6"\
   "shr eax,16"\
   "add esi,texture_map1"\
   "add edx,irdv"\
   "mov al,[esi+eax]"\
   "mov [edi+ecx],al"\
   "inc ecx"\
   "jnz $1"\
   "pop ebp"\
   parm [EDI] [ECX] [EBX] [EDX]\
   modify [EDI ECX EBX EDX EAX ESI];

short OldFPUCW, FPUCW;
void fpu32();
#pragma aux fpu32 = \
   "fstcw [OldFPUCW]"\
   "mov ax,OldFPUCW"\
   "and eax,0xcff"\
   "mov [FPUCW],ax"\
   "fldcw [FPUCW]"\
   modify [EAX];

void fpu64();
#pragma aux fpu64 = \
   "fldcw [OldFPUCW]";

void oldtexloop(char *scr, short count, long iru, long irv);
#pragma aux oldtexloop = \
"$1:"\
   "mov esi,edx"\
   "mov eax,ebx"\
   "shr esi,16"\
   "shl esi,8"\
   "shr eax,16"\
   "add esi,eax"\
   "add ebx,irdu"\
   "add esi,texture_map1"\
   "add edx,irdv"\
   "movsb"\
   "dec cx"\
   "jnz $1"\
   parm [EDI] [CX] [EBX] [EDX]\
   modify [EDI ECX EBX EDX EAX ESI];

float zinv, z2;

/////////////////////////////////////////////////////////////////////
// Draw all the spans
/////////////////////////////////////////////////////////////////////
void DrawSpans (void)
{  
   unsigned long eax, ebx, ecx, edx, esi, edi;
   extern int no_stars;
   extern int no_textures;
   extern float psin[256];
   float u, v, z, u2, v2, ru, rv, ru2, rv2, dvperg, duperg, dzperg;
   float dinvu2, dinvv2, dinvz2;
   span_t  *pspan;
   unsigned int n, k, d;
   extern int do_ccpp;
   int left, right, start, end;
   int remain, num_subdiv;
   int i, j, y, x;
   float du, dv, dz;
   int tnum;
   char c;
   int bg_offset;
   i = 0;   
   if (!no_stars)
   {
      bg_offset = (int)((6.28 - yaw)/6.28 * 1280
                  + ((6.28 - pitch)/6.28 * NUMYPIXELS * NUMXPIXELS));
      bg_offset = bg_offset % (NUMXPIXELS * NUMYPIXELS);
      if (bg_offset < 0) bg_offset += NUMXPIXELS * NUMYPIXELS;
   }
   fpu32();
   if (do_ccpp) timestart();
   for (pspan=spans ; pspan->x != -1 ; pspan++)
   {
      tnum = pspan->color >> 8;
      if (tnum > NTEXTURES || tnum < 0) tnum = 15;
      texture_map1 = texture_map[tnum];
      // The blackness or stars of space
      if (pspan->color == 0)
      {
         if (no_stars) // if stars are turned off then just black
         {
            mymemset(&pix_mem[i], pspan->color, pspan->count);
            i += pspan->count;
         }
         else // draw the stars
         {
            start = i + bg_offset;
            if (start > NUMXPIXELS * NUMYPIXELS) start = start - NUMXPIXELS * NUMYPIXELS;
            end = start + pspan->count;
            right = end - NUMXPIXELS * NUMYPIXELS;
            if (right > 0) left = NUMXPIXELS * NUMYPIXELS - start;
            else left = pspan->count;
            mymemcpy(&pix_mem[i], &background_texture[start], left);
            i += left;
            if (right > 0)
            {
               mymemcpy(&pix_mem[i], &background_texture[0], right);
               i += right;
            }
         }
      }
      else if (no_textures) // flat shaded polygon strip
      {
         mymemset(&pix_mem[i], pspan->color + 4, pspan->count);
         i += pspan->count;
      }
/*
      else if (0 && pspan->count < 4 && tnum != 0) // short span
      {
        u = pspan->invu; v = pspan->invv; z = pspan->invz;
        for (j=0; j<pspan->count; j++)
        {
           if (z != 0)
           {
              ru = u / z; rv = v / z;
              pix_mem[i] = texture_map1[(((int)rv)<<TSHIFT)+((int)ru)];
           }
           else pix_mem[i] = 0;
           u += pspan->dinvu; v += pspan->dinvv; z += pspan->dinvz;
           i++;
        }
      }
*/
      // The next code is for texture mapping spans that are
      // flat on in Z with no color modification
      else if (no_perspective_correct)
      {
         if (do_time)
         {
            iru = 0;
            irv = 0;
            irdu = 1;
            irdv = 1;
         }
         else
         {
            ru = pspan->invu / pspan->invz;
            rv = pspan->invv / pspan->invz;
            u2 = pspan->invu + pspan->dinvu * (float)(pspan->count);
            v2 = pspan->invv + pspan->dinvv * (float)(pspan->count);
            z2 = pspan->invz + pspan->dinvz * (float)(pspan->count);
            if (z2 != 0) {
                ru2 = u2 / z2;
                rv2 = v2 / z2;
            }
            else {
               ru2 = 0;
               rv2 = 0;
            }
            du = (ru2 - ru) / (pspan->count + 1);
            dv = (rv2 - rv) / (pspan->count + 1);
            iru = ru * 65536.0;
            irv = rv * 65536.0;
            irdu = du * 65536.0;
            irdv = dv * 65536.0;
         }
//         k = pspan->count + i;
//         while (i < k)
//         {
//              pix_mem[i++] = texture_map1[((irv >> 16)<<TSHIFT)+(iru >> 16)];
//              irv+=irdv; iru+=irdu;
//         }

         lineartexloop(&pix_mem[i], pspan->count, iru, irv);

//         texloop8(&pix_mem[i], pspan->count, ((iru << 8) & 0xffff0000)
//               | ((irv >> 8) & 0x0000ffff), ((irdu << 8) & 0xffff0000)
//               | ((irdv >> 8) & 0x0000ffff));

         i += pspan->count;
      }
/*
      else if (pspan->count < 20
         || fabs(pspan->dinvz) < .00001 && pspan->dinvz != 0)
      {
         ru = pspan->invu / pspan->invz;
         rv = pspan->invv / pspan->invz;
         u2 = pspan->invu + pspan->dinvu * (float)(pspan->count);
         v2 = pspan->invv + pspan->dinvv * (float)(pspan->count);
         z2 = pspan->invz + pspan->dinvz * (float)(pspan->count);
         if (z2 != 0) {
             ru2 = u2 / z2;
             rv2 = v2 / z2;
         }
         else {
            ru2 = 0;
            rv2 = 0;
         }
         du = (ru2 - ru) / (pspan->count + 1);
         dv = (rv2 - rv) / (pspan->count + 1);
         iru = ru * 65536.0;
         irv = rv * (float)0x10000;
         irdu = du * (float)0x10000;
         irdv = dv * (float)0x10000;
//         k = pspan->count + i;
//         while (i < k)
//         {
//              pix_mem[i++] = texture_map1[((irv >> 16)<<TSHIFT)+(iru >> 16)]
//                     + pspan->color;
//              irv+=irdv; iru+=irdu;
//         }
         texloop(&pix_mem[i], pspan->count, iru, irv);
         i += pspan->count;
      }
*/
      else // Perspective correct texture mapping every n pixels
      {
        j = 0;
        u = pspan->invu;
        v = pspan->invv;
        z = pspan->invz;
        if (z != 0) {
           zinv = 1.0 / z;
           ru = u * zinv; rv = v * zinv;
        } else { ru = rv = 0; }
#ifdef OLDTEXLOOP
        while (j < pspan->count) {
           lim = min(SUBDIV, pspan->count - j + 1);
           u2 = pspan->invu + pspan->dinvu * (float)(j + lim - 1);
           v2 = pspan->invv + pspan->dinvv * (float)(j + lim - 1);
           z2 = pspan->invz + pspan->dinvz * (float)(j + lim - 1);
           if (z2 != 0) {
               zinv = 1.0 / z2;
               ru2 = u2 * zinv; rv2 = v2 * zinv;
           } else {
              ru2 = rv2 = 0;
           }
           iru = (ru * 65536.0);
           irv = (rv * 65536.0);
           zinv = 1.0 / (float)lim * 65536.0;
           irdu = ((ru2 - ru) * zinv);
           irdv = ((rv2 - rv) * zinv);
           lim--;
           lineartexloop(&pix_mem[i], lim, iru, irv);
           i += lim;
           j += lim;
           ru = ru2;
           rv = rv2;
        }
#else
        num_subdiv = pspan->count / (SUBDIV - 1);
        remain = pspan->count % (SUBDIV - 1);
        dinvu2 = (float)pspan->dinvu * (float)(SUBDIV - 1);
        dinvv2 = (float)pspan->dinvv * (float)(SUBDIV - 1);
        dinvz2 = (float)pspan->dinvz * (float)(SUBDIV - 1);
        u2 = pspan->invu;
        v2 = pspan->invv;
        z2 = pspan->invz;
        for (k=0; k<num_subdiv; k++)
        {
           lim = SUBDIV;
           u2 += dinvu2;
           v2 += dinvv2;
           z2 += dinvz2;
           if (z2 != 0) {
               zinv = 1.0 / z2;
               ru2 = u2 * zinv; rv2 = v2 * zinv;
           } else {
              ru2 = ru;
              rv2 = rv;
           }
           iru = (ru * 65536.0);
           irv = (rv * 65536.0);
           irdu = (int)((ru2 - ru) * (float)(65536 / SUBDIV));
           irdv = (int)((rv2 - rv) * (float)(65536 / SUBDIV));
           lineartexloop(&pix_mem[i], SUBDIV-1, iru, irv);
           i += SUBDIV-1;
           ru = ru2;
           rv = rv2;
        }
        if (remain)
        {
           lim = remain + 1;
           u2 = pspan->invu + pspan->dinvu * (float)(pspan->count - 1);
           v2 = pspan->invv + pspan->dinvv * (float)(pspan->count - 1);
           z2 = pspan->invz + pspan->dinvz * (float)(pspan->count - 1);
           if (z2 != 0) {
               zinv = 1.0 / z2;
               ru2 = u2 * zinv; rv2 = v2 * zinv;
           } else {
              ru2 = ru;
              rv2 = rv;
           }
           iru = (ru * 65536.0);
           irv = (rv * 65536.0);
           zinv = 1.0 / (float)lim * 65536.0;
           irdu = ((ru2 - ru) * zinv);
           irdv = ((rv2 - rv) * zinv);
           lim--;
           lineartexloop(&pix_mem[i], lim, iru, irv);
           i += lim;
         }
#endif
      }
   }
   if (do_ccpp) timestop();
   fpu64();
   if (do_ccpp) clock_cycles /= (NUMXPIXELS * NUMYPIXELS);
   else clock_cycles = 0;
}

/////////////////////////////////////////////////////////////////////
// Clear the lists of edges to add and remove on each scan line.
/////////////////////////////////////////////////////////////////////
void ClearEdgeLists(void)
{
    int i;

    for (i=0 ; i<DIBHeight ; i++)
    {
        newedges[i].pnext = &maxedge;
        removeedges[i] = NULL;
    }
    pavailsurf = surfs;
    pavailedge = edges;
}
