/*  rxgrm.c */
/*  REGULAR EXPRESSION GRAMMAR  */
Goal
  -> RegularExpression <eof>
RegularExpression
  -> AltExpr              => rx_emit  RXNT_ROOT
AltExpr
  -> RegExpr
  -> AltExpr '|' RegExpr  => rx_emit  RXNT_ALT
RegExpr
  -> RXpr
  -> ^ RXpr               => rx_emit  RXNT_RX_BEGIN
  -> RXpr $               => rx_emit  RXNT_RX_END
  -> ^ RXpr $             => rx_emit  RXNT_RX_BOTH
RXpr
  -> Span
  -> RXpr Span            => rx_emit  RXNT_RX
Span
  -> TermList
  -> Span .*              => rx_emit  RXNT_SPAN_0_FROM
  -> Span .+              => rx_emit  RXNT_SPAN_1_FROM
  -> .* TermList          => rx_emit  RXNT_SPAN_0_TO
  -> .+ TermList          => rx_emit  RXNT_SPAN_1_TO
  -> Span .* TermList     => rx_emit  RXNT_SPAN_0
  -> Span .+ TermList     => rx_emit  RXNT_SPAN_1
TermList
  -> Term
  -> TermList Term        => rx_emit  RXNT_TERM_LIST
Term
  -> Primary *            => rx_emit  RXNT_STAR
  -> Primary +            => rx_emit  RXNT_PLUS
  -> Primary ?            => rx_emit  RXNT_BIN
  -> Primary
  -> .                    => rx_emit  RXNT_ANY_C
Primary
  -> Char
  -> <esc>                => rx_emit  RXNT_ESC_C
  -> ( AltExpr )
  -> [ ClassList ]        => rx_emit  RXNT_CLASS
  -> [^ ClassList ]       => rx_emit  RXNT_NOT_CLASS
ClassList
  -> ClassItem
  -> ClassList ClassItem  => rx_emit  RXNT_CLASS_LIST
ClassItem
  -> Char - Char          => rx_emit  RXNT_RANGE
  -> Char
  -> <esc>                => rx_emit  RXNT_ESC_C
Char  -> <c>              => rx_emit  RXNT_C

/*  rx.h */
/*  RX.H -- Header file for regular expression processing   */
#define  SPACE        ' '
#define  TAB          '\t'
#define  NL           '\n'
#define  NULLP        (char *) 0
#define  AND          &&
#define  OR           ||
#define  BACK_SLASH   '\\'
#define  isoctal(x)   ((x) >= '0'  AND  (x) <= '7')
typedef enum {
  ERROR_TOK,     EOF_TOK,      BEGIN_TOK,    /* --- Token Types --- */
  END_TOK,       ALT_TOK,      CHAR_TOK,
  ANY_CHAR_TOK,  ESCAPE_TOK,   LPAREN_TOK,
  RPAREN_TOK,    LBRACKET_TOK, NOT_TOK,
  RBRACKET_TOK,  STAR_TOK,     PLUS_TOK,
  BIN_TOK,       DASH_TOK,     SPAN0_TOK,
  SPAN1_TOK,
  RXNT_ROOT,          /* --- Node Types --- */
  RXNT_RX,            RXNT_RX_BEGIN,   RXNT_RX_END,
  RXNT_RX_BOTH,       RXNT_SPAN_0,     RXNT_SPAN_1,
  RXNT_SPAN_0_TO,     RXNT_SPAN_1_TO,  RXNT_SPAN_0_FROM,
  RXNT_SPAN_1_FROM,   RXNT_ALT,        RXNT_TERM_LIST,
  RXNT_STAR,          RXNT_PLUS,       RXNT_BIN,
  RXNT_C,             RXNT_ANY_C,      RXNT_ESC_C,
  RXNT_RANGE,         RXNT_CLASS,      RXNT_CLASS_LIST,
  RXNT_NOT_CLASS
}  RX_SYM;
typedef struct RX_NODE  {           /* --- REG EXPR NODE --- */
  RX_SYM   type;                    /* Node type (see above) */
  char     c;                       /* Character */
  char     flag;                    /* Flags: */
#define      RX_BEGIN    0x01       /*   Match beginning of field */
#define      RX_END      0x02       /*   Match end of field */
  struct RX_NODE *lnode;            /* Ptr to left-node */
  struct RX_NODE *rnode;            /* Ptr to right-node */
}  RXNODE;
#define  RXNODE_0   (RXNODE *) 0
#ifdef  RXTRNL        /* rx_main has this #define'd.  So has definitions */
  char   *token;                    /* Ptr to curr token. */
  char   *input;                    /* Ptr to char after curr token. */
  char   *T_beg;                    /* token-begin ptr */
  char   *T_end;                    /* token-end ptr */
  RXNODE *rx_root = (RXNODE *) 0;   /* Ptr to root node. */
  int     rx_match_pos = -1;        /* Position of match. */
  int     rx_match_len = -1;        /* Length of match. */
#else            /* Other modules just contain declarations */
  extern char   *input, *token, *T_beg, *T_end;
  extern RXNODE *rx_root;
  extern int     rx_match_pos, rx_match_len;
#endif   /* RXTRNL */
int  rx_parse      (char *);        /* --- Func Declarations --- */
void rx_print_tree (RXNODE *, int);
int  rx_match      (RXNODE *, char *);
int  rx_scan       (void);

/* rx_main.c */
/* RX_MAIN.C -- Driver program for the compiler  */
#define  RXTRNL
#include <string.h>
#include <stdio.h>
#include <dir.h>
#include <dos.h>
#include <rx.h>    /* globals defined here because RXTRNL is #define'd */
int  main (int argc, char **argv)
{
  int    i, rc, match_sw, lin;
  FILE  *stream;
  char  *rx, *cp;
  char   txt [258];
  struct ffblk ffblk;    /* For Turbo C "findfirst" & "findnext" functions. */
  if (argc != 3)  {
    printf ("RX.EXE ----- Usage is:\n");
    printf ("  RX \"<regexpr>\" <filemask>\n");
    printf ("Where <regexpr> is a regular expression, in double quotes.\n");
    printf ("And <filemask> is a DOS filename specification, wildcards OK.\n");
    exit (8);
  }
  rx = argv[1];
  i = rx_parse (rx);        /* Parse the reg. expr. */
  if (i)  {
    printf ("Parser error = %d\n", i);
    exit (4);
  }
  printf ("RegExpr ==> \"%s\"\n", rx);
  rc = findfirst (argv[2], &ffblk, 0);
  while (rc == 0)  {        /* Try to match lines */
    match_sw = 0;
    lin = 1;
    stream = fopen (ffblk.ff_name, "r");
    if (stream == (FILE *) 0)  {
      printf ("RX.EXE ----- Error OPENing data file '%s'.\n",
      strupr(ffblk.ff_name));
      exit(4);
    }
    printf ("File:  %s\n", strupr(ffblk.ff_name));
    txt[0]=0;
    while (fgets (txt+1, 256, stream) != 0)  {
      if ((cp = strchr (txt+1, NL)) != NULLP)
         *cp = 0;
      /* ----- Attempt to match the reg.expr. ----- */
      if (rx_match (rx_root, txt+1))  {
         match_sw = 1;
         printf ("%3d| %s\n", lin, txt+1);
      }
      lin++;
    }
    fclose (stream);
    if (!match_sw)
      printf ("No match occurred.\n");
    rc = findnext (&ffblk);
  }
  return;
}

/* rx_scan.c */
/* RX_SCAN -- Lexical analyzer for regular expressions  */
#include <ctype.h>
#include <rx.h>
static int in_brackets = 0;       /* In class brackets ? */
int  rx_scan  (void)
{
  token=input;
  switch (*input++)  {
    case 0:
    case NL:  return EOF_TOK;
    case '^': if (in_brackets)  return CHAR_TOK;
              return BEGIN_TOK;
    case '$': if (in_brackets)  return CHAR_TOK;
          return END_TOK;
    case '.': if (in_brackets)  return CHAR_TOK;
          if (*input=='*')  { ++input; return SPAN0_TOK; }
          if (*input=='+')  { ++input; return SPAN1_TOK; }
          return ANY_CHAR_TOK;
    case '[': ++in_brackets;
          if (*input=='^')  { ++input;  return NOT_TOK; }
          else              { return LBRACKET_TOK;  }
    case ']': --in_brackets; return RBRACKET_TOK;
    case '|': if (in_brackets)  return CHAR_TOK;
              return ALT_TOK;
    case '(': if (in_brackets)  return CHAR_TOK;
           return LPAREN_TOK;
    case ')': if (in_brackets)  return CHAR_TOK;
          return RPAREN_TOK;
    case '*': if (in_brackets)  return CHAR_TOK;
          return STAR_TOK;
    case '+': if (in_brackets)  return CHAR_TOK;
          return PLUS_TOK;
    case '-': if (in_brackets
              AND isalnum(*(input-2))
          AND isalnum(*input))
            return DASH_TOK;
          return CHAR_TOK;
    case '?': if (in_brackets)  return CHAR_TOK;
          return BIN_TOK;
    case BACK_SLASH:
     if (*input=='b'  OR  *input=='f'  OR  *input=='n'
     OR  *input=='r'  OR  *input=='t')    /* Std escape sequences */
       { ++input;  return ESCAPE_TOK;  }
     if (isoctal(*input) AND isoctal(*(input+1))
     AND isoctal(*(input+2)))        /* Octal number */
       { input+=3;  return ESCAPE_TOK; }
     ++input;                  /* Else, literal char */
     ++token;
     return CHAR_TOK;
    default:
     return CHAR_TOK;
  }
}

/* rx_emit.c */
/* RX_EMIT.C -- Emitter routines for abstract syntax tree.  */
#include <rx.h>
#define  RX_STK_SIZE      40               /* Arbitrary limits.  Should be */
#define  RX_NUM_NODES    200               /*   enough for most cases, tho. */
static RXNODE *rxstk [RX_STK_SIZE];        /* Semantic stack for reg expr. */
static int     idx = -1;                   /* Index into semantic stack. */
static RXNODE  rxnodearea [RX_NUM_NODES];  /* Stg for RXNODEs. */
static RXNODE *rxnp = rxnodearea - 1;      /* Pointer to last used node. */
static RXNODE *rxnp_max = rxnodearea + RX_NUM_NODES;   /* End of node area. */
static RXNODE *make_rxnode (int, RXNODE *, RXNODE *);   /* Func Decl. */
int rx_emit (int nodetype)
{
  char    c;
  int     i;
  RXNODE *np;
  switch (nodetype)  {
    case RXNT_ROOT:   rx_root = rxstk[idx];
              break;
    case RXNT_RX:
    case RXNT_STAR:
    case RXNT_PLUS:
    case RXNT_BIN:
    case RXNT_CLASS:
    case RXNT_NOT_CLASS:
     /* --- The single line of code below does it all!   1). Create a new
        node.  2). Pop the top node pointer off the semantic stack.
        3). Attach it as the new node's left-node pointer.  4). Push
        pointer to new node onto the stack.  */
     rxstk[idx] = make_rxnode (nodetype, rxstk[idx], RXNODE_0);
     break;
    case RXNT_RX_BEGIN:
    case RXNT_RX_END:
    case RXNT_RX_BOTH:
     rx_emit (RXNT_RX);    /* Note node-type changed to RXNT_RX */
     if (nodetype == RXNT_RX_BEGIN
     OR  nodetype == RXNT_RX_BOTH)
       rxstk[idx]->flag |= RX_BEGIN;
     if (nodetype == RXNT_RX_END
     OR  nodetype == RXNT_RX_BOTH)
       rxstk[idx]->flag |= RX_END;
     break;
    case RXNT_ALT:
    case RXNT_TERM_LIST:
    case RXNT_CLASS_LIST:
    case RXNT_RANGE:
    case RXNT_SPAN_0:
    case RXNT_SPAN_1:
     rxstk[idx-1] = make_rxnode (nodetype, rxstk[idx-1], rxstk[idx]);
     --idx;
     break;
    case RXNT_SPAN_0_TO:
    case RXNT_SPAN_1_TO:
     rxstk[idx] = make_rxnode (nodetype, RXNODE_0, rxstk[idx]);
     break;
    case RXNT_SPAN_0_FROM:
    case RXNT_SPAN_1_FROM:
     rxstk[idx] = make_rxnode (nodetype, rxstk[idx], RXNODE_0);
     break;
    case RXNT_C:
     rxstk[++idx] = make_rxnode (nodetype, RXNODE_0, RXNODE_0);
     rxstk[idx]->c = *T_beg;     /* Parser maintains "T_beg". */
     break;
    case RXNT_ANY_C:
     rxstk[++idx] = make_rxnode (nodetype, RXNODE_0, RXNODE_0);
     break;
    case RXNT_ESC_C:     /* Note node type changed to RXNT_C. */
     np = rxstk[++idx] = make_rxnode (RXNT_C, RXNODE_0, RXNODE_0);
     c = *(T_beg+1);
     if (c == 'b')  { np->c = '\b'; return 0; }
     if (c == 'f')  { np->c = '\f'; return 0; }
     if (c == 'n')  { np->c = '\n'; return 0; }
     if (c == 'r')  { np->c = '\r'; return 0; }
     if (c == 't')  { np->c = '\t'; return 0; }
     if (isoctal(*(T_beg+1))
     AND isoctal(*(T_beg+2))
     AND isoctal(*(T_beg+3)))  {
       i  = (*(T_beg+1) - '0') * 64;
       i += (*(T_beg+2) - '0') * 8;
       i +=  *(T_beg+3) - '0';
       np->c = i;
     }
     else
       np->c = *T_beg+1;
     break;
    default:  exit (3);     /* Something has gone wrong. */
  }
  if (idx >= RX_STK_SIZE)  {
    printf ("Node-pointer stack overflow.\n");
    exit (4);
  }
  return 0;
}
static RXNODE *make_rxnode (int type, RXNODE *lnode, RXNODE *rnode)
{
  if (++rxnp >= rxnp_max)  {        /* Alloc area for a new node */
    printf ("Node area overflow\n");
    exit (4);
  }
  rxnp->type = type;
  rxnp->lnode = lnode;
  rxnp->rnode = rnode;
  rxnp->c = rxnp->flag = 0;
  return (rxnp);
}

/* rx_match.c */
#include <string.h>
#include <rx.h>
static int match_node (RXNODE *, char *);     /* Local function */
int  rx_match  (RXNODE *np, char *txt)
{
  int   i, j;
  for (i=0; txt[i]; i++)  {
    j = match_node (np, txt+i);
    if (j >= 0)  {
      rx_match_pos = i;          /* A MATCH !!! */
      rx_match_len = j;
      return 1;
    }
  }
  rx_match_pos = -1;            /* No match. */
  rx_match_len = -1;
  return 0;
}
static int  match_node  (RXNODE *np, char *txt)
{
  int   i, j, n, dot;
  if (np == (RXNODE *) 0)  return 0;
  switch (np->type)  {
    case RXNT_RX:            /* Must match both sub-nodes. */
      if (np->flag & RX_BEGIN
      AND *(txt-1) != 0)
        return -1;
         i = match_node (np->lnode, txt);
         if (i<0)  return -1;
         j = match_node (np->rnode, txt+i);
         if (j<0)  return -1;
      if (np->flag & RX_END
      AND *(txt+i+j+1) != 0)
        return -1;
      return  i+j;
    case RXNT_ALT:           /* Match if either subnode matches. */
         i = match_node (np->lnode, txt);
      j = match_node (np->rnode, txt);
      return  i>j ? i : j;    /* Return the longer matching length. */
    case RXNT_TERM_LIST:     /* Must match both subnodes. */
         i = match_node (np->lnode, txt);
         if (i<0)  return -1;
         j = match_node (np->rnode, txt+i);
         if (j<0)  return -1;
         return i+j;
    case RXNT_PLUS:     /* Must match 1 or more occurance(s) of left-node. */
         i = match_node (np->lnode, txt);
         if (i<0)  return -1;
      /* ------ Fall into RXNT_STAR ----- */
    case RXNT_STAR:     /* Match 0 or more occurance(s) of left-node. */
      n = (np->type == RXNT_PLUS ? i : 0);   /* Account for fall-in. */
      while (1)  {
        i = match_node (np->lnode, txt+n);
        if (i<=0)  return n;
        n += i;
      }
    case RXNT_BIN:     /* Must match 0 or 1 occurance of left-node. */
         i = match_node (np->lnode, txt);
      return  i<0 ? 0 : i;
    case RXNT_RANGE:         /* Must be in range, inclusive. */
         return  (*txt < np->lnode->c  OR  *txt > np->rnode->c) ? -1 : 1;
    case RXNT_CLASS:         /* Must match left-node. */
         i = match_node (np->lnode, txt);
      return  i>0 ? i : -1;
    case RXNT_CLASS_LIST:    /* Must match either subnode. */
         i = match_node (np->lnode, txt);
         if (i>0)  return i;
      j = match_node (np->rnode, txt);
      if (j>0)  return j;
         return -1;
    case RXNT_NOT_CLASS:     /* Must not match left-node. */
         i = match_node (np->lnode, txt);
      return  i>0 ? -1 : 1;
    case RXNT_SPAN_0:
    case RXNT_SPAN_1:
      i = match_node (np->lnode, txt);
      if (i<0)  return -1;
      if (i >= strlen(txt)  AND  np->type == RXNT_SPAN_1)
        return -1;
      dot = (np->type == RXNT_SPAN_1 ? 1 : 0);
      for (n=strlen(txt+i+dot); n>=0; n--)  {
        j = match_node (np->rnode, txt+i+dot+n);
        if (j>=0)  return  i+dot+n+j;
      }
      return -1;
    case RXNT_SPAN_0_TO:
    case RXNT_SPAN_1_TO:
      dot = (np->type == RXNT_SPAN_1_TO ? 1 : 0);
      for (n=strlen(txt+dot); n>=0; n--)  {
        i = match_node (np->rnode, txt+dot+n);
        if (i>=0)  return dot+n+i;
      }
      return -1;
    case RXNT_SPAN_0_FROM:
    case RXNT_SPAN_1_FROM:
      i = match_node (np->lnode, txt);
      if (i<0)  return -1;
      if (i >= strlen(txt)  AND  np->type == RXNT_SPAN_1_FROM)
        return -1;
      return strlen(txt);
    case RXNT_C:        /* Must match the character. */
      return  *txt == np->c ? 1 : -1;
    case RXNT_ANY_C:     /* Matches anything except end-of-string. */
      return  *txt ? 1 : -1;
    default:    /* "I don't think we're in Kansas anymore, Toto." */
         exit (1);
  }
}
                                               