/*  Copyright (C) 1993   Marc Stern  (internet: stern@mble.philips.be)  */

#include "strings.h"
#include <stdlib.h>


/*
   Functions :   matchset
                 match
                 recursexp
                 regexp
*/



/***
 *  Function    :  matchset
 *
 *  Description :  Test if a character matches a set expression.
 *
 *  Parameters  :  in    char c          character to be matched
 *                 in    char *pattern   regular expression to match
 *
 *  Parameters  :  in   char   *string
 *
 *  Decisions   :  The following symbols are treated specially:
 *
 *              \  quote next character      -  range of values
 *              ^  non-inclusion (if first character)
 *
 *              ex: aeiou0-9   match a, e, i, o, u, and 0 thru 9
 *                  ^aeiou0-9  match anything but a, e, i, o, u, and 0 thru 9
 *
 *  Return      :  1 or 0
 *
 *  OS/Compiler :  All
 ***/

int matchset( char c, char *pattern )

{ const char *begin;
  int match_ok = 1;

  if ( ! c ) return 0;

  if ( *pattern == '^' ) { match_ok = 0; pattern ++; }

  for ( begin = pattern; *pattern; begin = pattern++ )
      {
        if ( (*pattern == '-') && (pattern != begin) )  /* range */
           {
             pattern ++; if ( *pattern == '\\' ) pattern ++;
             if ( (min(*pattern, *begin) <= c) && (max(*pattern, *begin) >= c) )
                return match_ok;
           }

        if ( *pattern == '\\' ) pattern++;

        if ( *pattern == c ) return match_ok;
      }

  return (! match_ok);
}




/***
 *  Function    :  match (internal)
 *
 *  Description :  Returns the number of character of a string matched
 *                 by a one character regular expression.
 *
 *  Decisions   :  The following symbols are treated specially:
 *
 *                 .  any character             \  quote next character
 *                 [] set of characters
 *
 *  Parameters  :  in   char *string    input string to be matched
 *                 in   char *pattern   regular expression to match
 *
 *  Side-effects:  pattern contents will be destroyed.
 *
 *  Return      :  number of character matched by regular expression
 *                 0 if not matched
 *
 *  OS/Compiler :  All
 ***/

static int near match( const char *string, char *pattern )

{ char *ptr;
  int  length = 0;
                 
  switch ( *pattern )
         {
           case '.' : *pattern = '\0';
                      length = strlen(string);
                      break;

           case '[' : for ( ptr = ++pattern;
                           *ptr && ! (*ptr == ']' && *(ptr - 1) != '\\');
                           ptr ++ );

                      if ( *ptr ) *ptr = '\0';
                      while ( matchset(*string++, pattern) ) length ++;
                      break;

           case '\\': pattern ++;

           default  : while ( *string++ == *pattern ) length ++;
                      *pattern = '\0';
                      break;
          }

  return length;
}




/***
 *  Function    :  recursexp
 *
 *  Description :  Returns the number of character of a string matched
 *                 by a regular expression.
 *
 *  Decisions   :  The following symbols are treated specially:
 *
 *                 .  any character             \  quote next character
 *                 *  match zero or more        +  match one or more
 *                 [] set of characters
 *
 *
 *  Parameters  :  in   char *string    input string to be matched
 *                 in   char *pattern   regular expression to match
 *
 *  Return      :  number of character matched by regular expression
 *                 -1 if not matched
 *
 *  OS/Compiler :  All
 ***/

int recursexp( const char *string, char *pattern )

{ int count1, count2 = -1, minone = 0;
  char *pattrn;

  if ( ! *pattern ) return 0;

  pattrn = strdup( pattern );
  count1 = match( string, pattrn );
  if ( count1 < 0 ) { free( pattrn );
                      return -1;
                    }

  while ( *pattrn++ );

  switch ( *pattrn )
         {
           case '\0': free( pattrn );
                      if ( count1 ) return 1;
                               else return -1;

           case '*': pattrn ++;
                     minone = 0;
                     break;

           case '+': if ( ! count1 ) return -1;
                     pattrn ++;
                     break;

           default : if ( ! count1 ) return -1;
                     count1 = 1;
                     break;
         }

  for ( ; count1 >= 0; count1 -- )
      {
        count2 = recursexp( string + count1, pattrn );
        if ( count2 >= 0 ) break;
      }

  free( pattrn );

  if ( count2 < 0 ) return -1;
  if ( ! count1 && minone ) return -1;

  return (count1 + count2);
}




/***
 *  Function    :  regexp
 *
 *  Description :  Returns the string matched by a regular expression
 *                 into a string.
 *
 *  Decisions   :  The following symbols are treated specially:
 *
 *                 ^  start of line             $  end of line
 *                 ?  any character             \  quote next character
 *                 *  match zero or more        [] set of characters
 *
 *              ex: [aeiou0-9]   match a, e, i, o, u, and 0 thru 9
 *                  [^aeiou0-9]  match anything but a, e, i, o, u, and 0 thru 9
 *
 *  Parameters  :  out   char *outstr    resulting string
 *                 in    char *string    input string in which we search
 *                 in    char *pattern   regular expression to match
 *
 *  Return      :  - pointer to resulting string
 *                 - if ( outstr == NULL ) returns pointer to matched string
 *                   inside 'string'.
 *
 *  OS/Compiler :  All
 ***/

char *regexp( char *outstr, const char *string, const char *pattern )

{ char *ptr, *pattrn;
  int count, begin = 0, end = 0;

  pattrn = strdup( pattern );
  ptr = strend( pattrn ) - 1;
  if ( (*ptr == '$') && (*(ptr - 1) != '\\') )   /* Match end of line */
     {
       end = 1;
       *ptr = '\0';
     }

  if ( *pattrn == '^' )        /* Match begin of line */
     {
       begin = 1;
       pattrn ++;
     }

  for (; *string; string ++ )
      if ( (count = recursexp(string, pattrn)) >= 0 || begin ) break;

  free( pattrn );

  if ( end && (count != strlen(string)) ) count = 0;

  if ( outstr )
     {
       strleft( outstr, string, count );
       return outstr;
     }
  return string;
}


#ifdef TEST

#include <stdio.h>
#include <stdlib.h>

void main()

{ char string[255], pattern[255], result[255];

  regexp( result, "I123", "[^A-Z\\-^][0-9]+\\.*" );
  printf( "\n    Result : %s\n\n", result );
  exit(1);

  for (;;) {
             printf( "\n    String : " ); gets( string );
             printf( "    Pattern: " ); gets( pattern );

             regexp( result, string, pattern );
             printf( "\n    Result : %s\n\n", result );
           }
}

#endif
