/* xlread.c - xlisp input routines */
/*	Copyright (c) 1995, by David Michael Betz
	All Rights Reserved
	Permission is granted for unrestricted non-commercial use	*/

#include "xlisp.h"
#include <math.h>

/* symbol name constituents */
#define WSPACE "\t \f\r\n"
#define CONST1 "!$%&*+-./0123456789:<=>?@[]^_{}~"
#define CONST2 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"

/* return values for readone() */
#define RO_EXPR		1
#define RO_COMMENT	2
#define RO_EOF		3

/* external variables */
extern LVAL s_package,s_quote,s_function,s_quasiquote,s_unquote,s_unquotesplicing,s_dot;
extern LVAL s_readtable,s_nmacro,s_tmacro,s_wspace,s_const,s_sescape,s_mescape;
extern LVAL eof_object;

/* forward declarations */
static int readone(LVAL fptr,LVAL *pval);
static LVAL read_list(LVAL fptr);
static void read_cdr(LVAL fptr,LVAL last);
static void read_comment(LVAL fptr);
static LVAL read_vector(LVAL fptr);
static LVAL read_comma(LVAL fptr);
static LVAL read_quote(LVAL fptr,LVAL sym);
static LVAL read_symbol(LVAL fptr);
static LVAL read_string(LVAL fptr);
static int read_special(LVAL fptr,int ch,LVAL *pval);
static LVAL read_radix(LVAL fptr,int radix);
static int isradixdigit(int ch,int radix);
static int getdigit(int ch);
static int getsymbol(LVAL fptr,char *buf);
static int scan(LVAL fptr);
static int checkeof(LVAL fptr);
static int isconstituent(int ch);
static LVAL tentry(int ch);

/* xlread - read an expression */
int xlread(LVAL fptr,LVAL *pval)
{
    int sts;

    /* read an expression skipping any leading comments */
    cpush(fptr);
    while ((sts = readone(fptr,pval)) == RO_COMMENT)
	;
        
    /* skip over any trailing spaces up to a newline */
    if (sts == RO_EXPR) {
        while (xliready(fptr)) {
            int ch = xlpeek(fptr);
            if (isspace(ch)) {
                xlgetc(fptr); /* skip over the character */
                if (ch == '\n')
                    break;
            }
            else
                break;
        }
    }
    drop(1);

    /* return with status */
    return sts == RO_EXPR;
}

/* readone - read a single expression (maybe) */
static int readone(LVAL fptr,LVAL *pval)
{
    int argc,ch;
    LVAL entry;

    /* get the next character */
    if ((ch = scan(fptr)) == EOF)
        return RO_EOF;
    
    /* check for a constituent */
    else if ((entry = tentry(ch)) == s_const) {
        xlungetc(fptr,ch);
        *pval = read_symbol(fptr);
        return RO_EXPR;
    }
    
    /* check for a read macro for this character (type . [function | vector]) */
    else if (consp(entry)) {
    	entry = cdr(entry);
        
        /* check for a dispatch macro */
        if (vectorp(entry)) {
            ch = xlgetc(fptr);
            if (ch == EOF)
                xlfmterror("unexpected end of file");
            else if (ch >= getsize(entry))
                xlfmterror("character out of bounds ~S",cvchar(ch));
            argc = CallFunction(pval,getelement(entry,ch),2,fptr,cvchar(ch));
        }
        
        /* handle a normal read macro */
        else
            argc = CallFunction(pval,entry,2,fptr,cvchar(ch));

	/* treat as white space if macro returned no values */
	return argc == 0 ? RO_COMMENT : RO_EXPR;
    }
    
    /* handle illegal characters */
    else {
        xlfmterror("unknown character ~S",cvchar(ch));
        return RO_COMMENT; /* never reached */
    }
}

/* xread - built-in function 'read' */
LVAL xread(void)
{
    LVAL val;

    /* get file pointer and eof value */
    xlval = moreargs() ? xlgetiport() : curinput();
    xllastarg();

    /* read an expression */
    if (!xlread(xlval,&val))
	val = eof_object;

    /* return the expression */
    return val;
}

/* xreaddelimitedlist - read a delimited list */
LVAL xreaddelimitedlist(void)
{
    LVAL fptr,last,val;
    int tch,ch;

    /* parse the argument list */
    tch = getchcode(xlgachar());
    fptr = moreargs() ? xlgetiport() : curinput();
    xllastarg();
    
    /* protect the input stream */
    cpush(fptr);
    
    /* build the list */
    xlval = last = v_nil;
    while ((ch = scan(fptr)) != tch) {
    	if (ch == EOF)
	    xlfmterror("unexpected EOF");
	xlungetc(fptr,ch);
	switch (readone(fptr,&val)) {
	case RO_EOF:
	    xlfmterror("unexpected EOF");
	case RO_EXPR:
	    if (val == s_dot)
		xlfmterror("misplaced dot");
	    else {
		val = cons(val,v_nil);
		if (last) rplacd(last,val);
		else xlval = val;
		last = val;
	    }
	    break;
	}
    }
    drop(1);
    return xlval;
}

/* xrmhash - read macro %RM-HASH */
void xrmhash(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return zero arguments for comments */
    if (read_special(xlval,getchcode(mch),&xlval) == RO_COMMENT)
        mvreturn(0);
    svreturn();
}

/* xrmquote - read macro %RM-QUOTE */
void xrmquote(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return the result */
    xlval = read_quote(xlval,s_quote);
    svreturn();
}

/* xrmdquote - read macro %RM-DOUBLE-QUOTE */
void xrmdquote(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return the result */
    xlval = read_string(xlval);
    svreturn();
}

/* xrmbquote - read macro %RM-BACKQUOTE */
void xrmbquote(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return the result */
    xlval = read_quote(xlval,s_quasiquote);
    svreturn();
}

/* xrmcomma - read macro %RM-COMMA */
void xrmcomma(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return the result */
    xlval = read_comma(xlval);
    svreturn();
}

/* xrmlparen - read macro %RM-LEFT-PAREN */
void xrmlparen(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* return the result */
    xlval = read_list(xlval);
    svreturn();
}

/* xrmrparen - read macro %RM-RIGHT-PAREN */
void xrmrparen(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* illegal in this context */
    xlfmterror("misplaced right paren");
}

/* xrmsemi - read macro %RM-SEMICOLON */
void xrmsemi(void)
{
    LVAL mch;
    
    /* parse the argument list */
    xlval = xlgetiport();
    mch = xlgachar();
    xllastarg();
    
    /* skip over the comment */
    read_comment(xlval);
    mvreturn(0);
}

/* read_list - read a list */
static LVAL read_list(LVAL fptr)
{
    LVAL last,val;
    int ch;
    cpush(v_nil); last = v_nil;
    while ((ch = scan(fptr)) != ')') {
    	if (ch == EOF)
	    xlfmterror("unexpected EOF");
	xlungetc(fptr,ch);
	switch (readone(fptr,&val)) {
	case RO_EOF:
	    xlfmterror("unexpected EOF");
	case RO_EXPR:
	    if (val == s_dot) {
		if (last == v_nil)
		    xlfmterror("misplaced dot");
		read_cdr(fptr,last);
		return pop();
	    }
	    else {
		val = cons(val,v_nil);
		if (last) rplacd(last,val);
		else settop(val);
		last = val;
	    }
	    break;
	}
    }
    return pop();
}

/* read_cdr - read the cdr of a dotted pair */
static void read_cdr(LVAL fptr,LVAL last)
{
    LVAL val;
    int ch;
    
    /* read the cdr expression */
    if (!xlread(fptr,&val))
	xlfmterror("unexpected EOF");
    rplacd(last,val);
    
    /* check for the close paren */
    while ((ch = scan(fptr)) == ';')
	read_comment(fptr);
    if (ch != ')')
	xlfmterror("missing right paren");
}

/* read_comment - read a comment (to end of line) */
static void read_comment(LVAL fptr)
{
    int ch;
    while ((ch = xlgetc(fptr)) != EOF && ch != '\n')
	;
    if (ch != EOF) xlungetc(fptr,ch);
}

/* read_vector - read a vector */
static LVAL read_vector(LVAL fptr)
{
    int len=0,ch,i;
    LVAL last,val;
    
    cpush(v_nil); last = v_nil;
    while ((ch = scan(fptr)) != ')') {
	if (ch == EOF)
	    xlfmterror("unexpected EOF");
	xlungetc(fptr,ch);
	switch (readone(fptr,&val)) {
	case RO_EOF:
	    xlfmterror("unexpected EOF");
	case RO_EXPR:
	    val = cons(val,v_nil);
	    if (last) rplacd(last,val);
	    else settop(val);
	    last = val;
	    ++len;
	    break;
	}
    }
    val = newvector(len);
    for (last = pop(), i = 0; i < len; ++i, last = cdr(last))
	setelement(val,i,car(last));
    return val;
}

/* read_comma - read a unquote or unquote-splicing expression */
static LVAL read_comma(LVAL fptr)
{
    int ch;
    if ((ch = xlgetc(fptr)) == '@')
	return read_quote(fptr,s_unquotesplicing);
    else {
	xlungetc(fptr,ch);
	return read_quote(fptr,s_unquote);
    }
}

/* read_quote - parse the tail of a quoted expression */
static LVAL read_quote(LVAL fptr,LVAL sym)
{
    LVAL val;
    if (!xlread(fptr,&val))
	xlfmterror("unexpected EOF");
    cpush(cons(val,v_nil));
    settop(cons(sym,top()));
    return pop();
}

/* read_symbol - parse a symbol (or a number) */
static LVAL read_symbol(LVAL fptr)
{
    extern LVAL keywordpackage,k_external;
    char buf[STRMAX+1],*sname;
    LVAL package,val,key;
    
    /* get the symbol name */
    if (!getsymbol(fptr,buf))
	xlfmterror("expecting a symbol or number");
    
    /* check to see if it's a number */
    if (isnumber(buf,&val))
	return val;
    
    /* handle an implicit package reference */
    if ((sname = strchr(buf,':')) == '\0')
	return xlintern(buf,getvalue(s_package),&key);
	
    /* handle an explicit package reference */
    else {
	
	/* handle keywords */
	if (sname == buf) {
	    if (strchr(++sname,':'))
	        xlfmterror("invalid symbol ~A",cvcstring(sname));
	    return xlintern(sname,keywordpackage,&key);
	}
	
	/* terminate the package name */
	*sname++ = '\0';
	
	/* find the package */
	if ((package = findpackage(buf)) == v_nil)
	    xlfmterror("no package ~A",cvcstring(buf));
	    
	/* handle an internal symbol reference */
	if (*sname == ':') {
	    if (strchr(++sname,':'))
	        xlfmterror("invalid symbol ~A",cvcstring(sname));
	    return findsymbol(sname,package,&key);
	}
	
	/* handle an external symbol reference */
	else {
	    if (strchr(sname,':'))
	        xlfmterror("invalid symbol ~A",cvcstring(sname));
	    if ((val = findsymbol(sname,package,&key)) == v_nil || key != k_external)
	        xlfmterror("no external symbol ~A in ~S",cvcstring(sname),package);
	    return val;
	}
    }
}

/* read_string - parse a string */
static LVAL read_string(LVAL fptr)
{
    char buf[STRMAX],*p;
    int ch,d2,d3;
    FIXTYPE len;

    /* collect in the buffer to start */
    cpush(v_nil);
    p = buf;
    len = 0;
    
    /* loop looking for a closing quote */
    while ((ch = checkeof(fptr)) != '"') {

	/* handle escaped characters */
	switch (ch) {
	case '\\':
		switch (ch = checkeof(fptr)) {
		case 't':
			ch = '\011';
			break;
		case 'n':
			ch = '\012';
			break;
		case 'f':
			ch = '\014';
			break;
		case 'r':
			ch = '\015';
			break;
		default:
			if (ch >= '0' && ch <= '7') {
			    d2 = checkeof(fptr);
			    d3 = checkeof(fptr);
			    if (d2 < '0' || d2 > '7'
			     || d3 < '0' || d3 > '7')
				xlfmterror("invalid octal digit");
			    ch -= '0'; d2 -= '0'; d3 -= '0';
			    ch = (ch << 6) | (d2 << 3) | d3;
			}
			break;
		}
	}

	/* store the character */
	if (++len > STRMAX) {
	    if (top() == v_nil) {
	        FIXTYPE cnt = STRMAX;
	        settop(newustream());
	        for (p = buf; --cnt >= 0; )
	            xlputc(top(),*p++);    
	    }
	    xlputc(top(),ch);
	}
	else
	    *p++ = ch;
    }

    /* return the new string */
    return top() == v_nil ? pop(), cvstring(buf,len) : getstroutput(pop());
}

/* read_special - parse an atom starting with '#' */
static int read_special(LVAL fptr,int ch,LVAL *pval)
{
    char buf[STRMAX+1],buf2[STRMAX+3];
    int lastch;
    LVAL key;
    switch (ch) {
    case '!':
	if (getsymbol(fptr,buf)) {
	    if (strcmp(buf,"TRUE") == 0) {
		*pval = v_true;
		return RO_EXPR;
	    }
	    else if (strcmp(buf,"FALSE") == 0) {
		*pval = v_false;
		return RO_EXPR;
	    }
	    else if (strcmp(buf,"NULL") == 0) {
		*pval = v_nil;
		return RO_EXPR;
	    }
	    else {
		sprintf(buf2,"#!%s",buf);
		*pval = xlintern(buf2,getvalue(s_package),&key);
		return RO_EXPR;
	    }
	}
	else
	    xlfmterror("expecting symbol after '#!'");
	break;
    case '\\':
	ch = checkeof(fptr);	/* get the next character */
	xlungetc(fptr,ch);	/* but allow getsymbol to get it also */
	if (getsymbol(fptr,buf)) {
	    if (strcmp(buf,"NEWLINE") == 0)
		ch = '\n';
	    else if (strcmp(buf,"SPACE") == 0)
		ch = ' ';
	    else if (strlen(buf) > 1)
		xlerror("unexpected symbol after '#\\'",cvcstring(buf));
	}
	else			/* wasn't a symbol, get the character */
	    ch = checkeof(fptr);
	*pval = cvchar(ch);
	return RO_EXPR;
    case '(':
	*pval = read_vector(fptr);
	return RO_EXPR;
    case ':':
	if (!getsymbol(fptr,buf))
	    xlfmterror("expecting a symbol after #:");
	*pval = cvsymbol(cvcstring(buf));
	return RO_EXPR;
    case '\'':
	*pval = read_quote(fptr,s_function);
	return RO_EXPR;
    case 'b':
    case 'B':
	*pval = read_radix(fptr,2);
	return RO_EXPR;
    case 'o':
    case 'O':
	*pval = read_radix(fptr,8);
	return RO_EXPR;
    case 'd':
    case 'D':
	*pval = read_radix(fptr,10);
	return RO_EXPR;
    case 'x':
    case 'X':
        *pval = read_radix(fptr,16);
        return RO_EXPR;
    case '|':
	for (lastch = '\0'; (ch = xlgetc(fptr)) != EOF; lastch = ch)
	    if (lastch == '|' && ch == '#')
		break;
        if (ch == EOF)
            xlfmterror("End of file within comment");
        return RO_COMMENT;
    default:
	xlungetc(fptr,ch);
	if (getsymbol(fptr,buf)) {
	    if (strcmp(buf,"T") == 0) {
		*pval = v_true;
		return RO_EXPR;
	    }
	    else if (strcmp(buf,"F") == 0) {
		*pval = v_false;
		return RO_EXPR;
	    }
	    else
		xlerror("unexpected symbol after '#'",cvcstring(buf));
	}
	else
	    xlerror("unexpected character after '#'",cvchar(xlgetc(fptr)));
	break;
    }
    return RO_COMMENT; /* never reached */
}

/* read_radix - read a number in a specified radix */
static LVAL read_radix(LVAL fptr,int radix)
{
    char buf[STRMAX+1];
    LVAL val;
    int ch,i;

    /* get number */
    for (i = 0; (ch = xlgetc(fptr)) != EOF && isconstituent(ch); ) {
        if (islower(ch)) ch = toupper(ch);
	if (!isradixdigit(ch,radix))
	    xlerror("invalid digit",cvchar(ch));
	if (i < STRMAX)
            buf[i++] = ch;
    }
    buf[i] = '\0';

    /* save the break character */
    xlungetc(fptr,ch);

    /* convert the string to a number */
    isradixnumber(buf,radix,&val);
    return val;
}

/* isradixnumber - convert a string to a number in the specified radix */
int isradixnumber(char *str,int radix,LVAL *pval)
{
    FIXTYPE val = 0;
    int ch;
    
    /* get number */
    while ((ch = *str++) != '\0' && isconstituent(ch)) {
        if (islower(ch)) ch = toupper(ch);
	if (!isradixdigit(ch,radix))
	    return FALSE;
        val = val * radix + getdigit(ch);
    }

    /* return the number */
    *pval = cvfixnum(val);
    return TRUE;
}

/* isradixdigit - check to see if a character is a digit in a radix */
static int isradixdigit(int ch,int radix)
{
    switch (radix) {
    case 2:	return ch >= '0' && ch <= '1';
    case 8:	return ch >= '0' && ch <= '7';
    case 10:	return ch >= '0' && ch <= '9';
    case 16:	return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F');
    }
    return FALSE; /* never reached */
}

/* getdigit - convert an ascii code to a digit */
static int getdigit(int ch)
{
    return ch <= '9' ? ch - '0' : ch - 'A' + 10;
}

/* getsymbol - get a symbol name */
static int getsymbol(LVAL fptr,char *buf)
{
    LVAL type;
    int ch,i;

    /* get symbol name */
    for (i = 0; (ch = xlgetc(fptr)) != EOF && (type = chartype(ch)) == s_const || type == s_nmacro; )
	if (i < STRMAX)
	    buf[i++] = (islower(ch) ? toupper(ch) : ch);
    buf[i] = '\0';

    /* save the break character */
    xlungetc(fptr,ch);
    return buf[0] != '\0';
}

/* isnumber - check if this string is a number */
int isnumber(char *str,LVAL *pval)
{
    int dl,dot,dr;
    char *p;

    /* initialize */
    p = str; dl = dot = dr = 0;

    /* check for a sign */
    if (*p == '+' || *p == '-')
	p++;

    /* check for a string of digits */
    while (isdigit(*p))
	p++, dl++;

    /* check for a decimal point */
    if (*p == '.') {
	p++; dot = 1;
	while (isdigit(*p))
	    p++, dr++;
    }

    /* check for an exponent */
    if ((dl || dr) && *p == 'E') {
	p++; dot = 1;

	/* check for a sign */
	if (*p == '+' || *p == '-')
	    p++;

	/* check for a string of digits */
	while (isdigit(*p))
	    p++, dr++;
    }

    /* make sure there was at least one digit and this is the end */
    if ((dl == 0 && dr == 0) || *p)
	return FALSE;

    /* convert the string to an integer and return successfully */
    if (pval) {
	if (*str == '+') ++str;
	if (str[strlen(str)-1] == '.') str[strlen(str)-1] = 0;
	*pval = (dot ? cvflonum(atof(str)) : cvfixnum(ICNV(str)));
    }
    return TRUE;
}

/* scan - scan for the first non-blank character */
static int scan(LVAL fptr)
{
    int ch;

    /* look for a non-blank character */
    while ((ch = xlgetc(fptr)) != EOF && tentry(ch) == s_wspace)
	;

    /* return the character */
    return ch;
}

/* checkeof - get a character and check for end of file */
static int checkeof(LVAL fptr)
{
    int ch;
    if ((ch = xlgetc(fptr)) == EOF)
	xlfmterror("unexpected EOF");
    return ch;
}

/* isconstituent - is this a symbol constituent? */
static int isconstituent(int ch)
{
    return chartype(ch) == s_const;
}

/* chartype - get readtable character type for a character */
LVAL chartype(int ch)
{
    LVAL entry = tentry(ch);
    return consp(entry) ? car(entry) : entry;
}

/* tentry - get readtable entry for a character */
static LVAL tentry(int ch)
{
    extern LVAL s_readtable;
    LVAL rtable = getvalue(s_readtable);
    if (vectorp(rtable) && ch >= 0 && ch < getsize(rtable))
	return getelement(rtable,ch);
    return v_nil;
}

/* defmacro - define a read macro */
static void defmacro(int ch,LVAL type,char *name)
{
    setelement(getvalue(s_readtable),ch,cons(type,getvalue(xlenter(name))));
}

/* defdmacro - define a dispatching read macro */
static void defdmacro(LVAL dtable,int ch,char *name)
{
    setelement(dtable,ch,getvalue(xlenter(name)));
}

/* xlrinit - initialize the reader */
void xlrinit(void)
{
    LVAL rtable,dtable;
    char *p;
    int ch;
    
    /* create the read table */
    rtable = newvector(256);
    setvalue(s_readtable,rtable);
    
    /* initialize the readtable */
    for (p = WSPACE; (ch = *p++) != '\0'; )
        setelement(rtable,ch,s_wspace);
    for (p = CONST1; (ch = *p++) != '\0'; )
        setelement(rtable,ch,s_const);
    for (p = CONST2; (ch = *p++) != '\0'; )
        setelement(rtable,ch,s_const);
        
    /* install the built-in read macros */
    defmacro('\'',s_tmacro,"%RM-QUOTE");
    defmacro('"', s_tmacro,"%RM-DOUBLE-QUOTE");
    defmacro('`' ,s_tmacro,"%RM-BACKQUOTE");
    defmacro(',', s_tmacro,"%RM-COMMA");
    defmacro('(', s_tmacro,"%RM-LEFT-PAREN");
    defmacro(')', s_tmacro,"%RM-RIGHT-PAREN");
    defmacro(';', s_tmacro,"%RM-SEMICOLON");
    
    /* setup the # dispatch table */
    dtable = newvector(256);
    setelement(rtable,'#',cons(s_nmacro,dtable));
    
    /* install the dispatch macros */
    defdmacro(dtable,'!', "%RM-HASH");
    defdmacro(dtable,'\\',"%RM-HASH");
    defdmacro(dtable,'(', "%RM-HASH");
    defdmacro(dtable,':', "%RM-HASH");
    defdmacro(dtable,'\'',"%RM-HASH");
    defdmacro(dtable,'t', "%RM-HASH");
    defdmacro(dtable,'T', "%RM-HASH");
    defdmacro(dtable,'f', "%RM-HASH");
    defdmacro(dtable,'F', "%RM-HASH");
    defdmacro(dtable,'b', "%RM-HASH");
    defdmacro(dtable,'B', "%RM-HASH");
    defdmacro(dtable,'o', "%RM-HASH");
    defdmacro(dtable,'O', "%RM-HASH");
    defdmacro(dtable,'d', "%RM-HASH");
    defdmacro(dtable,'D', "%RM-HASH");
    defdmacro(dtable,'x', "%RM-HASH");
    defdmacro(dtable,'X', "%RM-HASH");
    defdmacro(dtable,'|', "%RM-HASH");
}
