/*	EXPR_LEX.C
	Copyright (C) 1992	Keith L. Robertson	All Rights Reserved

	Lexical analyzer for the EXPR expression evaluator.
*/
#include <ctype.h>
#include "expr_lex.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>


/****************************************/
/*		Options			*/
/****************************************/

/* Maximum length of identifiers and other lexemes EXCEPT strings. */
#define		MAX_LEX_LENGTH		128

extern SHORT	ERROR_THRESHOLD = 5;	/* Max # errors before it quits. */
extern SHORT	TAB_STOPS = 8;


/****************************************/
/*	  Helper Functions		*/
/****************************************/

/* Convert a constant number to a string. */
#define		to_string(num)	#num

/* Convert unsigned short to string. */
#define     MX_NLEN	6
extern STRING	ustoa (USHORT num)
{   static CHAR		nstr [MX_NLEN];
    STRING   p;

    p = nstr+MX_NLEN-1;
    *p = 0;
    do	{
	*--p = num % 10 + '0';
	num = num / 10;
    }	while  (num != 0);

    return  p;
}

/* Write a string to Standard Output. */
extern VOID	put_string (CSTRING str)
{   fputs (str, stdout);
}


/****************************************/
/*	  Tables & Constant Data	*/
/****************************************/

/* Character classification: */
enum    {
	ALPHAB,		/* A-Z, a-z, _					*/
	NUMBER,		/* 0-9						*/
	WHT_SP,		/* \t \n \v \f \r space				*/
	BAD_CH,		/* Bad character  c < 32 || 127 < c		*/
	SYM,		/* SYM+ 0 - SYM+10, symbols recognized by lexer	*/
	BD_SYM= SYM+12,	/* Bad symbol, not otherwise listed.		*/
	EOF_CH,		/* End-of-file character			*/
	ENDCLASS
};

/* Map character to classification: */
UCHAR   char_class[256] = {
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, WHT_SP, WHT_SP, WHT_SP, WHT_SP, WHT_SP, BAD_CH, BAD_CH, /* \btnvfr?? */
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    WHT_SP, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BD_SYM, /*  !"#$%&' */
    SYM+ 0, SYM+ 1, SYM+ 2, SYM+ 3, SYM+11, SYM+ 4, SYM+ 5, SYM+ 6, /* ()*+,-./ */
    NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, NUMBER, /* 01234567 */
    NUMBER, NUMBER, BD_SYM, SYM+ 7, BD_SYM, SYM+ 8, BD_SYM, SYM+10, /* 89:;<=>? */
    BD_SYM, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* @ABCDEFG */
    ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* HIJKLMNO */
    ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* PQRSTUVW */
    ALPHAB, ALPHAB, ALPHAB, BD_SYM, BD_SYM, BD_SYM, SYM+ 9, ALPHAB, /* XYZ[\]^_ */
    BD_SYM, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* `abcdefg */
    ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* hijklmno */
    ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, ALPHAB, /* pqrstuvw */
    ALPHAB, ALPHAB, ALPHAB, BD_SYM, BD_SYM, BD_SYM, BD_SYM, BAD_CH, /* xyz{|}~Del*/
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, /* 128+	*/
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH,
    BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, BAD_CH, EOF_CH,
};

#define		is_idchar(ch)	(char_class [ch] <= NUMBER)
#define		is_symchar(ch)	(temp_ch=char_class [ch],\
	SYM <= temp_ch  &&  temp_ch < EOF_CH)


/****************************************/
/*	       Static Data		*/
/****************************************/

static UCHAR	next_char;	/* Next character to process */

	/* String corresponding to returned token. */
static CHAR	lex_buffer [MAX_LEX_LENGTH + 1];
static USHORT	lex_len;		/* Length of lexeme. */

static USHORT	c_line, c_posn;

/*	lexeme
	l   c
lexeme.line, lexeme.posn -- start of lexeme
c_line, c_posn -- start of current character
*/


/****************************************/
/*	Initialization Function		*/
/****************************************/

extern USHORT		error_count = 0;	/* Forward decls */
extern UCHAR _near	adv ();

/* Initialize lexical analyzer.  Called before any call to get_token. */
extern VOID	init_lexer ()
{   error_count = 0;
    c_posn = 0;
    c_line = 1;
    lex_len = 0;

    next_char = adv ();
}


/****************************************/
/*	  Error Output Functions	*/
/****************************************/

static CSTRING  err_type_str [] = {
    "Lexical",  "Syntax",  "Semantic", "Fatal",
};

/* Mtype is message type, error or warning. */
static VOID _near	log_output (USHORT etype,  CSTRING desc,  STRING mtype)
{   put_string ("Posn ");
    put_string (ustoa (etype==LEXICAL ? c_posn : lexeme.posn));
    put_string (": ");
    put_string (err_type_str [etype]);
    put_string (mtype);
    put_string (desc);
    put_string ("\n");
}

/* Print an error message, desc. */
/* Locn is type of error, LEXICAL, SYNTAX, SEMANTIC, FATAL */
extern VOID	log_error (USHORT etype,  CSTRING desc)
{   log_output (etype, desc, " error: ");

    if  (etype == FATAL)  { exit (1); }

    if  (ERROR_THRESHOLD != 0)	{
	++error_count;
	if  (error_count >= ERROR_THRESHOLD)  {
	    put_string ("Aborting...too many errors.\n");
	    exit (1);
	}
    }
}

extern VOID	log_warning (USHORT etype,  CSTRING desc)
{   log_output (etype, desc, " warning: ");
}


/****************************************/
/*	  Advance Input Functions	*/
/****************************************/
/* Use static '_near' functions to improve call time and program space. */

static UCHAR _near	get_char ()
{   UCHAR  ch;
	/* Skip and ignore carriage returns */
    do	{ ch = getchar (); }		while	(ch == '\r');

    if  (ch=='\n')  { ++c_line;  c_posn = 0; }
    else if  (ch=='\t')  { c_posn = c_posn - (c_posn % TAB_STOPS) + TAB_STOPS; }
    else   { ++c_posn; }

    return  ch;
}

/* Advance to next character and return it */
static UCHAR _near	adv ()
{   UCHAR	ch;
    lex_buffer [lex_len++] = next_char;
    ch = get_char ();
    return  (next_char = ch);
}


/****************************************/
/*	  External 'get_' Functions	*/
/****************************************/

/* Public */
LEX_INFO	lexeme;
USHORT		last_token;


/* Get token from input stream. */
extern USHORT	get_token ()
{   register SHORT	token;
    register UCHAR	ch = next_char, temp_ch;

    lex_len	= 0;
    lexeme.name = 0;
    lexeme.line = c_line;
    lexeme.posn = c_posn;

    while  (1)  {

	switch  (char_class [ch])  {
	/*---------------------------------*/
	case  EOF_CH:
	    token = EOS;  goto RETURN;

	/*---------------------------------*/
	case  ALPHAB:
	    if  (ch == 'e')  {
		if  ((ch=adv())=='x'  &&  (ch=adv())=='i'  &&  (ch=adv())=='t')  {
		    token = EOS;  goto ENDKEY;
		}
		else	{ goto IDENT; }
	    }
	    else if  (ch == 'h')  {
		if  ((ch=adv())=='e'  &&  (ch=adv())=='l'  &&  (ch=adv())=='p')  {
		    token = HELP;  goto ENDKEY;
		}
		else	{ goto IDENT; }
	    }
	    else if  (ch == 'k')  {
		if  ((ch=adv())=='i'  &&  (ch=adv())=='l'  &&  (ch=adv())=='l')  {
		    token = KILL;  goto ENDKEY;
		}
		else	{ goto IDENT; }
	    }
	    else if  (ch == 'l')  {
		if  ((ch=adv()) == 'i'  &&  (ch=adv()) == 's')  {
		    if  ((ch=adv()) == 'p')  {
			token = LISP;  goto ENDKEY;
		    }
		    else if  (ch == 't')  {
			token = LIST;  goto ENDKEY;
		    }
		    else	{ goto IDENT; }
		}
		else	{ goto IDENT; }
	    }
	    else	{ goto IDENT; }

	ENDKEY:
	    if  (!is_idchar(ch=adv()))  { goto RETURN; }
	    ch = adv();
	IDENT:
	    while  (is_idchar(ch))  { ch = adv(); }
	    token = IDENTIFIER;
	    goto RETURN_LEXEME_STRING;

	/*---------------------------------*/
	case  NUMBER:
	    while  ('0' <= ch  &&  ch <= '9')  { ch=adv(); }
	case  SYM+ 5:
	    if  (ch == '.')  { ch=adv(); }
	    while  ('0' <= ch  &&  ch <= '9')  { ch=adv(); }

	    /* Convert to floating point value.  Don't return string. */
	    lex_buffer [lex_len] = '\0';
	    lexeme.val.f = strtod (lex_buffer, 0);
	    token = REAL;
	    goto RETURN;

	/*---------------------------------*/
	case  WHT_SP:
	    do	{
		ch = adv ();
	    }	while  (char_class [ch] == WHT_SP);
	    goto RESTART;

	/*---------------------------------*/
	case  BAD_CH:
	    log_error (LEXICAL, "unrecognized character(s) in input stream.");
	    do	{
		ch = adv ();
	    }	while  (char_class [ch] == BAD_CH);
	    goto RESTART;

	/*---------------------------------*/
	case  SYM+ 0:	/* ( */
	    token = OPEN_P;		goto ENDSYM;

	case  SYM+ 1:	/* ) */
	    token = CLOSE_P;		goto ENDSYM;

	case  SYM+ 2:	/* * */
	    token = TIMES;		goto ENDSYM;

	case  SYM+ 3:	/* + */
	    token = PLUS;		goto ENDSYM;

	case  SYM+ 4:	/* - */
	    token = MINUS;		goto ENDSYM;

	case  SYM+ 6:	/* / */
	    token = DIVIDE;		goto ENDSYM;

	case  SYM+ 8:	/* = */
	    token = EQUAL;		goto ENDSYM;

	case  SYM+ 9:	/* ^ */
	    token = POWER;		goto ENDSYM;

	case  SYM+10:	/* ? */
	    token = HELP;		goto ENDSYM;

	case  SYM+11:	/* , */
	    token = COMMA;		goto ENDSYM;

	case  SYM+ 7:	/* ; */
	    token = SEMICOLON;

	ENDSYM:
	    ch = adv();
	    goto RETURN;

	/*---------------------------------*/
	case  BD_SYM:
	    log_error (LEXICAL, "unknown symbol.");
	    do	{
		ch = adv ();
	    }	while  (is_symchar (ch));
	RESTART:
	    next_char = ch;
	    lex_len = 0;
	    lexeme.line = c_line;
	    lexeme.posn = c_posn;
	}
    }

RETURN_LEXEME_STRING:
    if  (lex_len > MAX_LEX_LENGTH)  {
	/* Memory after lex_buffer has been trampled on. */
	log_error (FATAL,
	  "only strings can exceed " to_string(MAX_LEX_LENGTH) " characters in length.");
    }
    lex_buffer [lex_len] = '\0';
    lexeme.name = malloc (lex_len + 1);
    strcpy (lexeme.name, lex_buffer);

RETURN:
    return  (lexeme.token = last_token = token);
}
