/*
	Copyright (c) 1993 by Robert Jervis
	All rights reserved.

	Permission to use, copy, modify and distribute this software is
	subject to the license described in the READ.ME file.
 */
include	file;
include	sbuffer;
include	hash;
include	target;
include	errmsg;

tokenNames:	public	type	byte =	{

		/* Punctuation */

	EOF_TOK,			/* End of file token */
	LP,				/* ( */
	RP,				/* ) */
	LB,				/* [ */
	RB,				/* ] */
	LC,				/* { */
	RC,				/* } */
	SM,				/* ; */
	CO,				/* : */
	ELLIPSIS, 			/* ... */
	RANGE,				/* .. */
	CM,				/* , */

		/* Operators */

	ASG,				/* = */
	ADA,				/* += */
	SBA,				/* -= */
	MUA,				/* *= */
	DVA,				/* /= */
	MOA,				/* %= */
	ANA,				/* &= */
	ORA,				/* |= */
	XRA,				/* ^= */
	LSA,				/* <<= */
	RSA,				/* >>= */
	ADD,				/* + */
	SUB,				/* - */
	MUL,				/* * */
	DIV,				/* / */
	MOD,				/* % */
	AND,				/* & */
	OR,				/* | */
	XOR,				/* ^ */
	LSH,				/* << */
	RSH,				/* >> */
	QUES,				/* ? */
	LOR,				/* || */
	LAND,				/* && */
	EQ,				/* == */
	NE,				/* != */
	LT,				/* < */
	GT,				/* > */
	LE,				/* <= */
	GE,				/* >= */
	IOARROW,			/* <- */
	NOT,				/* ! */
	COM,				/* ~ */
	INC,				/* ++ */
	DEC,				/* -- */
	DOT,				/* . */
	ARROW,				/* -> */
	QUAL,				/* :: */

		/* Multi-component tokens */

	MAGIC,				/* $ tokens */
	ID,				/* normal identifier */
	ICON,				/* integer constant */
	FCON,				/* floating point constant */
	STRING,				/* string literal */
	LITERAL,			/* literal data */
	INTEGER,			/* integer data */
	FLOAT,				// float
	POINTER,			/* pointer */
	UNSIGNED,			// unsigned

	lastToken
	};

tokenStream:	public	type	inherit	sourceBuffer {
	public:
/*
 *	FUNCTION:	scan
 *
 *	DESCRIPTION:
 *		This function returns a stream of tokens.
 */
scan:	() tokenNames =
	{
	nxtc:	int;
	ntype:	int;
	a:	int;
	cp:	* char;
	s:	[:] char;

	if	(Backup){
		Backup = 0;
		return Token.lex;
		}
	for	(;;){
		nxtc = getc();
		ntype = CharacType[nxtc];
		switch	(ntype){
		case	A_LET:
			rememberLine();
		        cp = Accum;
			for	(a = IDLEN;
					 a > 0 &&
					 (CharacType[nxtc] == A_LET ||
					  CharacType[nxtc] == A_DIG ||
					  CharacType[nxtc] == A_DOT);
					a--){
				*cp++ = nxtc;
				nxtc = getc();
				}
			*cp = 0;
			while	(CharacType[nxtc] == A_LET ||
				 CharacType[nxtc] == A_DIG ||
				 CharacType[nxtc] == A_DOT)
				nxtc = getc();
			if	(nxtc != EOF_MARK)
			        ungetc();
			if	(!FullScan){
				Token.lex = ID;
				return ID;
				}
			i:	int;

			i = cp - Accum;
			Token.iden = hash(Accum[:i]);
			switch	(Token.iden->Token){
			case	0:
				Token.lex = ID;
				break;

			default:
				Token.lex = Token.iden->Token;
				}
			return Token.lex;

		default:
			rememberLine();
			return Token.lex = ntype;

		case	A_ILL:
			error(ErrBadChar, nxtc, nxtc);
			break;

		case	A_MAGIC:
			if	(!BuildMachine){
				error(ErrBadChar, nxtc, nxtc);
				break;
				}
			nxtc = getc();
			if	(CharacType[nxtc] != A_DIG){
				error(ErrBadNumber);
				ungetc();
				break;
				}
			ntype = number(nxtc);
			if	(ntype == FLOAT_CON)
				error(ErrBadNumber);
			convertnum(ntype);
			return Token.lex = MAGIC;

		case	A_SKP:
			break;

		case	A_SLH:
			rememberLine();
			a = getc();
			if	(a == '*'){
				skipComment();
				break;
				}
			else if	(a == '/'){
				skipToEol();
				break;
				}
			else if	(a == '=')
				Token.lex = DVA;
			else	{
				ungetc();
				Token.lex = DIV;
				}
			return Token.lex;

		case	A_EXC:
		case	A_PCT:
		case	A_EQ:
		case	A_AMP:
		case	A_AST:
		case	A_PLS:
		case	A_UP:
		case	A_BAR:
			rememberLine();
			a = getc();
			if	(Dbltok[ntype - A_EXC] && a == nxtc)
				Token.lex = Dbltok[ntype - A_EXC];
			else if	(a == '=')
				Token.lex = Asgtok[ntype - A_EXC];
			else	{
				if	(a != EOF_MARK)
					ungetc();
				Token.lex = Sintok[ntype - A_EXC];
				}
			return Token.lex;

		case	A_DSH:
			rememberLine();
			nxtc = getc();
			if	(nxtc == '-')
				Token.lex = DEC;
			else if	(nxtc == '=')
				Token.lex = SBA;
			else if	(nxtc == '>')
				Token.lex = ARROW;
			else	{
				if	(nxtc != EOF_MARK)
					ungetc();
				Token.lex = SUB;
				}
			return Token.lex;

		case	A_DOT:
			rememberLine();
			nxtc = getc();
			if	(nxtc == '.'){
				nxtc = getc();
				if	(nxtc != '.'){
					ungetc();
					Token.lex = RANGE;
					}
				else
					Token.lex = ELLIPSIS;
				}
			else	{
				if	(nxtc != EOF_MARK)
					ungetc();
				Token.lex = DOT;
				}
			return Token.lex;

		case	A_LT:
			rememberLine();
			nxtc = getc();
			if	(nxtc == '=')
				Token.lex = LE;
			else if	(nxtc == '-')
				Token.lex = IOARROW;
			else if	(nxtc == '<'){
				nxtc = getc();
				if	(nxtc == '=')
					Token.lex = LSA;
				else	{
					if	(nxtc != EOF_MARK)
						ungetc();
					Token.lex = LSH;
					}
				}
			else	{
				if	(nxtc != EOF_MARK)
					ungetc();
				Token.lex = LT;
				}
			return Token.lex;

		case	A_CO:
			rememberLine();
			nxtc = getc();
			if	(nxtc == ':')
				Token.lex = QUAL;
			else	{
				if	(nxtc != EOF_MARK)
					ungetc();
				Token.lex = CO;
				}
			return Token.lex;

		case	A_GT:
			rememberLine();
			nxtc = getc();
			if	(nxtc == '=')
				Token.lex = GE;
			else if	(nxtc == '>'){
				nxtc = getc();
				if	(nxtc == '=')
					Token.lex = RSA;
				else	{
					if	(nxtc != EOF_MARK)
						ungetc();
					Token.lex = RSH;
					}
				}
			else	{
				if	(nxtc != EOF_MARK)
					ungetc();
				Token.lex = GT;
				}
			return Token.lex;

		case	A_QUO:
			rememberLine();
			getString();
			return Token.lex = STRING;

		case	A_DIG:
			rememberLine();
			ntype = number(nxtc);
			if	(!FullScan){
				if	(ntype == FLOAT_CON){
					Token.lex = FCON;
					return Token.lex;
					}
				}
			return Token.lex = convertnum(ntype);

		case	A_APO:
			rememberLine();
			getcc();
			return Token.lex = ICON;
			}
		}
	}

openRange:	(t: textRange) =
	{
	super openRange(t);
	Backup = 0;
	FullScan = TRUE;
	}
/*
 *	FUNCTION:	unscan
 *
 *	DESCRIPTION:
 *		This function backs up the scan pointer so that the next
 *		call to scan will return the same token again.
 */
unscan:	() =
	{
	Backup = 1;
	}

number:	(i: char) int =
	{
	dotf:		boolean;
	expf:		boolean;
	bigDigits:	boolean;
	radix:		int;
	cp:		* char;

	dotf = FALSE;
	expf = FALSE;
	bigDigits = FALSE;
	Token.constType = -INTBITS;		// default to a signed int
	if	(i == '0'){
		i = getc();
		if	(i == 'x' || i == 'X'){
			i = getc();
			radix = HEX_CON;
			}
		else
			radix = OCTAL_CON;
		}
	else
		radix = DECIMAL_CON;
	for	(cp = Accum;; cp++, i = getc()){
		i = tolower(i);			// ignore upper case distinctions
		if	(cp < &Accum[sizeof Accum])
			*cp = i;
		if	(CharacType[i] == A_DIG){
			if	(i >= '8')
				bigDigits = TRUE;
			}
		else if	(i >= 'a' && i <= 'f'){
			if	(radix != HEX_CON){
				ungetc();
				break;
				}
			}
		else if	(i == 'l'){
			if	(dotf || expf){
				Token.constType = EXTENDBITS;
				break;
				}
			i = getc();
			if	(i == 'u' || i == 'U')
				Token.constType = LONGBITS;
			else	{
				Token.constType = -LONGBITS;
				if	(i != EOF_MARK)
					ungetc();
				}
			break;
			}
		else if	(i == 'u'){
			if	(dotf || expf){
				ungetc();
				break;
				}
			i = getc();
			if	(i == 'l' || i == 'L')
				Token.constType = LONGBITS;
			else	{
				Token.constType = INTBITS;
				if	(i != EOF_MARK)
					ungetc();
				}
			break;
			}
		else if	(i == EOF_MARK)
			break;
		else	{
			ungetc();
			break;
			}
		}
	*cp = 0;
	if	(radix == OCTAL_CON &&
		 bigDigits)
		error(ErrNotOctalDigit);
	return radix;
	}

getcc:	() =
	{
	i:	int;
	c:	int;
	u:	union	{
		public:

		cx:	[CCLEN] char;
		y:	long;
		};

	u.y = 0;
	for	(i = 0; ; i++){
		c = mapc('\'');
		if	(c == -1)
			break;
		if	(i < CCLEN)
			u.cx[i] = c;
		}
	if	(i > CCLEN)
		error(ErrCConstLong);
	if	(i == 1)
		Token.icon = u.cx[0];
	else
		Token.icon = integerConstant(u.y, -INTBITS);
	Token.constType = -INTBITS;
	}

getString:	() =
	{
	i:	int;
	cp:	ref char;

	Token.stringLength = 0;
	cp = Accum;
	i = mapc('"');
	while	(i != -1){
		if	(Token.stringLength >= sizeof Accum){
			error(ErrStringLong);
			while	(mapc('"') != -1)
				;
			break;
			}
		else	{
			Token.stringLength++;
			*cp++ = i;
			}
		i = mapc('"');
		}
	if	(FullScan){
		Token.stringValue = alloc(Token.stringLength);
		memCopy(Token.stringValue, Accum, Token.stringLength);
		}
	}

resync:	(s: [:] char, ...) =
	{
	tok:	int;
	depth:	int;
	f:	boolean;

	tok = Token.lex;
	errorV(s, ...);
	if	(tok == RC){
		unscan();
		return;
		}
	f = FullScan;
	FullScan = FALSE;
	while	(tok != 0 && tok != SM && tok != RC){
		if	(tok == LC){
			depth = 1;
			for	(;;){
				tok = scan();
				if	(tok == 0){
					FullScan = f;
					return;
					}
				if	(tok == RC){
					depth--;
					if	(depth <= 0)
						break;
					}
				else if	(tok == LC)
					depth++;
				}
			FullScan = f;
			return;
			}
		tok = scan();
		}
	if	(tok == RC)
		unscan();
	FullScan = f;
	}

setScanMode:	(f: boolean) =
	{
	FullScan = f;
	}

private:

Backup:		int;
FullScan:	boolean;

rememberLine:	() =
	{
	Token.offset = tell() - 1;
	}


	};

IDLEN:	public	const	int = 32;		// max len of an identifier

FLOAT_CON:	const	int = 0;
OCTAL_CON:	const	int = 8;
DECIMAL_CON:	const	int = 10;
HEX_CON:	const	int = 16;

	/* These are the tokens returned by the scanner */

Sintok:	[] tokenNames = [ NOT, MOD,  AND, MUL, ADD, XOR,  OR, ASG ];
Asgtok:	[] tokenNames = [  NE, MOA,  ANA, MUA, ADA, XRA, ORA,  EQ ];
Dbltok:	[] tokenNames = [   0,   0, LAND,   0, INC,   0, LOR,  EQ ];

Token:	public	{
	public:

	lex:		tokenNames;
	offset:		fileOffset;
	constType:	signedByte;
	icon:		unsignedLong;
	stringValue:	ref char;
	stringLength:	int;
	iden:		ref identifier;
	};
Accum:		public	[4096] char;
BuildMachine:	public	boolean;

tokenClasses:	type	tokenNames = {
	A_SKP = lastToken,
	A_EXC,
	A_PCT,
	A_AMP,
	A_AST,
	A_PLS,
	A_UP,
	A_BAR,
	A_EQ,
	A_LET,
	A_DIG,
	A_QUO,
	A_APO,
	A_DSH,
	A_DOT,
	A_LT,
	A_ILL,
	A_GT,
	A_SLH,
	A_MAGIC,
	A_CO,
	};

CharacType:	[] tokenClasses = [
	EOF_TOK,A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_SKP,	A_SKP,	A_SKP,	A_SKP,	A_SKP,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,

	A_SKP,	A_EXC,	A_QUO,	A_ILL,	A_MAGIC,A_PCT,	A_AMP,	A_APO,
	LP,	RP,	A_AST,	A_PLS,	CM,	A_DSH,	A_DOT,	A_SLH,
	A_DIG,	A_DIG,	A_DIG,	A_DIG,	A_DIG,	A_DIG,	A_DIG,	A_DIG,
	A_DIG,	A_DIG,	A_CO,	SM,	A_LT,	A_EQ,	A_GT,	QUES,

	A_ILL,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	LB,	A_ILL,	RB,	A_UP,	A_LET,

	A_ILL,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,	A_LET,
	A_LET,	A_LET,	A_LET,	LC,	A_BAR,	RC,	COM,	A_ILL,

	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,

	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,

	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,

	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,	A_ILL,
	];

scanInit:	entry	() =
	{
	identifier create("float",		FLOAT);
	identifier create("integer",		INTEGER);
	identifier create("literal",		LITERAL);
	identifier create("pointer",		POINTER);
	identifier create("string",		STRING);
	identifier create("unsigned",		UNSIGNED);
	}

/*
	This function converts a number that has been scanned.  The radix
	determines whether floating point or integer conversion is applied.

	For integer conversions, after the conversion is completed, the
	resulting value is checked.  If it fits in the default number of
	bits, everything proceeds just fine.  If not, the resulting integer
	type is promoted.

	The only promotions are that if the scanned type is a signed value
	smaller than long, and the value fits as a signed long, then make
	it a signed long.

	Otherwise, make the result an unsigned long.

	The 'scanned type' is the type determined by the combination of
	L and U suffixes.
 */
convertnum:	(radix: int) int =
	{
	cp:	* char;
	i:	int;

	Token.icon = 0;
	for	(cp = Accum; *cp; cp++){
		i = *cp;
		if	(CharacType[i] == A_DIG)
			i -= '0';
		else
			i += 10 - 'a';
		Token.icon = Token.icon * radix + i;
		}
	if	(integerFits(Token.icon, Token.constType))
		return ICON;
	if	(Token.constType < 0 &&
		 Token.constType != -LONGBITS &&
		 integerFits(Token.icon, -LONGBITS))
		Token.constType = -LONGBITS;
	else
		Token.constType = LONGBITS;
	return ICON;
	}

