/* lexer.c */

/*
 * Copyright (c) 1991-1993  R. Nigel Horspool.
 * All rights reserved.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/

/* Handles reading of LaTeX lexical elements,
   looks up LaTeX command names, etc.  */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>

#include "tables.h"
#include "la_mml.h"
#include "lexer.h"

#define FPR		fprintf


/*  Text of last scanned \xxx command name
    (`_' and `^', too, as a helpful kluge)  */
char tokenbuffer[MAXLEN+1];

int ch;				/* current input character */
int linenum = 0;		/* position in current file */
char *filename;			/* name of current file */
FILE *f;			/* current input stream */
BOOL german = FALSE;


typedef struct _ch {
	    struct _ch	*next;
	    ACTIONPTR u;
	} CHT, *CHTPTR;

typedef struct _eh {
	    struct _eh *next_in_bkt;
	    ENVPTR e;
	} EHT, *EHTPTR;


#define TDBUCKETS	512	/* must be power of 2 */
static CHTPTR cmdhashtab[TDBUCKETS];

#define ECHAINS	64	/* must be a power of 2 */
static EHTPTR env_hash_chain[ECHAINS];

ENVPTR curr_env = &bad_env;
ENVPTR math_env, displaymath_env, tabbing_env;

extern BOOL unknowns;
static ACTIONPTR chartab[256], slashchartab[256];



static int cmdhash( register char *s,  register int len ) {
    return ((s[1]<<4) + s[len-1]*3 + len) & (TDBUCKETS - 1);
}


static void enter_cmd_hash( char *name, ACTIONPTR infoptr ) {
    register int k;
    register CHTPTR chp;

    k = cmdhash(name, strlen(name));
    chp = (CHTPTR)malloc(sizeof(CHT));
    MCHECK(chp);
    chp->next = cmdhashtab[k];
    chp->u = infoptr;
    cmdhashtab[k] = chp;
}


static CHTPTR lookup_cmd( register char *name, register int len ) {
    register CHTPTR chp;

    chp = cmdhashtab[ cmdhash(name,len) ];
    for( ; chp != NULL;  chp = chp->next ) {
	if (strcmp(name, chp->u->cmd) == 0)
	    return chp;
    }
    if (unknowns) {
	LN(); FPR(stderr, "unknown command: %s\n", name);
    }
    return NULL;
}


static ACTIONPTR lookup_char( register int c ) {
    register ACTIONPTR r;

    tokenbuffer[0] = c;  tokenbuffer[1] = '\0';
    r = chartab[c&0xff];
    if (r != NULL) return r;
    /*	replace an unmapped 8-bit character with a space */
    if (c & 0x80) tokenbuffer[0] = ' ';
    return &other;
}


static ACTIONPTR lookup_slash_char( register int c ) {
    register ACTIONPTR r;
    tokenbuffer[0] = c;  tokenbuffer[1] = '\0';
    r = slashchartab[c & 0xff];
    if (r != NULL) return r;
    /*	replace an unmapped 8-bit character with a space */
    if (c & 0x80) tokenbuffer[0] = ' ';
    return &other;
}



/*  Enters math commands into the hash table  */
static void enter_cmds( ACTIONPTR cmdtable ) {
    register char *str;
    ACTIONPTR strp;

    for( strp = cmdtable;  ;  strp++ ) {
	str = strp->cmd;
	if (str == NULL) break;
	enter_cmd_hash(str, strp);
    }
}


/*  Provide next character to process in global variable `ch'  */
void readch() {
    if (ch == '\n') linenum++;
    ch = getc(f);
}


/*  Returns next token in the input as its result.  */
ACTIONPTR get_token() {
    register int lastch;
    CHTPTR chptr;
    register int toklen;
    BOOL star;

    tokenbuffer[0] = '\0';
    for( ; ; ) {
	/* KJT 29/03/08: "eof" renamaed "endoffile" to avoid MingW clash */
	if (ch == EOF) return &endoffile;
	lastch = ch;
	readch();
	switch(lastch) {
	case EOF:
	    /* KJT 29/03/08: "eof" renamaed "endoffile" to avoid MingW clash */
	    return &endoffile;
	case '%':	/* skip to end of line */
	    while (ch != '\n' && ch != EOF)
		readch();
	    if (ch == '\n') readch();
	    break;
	case ' ':
	case '\t':
	    tokenbuffer[0] = ' ';  tokenbuffer[1] = '\0';
	    return &other;
	case '\\':
	    tokenbuffer[0] = '\\';  toklen = 1;
	    if (isalpha(ch)) {	/* read command name */
		do {
		    if (toklen < MAXLEN)
			tokenbuffer[toklen++] = ch;
		    readch();
		} while(isalpha(ch));

		/* check for name of form \aaaa* */
		if (ch == '*' && toklen < MAXLEN) {
		    tokenbuffer[toklen++] = ch;
		    star = TRUE;
		} else
		    star = FALSE;
		tokenbuffer[toklen] = '\0';
		chptr = lookup_cmd(tokenbuffer, toklen);
		if (chptr != NULL) {
		    if (star) readch();
		    return chptr->u;
		}

		if (star) {
		    /* try again without the * */
		    tokenbuffer[--toklen] = '\0';
		    chptr = lookup_cmd(tokenbuffer, toklen);
		    if (chptr != NULL)
			return chptr->u;
		}
		return &untranslated;
	    }
	    lastch = ch;
	    readch();
	    return lookup_slash_char(lastch);
	case '-':
	    if (ch != '-')
		return lookup_char('-');
	    readch();
	    if (ch != '-')
		return &dashdash;
	    readch();
	    return &dashdashdash;
	case '`':
	    if (ch != '`')
		return lookup_char('`');
	    readch();
	    return &leftdquote;
	case '\"':
	    if (!german)
		return lookup_char(lastch);
	    toklen = 0;
	    switch(ch) {
	    case 'a':  case 'i':
	    case 'o':  case 'u':
	    case 'A':
	    case 'O':  case 'U':
		return lookup_slash_char('\"');
	    case 's':
		strcpy(tokenbuffer, "\\ss");  toklen = 3;
		break;
	    case '`':
		strcpy(tokenbuffer, "\\glqq");  toklen = 5;
		break;
	    case '\'':
		strcpy(tokenbuffer, "\\grqq");  toklen = 5;
		break;
	    case '<':
		strcpy(tokenbuffer, "\\flqq");  toklen = 5;
		break;
	    case '>':
		strcpy(tokenbuffer, "\\frqq");  toklen = 5;
		break;
	    case '|':  case '-':
		readch();  continue;
	    default:	/* we'll assume this is "ck or "ff etc. coming */
		if (isalpha(ch)) continue;
		break;
	    }
	    if (toklen == 0)
		return lookup_char(lastch);
	    readch();
	    chptr = lookup_cmd(tokenbuffer, toklen);
	    if (chptr != NULL) return chptr->u;
	    LN(); FPR(stderr, "missing definition for %s", tokenbuffer );
	    break;
	case '\'':
	    if (ch != '\'')
		return lookup_char('\'');
	    readch();
	    return &rightdquote;
	case '$':
	    if (ch != '$')
		return lookup_char('$');
	    readch();
	    return &dollardollar;
	case 'a': case 'b': case 'c': case 'd': case 'e':
	case 'f': case 'g': case 'h': case 'i': case 'j':
	case 'k': case 'l': case 'm': case 'n': case 'o':
	case 'p': case 'q': case 'r': case 's': case 't':
	case 'u': case 'v': case 'w': case 'x': case 'y':
	case 'z':
	case 'A': case 'B': case 'C': case 'D': case 'E':
	case 'F': case 'G': case 'H': case 'I': case 'J':
	case 'K': case 'L': case 'M': case 'N': case 'O':
	case 'P': case 'Q': case 'R': case 'S': case 'T':
	case 'U': case 'V': case 'W': case 'X': case 'Y':
	case 'Z':
	case '0': case '1': case '2': case '3': case '4':
	case '5': case '6': case '7': case '8': case '9':
	    tokenbuffer[0] = lastch;  toklen = 1;
	    while(isalnum(ch)) {
		if (toklen < MAXLEN)
		    tokenbuffer[toklen++] = ch;
		readch();
	    }
	    tokenbuffer[toklen] = '\0';
	    return &other;
	default:
	    return lookup_char(lastch);
	}
    }
}


/*  All environment names are kept in a table.  This enables us to flag
    the few that put us into math mode (or, worse, into verbatim mode).
    It also means that we keep unique copies of these names for use in
    error messages.  */
ENVPTR lookup_env( char *envname ) {
    int hc, len;
    register EHTPTR ehp;
    register ENVPTR ep;

    len = strlen(envname);
    hc = (len + envname[0] + envname[len-1]) & (ECHAINS-1);
    for( ehp = env_hash_chain[hc];  ehp != NULL; ehp = ehp->next_in_bkt ) {
	if (strcmp(ehp->e->name, envname) == 0) return ehp->e;
    }

    /* allocate a new environment name entry */
    ep = (ENVPTR)malloc(sizeof(ENV));
    MCHECK(ep);
    *ep = bad_env;
    ep->name = malloc((unsigned)(len+1));
    MCHECK(ep->name);
    strcpy(ep->name, envname);

    ehp = (EHTPTR)malloc(sizeof(EHT));
    ehp->next_in_bkt = env_hash_chain[hc];
    env_hash_chain[hc] = ehp;
    ehp->e = ep;

    return ep;
}


static void enter_chars( ACTIONPTR cmdtable ) {
    register char *str;
    ACTIONPTR strp;

    for( strp = cmdtable;  ;  strp++ ) {
	str = strp->cmd;
	if (str == NULL) break;
	chartab[str[0]&0xff] = strp;
    }
}

static void enter_slashchars( ACTIONPTR cmdtable ) {
    register char *str;
    ACTIONPTR strp;

    for( strp = cmdtable;  ;  strp++ ) {
	str = strp->cmd;
	if (str == NULL) break;
	if (str[0] != '\\') OOPS("bad slash-char table entry");
	slashchartab[str[1]&0xff] = strp;
    }
}


static void enter_env( ENVPTR ep ) {
    int hc, len;
    register EHTPTR ehp;
    char *envname = ep->name;

    len = strlen(envname);
    hc = (len + envname[0] + envname[len-1]) & (ECHAINS-1);
    ehp = (EHTPTR)malloc(sizeof(EHT));
    ehp->next_in_bkt = env_hash_chain[hc];
    env_hash_chain[hc] = ehp;
    ehp->e = ep;
}


/*  Enter the environment names that have special handling codes  */
static void init_env_hash_tab() {
    register ENVPTR ep;
    for( ep = environments;  ep->name != NULL;  ep++ )
	enter_env(ep);
    math_env = lookup_env("math");
    displaymath_env = lookup_env("displaymath");
    tabbing_env = lookup_env("tabbing");
}


void initialize_lexer() {
    enter_cmds(commands);
    enter_chars(special_chars);
    enter_slashchars(slash_chars);
    init_env_hash_tab();
}
