scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 12e0b049f96901be321d7797730555f81af2eee8
parent ad9c3eeed6939fd545333482bf951991ab3e1b25
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue, 11 Mar 2014 15:31:01 +0100

Do a prefetch of 1 token

C is a LR(2) language so it means we need 2 tokens in order to
decide what is the next step. We have solving this problem with
backtracking and a tricky ahead, that was used only to detect that
the proximus token was ':' or not. Since we want to remove the
backtracking we need a real ahead() function, and the better
way is having a prefetch token.

Diffstat:
Mlex.c | 99++++++++++++++++++++++++++++++++++++++++---------------------------------------
1 file changed, 50 insertions(+), 49 deletions(-)

diff --git a/lex.c b/lex.c @@ -9,10 +9,9 @@ #include "symbol.h" #include "sizes.h" -#define NR_KEYW_HASH 16 - unsigned char yytoken; char yytext[IDENTSIZ + 1]; +static char yybuf[IDENTSIZ + 1]; unsigned linenum; unsigned columnum; const char *filename; @@ -24,7 +23,7 @@ struct keyword { }; static FILE *yyin; - +static struct symbol yysym = {.name = ""}, *yynval = &yysym; struct symbol *yyval; struct symbol * @@ -50,6 +49,7 @@ type: switch (ch = toupper(getc(yyin))) { v = strtoll(s, NULL, base); sym = lookup(NULL, NS_IDEN); + sym->tok = CONSTANT; sym->ctype = *tp; switch (tp->type) { @@ -72,7 +72,7 @@ type: switch (ch = toupper(getc(yyin))) { return sym; } -static char +static struct symbol * number(void) { register char *bp, ch; @@ -90,7 +90,7 @@ number(void) ungetc(ch, yyin); } - for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = ch) { + for (bp = yybuf; bp < &yybuf[IDENTSIZ]; *bp++ = ch) { ch = getc(yyin); switch (base) { case 8: @@ -108,13 +108,11 @@ number(void) } } -end: if (bp == &yytext[IDENTSIZ]) - error("identifier too long %s", yytext); +end: if (bp == &yybuf[IDENTSIZ]) + error("identifier too long %s", yybuf); *bp = '\0'; ungetc(ch, yyin); - yyval = integer(yytext, base); - - return CONSTANT; + return integer(yybuf, base); } void @@ -169,35 +167,36 @@ init_keywords(void) } } -static unsigned char +static struct symbol * iden(void) { register char ch, *bp; - for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = ch) { + for (bp = yybuf; bp < &yybuf[IDENTSIZ]; *bp++ = ch) { if (!isalnum(ch = getc(yyin)) && ch != '_') break; } - if (bp == &yytext[IDENTSIZ]) - error("identifier too long %s", yytext); + if (bp == &yybuf[IDENTSIZ]) + error("identifier too long %s", yybuf); *bp = '\0'; ungetc(ch, yyin); - yyval = lookup(yytext, NS_IDEN); - return yyval->tok; + return lookup(yybuf, NS_IDEN); } static unsigned char skip(void) { - register int c; + static int c; extern char parser_out_home; - while (isspace(c = getc(yyin))) { - switch (c) { - case '\n': ++linenum, columnum = 1; break; - case '\t': columnum += 8; break; - default: ++columnum; break; + if (c != EOF) { + while (c != EOF && isspace(c = getc(yyin))) { + switch (c) { + case '\n': ++linenum, columnum = 1; break; + case '\t': columnum += 8; break; + default: ++columnum; break; + } } } if (c == EOF) { @@ -214,14 +213,14 @@ follow(unsigned char op, unsigned char eq, unsigned char rep) { register char c = getc(yyin); - yytext[1] = c; - yytext[2] = '\0'; + yybuf[1] = c; + yybuf[2] = '\0'; if (c == '=') return eq; else if (c == op && rep) return rep; - yytext[1] = '\0'; + yybuf[1] = '\0'; ungetc(c, yyin); return op; } @@ -231,23 +230,24 @@ rel_shift(unsigned char op) { static char tokens[2][3] = { {GE, SHL, SHL_EQ}, - {LE, SHR, SHR_EQ}}; + {LE, SHR, SHR_EQ} + }; register char c = getc(yyin); register char *tp = tokens[op == '>']; - yytext[1] = c; - yytext[2] = '\0'; + yybuf[1] = c; + yybuf[2] = '\0'; if (c == '=') { return tp[0]; } else if (c == op) { if ((c = getc(yyin)) == '=') { - yytext[2] = c; - yytext[3] = '\0'; + yybuf[2] = c; + yybuf[3] = '\0'; return tp[2]; } op = tp[1]; } else { - yytext[1] = '\0'; + yybuf[1] = '\0'; } ungetc(c, yyin); return op; @@ -258,13 +258,13 @@ minus(void) { register int c = getc(yyin); - yytext[1] = c; + yybuf[1] = c; switch (c) { case '-': return DEC; case '>': return INDIR; case '=': return SUB_EQ; default: - yytext[1] = '\0'; + yybuf[1] = '\0'; ungetc(c, yyin); return '-'; } @@ -275,8 +275,8 @@ operator(void) { register unsigned char c = getc(yyin); - yytext[0] = c; - yytext[1] = '\0'; + yybuf[0] = c; + yybuf[1] = '\0'; switch (c) { case '=': return follow('=', EQ, 0); case '^': return follow('^', XOR_EQ, 0); @@ -297,28 +297,28 @@ next(void) { register unsigned char c; + strcpy(yytext, yybuf); + yyval = yynval; + yytoken = yynval->tok; + yynval = &yysym; + if (!skip()) { - yytoken = EOFTOK; + yysym.tok = EOFTOK; } else { ungetc(c = getc(yyin), yyin); if (isalpha(c) || c == '_') - yytoken = iden(); + yynval = iden(); else if (isdigit(c)) - yytoken = number(); + yynval = number(); else - yytoken = operator(); + yysym.tok = operator(); } } unsigned char ahead(void) { - register char c; - - if (!skip()) - return EOFTOK; - ungetc(c = getc(yyin), yyin); - return c; + return yynval->tok; } char @@ -347,10 +347,11 @@ open_file(register const char *file) if (file == NULL) { yyin = stdin; filename = "(stdin)"; - return; + } else { + if ((yyin = fopen(file, "r")) == NULL) + die("file '%s' not found", file); + filename = file; } - if ((yyin = fopen(file, "r")) == NULL) - die("file '%s' not found", file); - filename = file; columnum = linenum = 1; + next(); /* prefetch first token */ }