commit 1ffd5f6ee169b5d76958348b3376d82a66a75d63
parent c990953ff9d588de699e0a19420b5cc2d7e0db51
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date: Sun, 10 Jun 2012 12:14:12 +0200
Improved lexical analysis
This patch removes all the complex switch in function next, which is now a
clearer one.
Diffstat:
M | decl.c | | | 15 | +++++++++++---- |
M | lex.c | | | 195 | ++++++++++++++++++++++++++++++++++++++----------------------------------------- |
M | symbol.c | | | 28 | ++++++++++++++++------------ |
3 files changed, 121 insertions(+), 117 deletions(-)
diff --git a/decl.c b/decl.c
@@ -10,10 +10,19 @@
char parser_out_home;
-#include <stdio.h> /* TODO: remove this */
static void declarator(void);
+static struct symbol *newiden(char *s, unsigned char key)
+{
+ register struct symbol *sym = lookup(yytext, yyhash);
+
+ if (!sym)
+ sym = install(yytext, yyhash);
+ if (sym->level == nested_level)
+ error("redeclaration of '%s'", yytext);
+ return sym;
+}
static void dirdcl(void)
{
@@ -21,9 +30,7 @@ static void dirdcl(void)
declarator();
expect(')');
} else if (yytoken == IDEN) {
- if (yyval.sym && yyval.sym->level == nested_level)
- error("redeclaration of '%s'", yytext);
- addsym(yytext, yyhash);
+ newiden(yytext, yyhash);
next();
} else {
error("expected '(' or identifier before of '%s'", yytext);
diff --git a/lex.c b/lex.c
@@ -111,9 +111,20 @@ static char number(void)
return CONSTANT;
}
-static unsigned char iden(void)
+static unsigned char keyword(const char *s, unsigned char key)
{
register struct keyword *kwp;
+
+ key &= NR_KWD_HASH - 1;
+ for (kwp = khash[key]; kwp; kwp = kwp->next) {
+ if (!strcmp(kwp->str, yytext))
+ return kwp->tok;
+ }
+ return 0;
+}
+
+static unsigned char iden(void)
+{
register char ch;
register char *bp = yytext;
@@ -124,27 +135,21 @@ static unsigned char iden(void)
}
if (bp == yytext + TOKSIZ_MAX)
error("identifier too long %s", yytext);
- ungetc(ch, yyin);
*bp = '\0';
- yyhash &= NR_KWD_HASH - 1;
- for (kwp = khash[yyhash]; kwp; kwp = kwp->next) {
- if (!strcmp(kwp->str, yytext))
- return kwp->tok;
- }
- yyval.sym = lookupsym(yytext, yyhash);
+ ungetc(ch, yyin);
+
+ if (ch = keyword(yytext, yyhash))
+ return ch;
return IDEN;;
}
-
-
-unsigned char next(void)
+static unsigned char skip(void)
{
- static unsigned int c;
- register unsigned char ch;
+ register int c;
extern char parser_out_home;
while (isspace(c = getc(yyin))) {
- if ((char) c == '\n')
+ if (c == '\n')
++linenum, columnum = 1;
else
++columnum;
@@ -152,97 +157,85 @@ unsigned char next(void)
if (c == EOF) {
if (parser_out_home)
error("Find EOF while parsing");
- ch = EOFTOK;
- memcpy(yytext, "EOF", sizeof("EOF"));
- goto return_token;
+ return 1;
+ }
+ ungetc(c, yyin);
+ return 0;
+}
+
+static unsigned char
+follow(unsigned char op, unsigned char eq, unsigned char rep)
+{
+ register char c;
+
+ if ((c = getc(yyin)) == '=')
+ return eq;
+ else if (c == op && rep)
+ return rep;
+ ungetc(c, yyin);
+ return op;
+}
+
+static unsigned char rel_shift(unsigned char op)
+{
+ static char tokens[2][3] = {
+ {GE, LSHIFT, LSHIFT_EQ},
+ {LE, RSHIFT, RSHIFT_EQ}};
+ register char c;
+ register char *tp = tokens[op == '>'];
+
+ if ((c = getc(yyin)) == '=') {
+ return tp[0];
+ } else if (c == op) {
+ if ((c = getc(yyin)) == '=')
+ return tp[2];
+ op = tp[1];
}
- ch = c;
- if (isalpha(ch) || ch == '_') {
- ungetc(ch, yyin);
- ch = iden();
- } else if (isdigit(ch)) {
- ungetc(ch, yyin);
- ch = number();
+ ungetc(c, yyin);
+ return c;
+}
+
+static unsigned char minus(void)
+{
+ register int c;
+
+ switch (c = getc(yyin)) {
+ case '-': return DEC;
+ case '>': return PTR;
+ case '=': return SUB_EQ;
+ default:
+ ungetc(c, yyin);
+ return '-';
+ }
+}
+
+unsigned char next(void)
+{
+ register unsigned char c;
+
+ if (!skip())
+ c = EOFTOK;
+ if (isalpha(c = getc(yyin)) || c == '_') {
+ ungetc(c, yyin);
+ c = iden();
+ } else if (isdigit(c)) {
+ ungetc(c, yyin);
+ c = number();
} else {
- register unsigned char aux;;
- aux = getc(yyin);
- yytext[0] = ch;
- yytext[1] = aux;
- yytext[2] = '\0';
-
- switch (ch) {
- case '&':
- switch (aux) {
- case '&': ch = AND; break;
- case '=': ch = AND_EQ; break;
- default: goto no_doble_character;
- }
- break;
- case '|':
- switch (aux) {
- case '|': ch = OR; break;
- case '=': ch = OR_EQ; break;
- default: goto no_doble_character;
- }
- break;
- case '<':
- switch (aux) {
- case '<': ch = LSHIFT; break;
- case '=': ch = LSHIFT_EQ; break;
- default: goto no_doble_character;
- }
- break;
- case '>':
- switch (aux) {
- case '<': ch = RSHIFT; break;
- case '=': ch = RSHIFT_EQ; break;
- default: goto no_doble_character;
- }
- break;
- case '-':
- switch (aux) {
- case '-': ch = DEC; break;
- case '>': ch = PTR; break;
- case '=': ch = SUB_EQ; break;
- default: goto no_doble_character;
- }
- break;
- case '=':
- if (aux == '=') ch = EQ;
- else goto no_doble_character;
- break;
- case '^':
- if (aux == '=') ch = XOR_EQ;
- else goto no_doble_character;
- break;
- case '*':
- if (aux == '=') ch = LSHIFT_EQ;
- else goto no_doble_character;
- break;
- case '+':
- if (aux == '+') ch = INC;
- else if (aux == '=') ch = ADD_EQ;
- else goto no_doble_character;
- break;
- case '!':
- if (aux == '=') {
- ch = NE;
- break;
- }
- no_doble_character:
- case '/': case ';': case '{': case '}':
- case '(': case ')': case '~': case ',':
- case '?': case '[': case ']': case ':':
- ungetc(aux, yyin);
- yytext[1] = '\0';
- break;
- default:
- error("Incorrect character '%02x", c);
+ switch (c) {
+ case '=': c = follow('=', EQ, 0); break;
+ case '^': c = follow('^', XOR_EQ, 0); break;
+ case '*': c = follow('*', MUL_EQ, 0); break;
+ case '!': c = follow('!', NE, 0); break;
+ case '+': c = follow('+', ADD_EQ, INC); break;
+ case '&': c = follow('&', AND_EQ, AND); break;
+ case '|': c = follow('|', OR_EQ, OR); break;
+ case '<': c = rel_shift('<'); break;
+ case '>': c = rel_shift('>'); break;
+ case '-': c = minus(); break;
}
}
-
-return_token:
- return yytoken = ch;
+ return yytoken = c;
}
char accept(unsigned char tok)
diff --git a/symbol.c b/symbol.c
@@ -21,10 +21,10 @@ static struct symctx *ctx_head = &ctx_base;
static void del_hash_ctx(struct symhash *htable, struct symbol *lim)
{
- register struct symbol *bp;
+ register struct symbol *bp, *next, *prev;
for (bp = htable->top; bp && bp != lim; bp = bp->next) {
- register struct symbol *next = bp->h_next, *prev = bp->h_prev;
+ next = bp->h_next, prev = bp->h_prev;
prev->h_next = next;
next->h_prev = prev;
free(bp->str);
@@ -46,27 +46,31 @@ void del_ctx(void)
del_hash_ctx(&iden_hash, ctx_head->next->iden);
}
-struct symbol *addsym(const char *s, unsigned char key)
+struct symbol *install(const char *s, unsigned char key)
{
static struct symbol *head;
register struct symbol *sym, *next;
sym = xmalloc(sizeof(*sym));
- sym->str = xstrdup(s);
+
sym->next = iden_hash.top;
iden_hash.top = sym;
- head = &iden_hash.buf[key], next = head->h_next;
-
- sym->h_next = next;
- sym->h_prev = next->h_prev;
- head->h_next = sym;
- next->h_prev = sym;
+ if (s) {
+ sym->str = xstrdup(s);
+ head = &iden_hash.buf[key], next = head->h_next;
+ sym->h_next = next;
+ sym->h_prev = next->h_prev;
+ head->h_next = sym;
+ next->h_prev = sym;
+ } else {
+ sym->h_next = sym->h_prev = sym->str = NULL;
+ }
return sym;
}
-struct symbol *lookupsym(char *s, unsigned char key)
+struct symbol *lookup(char *s, unsigned char key)
{
register struct symbol *bp, *head;
@@ -80,7 +84,7 @@ struct symbol *lookupsym(char *s, unsigned char key)
void init_symbol(void)
{
- struct symbol *bp;
+ register struct symbol *bp;
for (bp = iden_hash.buf; bp < &iden_hash.buf[NR_SYM_HASH]; ++bp)
bp->h_next = bp->h_prev = bp;