scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 8fa3bec21d142b3e8e14b6ae27574081ad0c967f
parent 893e56b36a5d1da429dd01ca703adbedff2898e5
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Fri, 25 Oct 2013 14:33:21 +0200

Use a different hash for keywords

With the previous implementation it was necessary look in the
symbol table in order to know if a identifier was a keyword
or not. This search generated the problem of not know yet
the correct namespace, and due to this problem it was necessary
the ugly code of namespace.
This patch is the first step to remove this shit, and uses
a new hash only for the keywords, so the problem of the
namespaces will be fixed because we push the search to other
moment when we already have the needed information.

Diffstat:
MMakefile | 2+-
Dkeyword.c | 60------------------------------------------------------------
Mlex.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Msymbol.c | 8++++----
Msymbol.h | 7+++----
5 files changed, 87 insertions(+), 71 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o keyword.o \ +OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o \ wrapper.o tree.o LIBS = diff --git a/keyword.c b/keyword.c @@ -1,60 +0,0 @@ -#include <stddef.h> - -#include "tokens.h" -#include "symbol.h" - - -static struct keyword { - char *str; - unsigned char tok; -} keywords [] = {"auto", AUTO, - "break", BREAK, - "_Bool", CHAR, - "_Complex", COMPLEX, - "case", CASE, - "char", CHAR, - "const", CONST, - "continue", CONTINUE, - "default", DEFAULT, - "do", DO, - "double", DOUBLE, - "else", ELSE, - "enum", ENUM, - "extern", EXTERN, - "float", FLOAT, - "for", FOR, - "goto", GOTO, - "if", IF, - "int", INT, - "_Imaginary", IMAGINARY, - "long", LONG, - "register", REGISTER, - "restricted", RESTRICT, - "return", RETURN, - "short", SHORT, - "signed", SIGNED, - "sizeof", SIZEOF, - "static", STATIC, - "struct", STRUCT, - "switch", SWITCH, - "typedef", TYPEDEF, - "union", UNION, - "unsigned", UNSIGNED, - "void", VOID, - "volatile", VOLATILE, - "while", WHILE, - NULL, 0, -}; - -void -init_keywords(void) -{ - register struct keyword *bp; - register struct symbol *sym; - - for (bp = keywords; bp->str; bp++) { - sym = lookup(bp->str, NS_KEYWORD); - sym->tok = bp->tok; - } - new_ctx(); -} diff --git a/lex.c b/lex.c @@ -9,6 +9,8 @@ #include "symbol.h" #include "sizes.h" +#define NR_KEYW_HASH 16 + union yyval yyval; unsigned char yytoken; char yytext[IDENTSIZ + 1]; @@ -16,7 +18,15 @@ unsigned linenum; unsigned columnum; const char *filename; +struct keyword { + char *str; + unsigned char tok; + struct keyword *next; +}; + static FILE *yyin; +static struct keyword *ktab[NR_KEYW_HASH]; + static char number(void) @@ -64,11 +74,75 @@ end: if (bp == yytext + IDENTSIZ) return CONSTANT; } +void +init_keywords(void) +{ + static struct keyword buff[] = { + {"auto", AUTO, NULL}, + {"break", BREAK, NULL}, + {"_Bool", CHAR, NULL}, + {"_Complex", COMPLEX, NULL}, + {"case", CASE, NULL}, + {"char", CHAR, NULL}, + {"const", CONST, NULL}, + {"continue", CONTINUE, NULL}, + {"default", DEFAULT, NULL}, + {"do", DO, NULL}, + {"double", DOUBLE, NULL}, + {"else", ELSE, NULL}, + {"enum", ENUM, NULL}, + {"extern", EXTERN, NULL}, + {"float", FLOAT, NULL}, + {"for", FOR, NULL}, + {"goto", GOTO, NULL}, + {"if", IF, NULL}, + {"int", INT, NULL}, + {"_Imaginary", IMAGINARY, NULL}, + {"long", LONG, NULL}, + {"register", REGISTER, NULL}, + {"restricted", RESTRICT, NULL}, + {"return", RETURN, NULL}, + {"short", SHORT, NULL}, + {"signed", SIGNED, NULL}, + {"sizeof", SIZEOF, NULL}, + {"static", STATIC, NULL}, + {"struct", STRUCT, NULL}, + {"switch", SWITCH, NULL}, + {"typedef", TYPEDEF, NULL}, + {"union", UNION, NULL}, + {"unsigned", UNSIGNED, NULL}, + {"void", VOID, NULL}, + {"volatile", VOLATILE, NULL}, + {"while", WHILE, NULL}, + {NULL, 0, NULL}, + }; + register struct keyword *bp; + + for (bp = buff; bp->str; ++bp) { + register unsigned char h = hash(bp->str) & NR_KEYW_HASH-1; + bp->next = ktab[h]; + ktab[h] = bp; + } +} + +static unsigned char +keyword(char *s) +{ + register struct keyword *bp; + + for (bp = ktab[hash(s) & NR_KEYW_HASH-1]; bp; bp = bp->next) { + if (!strcmp(bp->str, s)) + return bp->tok; + } + return 0; +} + static unsigned char iden(void) { register char ch, *bp; register struct symbol *sym; + static unsigned char tok; for (bp = yytext; bp < yytext + IDENTSIZ; *bp++ = ch) { if (!isalnum(ch = getc(yyin)) && ch != '_') @@ -78,9 +152,12 @@ iden(void) error("identifier too long %s", yytext); *bp = '\0'; ungetc(ch, yyin); - yyval.sym = lookup(yytext, NS_ANY); - return (yyval.sym->ns == NS_KEYWORD) ? yyval.sym->tok : IDEN; + if (tok = keyword(yytext)) + return tok; + + yyval.sym = lookup(yytext, NS_ANY); + return IDEN; } static unsigned char diff --git a/symbol.c b/symbol.c @@ -14,14 +14,14 @@ static struct symbol *htab[NR_SYM_HASH]; static struct symbol *head, *headfun; -static inline unsigned char +unsigned char hash(register const char *s) { register unsigned char h, ch; for (h = 0; ch = *s++; h += ch) /* nothing */; - return h & NR_SYM_HASH - 1; + return h; } void @@ -40,7 +40,7 @@ del_ctx(void) if (sym->ctx <= curctx) break; if ((s = sym->name) != NULL) - htab[hash(s)] = sym->hash; + htab[hash(s) & NR_SYM_HASH - 1] = sym->hash; next = sym->next; sym->next = headfun; headfun = sym; @@ -76,7 +76,7 @@ lookup(register const char *s, signed char ns) } l = strlen(s); - key = hash(s); + key = hash(s) & NR_SYM_HASH - 1; if (!(ins = ns >= 0)) ns = -ns; diff --git a/symbol.h b/symbol.h @@ -7,9 +7,8 @@ # include <stdbool.h> #endif -#define CTX_OUTER 1 -#define CTX_FUNC 2 -#define CTX_ANY 0 +#define CTX_OUTER 0 +#define CTX_FUNC 1 #define NOINSERT(x) (-x) @@ -52,7 +51,6 @@ struct symbol { char *name; struct { union { - unsigned char tok; /* used in keywords */ short val; /* used in integer constant */ }; }; @@ -76,6 +74,7 @@ extern void insert(struct symbol *sym, unsigned char ctx); extern struct ctype *storage(struct ctype *tp, unsigned char mod); extern struct ctype *newctype(void); extern void delctype(struct ctype *tp); +extern unsigned char hash(register const char *s); #ifndef NDEBUG extern void ptype(register struct ctype *t);