scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 8b6cc849d8e198cfa639b4de3ad84a22f2d36e07
parent 1ffd5f6ee169b5d76958348b3376d82a66a75d63
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Sun, 10 Jun 2012 19:12:51 +0200

Unified keywords and symbols

This helps in simplify the code, using the same code for both, keywords and
symbols. After this patch the lexical is simpler, because doesn't care about
the keywords.

Diffstat:
MMakefile | 2+-
Akeyword.c | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mlex.c | 103+++++++------------------------------------------------------------------------
Mmain.c | 4+---
Msymbol.c | 12+++++++++++-
Msymbol.h | 16++++++++++------
Mtokens.h | 3++-
Mtypes.c | 1+
Mtypes.h | 3++-
9 files changed, 97 insertions(+), 107 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ -OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o +OBJS = types.o decl.o lex.o error.o symbol.o flow.o main.o expr.o keyword.o LIBS = all: kcc diff --git a/keyword.c b/keyword.c @@ -0,0 +1,60 @@ +#include <stddef.h> + +#include "tokens.h" +#include "types.h" +#include "symbol.h" + + +static struct keyword { + char *str; + unsigned char tok; +} keywords [] = {"auto", AUTO, + "break", BREAK, + "_Bool", CHAR, + "case", CASE, + "char", CHAR, + "const", CONST, + "continue", CONTINUE, + "default", DEFAULT, + "do", DO, + "double", DOUBLE, + "else", ELSE, + "enum", ENUM, + "extern", EXTERN, + "float", FLOAT, + "for", FOR, + "goto", GOTO, + "if", IF, + "int", INT, + "long", LONG, + "register", REGISTER, + "restricted", RESTRICTED, + "return", RETURN, + "short", SHORT, + "signed", SIGNED, + "sizeof", SIZEOF, + "static", STATIC, + "struct", STRUCT, + "switch", SWITCH, + "typedef", TYPEDEF, + "union", UNION, + "unsigned", UNSIGNED, + "void", VOID, + "volatile", VOLATILE, + "while", WHILE, + NULL, 0, +}; + +void init_keywords(void) +{ + register struct keyword *bp; + register struct symbol *sym; + extern void init_symbol(void); + + init_symbol(); + for (bp = keywords; bp->str; bp++) { + sym = install(bp->str, hashfun(bp->str)); + sym->tok = bp->tok; + sym->type = T_KWD; + } +} diff --git a/lex.c b/lex.c @@ -5,56 +5,12 @@ #include <ctype.h> #include "cc.h" -#include "symbol.h" #include "tokens.h" +#include "symbol.h" +#include "types.h" -#define NR_KWD_HASH 32 - -static struct keyword { - char *str; - unsigned char tok; - struct keyword *next; -} keywords [] = {"auto", AUTO, NULL, - "break", BREAK, NULL, - "_Bool", CHAR, NULL, - "case", CASE, NULL, - "char", CHAR, NULL, - "const", CONST, NULL, - "continue", CONTINUE, NULL, - "default", DEFAULT, NULL, - "do", DO, NULL, - "double", DOUBLE, NULL, - "else", ELSE, NULL, - "enum", ENUM, NULL, - "extern", EXTERN, NULL, - "float", FLOAT, NULL, - "for", FOR, NULL, - "goto", GOTO, NULL, - "if", IF, NULL, - "int", INT, NULL, - "long", LONG, NULL, - "register", REGISTER, NULL, - "restricted", RESTRICTED, NULL, - "return", RETURN, NULL, - "short", SHORT, NULL, - "signed", SIGNED, NULL, - "sizeof", SIZEOF, NULL, - "static", STATIC, NULL, - "struct", STRUCT, NULL, - "switch", SWITCH, NULL, - "typedef", TYPEDEF, NULL, - "union", UNION, NULL, - "unsigned", UNSIGNED, NULL, - "void", VOID, NULL, - "volatile", VOLATILE, NULL, - "while", WHILE, NULL, - NULL, 0, NULL -}; - -static struct keyword *khash[NR_KWD_HASH]; static FILE *yyin; - union yyval yyval; unsigned char yytoken; unsigned char yyhash; @@ -64,37 +20,6 @@ unsigned columnum; const char *filename; -static unsigned char hashfun(register const char *s) -{ - register unsigned char h, ch; - - for (h = 0; ch = *s++; h += ch) - /* nothing */; - return h; -} - -void init_lex(void) -{ - register struct keyword *bp; - static unsigned char h; - - for (bp = keywords; bp->str; bp++) { - register struct keyword *aux, *ant; - h = hashfun(bp->str) & (NR_KWD_HASH - 1); - if (!(aux = khash[h]) || strcmp(bp->str, aux->str) < 0) { - khash[h] = bp; - bp->next = aux; - continue; - } - for (ant = aux; aux; ant = aux, aux = aux->next) { - if (strcmp(bp->str, aux->str) < 0) - break; - } - ant->next = bp; - bp->next = aux; - } -} - static char number(void) { register char *bp; @@ -106,27 +31,17 @@ static char number(void) } if (bp == yytext + TOKSIZ_MAX) error("identifier too long %s", yytext); - ungetc(ch, yyin); *bp = '\0'; - return CONSTANT; -} - -static unsigned char keyword(const char *s, unsigned char key) -{ - register struct keyword *kwp; + ungetc(ch, yyin); - key &= NR_KWD_HASH - 1; - for (kwp = khash[key]; kwp; kwp = kwp->next) { - if (!strcmp(kwp->str, yytext)) - return kwp->tok; - } - return 0; + return CONSTANT; } static unsigned char iden(void) { register char ch; register char *bp = yytext; + register struct symbol *sym; for (yyhash = 0; bp < yytext + TOKSIZ_MAX; *bp++ = ch) { if (!isalnum(ch = getc(yyin)) && ch != '_') @@ -137,10 +52,10 @@ static unsigned char iden(void) error("identifier too long %s", yytext); *bp = '\0'; ungetc(ch, yyin); - - if (ch = keyword(yytext, yyhash)) - return ch; - return IDEN;; + if ((sym = lookup(yytext, yyhash)) && sym->type == T_KWD) + return sym->tok; + yyval.sym = sym; + return IDEN; } static unsigned char skip(void) diff --git a/main.c b/main.c @@ -6,7 +6,6 @@ #include "syntax.h" extern void open_file(const char *file); -extern void init_lex(); extern void init_symbol(); struct user_opt user_opt; @@ -15,8 +14,7 @@ struct user_opt user_opt; int main(int argc, char *argv[]) { - init_lex(); - init_symbol(); + init_keywords(); open_file(NULL); for (next(); yytoken != EOFTOK; decl()) /* nothing */; diff --git a/symbol.c b/symbol.c @@ -65,7 +65,8 @@ struct symbol *install(const char *s, unsigned char key) head->h_next = sym; next->h_prev = sym; } else { - sym->h_next = sym->h_prev = sym->str = NULL; + sym->h_next = sym->h_prev = NULL; + sym->str = NULL; } return sym; } @@ -89,3 +90,12 @@ void init_symbol(void) for (bp = iden_hash.buf; bp < &iden_hash.buf[NR_SYM_HASH]; ++bp) bp->h_next = bp->h_prev = bp; } + +unsigned char hashfun(register const char *s) +{ + register unsigned char h, ch; + + for (h = 0; ch = *s++; h += ch) + /* nothing */; + return h; +} diff --git a/symbol.h b/symbol.h @@ -3,18 +3,21 @@ #ifndef SYMBOL_H #define SYMBOL_H - struct type; struct symbol { - char *str; - unsigned char level; struct type *type; + union { + struct { /* used in usual symbols */ + char *str; + unsigned char level; + }; + unsigned char tok; /* used in keywords */ + }; struct symbol *next; struct symbol *h_next, *h_prev; }; - struct symctx { struct symbol *iden; struct symctx *next; @@ -23,7 +26,8 @@ struct symctx { extern void new_ctx(struct symctx *ctx); extern void del_ctx(void); -extern struct symbol *addsym(const char *s, unsigned char key); -extern struct symbol *lookupsym(char *s, unsigned char key); +extern struct symbol *install(const char *s, unsigned char key); +extern struct symbol *lookup(char *s, unsigned char key); +extern unsigned char hashfun(register const char *s); #endif diff --git a/tokens.h b/tokens.h @@ -33,7 +33,7 @@ enum { }; - +struct symbol; union yyval { struct symbol *sym; }; @@ -51,4 +51,5 @@ extern void init_lex(void); extern unsigned char next(void); extern char accept(unsigned char tok); extern void expect(unsigned char tok); +extern void init_keywords(void); #endif diff --git a/types.c b/types.c @@ -23,6 +23,7 @@ struct type tulong = {.btype = LONG, .sign = 0}; struct type tllong = {.btype = LLONG, .sign = 1}; struct type tullong = {.btype = LLONG, .sign = 0}; struct type tvoid = {.btype = VOID, .sign = 0}; +struct type tkeyword; #define TYPEOP_MAX PTRLEVEL_MAX /* TODO: take a look of the ANSI standard */ diff --git a/types.h b/types.h @@ -24,7 +24,7 @@ struct type { extern struct type tschar, tuchar, tshort, tushort, tint, tuint; extern struct type tfloat, tdouble, tldouble, tlong; -extern struct type tulong, tllong, tullong, tvoid; +extern struct type tulong, tllong, tullong, tvoid, tkeyword; #define T_SCHAR (&tschar) #define T_UCHAR (&tuchar) @@ -40,6 +40,7 @@ extern struct type tulong, tllong, tullong, tvoid; #define T_LLONG (&tllong) #define T_ULLONG (&tullong) #define T_VOID (&tvoid) +#define T_KWD (&tkeyword) #define ARY 1 #define PTR 2