scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit fc8190e4863a2d300391578be850614b7b3f4b9d
parent c1c6db84269708d87a5b8d0e78225768c36b36ed
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Wed, 20 May 2015 18:16:40 +0200

Rewrite lookup() in cc1

This new version removes some ugly cases in decl.c, where we had to
lookup, and later on insert. In this new version lookup() accept a
new parameter that indicates that we want to insert a new element.

Diffstat:
Mcc1/cc1.h | 7+++++--
Mcc1/cpp.c | 15+++++++++++----
Mcc1/decl.c | 53+++++++++++++++++++++++------------------------------
Mcc1/expr.c | 8+++-----
Mcc1/lex.c | 20++++++++++++++------
Mcc1/stmt.c | 78++++++++++++++++++++++++++++++++++++------------------------------------------
Mcc1/symbol.c | 63+++++++++++++++++++++++++++++++++++++++++++++------------------
7 files changed, 137 insertions(+), 107 deletions(-)

diff --git a/cc1/cc1.h b/cc1/cc1.h @@ -91,6 +91,7 @@ enum { NS_TAG, NS_LABEL, NS_CPP, + NS_KEYWORD, NS_STRUCTS }; @@ -251,8 +252,9 @@ extern Type *ctype(uint8_t type, uint8_t sign, uint8_t size); extern Type *mktype(Type *tp, uint8_t op, short nelem, void *data); /* symbol.c */ -extern Symbol *lookup(char *s, unsigned char ns); -extern Symbol *install(char *s, unsigned char ns); +extern Symbol *lookup(uint8_t ns); +extern Symbol *install(uint8_t ns); +extern Symbol *newsym(uint8_t ns); extern void pushctx(void), popctx(void); /* stmt.c */ @@ -273,6 +275,7 @@ extern void setfname(char *name); extern void setfline(unsigned short line); extern bool addinput(char *fname); extern void delinput(void); +extern void setnamespace(uint8_t ns); #define accept(t) ((yytoken == (t)) ? next() : 0) /* code.c */ diff --git a/cc1/cpp.c b/cc1/cpp.c @@ -132,7 +132,8 @@ mkdefine(char *s, Symbol *sym) static char * define(char *s) { - char *t, name[IDENTSIZ+1]; + extern char yytext[]; + char *t; size_t len; Symbol *sym; @@ -142,9 +143,15 @@ define(char *s) /* nothing */; if ((len = t - s) > IDENTSIZ) goto too_long; - strncpy(name, s, len); - name[len] = '\0'; - sym = install(name, NS_CPP); + strncpy(yytext, s, len); + yytext[len] = '\0'; + sym = lookup(NS_CPP); + if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) { + warn("'%s' redefined", yytext); + free(sym->u.s); + } + sym->flags |= ISDEFINED; + sym->ns = NS_CPP; for (s = t; isspace(*s); ++s) /* nothing */; diff --git a/cc1/decl.c b/cc1/decl.c @@ -88,19 +88,6 @@ fundcl(struct dcldata *dp) return queue(dp, FTN, n, tp); } -static Symbol * -newiden(uint8_t ns) -{ - Symbol *sym; - extern uint8_t curctx; - - if (yylval.sym && yylval.sym->ctx == curctx && yylval.sym->ns == ns) - error("redeclaration of '%s'", yytext); - sym = install(yytext, ns); - next(); - return sym; -} - static struct dcldata *declarator0(struct dcldata *dp, uint8_t ns); static struct dcldata * @@ -112,10 +99,13 @@ directdcl(struct dcldata *dp, uint8_t ns) dp = declarator0(dp, ns); expect(')'); } else { - if (yytoken == IDEN || yytoken == TYPEIDEN) - sym = newiden(ns); - else - sym = install(NULL, ns); + if (yytoken == IDEN || yytoken == TYPEIDEN) { + if ((sym = install(ns)) == NULL) + error("redeclaration of '%s'", yytext); + next(); + } else { + sym = newsym(ns); + } dp = queue(dp, IDEN, 0, sym); } @@ -269,19 +259,22 @@ initializer(Symbol *sym) } static Symbol * -newtag(uint8_t tag) +newtag(void) { Symbol *sym; + uint8_t tag = yylval.token; static uint8_t ns = NS_STRUCTS; + setnamespace(NS_TAG); + next(); switch (yytoken) { - case IDEN: case TYPEIDEN: - if ((sym = lookup(yytext, NS_TAG)) == NULL) - sym = install(yytext, NS_TAG); + case IDEN: + case TYPEIDEN: + sym = yylval.sym; next(); break; default: - sym = install(NULL, NS_TAG); + sym = newsym(NS_TAG); break; } if (!sym->type) { @@ -290,7 +283,8 @@ newtag(uint8_t tag) sym->type = mktype(NULL, tag, 0, NULL); sym->type->ns = ns++; } - + + sym->flags |= ISDEFINED; if (sym->type->op != tag) error("'%s' defined as wrong kind of tag", yytext); return sym; @@ -303,12 +297,10 @@ structdcl(void) { Type *tagtype, *buff[NR_MAXSTRUCTS], **bp = &buff[0]; Symbol *tagsym, *sym; - uint8_t tag, n; + uint8_t n; size_t siz; - tag = yylval.token; - next(); - tagsym = newtag(tag); + tagsym = newtag(); tagtype = tagsym->type; if (!accept('{')) return tagtype; @@ -368,8 +360,7 @@ enumdcl(void) Symbol *sym; int val = 0; - next(); - tp = newtag(ENUM)->type; + tp = newtag()->type; if (yytoken == ';') return tp; @@ -381,7 +372,9 @@ enumdcl(void) while (yytoken != '}') { if (yytoken != IDEN) unexpected(); - sym = newiden(NS_IDEN); + if ((sym = install(NS_IDEN)) == NULL) + error("duplicated member '%s'", yytext); + next(); sym->type = inttype; if (accept('=')) initializer(sym); diff --git a/cc1/expr.c b/cc1/expr.c @@ -333,18 +333,16 @@ logic(char op, Node *lp, Node *rp) static Node * field(Node *np) { - extern uint8_t lex_ns; Symbol *sym; switch (BTYPE(np)) { case STRUCT: case UNION: - lex_ns = np->type->ns; + setnamespace(np->type->ns); next(); if (yytoken != IDEN) unexpected(); if ((sym = yylval.sym) == NULL) error("incorrect field in struct/union"); - lex_ns = NS_IDEN; next(); return node(OFIELD, sym->type, varnode(sym), np); default: @@ -470,9 +468,9 @@ primary(void) next(); break; case IDEN: - if (yylval.sym == NULL) { - yylval.sym = install(yytext, NS_IDEN); + if (!(yylval.sym->flags & ISDEFINED)) { yylval.sym->type = inttype; + yylval.sym->flags |= ISDEFINED; error("'%s' undeclared", yytext); } np = varnode(yylval.sym); diff --git a/cc1/lex.c b/cc1/lex.c @@ -23,7 +23,7 @@ struct input { struct input *next; }; -uint8_t lex_ns = NS_IDEN; +static uint8_t lex_ns = NS_IDEN; uint8_t yytoken; struct yystype yylval; @@ -246,7 +246,7 @@ integer(char *s, char base) convert: tp = ctype(INT, sign, size); - sym = install(NULL, NS_IDEN); + sym = newsym(NS_IDEN); sym->type = tp; v = strtol(s, NULL, base); if (tp == inttype) @@ -345,7 +345,7 @@ character(void) error("invalid character constant"); ++input->p; - sym = install(NULL, NS_IDEN); + sym = newsym(NS_IDEN); sym->u.i = c; sym->type = inttype; yylval.sym = sym; @@ -386,7 +386,7 @@ repeat: } *bp = '\0'; - sym = install(NULL, NS_IDEN); + sym = newsym(NS_IDEN); sym->u.s = xstrdup(buf); sym->type = mktype(chartype, ARY, (bp - buf) + 1, NULL); yylval.sym = sym; @@ -403,8 +403,8 @@ iden(void) /* nothing */; input->p = p; tok2str(); - sym = yylval.sym = lookup(yytext, lex_ns); - if (!sym || sym->token == IDEN) + sym = yylval.sym = lookup(lex_ns); + if (sym->token == IDEN) return IDEN; yylval.token = sym->u.token; return sym->token; @@ -502,6 +502,13 @@ operator(void) return t; } +/* TODO: Ensure that lex_ns is NS_IDEN after a recovery */ +void +setnamespace(uint8_t ns) +{ + lex_ns = ns; +} + uint8_t next(void) { @@ -525,6 +532,7 @@ next(void) } else { yytoken = operator(); } + lex_ns = NS_IDEN; return yytoken; } diff --git a/cc1/stmt.c b/cc1/stmt.c @@ -13,28 +13,6 @@ extern Node *convert(Node *np, Type *tp1, char iscast); extern Node *iszero(Node *np), *eval(Node *np); static void stmt(Symbol *lbreak, Symbol *lcont, Caselist *lswitch); -static Symbol * -label(char *s, char define) -{ - Symbol *sym; - - if ((sym = lookup(s, NS_LABEL)) != NULL) { - if (define) { - if (sym->flags & ISDEFINED) - error("label '%s' already defined", s); - sym->flags |= ISDEFINED; - } - return sym; - } - - sym = install(s, NS_LABEL); - if (define) - sym->flags |= ISDEFINED; - else - sym->flags &= ~ISDEFINED; - return sym; -} - static void stmtexp(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) { @@ -72,9 +50,9 @@ While(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) Symbol *begin, *cond, *end; Node *np; - begin = install(NULL, NS_LABEL); - end = install(NULL, NS_LABEL); - cond = install(NULL, NS_LABEL); + begin = newsym(NS_LABEL); + end = newsym(NS_LABEL); + cond = newsym(NS_LABEL); expect(WHILE); np = condition(); @@ -95,9 +73,9 @@ For(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) Symbol *begin, *cond, *end; Node *econd, *einc, *einit; - begin = install(NULL, NS_LABEL); - end = install(NULL, NS_LABEL); - cond = install(NULL, NS_LABEL); + begin = newsym(NS_LABEL); + end = newsym(NS_LABEL); + cond = newsym(NS_LABEL); expect(FOR); expect('('); @@ -127,8 +105,8 @@ Dowhile(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) Symbol *begin, *end; Node *np; - begin = install(NULL, NS_LABEL); - end = install(NULL, NS_LABEL); + begin = newsym(NS_LABEL); + end = newsym(NS_LABEL); expect(DO); emit(OBLOOP, NULL); emit(OLABEL, begin); @@ -179,9 +157,24 @@ static void stmt(Symbol *lbreak, Symbol *lcont, Caselist *lswitch); static void Label(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) { + Symbol *sym; + switch (yytoken) { - case IDEN: case TYPEIDEN: - emit(OLABEL, label(yytext, 1)); + case IDEN: + case TYPEIDEN: + /* + * We cannot call to insert() because the call to lookup in + * lex.c was done in NS_IDEN namespace, and it is impossibe + * to fix this point, because an identifier at the beginning + * of a statement may be part of an expression or part of a + * label. This double call to lookup() is going to generate + * an undefined symbol that is not going to be used ever. + */ + sym = lookup(NS_LABEL); + if (sym->flags & ISDEFINED) + error("label '%s' already defined", yytoken); + sym->flags |= ISDEFINED; + emit(OLABEL, sym); next(); expect(':'); stmt(lbreak, lcont, lswitch); @@ -204,11 +197,11 @@ Continue(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) static void Goto(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) { - expect(GOTO); - + setnamespace(NS_LABEL); + next(); if (yytoken != IDEN) unexpected(); - emit(OJUMP, label(yytext, 0)); + emit(OJUMP, yylval.sym); next(); expect(';'); } @@ -229,8 +222,8 @@ Switch(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) error("incorrect type in switch statement"); expect (')'); - lbreak = install(NULL, NS_LABEL); - lcond = install(NULL, NS_LABEL); + lbreak = newsym(NS_LABEL); + lcond = newsym(NS_LABEL); emit(OJUMP, lcond); stmt(lbreak, lcont, &lcase); emit(OLABEL, lcond); @@ -263,7 +256,7 @@ Case(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) pcase = xmalloc(sizeof(*pcase)); pcase->expr = np; pcase->next = lswitch->head; - emit(OLABEL, pcase->label = install(NULL, NS_LABEL)); + emit(OLABEL, pcase->label = newsym(NS_LABEL)); lswitch->head = pcase; ++lswitch->nr; } @@ -271,7 +264,7 @@ Case(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) static void Default(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) { - Symbol *ldefault = install(NULL, NS_LABEL); + Symbol *ldefault = newsym(NS_LABEL); expect(DEFAULT); expect(':'); @@ -285,14 +278,14 @@ If(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) Symbol *end, *lelse; Node *np; - lelse = install(NULL, NS_LABEL); + lelse = newsym(NS_LABEL); expect(IF); np = condition(); emit(OBRANCH, lelse); emit(OEXPR, negate(np)); stmt(lbreak, lcont, lswitch); if (accept(ELSE)) { - end = install(NULL, NS_LABEL); + end = newsym(NS_LABEL); emit(OJUMP, end); emit(OLABEL, lelse); stmt(lbreak, lcont, lswitch); @@ -355,7 +348,8 @@ stmt(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) case CASE: fun = Case; break; case DEFAULT: fun = Default; break; default: fun = stmtexp; break; - case TYPEIDEN: case IDEN: + case TYPEIDEN: + case IDEN: fun = (ahead() == ':') ? Label : stmtexp; break; case '@': diff --git a/cc1/symbol.c b/cc1/symbol.c @@ -69,7 +69,7 @@ newsym(uint8_t ns) sym->id = (curctx) ? ++localcnt : ++globalcnt; sym->ctx = curctx; sym->token = IDEN; - sym->flags = 0; + sym->flags = ISDEFINED; sym->name = NULL; sym->type = NULL; sym->hash = NULL; @@ -79,32 +79,58 @@ newsym(uint8_t ns) } Symbol * -lookup(char *s, uint8_t ns) +lookup(uint8_t ns) { - Symbol *sym; - - for (sym = htab[hash(s)]; sym; sym = sym->hash) { - if (!strcmp(sym->name, s) && sym->ns == ns) + Symbol *sym, **h; + uint8_t sns; + char *t, c; + + h = &htab[hash(yytext)]; + c = *yytext; + for (sym = *h; sym; sym = sym->hash) { + t = sym->name; + if (*t != c || strcmp(t, yytext)) + continue; + sns = sym->ns; + if (sns == NS_KEYWORD || sns == NS_CPP) return sym; + if (sns != ns) + continue; + return sym; } - return NULL; + sym = newsym(ns); + sym->name = xstrdup(yytext); + sym->flags &= ~ISDEFINED; + sym->hash = *h; + *h = sym; + return sym; } Symbol * -install(char *s, uint8_t ns) +install(uint8_t ns) { - Symbol *sym, **t; + Symbol *sym, **h; + /* + * install() is always called after a call to lookup(), so + * yylval.sym always points to a symbol with yytext name. + * if the symbol is an undefined symbol and in the same + * context, then it was generated in the previous lookup() + * call. If the symbol is defined and in the same context + * then there is a redefinition + */ + if (yylval.sym->ctx == curctx) { + if (yylval.sym->flags & ISDEFINED) + return NULL; + yylval.sym->flags |= ISDEFINED; + return yylval.sym; + } + h = &htab[hash(yytext)]; sym = newsym(ns); - sym->flags |= ISDEFINED; - - if (s) { - sym->name = xstrdup(s); - t = &htab[hash(s)]; - sym->hash = *t; - *t = sym; - } + sym->name = xstrdup(yytext); + sym->hash = *h; + *h = sym; return sym; } @@ -154,7 +180,8 @@ ikeywords(void) Symbol *sym; for (bp = buff; bp->str; ++bp) { - sym = install(bp->str, NS_IDEN); + strcpy(yytext, bp->str); + sym = lookup(NS_KEYWORD); sym->token = bp->token; sym->u.token = bp->value; }