scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 1c93045ca831a57e47da5ed0c513b1fc987de308
parent 36fbb9b46ef9beed84eb0f14588c8cebc1c92c81
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Mon, 24 Aug 2015 17:55:35 +0200

Rewrite symbol table again

Lookup() was inserting in the table when a symbol was not defined.
It was a bad idea because there are sometimes where the namespace
is not known in the lexer time. It was creating symbols in the
incorrect namespace, and it was difficult to re use these symbols,
and in some times they were masquerading other symbols. The solution
is allocate when a symbol is not found but don't insert it in the
symbol table.

Diffstat:
Mcc1/cc1.h | 15+++++++--------
Mcc1/cpp.c | 19+++++++++++++------
Mcc1/decl.c | 22++++++++++++++++++----
Mcc1/expr.c | 6++++--
Mcc1/lex.c | 18++++--------------
Mcc1/stmt.c | 9+++++++--
Mcc1/symbol.c | 194++++++++++++++++++++++++++++++++++++++++++-------------------------------------
Mcc1/tests/test014.c | 31++++++++++++++++---------------
8 files changed, 171 insertions(+), 143 deletions(-)

diff --git a/cc1/cc1.h b/cc1/cc1.h @@ -22,7 +22,7 @@ typedef struct input Input; */ struct type { unsigned char op; /* type builder operator */ - unsigned char ns; /* namespace for struct members */ + char ns; /* namespace for struct members */ short id; /* type id, used in dcls */ char letter; /* letter of the type */ bool defined : 1; /* type defined */ @@ -48,7 +48,7 @@ struct symbol { Type *type; unsigned short id; unsigned char ctx; - unsigned char ns; + char ns; unsigned char token; short flags; union { @@ -311,10 +311,10 @@ extern Type *duptype(Type *base); /* symbol.c */ extern void dumpstab(char *msg); -extern Symbol *lookup(unsigned ns, char *name); -extern Symbol *nextsym(Symbol *sym, unsigned ns); -extern Symbol *install(unsigned ns, Symbol *sym); -extern Symbol *newsym(unsigned ns); +extern Symbol *lookup(int ns, char *name); +extern Symbol *nextsym(Symbol *sym, int ns); +extern Symbol *install(int ns, Symbol *sym); +extern Symbol *newsym(int ns); extern void pushctx(void), popctx(void); extern void ikeywords(void); extern void delmacro(Symbol *sym); @@ -334,7 +334,6 @@ extern bool moreinput(void); extern void expect(unsigned tok); extern void discard(void); extern bool addinput(char *fname); -extern void setnamespace(int ns); extern void setsafe(int type); extern void ilex(char *fname); #define accept(t) ((yytoken == (t)) ? next() : 0) @@ -369,7 +368,7 @@ extern unsigned short yylen; extern int cppoff, disexpand; extern unsigned cppctx; extern Input *input; -extern int lexmode; +extern int lexmode, namespace; extern unsigned curctx; extern Symbol *curfun, *zero, *one; diff --git a/cc1/cpp.c b/cc1/cpp.c @@ -327,8 +327,9 @@ define(void) if (cppoff) return; - setnamespace(NS_CPP); + namespace = NS_CPP; next(); + if (yytoken != IDEN) { cpperror("macro names must be identifiers"); return; @@ -342,7 +343,7 @@ define(void) sym->flags |= ISDECLARED; } - setnamespace(NS_IDEN); /* Avoid polution in NS_CPP */ + namespace = NS_IDEN; /* Avoid polution in NS_CPP */ next(); if ((n = getpars(args)) == NR_MACROARG) goto delete; @@ -371,7 +372,7 @@ include(void) if (cppoff) return; - setnamespace(NS_IDEN); + namespace = NS_IDEN; next(); switch (*yytext) { @@ -488,7 +489,7 @@ ifclause(int negate, int isifdef) error("too much nesting levels of conditional inclusion"); n = cppctx++; - setnamespace(NS_CPP); + namespace = NS_CPP; next(); if (isifdef) { @@ -578,7 +579,7 @@ undef(void) if (cppoff) return; - setnamespace(NS_CPP); + namespace = NS_CPP; next(); if (yytoken != IDEN) { error("no macro name given in #undef directive"); @@ -609,6 +610,7 @@ cpp(void) {ERROR, usererr}, {0, NULL} }; + int ns; if (*input->p != '#') return 0; @@ -616,8 +618,11 @@ cpp(void) disexpand = 1; lexmode = CPPMODE; - setnamespace(NS_CPPCLAUSES); + ns = namespace; + namespace = NS_CPPCLAUSES; next(); + namespace = NS_IDEN; + for (bp = clauses; bp->token && bp->token != yytoken; ++bp) /* nothing */; if (!bp->token) @@ -629,8 +634,10 @@ cpp(void) if (yytoken != EOFTOK && !cppoff) errorp("trailing characters after preprocessor directive"); + disexpand = 0; lexmode = CCMODE; + namespace = ns; return 1; } diff --git a/cc1/decl.c b/cc1/decl.c @@ -372,14 +372,16 @@ newtag(void) int op, tag = yylval.token; static unsigned ns = NS_STRUCTS; - setnamespace(NS_TAG); + namespace = NS_TAG; next(); + switch (yytoken) { case IDEN: case TYPEIDEN: sym = yylval.sym; if ((sym->flags & ISDECLARED) == 0) install(NS_TAG, yylval.sym); + namespace = NS_IDEN; next(); break; default: @@ -413,15 +415,20 @@ structdcl(void) Symbol *sym; Type *tp; static int nested; + int ns; + ns = namespace; sym = newtag(); tp = sym->type; + namespace = tp->ns; + if (!accept('{')) - return tp; + goto restore_name; if (tp->defined) error("redefinition of struct/union '%s'", sym->name); tp->defined = 1; + namespace = tp->ns; if (nested == NR_STRUCT_LEVEL) error("too levels of nested structure or union definitions"); @@ -431,6 +438,8 @@ structdcl(void) fieldlist(tp); --nested; +restore_name: + namespace = ns; return tp; } @@ -439,16 +448,19 @@ enumdcl(void) { Type *tp; Symbol *sym, *tagsym; - int val, nctes; + int ns, val, nctes; + ns = namespace; tagsym = newtag(); tp = tagsym->type; if (!accept('{')) - return tp; + goto restore_name; if (tp->defined) error("redefinition of enumeration '%s'", tagsym->name); tp->defined = 1; + namespace = NS_IDEN; + for (nctes = val = 0; yytoken != ')'; ++nctes, ++val) { if (yytoken != IDEN) unexpected(); @@ -475,6 +487,8 @@ enumdcl(void) } expect('}'); +restore_name: + namespace = ns; return tp; } diff --git a/cc1/expr.c b/cc1/expr.c @@ -588,8 +588,10 @@ field(Node *np) switch (BTYPE(np)) { case STRUCT: case UNION: - setnamespace(np->type->ns); + namespace = np->type->ns; next(); + namespace = NS_IDEN; + if (yytoken != IDEN) unexpected(); if ((sym = yylval.sym) == NULL) @@ -710,7 +712,7 @@ primary(void) next(); break; case IDEN: - if (!(yylval.sym->flags & ISDECLARED)) { + if ((yylval.sym->flags & ISDECLARED) == 0) { yylval.sym->type = inttype; yylval.sym->flags |= ISDECLARED; error("'%s' undeclared", yytext); diff --git a/cc1/lex.c b/cc1/lex.c @@ -18,7 +18,7 @@ unsigned short yylen; int cppoff; int lexmode = CCMODE; -static unsigned lex_ns = NS_IDEN, saved_ns; +int namespace = NS_IDEN; static int safe, eof; Input *input; @@ -378,7 +378,7 @@ iden(void) /* nothing */; input->p = p; tok2str(); - sym = lookup(lex_ns, yytext); + sym = lookup(namespace, yytext); if (sym->ns == NS_CPP) { if (!disexpand && expand(begin, sym)) return next(); @@ -386,7 +386,7 @@ iden(void) * it is not a correct macro call, so try to find * another definition. */ - sym = nextsym(sym, lex_ns); + sym = nextsym(sym, namespace); } yylval.sym = sym; if (sym->flags & ISCONSTANT) @@ -488,13 +488,7 @@ operator(void) return t; } -/* TODO: Ensure that lex_ns is NS_IDEN after a recovery */ -void -setnamespace(int ns) -{ - saved_ns = (ns == NS_CPPCLAUSES) ? lex_ns : 0; - lex_ns = ns; -} +/* TODO: Ensure that namespace is NS_IDEN after a recovery */ static void skipspaces(void) @@ -518,8 +512,6 @@ next(void) skipspaces(); c = *input->begin; if ((eof || lexmode == CPPMODE) && c == '\0') { - if (lexmode == CPPMODE) - lex_ns = saved_ns; strcpy(yytext, "<EOF>"); if (cppctx && eof) error("#endif expected"); @@ -540,8 +532,6 @@ next(void) exit: DBG(stderr, "TOKEN %s\n", yytext); - if (lexmode == CCMODE) - lex_ns = NS_IDEN; return yytoken; } diff --git a/cc1/stmt.c b/cc1/stmt.c @@ -20,8 +20,11 @@ label(void) switch (yytoken) { case IDEN: case TYPEIDEN: - if ((sym = install(NS_LABEL, yylval.sym)) == NULL) + sym = lookup(NS_LABEL, yytext); + if (sym->flags & ISDEFINED) error("label '%s' already defined", yytoken); + if ((sym->flags & ISDECLARED) == 0) + sym = install(NS_LABEL, sym); sym->flags |= ISDEFINED; emit(OLABEL, sym); next(); @@ -169,8 +172,10 @@ Goto(Symbol *lbreak, Symbol *lcont, Caselist *lswitch) { Symbol *sym; - setnamespace(NS_LABEL); + namespace = NS_LABEL; next(); + namespace = NS_IDEN; + if (yytoken != IDEN) unexpected(); sym = yylval.sym; diff --git a/cc1/symbol.c b/cc1/symbol.c @@ -54,30 +54,6 @@ hash(const char *s) return h & NR_SYM_HASH-1; } -static Symbol * -linkhash(Symbol *sym, char *name, unsigned hval) -{ - Symbol **h, *p, *prev; - - sym->name = xstrdup(name); - h = &htab[hval]; - - for (prev = p = *h; p; prev = p, p = p->hash) { - if (p->ctx <= sym->ctx) - break; - } - if (p == prev) { - sym->hash = *h; - *h = sym; - } else { - p = prev->hash; - prev->hash = sym; - sym->hash = p; - } - - return sym; -} - static void unlinkhash(Symbol *sym) { @@ -99,41 +75,47 @@ pushctx(void) error("too much nested blocks"); } +static void +killsym(Symbol *sym) +{ + short f; + char *name; + + f = sym->flags; + if (f & ISSTRING) + free(sym->u.s); + if (sym->ns == NS_TAG) + sym->type->defined = 0; + if ((name = sym->name) != NULL) { + unlinkhash(sym); + if ((f & (ISUSED|ISGLOBAL|ISDECLARED)) == ISDECLARED) + warn("'%s' defined but not used", name); + if ((f & ISDEFINED) == 0 && sym->ns == NS_LABEL) + errorp("label '%s' is not defined", name); + free(name); + } + free(sym); +} + void popctx(void) { Symbol *next, *sym; + char *name; short f; if (--curctx == GLOBALCTX) { localcnt = 0; for (sym = labels; sym; sym = next) { next = sym->next; - f = sym->flags; - if ((f & (ISUSED|ISDEFINED)) == ISDEFINED) - warn("'%s' defined but not used", sym->name); - if ((f & ISDEFINED) == 0) - errorp("label '%s' is not defined", sym->name); - free(sym->name); - free(sym); + killsym(sym); } labels = NULL; } for (sym = head; sym && sym->ctx > curctx; sym = next) { next = sym->next; - f = sym->flags; - if (sym->ns == NS_TAG) - sym->type->defined = 0; - if (sym->name) { - unlinkhash(sym); - if ((f & (ISUSED|ISGLOBAL|ISDECLARED)) == ISDECLARED) - warn("'%s' defined but not used", sym->name); - } - free(sym->name); - if (f & ISSTRING) - free(sym->u.s); - free(sym); + killsym(sym); } head = sym; } @@ -141,7 +123,7 @@ popctx(void) static unsigned short newid(void) { - unsigned id; + unsigned short id; id = (curctx) ? ++localcnt : ++globalcnt; if (id == 0) { @@ -161,41 +143,86 @@ duptype(Type *base) return tp; } -Symbol * -newsym(unsigned ns) +static Symbol * +allocsym(int ns, char *name) { - Symbol *sym, *p, *prev; + Symbol *sym; - sym = malloc(sizeof(*sym)); + sym = xmalloc(sizeof(*sym)); + if (name) + name = xstrdup(name); + sym->name = name; sym->id = 0; sym->ns = ns; sym->ctx = (ns == NS_CPP) ? UCHAR_MAX : curctx; sym->token = IDEN; - sym->flags = ISDECLARED | ISUSED; - sym->u.s = sym->name = NULL; + sym->flags = 0; + sym->u.s = NULL; sym->type = NULL; sym->next = sym->hash = NULL; + return sym; +} - if (ns == NS_CPP) +static Symbol * +linksym(Symbol *sym) +{ + Symbol *p, *prev; + + sym->flags |= ISDECLARED; + switch (sym->ns) { + case NS_CPP: return sym; - if (ns == NS_LABEL) { + case NS_LABEL: sym->next = labels; return labels = sym; + default: + for (prev = p = head; p; prev = p, p = p->next) { + if (p->ctx <= sym->ctx) + break; + } + if (p == prev) { + sym->next = head; + head = sym; + } else { + p = prev->next; + prev->next = sym; + sym->next = p; + } + return sym; } +} + +static Symbol * +linkhash(Symbol *sym) +{ + Symbol **h, *p, *prev; + + h = &htab[hash(sym->name)]; - for (prev = p = head; p; prev = p, p = p->next) { + for (prev = p = *h; p; prev = p, p = p->hash) { if (p->ctx <= sym->ctx) break; } if (p == prev) { - sym->next = head; - head = sym; + sym->hash = *h; + *h = sym; } else { - p = prev->next; - prev->next = sym; - sym->next = p; + p = prev->hash; + prev->hash = sym; + sym->hash = p; } + if (sym->ns != NS_CPP) + sym->id = newid(); + return linksym(sym); +} + +Symbol * +newsym(int ns) +{ + Symbol *sym; + + sym = linksym(allocsym(ns, NULL)); return sym; } @@ -204,33 +231,26 @@ newlabel(void) { Symbol *sym = newsym(NS_LABEL); sym->id = newid(); - sym->flags |= ISDEFINED; return sym; } Symbol * -lookup(unsigned ns, char *name) +lookup(int ns, char *name) { - Symbol *sym, **h; - unsigned sns, v; + Symbol *sym; + int sns; char *t, c; - v = hash(name); - h = &htab[v]; c = *name; - for (sym = *h; sym; sym = sym->hash) { + for (sym = htab[hash(name)]; sym; sym = sym->hash) { t = sym->name; if (*t != c || strcmp(t, name)) continue; sns = sym->ns; - if (sns == NS_KEYWORD || sns == NS_CPP) + if (sns == NS_KEYWORD || sns == NS_CPP || sns == ns) return sym; - if (sns != ns) - continue; - return sym; } - sym = linkhash(newsym(ns), name, v); - sym->flags &= ~(ISDECLARED | ISUSED); + sym = allocsym(ns, name); return sym; } @@ -245,10 +265,10 @@ delmacro(Symbol *sym) } Symbol * -nextsym(Symbol *sym, unsigned ns) +nextsym(Symbol *sym, int ns) { char *s, *t, c; - Symbol *new, *p; + Symbol *p; /* * This function is only called when a macro with parameters @@ -263,29 +283,18 @@ nextsym(Symbol *sym, unsigned ns) if (c == *t && !strcmp(s, t)) return sym; } - new = linkhash(newsym(ns), s, hash(s)); - new->flags &= ~ISDECLARED; - return new; + return linkhash(allocsym(ns, s)); } Symbol * -install(unsigned ns, Symbol *sym) +install(int ns, Symbol *sym) { - if (sym->ctx == curctx && ns == sym->ns) { - if (sym->flags & ISDECLARED) + if (sym->flags & ISDECLARED) { + if (sym->ctx == curctx && ns == sym->ns) return NULL; - } else { - sym = lookup(ns, sym->name); - if (sym->flags & ISDECLARED) - return sym; + sym = allocsym(ns, sym->name); } - - sym->flags |= ISDECLARED; - if (ns == NS_CPP) - return sym; - sym->id = newid(); - - return sym; + return linkhash(sym); } void @@ -354,7 +363,7 @@ ikeywords(void) for (lp = list; *lp; ++lp) { for (bp = *lp; bp->str; ++bp) { - sym = lookup(ns, bp->str); + sym = linkhash(allocsym(ns, bp->str)); sym->token = bp->token; sym->u.token = bp->value; } @@ -362,8 +371,9 @@ ikeywords(void) } /* * Remove all the predefined symbols from * the symbol list. It - * will make faster someoperations. There is no problem of memory + * will make faster some operations. There is no problem of memory * leakeage because this memory is not ever freed */ + globalcnt = 0; head = NULL; } diff --git a/cc1/tests/test014.c b/cc1/tests/test014.c @@ -2,21 +2,21 @@ name: TEST014 description: Basic storage class test output: -test014.c:22: warning: 'a' defined but not used -test014.c:22: warning: 'k' defined but not used -test014.c:22: warning: 'j' defined but not used -test014.c:22: warning: 'i' defined but not used -test014.c:22: warning: 'h' defined but not used -test014.c:28: warning: 'par' defined but not used -test014.c:28: warning: 'par' defined but not used -test014.c:33: warning: 'par' defined but not used -test014.c:35: error: incorrect storage class for file-scope declaration -test014.c:35: error: invalid storage class for function 'd' -test014.c:38: error: bad storage class in function parameter -test014.c:39: error: invalid storage class for function 'func4' -test014.c:40: error: invalid type specification -test014.c:41: warning: 'f' defined but not used -test014.c:44: error: conflicting types for 'd' +test014.c:16: warning: 'a' defined but not used +test014.c:16: warning: 'k' defined but not used +test014.c:16: warning: 'j' defined but not used +test014.c:16: warning: 'i' defined but not used +test014.c:16: warning: 'h' defined but not used +test014.c:22: warning: 'par' defined but not used +test014.c:22: warning: 'par' defined but not used +test014.c:27: warning: 'par' defined but not used +test014.c:29: error: incorrect storage class for file-scope declaration +test014.c:29: error: invalid storage class for function 'd' +test014.c:32: error: bad storage class in function parameter +test014.c:33: error: invalid storage class for function 'func4' +test014.c:34: error: invalid type specification +test014.c:35: warning: 'f' defined but not used +test014.c:38: error: conflicting types for 'd' G1 I a Y2 M b X3 I c @@ -46,6 +46,7 @@ R1 I par ???? */ +#line 1 int a; static char b; extern int c;