scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 08bb3b622e7cc180758648979cdc5e602a59da00
parent 5ab49e1bb06b5266b817527557114bada4bfa432
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Thu, 28 Jan 2016 11:09:20 +0100

[cc2] Set up parser.c to parse the full IR

After this macro commit scc is able to parse the full
IR, and it was tested again the full test suite of cc1.

Diffstat:
Mcc2/cc2.h | 13++++++++++++-
Mcc2/node.c | 1+
Mcc2/parser.c | 313++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------------
Mcc2/symbol.c | 1+
4 files changed, 236 insertions(+), 92 deletions(-)

diff --git a/cc2/cc2.h b/cc2/cc2.h @@ -76,8 +76,12 @@ enum op { OCAST = 'g', OCONST = '#', OSTRING = '"', + OINC = 'i', + ODEC = 'd', /*statements */ + ONOP = 'n', OJMP = 'j', + OBRANCH = 'y', ORET = 'h', OBLOOP = 'b', OELOOP = 'e', @@ -90,6 +94,8 @@ enum op { }; enum nerrors { + EEOFFUN, /* EOF while parsing function */ + ENLABEL, /* label without statement */ EIDOVER, /* identifier overflow */ EOUTPAR, /* out pf params */ ESYNTAX, /* syntax error */ @@ -117,7 +123,10 @@ struct symbol { unsigned short numid; char *name; char kind; - TSIZE off; + union { + TSIZE off; + Node *label; + } u; Symbol *next; Symbol *h_next; }; @@ -129,7 +138,9 @@ struct node { TUINT i; char *s; Symbol *sym; + char subop; } u; + Symbol *label; Node *left, *right; Node *stmt; }; diff --git a/cc2/node.c b/cc2/node.c @@ -40,6 +40,7 @@ newnode(void) np->right = NULL; np->left = NULL; np->stmt = NULL; + np->label = NULL; return np; } diff --git a/cc2/parser.c b/cc2/parser.c @@ -33,11 +33,11 @@ union tokenop { typedef void parsefun(char *, union tokenop); static parsefun type, symbol, getname, unary, binary, ternary, call, parameter, constant, composed, begininit, endinit, - jump; + jump, oreturn, loop, assign, ocase, odefault, casetable; typedef void evalfun(void); static evalfun vardecl, beginfun, endfun, endpars, stmt, - array, aggregate, flddecl; + array, aggregate, flddecl, labeldcl; static struct decoc { void (*eval)(void); @@ -51,7 +51,7 @@ static struct decoc { [PRIVAT] = {vardecl, symbol, .u.op = MEM}, [LOCAL] = {vardecl, symbol, .u.op = MEM}, [MEMBER] = {flddecl, symbol}, - [LABEL] = {flddecl, symbol}, + [LABEL] = {labeldcl, symbol}, [INT8] = {NULL, type, .u.arg = &int8type}, [INT16] = {NULL, type, .u.arg = &int16type}, @@ -84,14 +84,14 @@ static struct decoc { [OCPL] = {NULL, unary}, [ONEG] = {NULL, unary}, - [OFIELD] = {NULL, unary}, [OADDR] = {NULL, unary}, - [OAND] = {NULL, unary}, - [OOR] = {NULL, unary}, [OPTR] = {NULL, unary}, [OCAST] = {NULL, unary}, [OPAR ] = {NULL, unary}, + [OAND] = {NULL, binary}, + [OOR] = {NULL, binary}, + [OFIELD] = {NULL, binary}, [OADD] = {NULL, binary}, [OSUB] = {NULL, binary}, [OMUL] = {NULL, binary}, @@ -108,28 +108,33 @@ static struct decoc { [OBAND] = {NULL, binary}, [OBOR] = {NULL, binary}, [OBXOR] = {NULL, binary}, - [OASSIG] = {NULL, binary}, [OCOMMA] = {NULL, binary}, + [OASSIG] = {NULL, assign}, [OASK] = {NULL, ternary}, [OCALL] = {NULL, call}, [OCONST] = NULL, constant, - [OJMP] = NULL, NULL, - [ORET] = NULL, jump, + [OJMP] = NULL, jump, + [OBRANCH] = NULL, jump, + [ORET] = NULL, oreturn, - [OCASE] = NULL, - [ODEFAULT] = NULL, - [OTABLE] = NULL, - [OSWITCH] = NULL + [OBLOOP] = NULL, loop, + [OELOOP] = NULL, loop, + + [OCASE] = NULL, jump, + [OSWITCH] = NULL, jump, + + [ODEFAULT] = NULL, odefault, + [OTABLE] = NULL, casetable, }; +static Symbol *curfun; +static int funpars = -1, sclass, ininit, endf, lineno; +static Node *stmtp; static void *stack[STACKSIZ], **sp = stack; -static Symbol *lastsym, *curfun, *lastaggreg; static Symbol *params[NR_FUNPARAM]; -static int funpars = -1, sclass, callpars, ininit, injump; -static Node *stmtp, *callp; static void push(void *elem) @@ -147,6 +152,12 @@ pop(void) return *--sp; } +static int +empty(void) +{ + return sp == stack; +} + static void type(char *token, union tokenop u) { @@ -221,6 +232,39 @@ constant(char *token, union tokenop u) } static void +assign(char *token, union tokenop u) +{ + int c, op = *token++; + Node *np = newnode(); + + switch (*token) { + case ODIV: + case OMOD: + case OADD: + case OSUB: + case OSHL: + case OSHR: + case OBAND: + case OBXOR: + case OBOR: + case OINC: + case ODEC: + c = *token++; + break; + default: + c = 0; + break; + } + + np->u.subop = c; + np->op = op; + np->type = *gettype(token); + np->right = pop(); + np->left = pop(); + push(np); +} + +static void ternary(char *token, union tokenop u) { Node *ask, *colon; @@ -237,19 +281,111 @@ ternary(char *token, union tokenop u) ask->op = OASK; ask->type = *tp; ask->left = pop(); - ask->right = pop(); push(ask); } +static Node * +eval(char *tok) +{ + struct decoc *dp; + + do { + dp = &optbl[*tok]; + if (!dp->parse) + break; + (*dp->parse)(tok, dp->u); + } while (tok = strtok(NULL, "\t\n")); +} + +static int +nextline(void) +{ + char line[MAXLINE]; + size_t len; + int c; + void (*fun)(void); + +repeat: + ++lineno; + if (!fgets(line, sizeof(line), stdin)) + return 0; + if ((len = strlen(line)) == 0 || line[0] == '\n') + goto repeat; + if (line[len-1] != '\n') + error(ELNLINE); + line[len-1] = '\0'; + + c = *line; + eval(strtok(line, "\t\n")); + if ((fun = *optbl[c].eval) != NULL) + (*fun)(); + if (sp != stack) + error(ESTACKA); + return 1; +} + +static void +oreturn(char *token, union tokenop u) +{ + Node *np; + + np = newnode(); + np->op = *token; + eval(strtok(NULL, "\t\n")); + if (!empty()) + np->left = pop(); + push(np); +} + static void jump(char *token, union tokenop u) { + Node *np, *aux; + + np = newnode(); + np->op = *token; + eval(strtok(NULL, "\t\n")); + + if (*token != OJMP) + np->left = pop(); + aux = pop(); + np->u.sym = aux->u.sym; + delnode(aux); + push(np); +} + +static void +casetable(char *token, union tokenop u) +{ + Node *np, *aux; + + np = newnode(); + np->op = *token; + eval(strtok(NULL, "\t\n")); + np->left = pop(); + push(np); +} + +static void +odefault(char *token, union tokenop u) +{ + Node *np; + + np = newnode(); + np->op = *token; + eval(strtok(NULL, "\t\n")); + np->left = pop(); + push(np); +} + +static void +loop(char *token, union tokenop u) +{ Node *np; np = newnode(); np->op = *token; push(np); - injump = 1; } static void @@ -276,7 +412,7 @@ call(char *token, union tokenop u) np->right = par; } fun = newnode(); - fun->op = OCALL; + fun->op = *token++; fun->type = *gettype(token); fun->left = np; fun->right = par; @@ -308,27 +444,6 @@ endinit(char *token, union tokenop u) } static void -beginfun(void) -{ - if (curfun) - error(ESYNTAX); - memset(params, 0, sizeof(params)); - funpars = 0; - curfun = lastsym; - pushctx(); -} - -static void -endfun(void) -{ - if (!curfun) - error(ESYNTAX); - popctx(); - curfun = NULL; - funpars = -1; -} - -static void endpars(void) { if (!curfun || funpars == -1) @@ -359,7 +474,6 @@ aggregate(void) * symbol to store the name. */ sym = (Symbol *) tp; - lastaggreg = sym; sym->name = name; delnode(align); @@ -392,7 +506,7 @@ decl(Symbol *sym) case PRIVAT: case LOCAL: label(sym); - if (!ininit && (sym->type.flags & FUNF) == 0) + if (!ininit) allocdata(&sym->type); break; case AUTO: @@ -420,13 +534,25 @@ vardecl(void) np = pop(); sym = np->u.sym; + /* + * We have to free sym->name because in tentative declarations + * we can multiple declarations of the same symbol, and in this + * case our parser will allocate twice the memory + */ + free(sym->name); sym->name = name; sym->type = *tp; sym->kind = sclass; + if (ininit) sym->type.flags |= INITF; - lastsym = sym; - decl(sym); + + if ((tp->flags & FUNF) == 0) { + decl(sym); + } else { + curfun = sym; + label(sym); + } delnode(np); } @@ -439,16 +565,13 @@ flddecl(void) Type *tp; Symbol *sym; - if (!lastaggreg) - error(ESYNTAX); - off = pop(); name = pop(); tp = pop(); np = pop(); sym = np->u.sym; - sym->off = off->u.i; + sym->u.off = off->u.i; sym->name = name; sym->type = *tp; @@ -457,10 +580,34 @@ flddecl(void) } static void +labeldcl( void) +{ + Node *np; + Symbol *sym; + + np = pop(); + sym = np->u.sym; + delnode(np); + nextline(); + stmtp->label = sym; + sym->u.label = stmtp; + +} + +static void +addnode(Node *np) +{ + if (!curfun->u.label) + curfun->u.label = np; + else + stmtp->stmt = np; + stmtp = np; +} + +static void stmt(void) { - static Node *lastp; - Node *aux, *np; + Node *np; np = pop(); if (ininit) { @@ -468,53 +615,37 @@ stmt(void) deltree(np); return; } - if (injump) { - aux = np; - np = pop(); - np->left = aux; - injump = 0; - } - if (!stmtp) - stmtp = np; - else - lastp->stmt = np; - lastp = np; + addnode(np); } -void -parse(void) +static void +beginfun(void) { - char line[MAXLINE], *t; - size_t len; - int c; - struct decoc *dp; - void (*fun)(void); + memset(params, 0, sizeof(params)); + funpars = 0; + pushctx(); +} - for (;;) { - if (!fgets(line, sizeof(line), stdin)) - break; - if ((len = strlen(line)) == 0 || line[0] == '\n') - continue; - if (line[len-1] != '\n') - error(ELNLINE); - line[len-1] = '\0'; - - c = *line; - for (t = strtok(line, "\t\n"); t; t = strtok(NULL, "\t\n")) { - dp = &optbl[*t]; - if (!dp->parse) - break; - (*dp->parse)(t, dp->u); - } +static void +endfun(void) +{ + Node *np; - if ((fun = *optbl[c].eval) != NULL) - (*fun)(); - if (sp != stack) - error(ESTACKA); - if (c == '}') - break; - } + np = newnode(); + np->op = ONOP; + addnode(np); + curfun = NULL; + funpars = -1; + endf = 1; + popctx(); +} +void +parse(void) +{ + endf = 0; + while (!endf && nextline()) + /* nothing */; if (ferror(stdin)) error(EFERROR, strerror(errno)); } diff --git a/cc2/symbol.c b/cc2/symbol.c @@ -39,6 +39,7 @@ popctx(void) symtab[sym->id & NR_SYMHASH-1] = sym->h_next; freesym(sym); } + locals = NULL; } Symbol *