scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 5029b379b8193ead69618b3e306c9dcd2e1ab296
parent d56911aa4feb7fb2f32fa38f53c0d74ecf7ea4d8
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Sat, 23 Jan 2016 11:09:20 +0100

[cc2] Add parser of IR

This is a totally incomplete parser, and it is in a state not thought to be shown,
but it is something to begin to talk.

Diffstat:
Mcc2/Makefile | 3++-
Acc2/arch/amd64-sysv/arch.h | 5+++++
Acc2/arch/amd64-sysv/types.c | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acc2/arch/i386-sysv/arch.h | 5+++++
Acc2/arch/i386-sysv/types.c | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acc2/arch/z80/arch.h | 5+++++
Acc2/arch/z80/types.c | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcc2/cc2.h | 135++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Mcc2/cgen.c | 1+
Mcc2/code.c | 1+
Mcc2/main.c | 1+
Acc2/node.c | 64++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mcc2/optm.c | 1+
Mcc2/parser.c | 377+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
Mcc2/peep.c | 1+
Acc2/symbol.c | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
16 files changed, 914 insertions(+), 18 deletions(-)

diff --git a/cc2/Makefile b/cc2/Makefile @@ -2,7 +2,8 @@ include ../config.mk -OBJS = main.o parser.o code.o optm.o peep.o cgen.o +OBJS = main.o parser.o code.o optm.o peep.o cgen.o \ + symbol.o node.o arch/$(ARCH)/types.o all: cc2 diff --git a/cc2/arch/amd64-sysv/arch.h b/cc2/arch/amd64-sysv/arch.h @@ -0,0 +1,5 @@ + +#define TINT long long +#define TUINT unsigned long long +#define TFLOAT double + diff --git a/cc2/arch/amd64-sysv/types.c b/cc2/arch/amd64-sysv/types.c @@ -0,0 +1,89 @@ + +#include "arch.h" +#include "../../cc2.h" + + +Type int8type = { + .flags = SIGNF | INTF, + .size = 1, + .align = 1 +}; + +Type int16type = { + .flags = SIGNF | INTF, + .size = 2, + .align = 2 +}; + +Type int32type = { + .flags = SIGNF | INTF, + .size = 4, + .align = 4 +}; + +Type int64type = { + .flags = SIGNF | INTF, + .size = 8, + .align = 8 +}; + +Type uint8type = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type uint16type = { + .flags = INTF, + .size = 2, + .align = 2 +}; + +Type uint32type = { + .flags = INTF, + .size = 4, + .align = 2 +}; + +Type uint64type = { + .flags = INTF, + .size = 8, + .align = 2 +}; + +Type ptrtype = { + .flags = INTF, + .size = 8, + .align = 8 +}; + +Type booltype = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type float32type = { + .size = 4, + .align = 4 +}; + +Type float64type = { + .size = 8, + .align = 8 +}; + +Type float80type = { + .size = 10, + .align = 12 +}; + +Type voidtype = { + .size = 0, + .align = 0 +}; + +Type elipsistype = { + .size = 0, + .align = 0 +}; diff --git a/cc2/arch/i386-sysv/arch.h b/cc2/arch/i386-sysv/arch.h @@ -0,0 +1,5 @@ + +#define TINT long long +#define TUINT unsigned long long +#define TFLOAT double + diff --git a/cc2/arch/i386-sysv/types.c b/cc2/arch/i386-sysv/types.c @@ -0,0 +1,89 @@ + +#include "arch.h" +#include "../../cc2.h" + + +Type int8type = { + .flags = SIGNF | INTF, + .size = 1, + .align = 1 +}; + +Type int16type = { + .flags = SIGNF | INTF, + .size = 2, + .align = 2 +}; + +Type int32type = { + .flags = SIGNF | INTF, + .size = 4, + .align = 4 +}; + +Type int64type = { + .flags = SIGNF | INTF, + .size = 8, + .align = 8 +}; + +Type uint8type = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type uint16type = { + .flags = INTF, + .size = 2, + .align = 2 +}; + +Type uint32type = { + .flags = INTF, + .size = 4, + .align = 2 +}; + +Type uint64type = { + .flags = INTF, + .size = 8, + .align = 2 +}; + +Type ptrtype = { + .flags = INTF, + .size = 4, + .align = 4 +}; + +Type booltype = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type float32type = { + .size = 4, + .align = 4 +}; + +Type float64type = { + .size = 8, + .align = 8 +}; + +Type float80type = { + .size = 10, + .align = 4 +}; + +Type voidtype = { + .size = 0, + .align = 0 +}; + +Type elipsistype = { + .size = 0, + .align = 0 +}; diff --git a/cc2/arch/z80/arch.h b/cc2/arch/z80/arch.h @@ -0,0 +1,5 @@ + +#define TINT long long +#define TUINT unsigned long long +#define TFLOAT double + diff --git a/cc2/arch/z80/types.c b/cc2/arch/z80/types.c @@ -0,0 +1,89 @@ + +#include "arch.h" +#include "../../cc2.h" + + +Type int8type = { + .flags = SIGNF | INTF, + .size = 1, + .align = 1 +}; + +Type int16type = { + .flags = SIGNF | INTF, + .size = 2, + .align = 1 +}; + +Type int32type = { + .flags = SIGNF | INTF, + .size = 4, + .align = 1 +}; + +Type int64type = { + .flags = SIGNF | INTF, + .size = 8, + .align = 1 +}; + +Type uint8type = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type uint16type = { + .flags = INTF, + .size = 2, + .align = 1 +}; + +Type uint32type = { + .flags = INTF, + .size = 4, + .align = 1 +}; + +Type uint64type = { + .flags = INTF, + .size = 8, + .align = 1 +}; + +Type ptrtype = { + .flags = INTF, + .size = 2, + .align = 1 +}; + +Type booltype = { + .flags = INTF, + .size = 1, + .align = 1 +}; + +Type float32type = { + .size = 4, + .align = 1 +}; + +Type float64type = { + .size = 4, + .align = 1 +}; + +Type float80type = { + .size = 4, + .align = 1 +}; + +Type voidtype = { + .size = 0, + .align = 0 +}; + +Type elipsistype = { + .size = 0, + .align = 0 +}; diff --git a/cc2/cc2.h b/cc2/cc2.h @@ -1,14 +1,136 @@ +enum tflags { + SIGNF = 1, + INTF = 2, + DEFTYP = 4, + STRUCTF = 8, + UNIONF = 16, + FUNCF = 32, + ARYF = 64 +}; + +enum op { + /* types */ + ELLIPSIS = 'E', + INT8 = 'C', + INT16 = 'I', + INT32 = 'W', + INT64 = 'Q', + UINT8 = 'K', + UINT16 = 'N', + UINT32 = 'Z', + UINT64 = 'O', + POINTER = 'P', + FUNCTION = 'F', + VECTOR = 'V', + UNION = 'U', + STRUCT = 'S', + BOOL = 'B', + FLOAT = 'J', + DOUBLE = 'D', + LDOUBLE = 'H', + VOID = '0', + ONAME = '"', + /* kind of operand */ + NONE = 0, + MEM = 'M', + AUTO = 'A', + REG = 'R', + /* storage class */ + GLOB = 'G', + EXTRN = 'X', + PRIVAT = 'Y', + LOCAL = 'T', + MEMBER = 'M', + LABEL = 'L', + /* operands */ + OADD = '+', + OSUB = '-', + OMUL = '*', + OMOD = '%', + ODIV = '/', + OSHL = 'l', + OSHR = 'r', + OLT = '<', + OGT = '>', + OLE = '[', + OGE = ']', + OEQ = '=', + ONE = '!', + OBAND = '&', + OBOR = '|', + OBXOR = '^', + OCPL = '~', + OASSIG = ':', + ONEG = '_', + OCALL = 'c', + OPAR = 'p', + OFIELD = '.', + OCOMMA = ',', + OASK = '?', + OCOLON = ' ', + OADDR = '\'', + OAND = 'a', + OOR = 'b', + OPTR = '@', + OSYM = 'y', + OCAST = 'g', + OCONST = '#', + OSTRING = '"', + /*statements */ + OJMP = 'j', + ORET = 'r', + OBLOOP = 'b', + OELOOP = 'e', + OCASE = 'v', + ODEFAULT = 'f', + OTABLE = 't', + OSWITCH = 's', + OEPARS = '\\', + OSTMT = '\t' +}; + enum nerrors { + EIDOVER, /* identifier overflow */ + EOUTPAR, /* out pf params */ + ESYNTAX, /* syntax error */ + ESTACKA, /* stack unaligned */ + ESTACKO, /* stack overflow */ + ESTACKU, /* stack underflow */ ELNLINE, /* line too long */ EFERROR, /* error reading from file:%s*/ ENUMERR }; typedef struct node Node; +typedef struct type Type; +typedef struct symbol Symbol; + +struct type { + unsigned short size; + unsigned short align; + unsigned short flags; +}; + +struct symbol { + unsigned short id; + char *name; + Type type; + char kind; + union { + TUINT i; + char *s; + } u; + Symbol *next; + Symbol *h_next; +}; struct node { - unsigned char op; + char op; + Type type; + Symbol *sym; + Node *left, *right; + Node *stmt; }; /* main.c */ @@ -29,3 +151,14 @@ extern void peephole(void); /* code.c */ extern void writeout(void); + +/* node.c */ +extern void cleannodes(void); +extern void delnode(Node *np); +extern Node *newnode(void); + +/* symbol.c */ +extern Symbol *getsym(int id); +extern void popctx(void); +extern void pushctx(void); +extern void freesym(Symbol *sym); diff --git a/cc2/cgen.c b/cc2/cgen.c @@ -1,4 +1,5 @@ +#include "arch.h" #include "cc2.h" void diff --git a/cc2/code.c b/cc2/code.c @@ -1,4 +1,5 @@ +#include "arch.h" #include "cc2.h" void diff --git a/cc2/main.c b/cc2/main.c @@ -3,6 +3,7 @@ #include <stdio.h> #include <stdlib.h> +#include "arch.h" #include "cc2.h" #include "error.h" diff --git a/cc2/node.c b/cc2/node.c @@ -0,0 +1,64 @@ + +#include <stdlib.h> + +#include "../inc/cc.h" + +#include "arch.h" +#include "cc2.h" + +#define NSYMBOLS 32 + +int inhome; + +struct arena { + Node *mem; + struct arena *next; +}; +static struct arena *arena; +static Node *freep; + +Node * +newnode(void) +{ + struct arena *ap; + Node *np; + + if (!freep) { + ap = xmalloc(sizeof(*ap)); + ap->mem = xcalloc(NSYMBOLS, sizeof(Node)); + ap->next = arena; + arena = ap; + for (np = ap->mem; np < &ap->mem[NSYMBOLS-1]; ++np) + np->left = np+1; + np->left = NULL; + freep = np; + } + + np = freep; + freep = np->left; + + np->right = NULL; + np->left = NULL; + np->sym = NULL; + np->stmt = NULL; + return np; +} + +void +delnode(Node *np) +{ + np->left = freep; + freep = np; +} + +void +cleannodes(void) +{ + struct arena *ap, *next; + + for (ap = arena; ap; ap = next) { + next = ap; + free(ap->mem); + free(ap); + } +} diff --git a/cc2/optm.c b/cc2/optm.c @@ -1,4 +1,5 @@ +#include "arch.h" #include "cc2.h" void diff --git a/cc2/parser.c b/cc2/parser.c @@ -1,60 +1,405 @@ #include <errno.h> #include <stdio.h> +#include <stdlib.h> #include <string.h> +#include "../inc/cc.h" +#include "../inc/sizes.h" + +#include "arch.h" #include "cc2.h" -#define MAXLINE 200 +#define MAXLINE 200 +#define STACKSIZ 50 + +extern Type int8type, int16type, int32type, int64type, + uint8type, uint16type, uint32type, uint64type, + float32type, float64type, float80type, + booltype, + ptrtype, + voidtype, + elipsistype; + +Type funtype = { + .flags = DEFTYP | FUNCF +}; + +Type arrtype = { + .flags = DEFTYP | ARYF +}; + +Type uniontype = { + .flags = DEFTYP | UNIONF +}; + +Type strtype = { + .flags = DEFTYP | STRUCTF +}; + +union tokenop { + void *arg; + int op; +}; + +typedef void parsefun(char *, union tokenop); +static parsefun type, symbol, getname, unary, binary, ternary, call, + parameter, constant; + +typedef void evalfun(void); +static evalfun vardecl, beginfun, endfun, endpars, stmt, begininit, endinit; + +static struct decoc { + void (*eval)(void); + void (*parse)(char *token, union tokenop); + union tokenop u; +} optbl[] = { + [AUTO] = {vardecl, symbol, .u.op = AUTO}, + [REG] = {vardecl, symbol, .u.op = REG}, + [GLOB] = {vardecl, symbol, .u.op = MEM}, + [EXTRN] = {vardecl, symbol, .u.op = MEM}, + [PRIVAT] = {vardecl, symbol, .u.op = MEM}, + [LOCAL] = {vardecl, symbol, .u.op = MEM}, + [MEMBER] = {NULL, symbol}, + [LABEL] = {NULL, symbol}, + + [INT8] = {NULL, type, .u.arg = &int8type}, + [INT16] = {NULL, type, .u.arg = &int16type}, + [INT32] = {NULL, type, .u.arg = &int32type}, + [INT64] = {NULL, type, .u.arg = &int64type}, + [UINT8] = {NULL, type, .u.arg = &uint8type}, + [UINT16] = {NULL, type, .u.arg = &uint16type}, + [UINT32] = {NULL, type, .u.arg = &uint32type}, + [UINT64] = {NULL, type, .u.arg = &uint64type}, + [FLOAT] = {NULL, type, .u.arg = &float32type}, + [DOUBLE] = {NULL, type, .u.arg = &float64type}, + [LDOUBLE] = {NULL, type, .u.arg = &float80type}, + [VOID] = {NULL, type, .u.arg = &voidtype}, + [BOOL] = {NULL, type, .u.arg = &booltype}, + [POINTER] = {NULL, type, .u.arg = &ptrtype}, + [ELLIPSIS] = {NULL, type, .u.arg = &elipsistype}, + + [FUNCTION] = {NULL, type, .u.arg = &funtype}, + [VECTOR] = {NULL, NULL, .u.arg = &arrtype}, + [UNION] = {NULL, NULL, .u.arg = &uniontype}, + [STRUCT] = {NULL, NULL, .u.arg = &strtype}, + + [ONAME] = {NULL, getname}, + ['{'] = {beginfun}, + ['}'] = {endfun}, + ['('] = {begininit}, + [')'] = {endinit}, + [OEPARS] = {endpars}, + [OSTMT] = {stmt}, + + [OCPL] = {NULL, unary}, + [ONEG] = {NULL, unary}, + [OFIELD] = {NULL, unary}, + [OADDR] = {NULL, unary}, + [OAND] = {NULL, unary}, + [OOR] = {NULL, unary}, + [OPTR] = {NULL, unary}, + [OCAST] = {NULL, unary}, + [OPAR ] = {NULL, unary}, + + [OADD] = {NULL, binary}, + [OSUB] = {NULL, binary}, + [OMUL] = {NULL, binary}, + [OMOD] = {NULL, binary}, + [ODIV] = {NULL, binary}, + [OSHL] = {NULL, binary}, + [OSHR] = {NULL, binary}, + [OLT] = {NULL, binary}, + [OGT] = {NULL, binary}, + [OLE] = {NULL, binary}, + [OGE] = {NULL, binary}, + [OEQ] = {NULL, binary}, + [ONE] = {NULL, binary}, + [OBAND] = {NULL, binary}, + [OBOR] = {NULL, binary}, + [OBXOR] = {NULL, binary}, + [OASSIG] = {NULL, binary}, + [OCOMMA] = {NULL, binary}, + + [OASK] = {NULL, ternary}, + [OCALL] = {NULL, call}, + + [OCONST] = NULL, constant, + + [OCASE] = NULL, + [ODEFAULT] = NULL, + [OTABLE] = NULL, + [OSWITCH] = NULL +}; + +static void *stack[STACKSIZ], **sp = stack; +static Symbol *lastsym, *curfun; +static Symbol *params[NR_FUNPARAM]; +static int funpars = -1, sclass, callpars, ininit; +static Node *stmtp, *callp; static void -push(Node * np) +push(void *elem) { + if (sp == stack[STACKSIZ]) + error(ESTACKO); + *sp++ = elem; } -static Node * +static void * pop(void) { + if (sp == stack) + error(ESTACKU); + return *--sp; +} + +static unsigned short +newid(void) +{ + static unsigned short id; + + if (++id == 0) + error(EIDOVER); } static void -expr(char *tok) +type(char *token, union tokenop u) { + push(u.arg); } static void -stmt(Node *np) +getname(char *t, union tokenop u) { + push((*++t) ? xstrdup(t) : NULL); } static void -decl(char *tok) +symbol(char *token, union tokenop u) { + Node *np; + + sclass = *token++; + np = newnode(); + np->sym = getsym(atoi(token)); + np->op = u.op; + push(np); +} + +static Type * +gettype(char *token) +{ + struct decoc *dp; + + dp = &optbl[*token]; + if (!dp->parse) + error(ESYNTAX); + (*dp->parse)(token, dp->u); + return pop(); +} + +static void +constant(char *token, union tokenop u) +{ + static char letters[] = "0123456789ABCDEF"; + Node *np = newnode(); + Symbol *sym = getsym(0); + TUINT v; + unsigned c; + + np->sym = sym; + ++token; + if (*token == OSTRING) { + np->op = OSYM; + np->type = ptrtype; + sym->id = newid(); + sym->u.s = xstrdup(++token); + } else { + np->op = OCONST; + np->type = *gettype(token++); + for (v = 0; c = *token++; v += c) { + v <<= 4; + c = strchr(letters, c) - letters; + } + sym->u.i = v; + } +} + +static void +ternary(char *token, union tokenop u) +{ + Node *ask, *colon; + Type *tp; + + tp = gettype(++token); + + colon = newnode(); + colon->op = OCOLON; + colon->right = pop(); + colon->left = pop(); + + ask = newnode(); + ask->op = OASK; + ask->type = *tp; + ask->left = pop(); + ask->right = pop(); + push(ask); +} + +static void +unary(char *token, union tokenop u) +{ + Node *np = newnode(); + + np->op = *token++; + np->type = *gettype(token); + np->left = pop(); + np->right = NULL; + push(np); +} + +static void +call(char *token, union tokenop u) +{ + Node *np, *par, *fun; + + for (par = NULL;; par = np) { + np = pop(); + if (np->op != OPAR) + break; + np->right = par; + } + fun = newnode(); + fun->op = OCALL; + fun->type = *gettype(token); + fun->left = np; + fun->right = par; + push(fun); +} + +static void +binary(char *token, union tokenop u) +{ + Node *np = newnode(); + + np->op = *token++; + np->type = *gettype(token); + np->right = pop(); + np->left = pop(); + push(np); +} + +static void +begininit(void) +{ + inint = 1; +} + +static void +endinit(void) +{ + ininit = 0; +} + +static void +beginfun(void) +{ + if (curfun) + error(ESYNTAX); + memset(params, 0, sizeof(params)); + funpars = 0; + curfun = lastsym; + pushctx(); +} + +static void +endfun(void) +{ + if (!curfun) + error(ESYNTAX); + popctx(); + curfun = NULL; + funpars = -1; +} + +static void +endpars(void) +{ + if (!curfun || funpars == -1) + error(ESYNTAX); + funpars = -1; +} + +static void +vardecl(void) +{ + Type *tp; + Node *np; + Symbol *sym; + char *name; + + name = pop(); + tp = pop(); + np = pop(); + + sym = np->sym; + sym->name = name; + sym->type = *tp; + sym->kind = sclass; + lastsym = sym; + + if (funpars >= 0) { + if (funpars == NR_FUNPARAM) + error(EOUTPAR); + params[funpars++] = sym; + } + delnode(np); +} + +static void +stmt(void) +{ + static Node *lastp; + Node *np = pop(); + + if (!stmtp) + stmtp = np; + else + lastp->stmt = np; + lastp = np; } void parse(void) { - char line[MAXLINE]; + char line[MAXLINE], *t; size_t len; + int c; + struct decoc *dp; for (;;) { - if (fgets(line, sizeof(line), stdin)) + if (!fgets(line, sizeof(line), stdin)) break; if ((len = strlen(line)) == 0 || line[0] == '\n') continue; if (line[len-1] != '\n') error(ELNLINE); line[len-1] = '\0'; - switch (*line) { - case '\t': - expr(strtok(line, "\t")); - stmt(pop()); - break; - default: - decl(strtok(line, "\t")); - break; + + c = *line; + for (t = strtok(line, "\t\n"); t; t = strtok(NULL, "\t\n")) { + dp = &optbl[*t]; + if (!dp->parse) + break; + (*dp->parse)(t, dp->u); } + + (*optbl[c].eval)(); + if (sp != stack) + error(ESTACKA); + if (c == '}') + break; } if (ferror(stdin)) diff --git a/cc2/peep.c b/cc2/peep.c @@ -1,4 +1,5 @@ +#include "arch.h" #include "cc2.h" void diff --git a/cc2/symbol.c b/cc2/symbol.c @@ -0,0 +1,66 @@ + +#include <stdlib.h> +#include <string.h> + +#include "../inc/cc.h" + +#include "arch.h" +#include "cc2.h" + +#define NR_SYMHASH 64 + +static Symbol *symtab[NR_SYMHASH]; +static Symbol *locals; +static int infunction; + + +void +freesym(Symbol *sym) +{ + free(sym->name); + free(sym); +} + +void +pushctx(void) +{ + infunction = 1; +} + +void +popctx(void) +{ + Symbol *sym, *next; + + infunction = 0; + for (sym = locals; sym; sym = next) { + next = sym->next; + symtab[sym->id & NR_SYMHASH-1] = sym->h_next; + freesym(sym); + } +} + +Symbol * +getsym(int id) +{ + Symbol **htab, *sym; + + htab = &symtab[id & NR_SYMHASH-1]; + for (sym = *htab; sym; sym = sym->h_next) { + if (sym->id > 0 && sym->id != id) + break; + } + if (!sym) { + sym = xcalloc(1, sizeof(*sym)); + sym->id = id; + if (!infunction) { + sym->next = NULL; + } else { + sym->next = locals; + locals = sym; + } + sym->h_next = *htab; + *htab = sym; + } + return sym; +}