scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

parser.c (14462B)


      1 static char sccsid[] = "@(#) ./cc2/parser.c";
      2 #include <errno.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include <cstd.h>
      8 #include "../inc/scc.h"
      9 
     10 #include "cc2.h"
     11 
     12 #define STACKSIZ     50
     13 
     14 extern Type int8type, int16type, int32type, int64type,
     15             uint8type, uint16type, uint32type, uint64type,
     16             float32type, float64type, float80type,
     17             booltype,
     18             ptrtype,
     19             voidtype,
     20             arg_type;
     21 
     22 Type funetype = {
     23 	.flags = FUNF | ELLIPS
     24 };
     25 
     26 Type funtype = {
     27 	.flags = FUNF
     28 };
     29 
     30 union tokenop {
     31 	void *arg;
     32 	unsigned op;
     33 };
     34 
     35 struct swtch {
     36 	int nr;
     37 	Node *first;
     38 	Node *last;
     39 };
     40 
     41 static struct swtch swtbl[NR_BLOCK], *swp = swtbl;
     42 static Symbol *lastfun;
     43 
     44 typedef void parsefun(char *, union tokenop);
     45 static parsefun type, symbol, getname, unary, binary, ternary, call,
     46                 constant, composed, binit, einit,
     47                 jump, oreturn, loop, assign,
     48                 ocase, bswitch, eswitch, builtin;
     49 
     50 typedef void evalfun(void);
     51 static evalfun vardecl, beginfun, endfun, endpars, stmt,
     52                array, aggregate, flddecl, labeldcl;
     53 
     54 static struct decoc {
     55 	void (*eval)(void);
     56 	void (*parse)(char *token, union tokenop);
     57 	union tokenop u;
     58 } optbl[] = {      /*  eval     parse           args */
     59 	['A']   = {  vardecl,  symbol, .u.op  =  SAUTO<<8 | OAUTO},
     60 	['R']   = {  vardecl,  symbol, .u.op  =   SREG<<8 |  OREG},
     61 	['G']   = {  vardecl,  symbol, .u.op  =  SGLOB<<8 |  OMEM},
     62 	['X']   = {  vardecl,  symbol, .u.op  = SEXTRN<<8 |  OMEM},
     63 	['Y']   = {  vardecl,  symbol, .u.op  =  SPRIV<<8 |  OMEM},
     64 	['T']   = {  vardecl,  symbol, .u.op  = SLOCAL<<8 |  OMEM},
     65 	['M']   = {  flddecl,  symbol, .u.op  =  SMEMB<<8 |  OMEM},
     66 	['L']   = { labeldcl,  symbol, .u.op  = SLABEL<<8 | OLABEL},
     67 
     68 	['C']   = {     NULL,    type, .u.arg =    &int8type},
     69 	['I']   = {     NULL,    type, .u.arg =   &int16type},
     70 	['W']   = {     NULL,    type, .u.arg =   &int32type},
     71 	['Q']   = {     NULL,    type, .u.arg =   &int64type},
     72 	['K']   = {     NULL,    type, .u.arg =   &uint8type},
     73 	['N']   = {     NULL,    type, .u.arg =  &uint16type},
     74 	['Z']   = {     NULL,    type, .u.arg =  &uint32type},
     75 	['O']   = {     NULL,    type, .u.arg =  &uint64type},
     76 	['J']   = {     NULL,    type, .u.arg = &float32type},
     77 	['D']   = {     NULL,    type, .u.arg = &float64type},
     78 	['H']   = {     NULL,    type, .u.arg = &float80type},
     79 	['0']   = {     NULL,    type, .u.arg =    &voidtype},
     80 	['B']   = {     NULL,    type, .u.arg =    &booltype},
     81 	['P']   = {     NULL,    type, .u.arg =     &ptrtype},
     82 	['E']   = {     NULL,    type, .u.arg =    &funetype},
     83 	['1']	= {     NULL,    type, .u.arg =    &arg_type},
     84 
     85 	['F']   = {     NULL,    type, .u.arg =     &funtype},
     86 	['V']   = {    array,composed,                     0},
     87 	['U']   = {aggregate,composed,                     0},
     88 	['S']   = {aggregate,composed,                     0},
     89 
     90 	['"']   = {     NULL, getname,                     0},
     91 	['{']   = { beginfun,    NULL,                     0},
     92 	['}']   = {   endfun,    NULL,                     0},
     93 	['(']   = {     NULL,   binit,                     0},
     94 	[')']   = {     NULL,   einit,                     0},
     95 	['\\']  = {  endpars,    NULL,                     0},
     96 	['\t']  = {     stmt,    NULL,                     0},
     97 
     98 	['~']   = {     NULL,   unary, .u.op =          OCPL},
     99 	['_']   = {     NULL,   unary, .u.op =         OSNEG},
    100 	['\'']  = {     NULL,   unary, .u.op =         OADDR},
    101 	['@']   = {     NULL,   unary, .u.op =          OPTR},
    102 	['g']   = {     NULL,   unary, .u.op =         OCAST},
    103 	['p']   = {     NULL,   unary, .u.op =          OPAR},
    104 	['n']   = {     NULL,   unary, .u.op =          ONEG},
    105 
    106 	['a']   = {     NULL,  binary, .u.op =          OAND},
    107 	['o']   = {     NULL,  binary, .u.op =           OOR},
    108 	['.']   = {     NULL,  binary, .u.op =        OFIELD},
    109 	['+']   = {     NULL,  binary, .u.op =          OADD},
    110 	['-']   = {     NULL,  binary, .u.op =          OSUB},
    111 	['*']   = {     NULL,  binary, .u.op =          OMUL},
    112 	['%']   = {     NULL,  binary, .u.op =          OMOD},
    113 	['/']   = {     NULL,  binary, .u.op =          ODIV},
    114 	['l']   = {     NULL,  binary, .u.op =          OSHL},
    115 	['r']   = {     NULL,  binary, .u.op =          OSHR},
    116 	['<']   = {     NULL,  binary, .u.op =           OLT},
    117 	['>']   = {     NULL,  binary, .u.op =           OGT},
    118 	['[']   = {     NULL,  binary, .u.op =           OLE},
    119 	[']']   = {     NULL,  binary, .u.op =           OGE},
    120 	['=']   = {     NULL,  binary, .u.op =           OEQ},
    121 	['!']   = {     NULL,  binary, .u.op =           ONE},
    122 	['&']   = {     NULL,  binary, .u.op =         OBAND},
    123 	['|']   = {     NULL,  binary, .u.op =          OBOR},
    124 	['^']   = {     NULL,  binary, .u.op =         OBXOR},
    125 	[',']   = {     NULL,  binary, .u.op =        OCOMMA},
    126 	['m']   = {     NULL,  builtin,.u.op =      OBUILTIN},
    127 
    128 	[':']   = {     NULL,  assign, .u.op =        OASSIG},
    129 	['?']   = {     NULL, ternary, .u.op =          OASK},
    130 	['c']   = {     NULL,    call, .u.op =         OCALL},
    131 	['z']   = {     NULL,    call, .u.op =        OCALLE},
    132 
    133 	['#']   = {     NULL,constant, .u.op =        OCONST},
    134 
    135 	['j']   = {     NULL,    jump, .u.op =          OJMP},
    136 	['y']   = {     NULL,    jump, .u.op =       OBRANCH},
    137 	['h']   = {     NULL, oreturn, .u.op =          ORET},
    138 	['i']   = {     NULL,    NULL, .u.op =          OINC},
    139 	['d']   = {     NULL,    NULL, .u.op =          ODEC},
    140 
    141 	['b']   = {     NULL,    loop, .u.op =        OBLOOP},
    142 	['e']   = {     NULL,    loop, .u.op =        OELOOP},
    143 
    144 	['v']   = {     NULL,   ocase, .u.op =         OCASE},
    145 	['f']   = {     NULL,   ocase, .u.op =      ODEFAULT},
    146 	['t']   = {     NULL, eswitch, .u.op =      OESWITCH},
    147 	['s']   = {     NULL, bswitch, .u.op =      OBSWITCH},
    148 };
    149 
    150 static int sclass, inpars, ininit, endf, lineno;
    151 static void *stack[STACKSIZ], **sp = stack;
    152 
    153 static Node *
    154 push(void *elem)
    155 {
    156 	if (sp == &stack[STACKSIZ])
    157 		error(ESTACKO);
    158 	return *sp++ = elem;
    159 }
    160 
    161 static void *
    162 pop(void)
    163 {
    164 	if (sp == stack)
    165 		error(ESTACKU);
    166 	return *--sp;
    167 }
    168 
    169 static int
    170 empty(void)
    171 {
    172 	return sp == stack;
    173 }
    174 
    175 static void
    176 type(char *token, union tokenop u)
    177 {
    178 	push(u.arg);
    179 }
    180 
    181 static void
    182 composed(char *token, union tokenop u)
    183 {
    184 	Symbol *sym;
    185 
    186 	sym = getsym(atoi(token+1));
    187 	push(&sym->type);
    188 }
    189 
    190 static void
    191 getname(char *t, union tokenop u)
    192 {
    193 	push((*++t) ? xstrdup(t) : NULL);
    194 }
    195 
    196 static void
    197 symbol(char *token, union tokenop u)
    198 {
    199 	Node *np = node(u.op & 0xFF);
    200 	Symbol *sym = getsym(atoi(token+1));
    201 
    202 	sclass = u.op >> 8;
    203 	np->u.sym = sym;
    204 	np->type = sym->type;
    205 	push(np);
    206 }
    207 
    208 static Type *
    209 gettype(char *token)
    210 {
    211 	struct decoc *dp;
    212 
    213 	dp = &optbl[*token];
    214 	if (!dp->parse)
    215 		error(ESYNTAX);
    216 	(*dp->parse)(token, dp->u);
    217 	return pop();
    218 }
    219 
    220 static void
    221 constant(char *token, union tokenop u)
    222 {
    223 	static char letters[] = "0123456789ABCDEF";
    224 	Node *np;
    225 	TUINT v;
    226 	unsigned c;
    227 
    228 	++token;
    229 	if (*token == '"') {
    230 		++token;
    231 		np = node(OSTRING);
    232 		np->type.flags = STRF;
    233 		np->type.size = strlen(token);
    234 		np->type.align = int8type.align;
    235 		np->u.s = xstrdup(token);
    236 	} else {
    237 		np = node(OCONST);
    238 		np->type = *gettype(token++);
    239 		for (v = 0; c = *token++; v += c) {
    240 			v <<= 4;
    241 			c = strchr(letters, c) - letters;
    242 		}
    243 		np->u.i = v;
    244 	}
    245 	push(np);
    246 }
    247 
    248 static void
    249 assign(char *token, union tokenop u)
    250 {
    251 	int subop;
    252 	Node *np = node(u.op);
    253 
    254 	switch (subop = *++token) {
    255 	case '+':
    256 	case '-':
    257 	case '*':
    258 	case '%':
    259 	case '/':
    260 	case 'l':
    261 	case 'r':
    262 	case '&':
    263 	case '|':
    264 	case '^':
    265 	case 'i':
    266 	case 'd':
    267 		++token;
    268 		subop = optbl[subop].u.op;
    269 		break;
    270 	default:
    271 		subop = 0;
    272 		break;
    273 	}
    274 
    275 	np->u.subop = subop;
    276 	np->type = *gettype(token);
    277 	np->right = pop();
    278 	np->left = pop();
    279 	push(np);
    280 }
    281 
    282 static void
    283 ternary(char *token, union tokenop u)
    284 {
    285 	Node *ask = node(OASK), *colon = node(OCOLON);
    286 	Type *tp = gettype(token+1);
    287 
    288 	colon->right = pop();
    289 	colon->left = pop();
    290 
    291 	ask->type = *tp;
    292 	ask->left = pop();
    293 	ask->right = colon;
    294 	push(ask);
    295 }
    296 
    297 static void
    298 eval(char *tok)
    299 {
    300 	struct decoc *dp;
    301 
    302 	do {
    303 		dp = &optbl[*tok];
    304 		if (!dp->parse)
    305 			break;
    306 		(*dp->parse)(tok, dp->u);
    307 	} while (tok = strtok(NULL, "\t\n"));
    308 }
    309 
    310 static int
    311 nextline(void)
    312 {
    313 	static char line[LINESIZ];
    314 	size_t len;
    315 	int c;
    316 	void (*fun)(void);
    317 
    318 repeat:
    319 	++lineno;
    320 	if (!fgets(line, sizeof(line), stdin))
    321 		return 0;
    322 	if ((len = strlen(line)) == 0 || line[0] == '\n')
    323 		goto repeat;
    324 	if (line[len-1] != '\n')
    325 		error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE);
    326 	line[len-1] = '\0';
    327 
    328 	c = *line;
    329 	eval(strtok(line, "\t\n"));
    330 	if ((fun = *optbl[c].eval) != NULL)
    331 		(*fun)();
    332 	if (sp != stack)
    333 		error(ESTACKA);
    334 	return 1;
    335 }
    336 
    337 static void
    338 oreturn(char *token, union tokenop u)
    339 {
    340 	Node *np = node(u.op);
    341 
    342 	if (token = strtok(NULL, "\t\n"))
    343 		eval(token);
    344 	if (!empty())
    345 		np->left = pop();
    346 	push(np);
    347 }
    348 
    349 /*
    350  * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with
    351  * the last switch table. It is a bit ugly to touch directly curstmt
    352  * here, but moving this function to node.c is worse, because we are
    353  * putting knowledge of how the text is parsed into the node
    354  * represtation module.
    355  */
    356 static void
    357 waft(Node *np)
    358 {
    359 	Node *lastcase, *next;;
    360 	struct swtch *cur;
    361 	extern Node *curstmt;
    362 
    363 	if (swp == swtbl)
    364 		error(EWTACKU);
    365 
    366 	cur = swp - 1;
    367 	lastcase = cur->last;
    368 	next = lastcase->next;
    369 
    370 	np->next = next;
    371 	np->prev = lastcase;
    372 
    373 	if (next)
    374 		next->prev = np;
    375 	lastcase->next = np;
    376 
    377 	if (curstmt == cur->last)
    378 		curstmt = np;
    379 	cur->last = np;
    380 	cur->nr++;
    381 }
    382 
    383 static void
    384 bswitch(char *token, union tokenop u)
    385 {
    386 	struct swtch *cur;
    387 	Node *np = node(u.op);
    388 
    389 	if (swp == &swtbl[NR_BLOCK])
    390 		error(EWTACKO);
    391 	cur = swp++;
    392 	cur->nr = 0;
    393 
    394 	eval(strtok(NULL, "\t\n"));
    395 	np->left = pop();
    396 
    397 	push(cur->first = cur->last = np);
    398 }
    399 
    400 static void
    401 eswitch(char *token, union tokenop u)
    402 {
    403 	struct swtch *cur;
    404 
    405 	if (swp == swtbl)
    406 		error(EWTACKU);
    407 	jump(token, u);
    408 	waft(pop());
    409 	cur = --swp;
    410 	cur->first->u.i = cur->nr;
    411 }
    412 
    413 static void
    414 ocase(char *token, union tokenop u)
    415 {
    416 	jump(token, u);
    417 	waft(pop());
    418 }
    419 
    420 static void
    421 jump(char *token, union tokenop u)
    422 {
    423 	Node *aux, *np = node(u.op);
    424 
    425 	eval(strtok(NULL, "\t\n"));
    426 
    427 	if (u.op == OBRANCH || u.op == OCASE)
    428 		np->left = pop();
    429 	aux = pop();
    430 	np->u.sym = aux->u.sym;
    431 	delnode(aux);
    432 	push(np);
    433 }
    434 
    435 static void
    436 loop(char *token, union tokenop u)
    437 {
    438 	push(node(u.op));
    439 }
    440 
    441 static void
    442 unary(char *token, union tokenop u)
    443 {
    444 	Node *np = node(u.op);
    445 
    446 	np->type = *gettype(token+1);
    447 	np->left = pop();
    448 	np->right = NULL;
    449 	push(np);
    450 }
    451 
    452 static void
    453 call(char *token, union tokenop u)
    454 {
    455 	Node *np, *par, *fun = node(u.op);
    456 
    457 	for (par = NULL;; par = np) {
    458 		np = pop();
    459 		if (np->op != OPAR)
    460 			break;
    461 		np->right = par;
    462 	}
    463 
    464 	fun->type = *gettype(token+1);
    465 	fun->left = np;
    466 	fun->right = par;
    467 	push(fun);
    468 }
    469 
    470 static void
    471 builtin(char *token, union tokenop u)
    472 {
    473 	Node *np = node(u.op);
    474 	char *name;
    475 	unsigned subop, nchilds;
    476 
    477 	np->type = *gettype(token+1);
    478 	name = pop();
    479 
    480 	if (!strcmp("__builtin_va_arg", name)) {
    481 		nchilds = 1;
    482 		subop = BVA_ARG;
    483 	} else if (!strcmp("__builtin_va_start", name)) {
    484 		nchilds = 2;
    485 		subop = BVA_START;
    486 	} else if (!strcmp("__builtin_va_end", name)) {
    487 		nchilds = 1;
    488 		subop = BVA_END;
    489 	} else if (!strcmp("__builtin_va_copy", name)) {
    490 		nchilds = 2;
    491 		subop = BVA_COPY;
    492 	} else {
    493 		error(EBBUILT);;
    494 	}
    495 
    496 	np->u.subop = subop;
    497 	np->right = (nchilds == 2) ? pop() : NULL;
    498 	np->left = (nchilds != 0) ? pop() : NULL;
    499 
    500 	free(name);
    501 	push(np);
    502 }
    503 
    504 static void
    505 binary(char *token, union tokenop u)
    506 {
    507 	Node *np = node(u.op);
    508 
    509 	np->type = *gettype(token+1);
    510 	np->right = pop();
    511 	np->left = pop();
    512 	push(np);
    513 }
    514 
    515 static void
    516 binit(char *token, union tokenop u)
    517 {
    518 	ininit = 1;
    519 }
    520 
    521 static void
    522 einit(char *token, union tokenop u)
    523 {
    524 	ininit = 0;
    525 	endinit();
    526 }
    527 
    528 static void
    529 endpars(void)
    530 {
    531 	if (!curfun || !inpars)
    532 		error(ESYNTAX);
    533 	inpars = 0;
    534 }
    535 
    536 static void
    537 aggregate(void)
    538 {
    539 	Node *align, *size;
    540 	char *name;
    541 	Type *tp;
    542 	Symbol *sym;
    543 
    544 	align = pop();
    545 	size = pop();
    546 	name = pop();
    547 	tp = pop();
    548 
    549 	tp->size = size->u.i;
    550 	tp->align = align->u.i;
    551 	tp->flags = AGGRF;
    552 	/*
    553 	 * type is the first field of Symbol so we can obtain the
    554 	 * address of the symbol from the address of the type.
    555 	 * We have to do this because composed returns the pointer
    556 	 * to the type, but in this function we also need the
    557 	 * symbol to store the name.
    558 	 */
    559 	sym = (Symbol *) tp;
    560 	sym->name = name;
    561 
    562 	delnode(align);
    563 	delnode(size);
    564 }
    565 
    566 static void
    567 array(void)
    568 {
    569 	Type *tp, *base;
    570 	Node *size;
    571 
    572 	size = pop();
    573 	base = pop();
    574 	tp = pop();
    575 	tp->size = size->u.i * base->size; /* FIXME check for overflow */
    576 	tp->align = base->align;
    577 
    578 	delnode(size);
    579 }
    580 
    581 static void
    582 decl(Symbol *sym)
    583 {
    584 	Type *tp = &sym->type;
    585 
    586 	if (tp->flags & FUNF) {
    587 		lastfun = sym;
    588 	} else {
    589 		switch (sym->kind) {
    590 		case SEXTRN:
    591 		case SGLOB:
    592 		case SPRIV:
    593 		case SLOCAL:
    594 			defglobal(sym);
    595 			break;
    596 		case SAUTO:
    597 		case SREG:
    598 			if (!curfun)
    599 				error(ESYNTAX);
    600 			((inpars) ? defpar : defvar)(sym);
    601 			break;
    602 		default:
    603 			abort();
    604 		}
    605 	}
    606 }
    607 
    608 static void
    609 vardecl(void)
    610 {
    611 	Type *tp, *rp;
    612 	Node *np;
    613 	Symbol *sym;
    614 	char *name;
    615 
    616 	name = pop();
    617 	tp = pop();
    618 	if (tp->flags & FUNF)
    619 		rp = pop();
    620 	np = pop();
    621 
    622 	sym = np->u.sym;
    623 	/*
    624 	 * We have to free sym->name because in tentative declarations
    625 	 * we can have multiple declarations of the same symbol, and in
    626 	 * this case our parser will allocate twice the memory
    627 	 */
    628 	free(sym->name);
    629 	sym->name = name;
    630 	sym->type = *tp;
    631 	if (tp->flags & FUNF)
    632 		sym->rtype = *rp;
    633 	sym->kind = sclass;
    634 
    635 	if (ininit)
    636 		sym->type.flags |= INITF;
    637 	decl(sym);
    638 	delnode(np);
    639 }
    640 
    641 static void
    642 flddecl(void)
    643 {
    644 	Node *off, *np;
    645 	char *name;
    646 	Type *tp;
    647 	Symbol *sym;
    648 
    649 	off = pop();
    650 	name = pop();
    651 	tp = pop();
    652 	np = pop();
    653 
    654 	sym = np->u.sym;
    655 	sym->u.off = off->u.i;
    656 	sym->name = name;
    657 	sym->type = *tp;
    658 
    659 	delnode(np);
    660 	delnode(off);
    661 }
    662 
    663 static void
    664 labeldcl(void)
    665 {
    666 	Node *np;
    667 	Symbol *sym;
    668 
    669 	np = pop();
    670 	np->op = ONOP;
    671 	sym = np->u.sym;
    672 	sym->kind = SLABEL;
    673 	sym->u.stmt = np;
    674 	np->label = sym;
    675 	addstmt(np, SETCUR);
    676 }
    677 
    678 static void
    679 stmt(void)
    680 {
    681 	Node *np;
    682 
    683 	if (empty())
    684 		return;
    685 	np = pop();
    686 	if (ininit) {
    687 		data(np);
    688 		deltree(np);
    689 		return;
    690 	}
    691 	addstmt(np, SETCUR);
    692 }
    693 
    694 static void
    695 beginfun(void)
    696 {
    697 	curfun = lastfun;
    698 	inpars = 1;
    699 	pushctx();
    700 	addstmt(node(OBFUN), SETCUR);
    701 }
    702 
    703 static void
    704 endfun(void)
    705 {
    706 	endf = 1;
    707 	addstmt(node(OEFUN), SETCUR);
    708 }
    709 
    710 void
    711 parse(void)
    712 {
    713 	cleannodes();  /* remove code of previous function */
    714 	popctx();  /* remove context of previous function */
    715 	curfun = NULL;
    716 	endf = 0;
    717 
    718 	while (!endf && nextline())
    719 		/* nothing */;
    720 	if (ferror(stdin))
    721 		error(EFERROR, strerror(errno));
    722 }