parser.c (14462B)
1 static char sccsid[] = "@(#) ./cc2/parser.c"; 2 #include <errno.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 7 #include <cstd.h> 8 #include "../inc/scc.h" 9 10 #include "cc2.h" 11 12 #define STACKSIZ 50 13 14 extern Type int8type, int16type, int32type, int64type, 15 uint8type, uint16type, uint32type, uint64type, 16 float32type, float64type, float80type, 17 booltype, 18 ptrtype, 19 voidtype, 20 arg_type; 21 22 Type funetype = { 23 .flags = FUNF | ELLIPS 24 }; 25 26 Type funtype = { 27 .flags = FUNF 28 }; 29 30 union tokenop { 31 void *arg; 32 unsigned op; 33 }; 34 35 struct swtch { 36 int nr; 37 Node *first; 38 Node *last; 39 }; 40 41 static struct swtch swtbl[NR_BLOCK], *swp = swtbl; 42 static Symbol *lastfun; 43 44 typedef void parsefun(char *, union tokenop); 45 static parsefun type, symbol, getname, unary, binary, ternary, call, 46 constant, composed, binit, einit, 47 jump, oreturn, loop, assign, 48 ocase, bswitch, eswitch, builtin; 49 50 typedef void evalfun(void); 51 static evalfun vardecl, beginfun, endfun, endpars, stmt, 52 array, aggregate, flddecl, labeldcl; 53 54 static struct decoc { 55 void (*eval)(void); 56 void (*parse)(char *token, union tokenop); 57 union tokenop u; 58 } optbl[] = { /* eval parse args */ 59 ['A'] = { vardecl, symbol, .u.op = SAUTO<<8 | OAUTO}, 60 ['R'] = { vardecl, symbol, .u.op = SREG<<8 | OREG}, 61 ['G'] = { vardecl, symbol, .u.op = SGLOB<<8 | OMEM}, 62 ['X'] = { vardecl, symbol, .u.op = SEXTRN<<8 | OMEM}, 63 ['Y'] = { vardecl, symbol, .u.op = SPRIV<<8 | OMEM}, 64 ['T'] = { vardecl, symbol, .u.op = SLOCAL<<8 | OMEM}, 65 ['M'] = { flddecl, symbol, .u.op = SMEMB<<8 | OMEM}, 66 ['L'] = { labeldcl, symbol, .u.op = SLABEL<<8 | OLABEL}, 67 68 ['C'] = { NULL, type, .u.arg = &int8type}, 69 ['I'] = { NULL, type, .u.arg = &int16type}, 70 ['W'] = { NULL, type, .u.arg = &int32type}, 71 ['Q'] = { NULL, type, .u.arg = &int64type}, 72 ['K'] = { NULL, type, .u.arg = &uint8type}, 73 ['N'] = { NULL, type, .u.arg = &uint16type}, 74 ['Z'] = { NULL, type, .u.arg = &uint32type}, 75 ['O'] = { NULL, type, .u.arg = &uint64type}, 76 ['J'] = { NULL, type, .u.arg = &float32type}, 77 ['D'] = { NULL, type, .u.arg = &float64type}, 78 ['H'] = { NULL, type, .u.arg = &float80type}, 79 ['0'] = { NULL, type, .u.arg = &voidtype}, 80 ['B'] = { NULL, type, .u.arg = &booltype}, 81 ['P'] = { NULL, type, .u.arg = &ptrtype}, 82 ['E'] = { NULL, type, .u.arg = &funetype}, 83 ['1'] = { NULL, type, .u.arg = &arg_type}, 84 85 ['F'] = { NULL, type, .u.arg = &funtype}, 86 ['V'] = { array,composed, 0}, 87 ['U'] = {aggregate,composed, 0}, 88 ['S'] = {aggregate,composed, 0}, 89 90 ['"'] = { NULL, getname, 0}, 91 ['{'] = { beginfun, NULL, 0}, 92 ['}'] = { endfun, NULL, 0}, 93 ['('] = { NULL, binit, 0}, 94 [')'] = { NULL, einit, 0}, 95 ['\\'] = { endpars, NULL, 0}, 96 ['\t'] = { stmt, NULL, 0}, 97 98 ['~'] = { NULL, unary, .u.op = OCPL}, 99 ['_'] = { NULL, unary, .u.op = OSNEG}, 100 ['\''] = { NULL, unary, .u.op = OADDR}, 101 ['@'] = { NULL, unary, .u.op = OPTR}, 102 ['g'] = { NULL, unary, .u.op = OCAST}, 103 ['p'] = { NULL, unary, .u.op = OPAR}, 104 ['n'] = { NULL, unary, .u.op = ONEG}, 105 106 ['a'] = { NULL, binary, .u.op = OAND}, 107 ['o'] = { NULL, binary, .u.op = OOR}, 108 ['.'] = { NULL, binary, .u.op = OFIELD}, 109 ['+'] = { NULL, binary, .u.op = OADD}, 110 ['-'] = { NULL, binary, .u.op = OSUB}, 111 ['*'] = { NULL, binary, .u.op = OMUL}, 112 ['%'] = { NULL, binary, .u.op = OMOD}, 113 ['/'] = { NULL, binary, .u.op = ODIV}, 114 ['l'] = { NULL, binary, .u.op = OSHL}, 115 ['r'] = { NULL, binary, .u.op = OSHR}, 116 ['<'] = { NULL, binary, .u.op = OLT}, 117 ['>'] = { NULL, binary, .u.op = OGT}, 118 ['['] = { NULL, binary, .u.op = OLE}, 119 [']'] = { NULL, binary, .u.op = OGE}, 120 ['='] = { NULL, binary, .u.op = OEQ}, 121 ['!'] = { NULL, binary, .u.op = ONE}, 122 ['&'] = { NULL, binary, .u.op = OBAND}, 123 ['|'] = { NULL, binary, .u.op = OBOR}, 124 ['^'] = { NULL, binary, .u.op = OBXOR}, 125 [','] = { NULL, binary, .u.op = OCOMMA}, 126 ['m'] = { NULL, builtin,.u.op = OBUILTIN}, 127 128 [':'] = { NULL, assign, .u.op = OASSIG}, 129 ['?'] = { NULL, ternary, .u.op = OASK}, 130 ['c'] = { NULL, call, .u.op = OCALL}, 131 ['z'] = { NULL, call, .u.op = OCALLE}, 132 133 ['#'] = { NULL,constant, .u.op = OCONST}, 134 135 ['j'] = { NULL, jump, .u.op = OJMP}, 136 ['y'] = { NULL, jump, .u.op = OBRANCH}, 137 ['h'] = { NULL, oreturn, .u.op = ORET}, 138 ['i'] = { NULL, NULL, .u.op = OINC}, 139 ['d'] = { NULL, NULL, .u.op = ODEC}, 140 141 ['b'] = { NULL, loop, .u.op = OBLOOP}, 142 ['e'] = { NULL, loop, .u.op = OELOOP}, 143 144 ['v'] = { NULL, ocase, .u.op = OCASE}, 145 ['f'] = { NULL, ocase, .u.op = ODEFAULT}, 146 ['t'] = { NULL, eswitch, .u.op = OESWITCH}, 147 ['s'] = { NULL, bswitch, .u.op = OBSWITCH}, 148 }; 149 150 static int sclass, inpars, ininit, endf, lineno; 151 static void *stack[STACKSIZ], **sp = stack; 152 153 static Node * 154 push(void *elem) 155 { 156 if (sp == &stack[STACKSIZ]) 157 error(ESTACKO); 158 return *sp++ = elem; 159 } 160 161 static void * 162 pop(void) 163 { 164 if (sp == stack) 165 error(ESTACKU); 166 return *--sp; 167 } 168 169 static int 170 empty(void) 171 { 172 return sp == stack; 173 } 174 175 static void 176 type(char *token, union tokenop u) 177 { 178 push(u.arg); 179 } 180 181 static void 182 composed(char *token, union tokenop u) 183 { 184 Symbol *sym; 185 186 sym = getsym(atoi(token+1)); 187 push(&sym->type); 188 } 189 190 static void 191 getname(char *t, union tokenop u) 192 { 193 push((*++t) ? xstrdup(t) : NULL); 194 } 195 196 static void 197 symbol(char *token, union tokenop u) 198 { 199 Node *np = node(u.op & 0xFF); 200 Symbol *sym = getsym(atoi(token+1)); 201 202 sclass = u.op >> 8; 203 np->u.sym = sym; 204 np->type = sym->type; 205 push(np); 206 } 207 208 static Type * 209 gettype(char *token) 210 { 211 struct decoc *dp; 212 213 dp = &optbl[*token]; 214 if (!dp->parse) 215 error(ESYNTAX); 216 (*dp->parse)(token, dp->u); 217 return pop(); 218 } 219 220 static void 221 constant(char *token, union tokenop u) 222 { 223 static char letters[] = "0123456789ABCDEF"; 224 Node *np; 225 TUINT v; 226 unsigned c; 227 228 ++token; 229 if (*token == '"') { 230 ++token; 231 np = node(OSTRING); 232 np->type.flags = STRF; 233 np->type.size = strlen(token); 234 np->type.align = int8type.align; 235 np->u.s = xstrdup(token); 236 } else { 237 np = node(OCONST); 238 np->type = *gettype(token++); 239 for (v = 0; c = *token++; v += c) { 240 v <<= 4; 241 c = strchr(letters, c) - letters; 242 } 243 np->u.i = v; 244 } 245 push(np); 246 } 247 248 static void 249 assign(char *token, union tokenop u) 250 { 251 int subop; 252 Node *np = node(u.op); 253 254 switch (subop = *++token) { 255 case '+': 256 case '-': 257 case '*': 258 case '%': 259 case '/': 260 case 'l': 261 case 'r': 262 case '&': 263 case '|': 264 case '^': 265 case 'i': 266 case 'd': 267 ++token; 268 subop = optbl[subop].u.op; 269 break; 270 default: 271 subop = 0; 272 break; 273 } 274 275 np->u.subop = subop; 276 np->type = *gettype(token); 277 np->right = pop(); 278 np->left = pop(); 279 push(np); 280 } 281 282 static void 283 ternary(char *token, union tokenop u) 284 { 285 Node *ask = node(OASK), *colon = node(OCOLON); 286 Type *tp = gettype(token+1); 287 288 colon->right = pop(); 289 colon->left = pop(); 290 291 ask->type = *tp; 292 ask->left = pop(); 293 ask->right = colon; 294 push(ask); 295 } 296 297 static void 298 eval(char *tok) 299 { 300 struct decoc *dp; 301 302 do { 303 dp = &optbl[*tok]; 304 if (!dp->parse) 305 break; 306 (*dp->parse)(tok, dp->u); 307 } while (tok = strtok(NULL, "\t\n")); 308 } 309 310 static int 311 nextline(void) 312 { 313 static char line[LINESIZ]; 314 size_t len; 315 int c; 316 void (*fun)(void); 317 318 repeat: 319 ++lineno; 320 if (!fgets(line, sizeof(line), stdin)) 321 return 0; 322 if ((len = strlen(line)) == 0 || line[0] == '\n') 323 goto repeat; 324 if (line[len-1] != '\n') 325 error(len < sizeof(line)-1 ? ELNBLNE : ELNLINE); 326 line[len-1] = '\0'; 327 328 c = *line; 329 eval(strtok(line, "\t\n")); 330 if ((fun = *optbl[c].eval) != NULL) 331 (*fun)(); 332 if (sp != stack) 333 error(ESTACKA); 334 return 1; 335 } 336 337 static void 338 oreturn(char *token, union tokenop u) 339 { 340 Node *np = node(u.op); 341 342 if (token = strtok(NULL, "\t\n")) 343 eval(token); 344 if (!empty()) 345 np->left = pop(); 346 push(np); 347 } 348 349 /* 350 * Move np (which is a OCASE/ODEFAULT/OESWITCH) to be contigous with 351 * the last switch table. It is a bit ugly to touch directly curstmt 352 * here, but moving this function to node.c is worse, because we are 353 * putting knowledge of how the text is parsed into the node 354 * represtation module. 355 */ 356 static void 357 waft(Node *np) 358 { 359 Node *lastcase, *next;; 360 struct swtch *cur; 361 extern Node *curstmt; 362 363 if (swp == swtbl) 364 error(EWTACKU); 365 366 cur = swp - 1; 367 lastcase = cur->last; 368 next = lastcase->next; 369 370 np->next = next; 371 np->prev = lastcase; 372 373 if (next) 374 next->prev = np; 375 lastcase->next = np; 376 377 if (curstmt == cur->last) 378 curstmt = np; 379 cur->last = np; 380 cur->nr++; 381 } 382 383 static void 384 bswitch(char *token, union tokenop u) 385 { 386 struct swtch *cur; 387 Node *np = node(u.op); 388 389 if (swp == &swtbl[NR_BLOCK]) 390 error(EWTACKO); 391 cur = swp++; 392 cur->nr = 0; 393 394 eval(strtok(NULL, "\t\n")); 395 np->left = pop(); 396 397 push(cur->first = cur->last = np); 398 } 399 400 static void 401 eswitch(char *token, union tokenop u) 402 { 403 struct swtch *cur; 404 405 if (swp == swtbl) 406 error(EWTACKU); 407 jump(token, u); 408 waft(pop()); 409 cur = --swp; 410 cur->first->u.i = cur->nr; 411 } 412 413 static void 414 ocase(char *token, union tokenop u) 415 { 416 jump(token, u); 417 waft(pop()); 418 } 419 420 static void 421 jump(char *token, union tokenop u) 422 { 423 Node *aux, *np = node(u.op); 424 425 eval(strtok(NULL, "\t\n")); 426 427 if (u.op == OBRANCH || u.op == OCASE) 428 np->left = pop(); 429 aux = pop(); 430 np->u.sym = aux->u.sym; 431 delnode(aux); 432 push(np); 433 } 434 435 static void 436 loop(char *token, union tokenop u) 437 { 438 push(node(u.op)); 439 } 440 441 static void 442 unary(char *token, union tokenop u) 443 { 444 Node *np = node(u.op); 445 446 np->type = *gettype(token+1); 447 np->left = pop(); 448 np->right = NULL; 449 push(np); 450 } 451 452 static void 453 call(char *token, union tokenop u) 454 { 455 Node *np, *par, *fun = node(u.op); 456 457 for (par = NULL;; par = np) { 458 np = pop(); 459 if (np->op != OPAR) 460 break; 461 np->right = par; 462 } 463 464 fun->type = *gettype(token+1); 465 fun->left = np; 466 fun->right = par; 467 push(fun); 468 } 469 470 static void 471 builtin(char *token, union tokenop u) 472 { 473 Node *np = node(u.op); 474 char *name; 475 unsigned subop, nchilds; 476 477 np->type = *gettype(token+1); 478 name = pop(); 479 480 if (!strcmp("__builtin_va_arg", name)) { 481 nchilds = 1; 482 subop = BVA_ARG; 483 } else if (!strcmp("__builtin_va_start", name)) { 484 nchilds = 2; 485 subop = BVA_START; 486 } else if (!strcmp("__builtin_va_end", name)) { 487 nchilds = 1; 488 subop = BVA_END; 489 } else if (!strcmp("__builtin_va_copy", name)) { 490 nchilds = 2; 491 subop = BVA_COPY; 492 } else { 493 error(EBBUILT);; 494 } 495 496 np->u.subop = subop; 497 np->right = (nchilds == 2) ? pop() : NULL; 498 np->left = (nchilds != 0) ? pop() : NULL; 499 500 free(name); 501 push(np); 502 } 503 504 static void 505 binary(char *token, union tokenop u) 506 { 507 Node *np = node(u.op); 508 509 np->type = *gettype(token+1); 510 np->right = pop(); 511 np->left = pop(); 512 push(np); 513 } 514 515 static void 516 binit(char *token, union tokenop u) 517 { 518 ininit = 1; 519 } 520 521 static void 522 einit(char *token, union tokenop u) 523 { 524 ininit = 0; 525 endinit(); 526 } 527 528 static void 529 endpars(void) 530 { 531 if (!curfun || !inpars) 532 error(ESYNTAX); 533 inpars = 0; 534 } 535 536 static void 537 aggregate(void) 538 { 539 Node *align, *size; 540 char *name; 541 Type *tp; 542 Symbol *sym; 543 544 align = pop(); 545 size = pop(); 546 name = pop(); 547 tp = pop(); 548 549 tp->size = size->u.i; 550 tp->align = align->u.i; 551 tp->flags = AGGRF; 552 /* 553 * type is the first field of Symbol so we can obtain the 554 * address of the symbol from the address of the type. 555 * We have to do this because composed returns the pointer 556 * to the type, but in this function we also need the 557 * symbol to store the name. 558 */ 559 sym = (Symbol *) tp; 560 sym->name = name; 561 562 delnode(align); 563 delnode(size); 564 } 565 566 static void 567 array(void) 568 { 569 Type *tp, *base; 570 Node *size; 571 572 size = pop(); 573 base = pop(); 574 tp = pop(); 575 tp->size = size->u.i * base->size; /* FIXME check for overflow */ 576 tp->align = base->align; 577 578 delnode(size); 579 } 580 581 static void 582 decl(Symbol *sym) 583 { 584 Type *tp = &sym->type; 585 586 if (tp->flags & FUNF) { 587 lastfun = sym; 588 } else { 589 switch (sym->kind) { 590 case SEXTRN: 591 case SGLOB: 592 case SPRIV: 593 case SLOCAL: 594 defglobal(sym); 595 break; 596 case SAUTO: 597 case SREG: 598 if (!curfun) 599 error(ESYNTAX); 600 ((inpars) ? defpar : defvar)(sym); 601 break; 602 default: 603 abort(); 604 } 605 } 606 } 607 608 static void 609 vardecl(void) 610 { 611 Type *tp, *rp; 612 Node *np; 613 Symbol *sym; 614 char *name; 615 616 name = pop(); 617 tp = pop(); 618 if (tp->flags & FUNF) 619 rp = pop(); 620 np = pop(); 621 622 sym = np->u.sym; 623 /* 624 * We have to free sym->name because in tentative declarations 625 * we can have multiple declarations of the same symbol, and in 626 * this case our parser will allocate twice the memory 627 */ 628 free(sym->name); 629 sym->name = name; 630 sym->type = *tp; 631 if (tp->flags & FUNF) 632 sym->rtype = *rp; 633 sym->kind = sclass; 634 635 if (ininit) 636 sym->type.flags |= INITF; 637 decl(sym); 638 delnode(np); 639 } 640 641 static void 642 flddecl(void) 643 { 644 Node *off, *np; 645 char *name; 646 Type *tp; 647 Symbol *sym; 648 649 off = pop(); 650 name = pop(); 651 tp = pop(); 652 np = pop(); 653 654 sym = np->u.sym; 655 sym->u.off = off->u.i; 656 sym->name = name; 657 sym->type = *tp; 658 659 delnode(np); 660 delnode(off); 661 } 662 663 static void 664 labeldcl(void) 665 { 666 Node *np; 667 Symbol *sym; 668 669 np = pop(); 670 np->op = ONOP; 671 sym = np->u.sym; 672 sym->kind = SLABEL; 673 sym->u.stmt = np; 674 np->label = sym; 675 addstmt(np, SETCUR); 676 } 677 678 static void 679 stmt(void) 680 { 681 Node *np; 682 683 if (empty()) 684 return; 685 np = pop(); 686 if (ininit) { 687 data(np); 688 deltree(np); 689 return; 690 } 691 addstmt(np, SETCUR); 692 } 693 694 static void 695 beginfun(void) 696 { 697 curfun = lastfun; 698 inpars = 1; 699 pushctx(); 700 addstmt(node(OBFUN), SETCUR); 701 } 702 703 static void 704 endfun(void) 705 { 706 endf = 1; 707 addstmt(node(OEFUN), SETCUR); 708 } 709 710 void 711 parse(void) 712 { 713 cleannodes(); /* remove code of previous function */ 714 popctx(); /* remove context of previous function */ 715 curfun = NULL; 716 endf = 0; 717 718 while (!endf && nextline()) 719 /* nothing */; 720 if (ferror(stdin)) 721 error(EFERROR, strerror(errno)); 722 }