scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 0fba7162733650be3f0dab5c60ec4513b1775201
parent a05cda6f2efb61fe4b42e43a2ee76f80492a4e27
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Tue, 26 May 2015 22:14:51 +0200

Rewrite Input system

The Input system had a lot of problems and bugs, and it was very bad written.
This version is still slow, but it is working now.

Diffstat:
Mcc1/cc1.h | 9++++++---
Mcc1/cpp.c | 63++++++++++++++++++++++++++++++++-------------------------------
Mcc1/lex.c | 166++++++++++++++++++++++++++++++++++++++++++++++++-------------------------------
Mcc1/main.c | 5++---
4 files changed, 141 insertions(+), 102 deletions(-)

diff --git a/cc1/cc1.h b/cc1/cc1.h @@ -256,6 +256,7 @@ extern Type *mktype(Type *tp, unsigned op, short nelem, void *data); /* symbol.c */ extern void dumpstab(char *msg); extern Symbol *lookup(unsigned ns); +extern Symbol *nextsym(Symbol *sym, unsigned ns); extern Symbol *install(unsigned ns); extern Symbol *newsym(unsigned ns); extern void pushctx(void), popctx(void); @@ -278,10 +279,10 @@ extern char *getfname(void); extern unsigned short getfline(void); extern void setfname(char *name); extern void setfline(unsigned short line); -extern char *addinput(char *fname, Symbol *sym); +extern bool addinput(char *fname, Symbol *sym, char *str); extern void setnamespace(int ns); extern void setsafe(int type); -extern void initcpp(void); +extern void ilex(char *fname); #define accept(t) ((yytoken == (t)) ? next() : 0) /* code.c */ @@ -296,7 +297,8 @@ extern void freetree(Node *np); extern Node *expr(void), *negate(Node *np); /* cpp.c */ -extern bool preprocessor(char *s); +extern void icpp(void); +extern bool cpp(char *s); extern bool expand(Symbol *sym); /* @@ -307,6 +309,7 @@ extern char yytext[]; extern unsigned yytoken; extern unsigned short yylen; extern int cppoff; +extern unsigned cppctx; extern Type *voidtype, *pvoidtype, *booltype, *uchartype, *chartype, diff --git a/cc1/cpp.c b/cc1/cpp.c @@ -13,13 +13,12 @@ /* TODO: preprocessor error must not rise recover */ -static char *argp; +static char *argp, *macroname; static unsigned arglen; -static unsigned numif; -static Symbol *lastmacro; static Symbol *symline, *symfile; +static unsigned char ifstatus[NR_COND]; -unsigned char ifstatus[NR_COND]; +unsigned cppctx; static Symbol * defmacro(char *s) @@ -33,7 +32,7 @@ defmacro(char *s) } void -initcpp(void) +icpp(void) { static char sdate[17], stime[14]; struct tm *tm; @@ -109,11 +108,11 @@ nextcpp(void) next(); if (yytoken == EOFTOK) { error("unterminated argument list invoking macro \"%s\"", - lastmacro->name); + macroname); } if (yylen + 1 > arglen) { error("argument overflow invoking macro \"%s\"", - lastmacro->name); + macroname); } memcpy(argp, yytext, yylen); argp += yylen; @@ -177,10 +176,10 @@ parsepars(char *buffer, char **listp, int nargs) } if (n == NR_MACROARG) - error("too much parameters in macro \"%s\"", lastmacro->name); + error("too much parameters in macro \"%s\"", macroname); if (n != nargs) { error("macro \"%s\" passed %d arguments, but it takes %d", - lastmacro->name, n, nargs); + macroname, n, nargs); } return 1; } @@ -193,7 +192,7 @@ parsepars(char *buffer, char **listp, int nargs) * is the macro definition, where @dd@ indicates the * parameter number dd */ -#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+5) +#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2) bool expand(Symbol *sym) { @@ -203,20 +202,18 @@ expand(Symbol *sym) if (sym == symfile) { sprintf(buffer, "\"%s\"", getfname()); - strcpy(addinput(NULL, symfile), buffer); - return 1; + goto add_macro; } if (sym == symline) { sprintf(buffer, "%d", getfline()); - strcpy(addinput(NULL, symline), buffer); - return 1; + goto add_macro; } - lastmacro = sym; + macroname = sym->name; if (!parsepars(buffer, arglist, atoi(s))) return 0; - bp = addinput(NULL, sym); + bp = buffer; len = INPUTSIZ-1; for (s += 3; c = *s; ++s) { if (c != '@') { @@ -237,10 +234,12 @@ expand(Symbol *sym) } } *bp = '\0'; +add_macro: + addinput(NULL, sym, buffer); return 1; expansion_too_long: - error("expansion of macro \"%s\" is too long", lastmacro->name); + error("expansion of macro \"%s\" is too long", macroname); } #undef BUFSIZE @@ -382,7 +381,7 @@ define(char *s) static void include(char *s) { - char **bp, delim, c, *p, *file, buff[FILENAME_MAX]; + char **bp, delim, c, *p, *file, path[FILENAME_MAX]; char *sysinclude[] = { PREFIX"/include/", PREFIX"/local/include/", @@ -402,7 +401,7 @@ include(char *s) if (!string(&s, &file, delim)) goto bad_include; cleanup(s); - if (delim == '"' && addinput(file, NULL)) + if (delim == '"' && addinput(file, NULL, NULL)) return; filelen = strlen(file); @@ -410,9 +409,9 @@ include(char *s) dirlen = strlen(*bp); if (dirlen + filelen > FILENAME_MAX) continue; - memcpy(buff, *bp, dirlen); - memcpy(buff+dirlen, file, filelen); - if (addinput(buff, NULL)) + memcpy(path, *bp, dirlen); + memcpy(path+dirlen, file, filelen); + if (addinput(path, NULL, NULL)) return; } error("included file '%s' not found", file); @@ -472,13 +471,15 @@ static void ifclause(char *s, int isdef) { Symbol *sym; - unsigned n = numif++; + unsigned n = cppctx++; - if (numif == NR_COND-1) + if (cppctx == NR_COND-1) error("too much nesting levels of conditional inclusion"); - if (!iden(&s)) - error("#ifdef clause must have an identifier as parameter"); + if (!iden(&s)) { + error("no macro name given in #%s directive", + (isdef) ? "ifdef" : "ifndef"); + } cleanup(s); sym = lookup(NS_CPP); @@ -501,10 +502,10 @@ ifndef(char *s) static void endif(char *s) { - if (numif == 0) + if (cppctx == 0) error("#endif without #if"); cleanup(s); - if (!ifstatus[--numif]) + if (!ifstatus[--cppctx]) --cppoff; } @@ -513,10 +514,10 @@ elseclause(char *s) { struct ifstatus *ip; - if (numif == 0) + if (cppctx == 0) error("#else without #if"); cleanup(s); - cppoff += (ifstatus[numif-1] ^= 1) ? -1 : 1; + cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1; } static void @@ -532,7 +533,7 @@ undef(char *s) } bool -preprocessor(char *s) +cpp(char *s) { static struct { char *name; diff --git a/cc1/lex.c b/cc1/lex.c @@ -1,17 +1,16 @@ +#include <ctype.h> #include <errno.h> #include <inttypes.h> #include <setjmp.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <ctype.h> #include "../inc/sizes.h" #include "../inc/cc.h" #include "cc1.h" - typedef struct input Input; struct input { @@ -30,62 +29,103 @@ unsigned short yylen; int cppoff; static unsigned lex_ns = NS_IDEN; -static int safe, eof, incomment; +static int safe, eof; static Input *input; -char * -addinput(char *fname, Symbol *sym) +static void +allocinput(char *fname, FILE *fp, char *buff) { Input *ip; + + ip = xmalloc(sizeof(Input)); + ip->fname = fname; + ip->next = input; + ip->macro = NULL; + ip->begin = ip->line = buff; + ip->nline = (fp) ? 0 : input->nline; + ip->fp = fp; + input = ip; +} + +void +ilex(char *fname) +{ FILE *fp; - unsigned short nline = 0; - /* TODO: Add a field in input to see easier which is the case - where we are */ + /* + * we can use static file names because this Input is not going + * to be freed ever + */ + if (!fname) { + fp = stdin; + fname = "<stdin>"; + } else { + if ((fp = fopen(fname, "r")) == NULL) + die("error opening output:%s", strerror(errno)); + fname = fname; + } + allocinput(fname, fp, xmalloc(INPUTSIZ)); + *input->begin = '\0'; +} + +bool +addinput(char *fname, Symbol *sym, char *str) +{ + FILE *fp; + char flags = 0; if (fname) { + /* + * this call comes from an include clause, so we reuse + * the buffer from the calling Input + */ if ((fp = fopen(fname, "r")) == NULL) - return NULL; + return 0; fname = xstrdup(fname); - } else if (!input) { - fp = stdin; - fname = xstrdup("<stdin>"); + str = input->line; + *str = '\0'; } else { + /* + * This call comes from a macro expansion, so we have + * to duplicate the input string because it is the + * expansion of the macro in a temporal buffer + */ fname = input->fname; - nline = input->nline; fp = NULL; + str = xstrdup(str); } - - ip = xmalloc(sizeof(Input)); - ip->fname = fname; - ip->next = input; - ip->macro = sym; - ip->begin = ip->p = ip->line = xmalloc(INPUTSIZ); - *ip->begin = '\0'; - ip->nline = nline; - ip->fp = fp; - input = ip; - return input->line; + allocinput(fname, fp, str); + input->macro = sym; + return 1; } static void delinput(void) { - Input *ip = input; - FILE *fp = ip->fp; + Input *ip; - if (!ip->next) - eof = 1; - if (fp) { - if (fclose(fp)) - die("error reading from input file '%s'", ip->fname); - if (eof) +repeat: + if (input->fp) { + /* include input */ + if (fclose(input->fp)) + die("error reading from input file '%s'", input->fname); + if (!input->next) { + eof = 1; return; - free(ip->fname); + } + free(input->fname); + } else { + /* macro input */ + free(input->line); } - input = ip->next; - free(ip->line); + ip = input; + input = input->next; free(ip); + + if (*input->begin != '\0') + return; + if (!input->fp) + goto repeat; } void @@ -120,13 +160,10 @@ readchar(void) FILE *fp; repeat: - while (!input->fp || (feof(input->fp) && !eof)) + if (feof(input->fp)) delinput(); - if (eof) { - if (incomment) - error("unterminated comment"); + if (eof) return '\0'; - } fp = input->fp; if ((c = getc(fp)) == '\\') { @@ -135,7 +172,7 @@ repeat: ungetc(c, fp); c = '\\'; } else if (c == EOF) { - goto repeat; + c = '\n'; } else if (c == '\n' && ++input->nline == 0) { die("error:input file '%s' too long", getfname()); } @@ -143,31 +180,32 @@ repeat: } static void -comment(char c) -{ - /* TODO: Ensure that incomment == 0 after a recovery */ - incomment = 1; - if (c == '*') { - for (;;) { - while (readchar() != '*') - /* nothing */; +comment(char type) +{ + if (type == '*') { + while (!eof) { + while (readchar() != '*' && !eof) + /* nothing */ if (readchar() == '/') break; } } else { - while (readchar() != '\n') + while (readchar() != '\n' && !eof) /* nothing */; } - incomment = 0; + if (eof) + error("unterminated comment"); } -static void +static bool readline(void) { char *bp, *lim; char c, peekc = 0; - lim = input->line + INPUTSIZ; + if (eof) + return 0; + lim = &input->line[INPUTSIZ-1]; for (bp = input->line; bp != lim; *bp++ = c) { c = (peekc) ? peekc : readchar(); peekc = 0; @@ -180,13 +218,14 @@ readline(void) c = '/'; } else { comment(c); - c = ' '; + break; } } if (bp == lim) error("line %u too big in file '%s'", getfline(), getfname()); *bp = '\0'; + return 1; } bool @@ -195,19 +234,14 @@ moreinput(void) char *p; repeat: - if (eof) - return 0; - while (*input->begin == '\0' && !input->fp) { + if (!input->fp) delinput(); - if (*input->begin) - return 1; - } - - *(p = input->line) = '\0'; - readline(); + if (!readline()) + return 0; + p = input->line; while (isspace(*p)) ++p; - if (*p == '\0' || preprocessor(p) || cppoff) + if (*p == '\0' || cpp(p) || cppoff) goto repeat; input->p = input->begin = p; return 1; @@ -549,6 +583,8 @@ next(void) repeat: skipspaces(); if (eof) { + if (cppctx) + error("#endif expected"); strcpy(yytext, "<EOF>"); return yytoken = EOFTOK; } @@ -624,7 +660,7 @@ discard(void) goto jump; break; } - if (!moreinput()) + if (c == '\0' && !moreinput()) exit(-1); } jump: diff --git a/cc1/main.c b/cc1/main.c @@ -63,11 +63,10 @@ main(int argc, char *argv[]) if (argc > 1) usage(); - initcpp(); + icpp(); ikeywords(); + ilex(*argv); - if (!addinput(*argv, NULL)) - die("error opening input file '%s'", *argv); for (next(); yytoken != EOFTOK; extdecl()) /* nothing */;