scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit e1b218ec5e6cc2745a43c5244bf6c2c0481b3e07
parent dbacd2db0387ac4dbd726aa48b7a5bf765d69d4e
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Fri, 17 Jul 2015 19:30:37 +0200

Rewrite and simplify the embedded preprocessor

The preprocessor dealt macro expansions in the same way that
includes. This was done in this way as a first attemp to avoid
recursion, but it only avoided direct recursion. Implement the
algorithm described in ANSI C is really complex, and the idea
of the embedded preprocessor was to have a fast and simple
preprocessor, so it is better to remove any idea of trying to
detect recursivity. It will simplify a lot the input/output
functions.
This version of the preprocessor uses the lexer of the compiler,
so the code is not so hardcore like the original which did all
the parsing directly with char pointers.

Diffstat:
Mcc1/cc1.h | 38+++++++++++++++++++++++++++++++-------
Mcc1/cpp.c | 651+++++++++++++++++++++++++++++++++----------------------------------------------
Mcc1/error.c | 5+++--
Mcc1/lex.c | 209++++++++++++++++++++++++++++---------------------------------------------------
Mcc1/symbol.c | 32+++++++++++++++++++++++++-------
5 files changed, 409 insertions(+), 526 deletions(-)

diff --git a/cc1/cc1.h b/cc1/cc1.h @@ -11,6 +11,8 @@ typedef struct type Type; typedef struct symbol Symbol; typedef struct caselist Caselist; typedef struct node Node; +typedef struct input Input; + struct type { unsigned char op; /* type builder operator */ @@ -70,6 +72,14 @@ struct yystype { unsigned char token; }; +struct input { + char *fname; + void *fp; + char *line, *begin, *p; + struct input *next; + unsigned short nline; +}; + /* * Definition of enumerations */ @@ -96,6 +106,7 @@ enum { NS_LABEL, NS_CPP, NS_KEYWORD, + NS_CPPCLAUSES, NS_STRUCTS }; @@ -111,6 +122,13 @@ enum { ISEXTERN =128 }; + +/* lexer mode, compiler or preprocessor directive */ +enum { + CCMODE, + CPPMODE +}; + /* input tokens */ enum tokens { TQUALIFIER = 128, @@ -177,6 +195,14 @@ enum tokens { CONTINUE, BREAK, RETURN, + DEFINE, + INCLUDE, + LINE, + PRAGMA, + ERROR, + IFDEF, + IFNDEF, + UNDEF, EOFTOK }; @@ -275,11 +301,7 @@ extern unsigned next(void); extern bool moreinput(void); extern void expect(unsigned tok); extern void discard(void); -extern char *getfname(void); -extern unsigned short getfline(void); -extern void setfname(char *name); -extern void setfline(unsigned short line); -extern bool addinput(char *fname, Symbol *sym, char *str); +extern bool addinput(char *fname); extern void setnamespace(int ns); extern void setsafe(int type); extern void ilex(char *fname); @@ -298,8 +320,8 @@ extern Node *expr(void), *negate(Node *np); /* cpp.c */ extern void icpp(void); -extern bool cpp(char *s); -extern int expand(Symbol *sym); +extern bool cpp(void); +extern bool expand(char *begin, Symbol *sym); /* * Definition of global variables @@ -310,6 +332,8 @@ extern unsigned yytoken; extern unsigned short yylen; extern int cppoff, disexpand; extern unsigned cppctx; +extern Input *input; +extern int lexmode; extern Type *voidtype, *pvoidtype, *booltype, *uchartype, *chartype, diff --git a/cc1/cpp.c b/cc1/cpp.c @@ -19,7 +19,7 @@ static char *argp, *macroname; static unsigned arglen; static Symbol *symline, *symfile; static unsigned char ifstatus[NR_COND]; -static int paramerr; +static Type *charptype; unsigned cppctx; int disexpand; @@ -56,87 +56,26 @@ icpp(void) symfile = defmacro("__FILE__"); } -static bool -iden(char **str) -{ - char c, *bp, *s = *str; - - if (!isalpha(c = *s) && c != '_') - return 0; - for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) { - if ((c = *s) == '\0' || !isalnum(c) && c != '_') - break; - ++s; - } - if (bp == &yytext[IDENTSIZ]) { - printerr("identifier too long in preprocessor"); - return 0; - } - *bp = '\0'; - - while (isspace(*s)) - ++s; - - *str = s; - return 1; -} - -static bool -string(char **input, char **str, char delim) -{ - char c, *s = *input; - - if (str) - *str = s; - - while ((c = *s) && c != delim) - ++s; - if (c == '\0') - return 0; - *s++ = '\0'; - *input = s; - - return 1; -} - -static void -cleanup(char *s) -{ - while (isspace(*s)) - ++s; - if (*s != '\0') - printerr("trailing characters after preprocessor directive"); -} - static void nextcpp(void) { - next(); - if (yytoken == EOFTOK) { - printerr("unterminated argument list invoking macro \"%s\"", - macroname); - goto mark_error; - } - if (yylen + 1 > arglen) { - printerr("argument overflow invoking macro \"%s\"", - macroname); - goto mark_error; - } - memcpy(argp, yytext, yylen); - argp += yylen; - *argp++ = ' '; - arglen -= yylen + 1; - return; - -mark_error: - paramerr = 1; - yytoken = 0; + next(); + if (yytoken == EOFTOK) + error("unterminated argument list invoking macro \"%s\"", + macroname); + if (yylen + 1 > arglen) + error("argument overflow invoking macro \"%s\"", + macroname); + memcpy(argp, yytext, yylen); + argp += yylen; + *argp++ = ' '; + arglen -= yylen + 1; } static void paren(void) { - while (!paramerr) { + for (;;) { nextcpp(); switch (yytoken) { case ')': @@ -151,7 +90,7 @@ paren(void) static void parameter(void) { - while (!paramerr) { + for (;;) { nextcpp(); switch (yytoken) { case ')': @@ -172,90 +111,55 @@ parsepars(char *buffer, char **listp, int nargs) int n; if (nargs == -1) - return 1; - - if (ahead() != '(') + return -1; + if (ahead() != '(' && nargs > 0) return 0; disexpand = 1; next(); - paramerr = n = 0; + n = 0; argp = buffer; arglen = INPUTSIZ; - if (ahead() != ')') { + if (yytoken != ')') { do { *listp++ = argp; parameter(); - } while (!paramerr && ++n < NR_MACROARG && yytoken == ','); + } while (++n < NR_MACROARG && yytoken == ','); } + if (yytoken != ')') + error("incorrect macro function alike invocation"); disexpand = 0; - if (paramerr) - return -1; - if (n == NR_MACROARG) { - printerr("too much parameters in macro \"%s\"", macroname); - return -1; - } + if (n == NR_MACROARG) + error("too much parameters in macro \"%s\"", macroname); if (n != nargs) { - printerr("macro \"%s\" passed %d arguments, but it takes %d", + error("macro \"%s\" passed %d arguments, but it takes %d", macroname, n, nargs); - return -1; } return 1; } -/* - * sym->u.s is a string with the following format: - * dd#string - * where dd is the number of arguments of the macro - * (-1 if it is a macro without arguments), and string - * is the macro definition, where @dd@ indicates the - * parameter number dd - */ -#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2) -int -expand(Symbol *sym) +static void +copymacro(char *bp, char *s, size_t bufsiz, char *arglist[]) { - unsigned len; - int r, n; - char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE]; - char prevc, c, *bp, *lim, *arg, *s = sym->u.s; + char prevc, c, *arg; - fprintf(stderr, "macro %s:%s\n", sym->name, sym->u.s); - if (sym == symfile) { - sprintf(buffer, "\"%s\"", getfname()); - goto add_macro; - } - if (sym == symline) { - sprintf(buffer, "%d", getfline()); - goto add_macro; - } - - macroname = sym->name; - if ((r = parsepars(arguments, arglist, atoi(s))) < 1) - return r; - - for (n = 0; n < atoi(s); ++n) - fprintf(stderr, "PAR%d:%s\n", n, arglist[n]); - - len = INPUTSIZ-1; - bp = buffer; - for (prevc = '\0', s += 3; c = *s; prevc = c, ++s) { + for (prevc = '\0'; c = *s; prevc = c, ++s) { if (c != '@') { if (c == '#') continue; - if (len-- == 0) + if (bufsiz-- == 0) goto expansion_too_long; *bp++ = c; } else { - unsigned size; + size_t size; if (prevc == '#') - len -= 2; + bufsiz -= 2; arg = arglist[atoi(++s)]; size = strlen(arg); - if (size > len) + if (size > bufsiz) goto expansion_too_long; if (prevc == '#') *bp++ = '"'; @@ -263,190 +167,179 @@ expand(Symbol *sym) bp += size; if (prevc == '#') *bp++ = '"'; - len -= size; + bufsiz -= size; s += 2; } } - *bp = '\0'; - fprintf(stderr, "macro expanded:%s\n", buffer); -add_macro: - addinput(NULL, sym, buffer); - return 1; + *bp = '\0'; + + return; expansion_too_long: - printerr("expansion of macro \"%s\" is too long", macroname); - return -1; + error("expansion of macro \"%s\" is too long", macroname); } -#undef BUFSIZE -/* - * Parse an argument list (par0, par1, ...) and creates - * an array with pointers to all the arguments in the - * list - */ -static char * -parseargs(char *s, char *args[NR_MACROARG], int *nargs) +#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2) +bool +expand(char *begin, Symbol *sym) { - int n; size_t len; - char *endp, c; + int n; + char *s = sym->u.s; + char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE]; - n = -1; - if (*s != '(') - goto set_nargs; - n = 0; - while (isspace(*s++)) - /* nothing */; - if (*s == ')') - goto set_nargs; - - for (n = 1; n <= NR_MACROARG; ++n) { - while (isspace(*s)) - ++s; - if (!isalpha(*s) && *s != '_') { - printerr("macro arguments must be identifiers"); - return NULL; - } - for (endp = s+1; isalnum(*endp) || *endp == '_'; ++endp) - /* nothing */; - if ((len = endp - s) > IDENTSIZ) { - printerr("macro argument too long"); - return NULL; - } - *args++ = s; - for (s = endp; isspace(*s); ++s) - *s = '\0'; - c = *s; - *s++ = '\0'; - if (c == ')') - break; - if (c == ',') { - continue; - } else { - printerr("macro parameters must be comma-separated"); - return NULL; - } + fprintf(stderr, "macro '%s':%s\n", sym->name, sym->u.s); + if (sym == symfile) { + sprintf(buffer, "\"%s\"", input->fname); + goto print_subs; } - if (n > NR_MACROARG) { - printerr("too much parameters in macro"); - return NULL; + if (sym == symline) { + sprintf(buffer, "%d", input->line); + goto print_subs; } -set_nargs: - *nargs = n; - return s; + macroname = sym->name; + if (!parsepars(arguments, arglist, atoi(s))) + return 0; + for (n = 0; n < atoi(s); ++n) + fprintf(stderr, "PAR%d:%s\n", n, arglist[n]); + + copymacro(buffer, s+3, INPUTSIZ-1, arglist); + +print_subs: + fprintf(stderr, "macro '%s' expanded to :'%s'\n", macroname, buffer); + len = strlen(buffer); + + /* cut macro invocation */ + memmove(begin, input->p, input->p - begin); + memmove(begin + len, begin, len); + + /* paste macro expansion */ + memcpy(begin, buffer, len); + input->p = input->begin = begin; + + return 1; } +#undef BUFSIZE -/* - * Copy a string define, and substitute formal arguments of the - * macro into strings in the form @XX@, where XX is the position - * of the argument in the argument list. - */ -static bool -copydefine(char *s, char *args[], char *buff, int bufsiz, int nargs) +static int +getpars(Symbol *args[NR_MACROARG]) { - int n; - size_t ncopy; - char arroba[6], *p, **bp, c, prevc; - - for (prevc = '\0'; c = *s++; prevc = c) { - if (!isalpha(c) && c != '_' || nargs < 1) { - if (bufsiz-- == 0) - goto too_long; - if (prevc == '#') - goto bad_stringer; - *buff++ = c; - if (c != '#') - continue; - while (isspace(*++s)) - /* nothing */; + int n = -1; + char *err; + + if (!accept('(')) + return n; + ++n; + if (accept(')')) + return n; + + do { + if (n == NR_MACROARG) { + err = "too much parameters in macro"; + goto popctx_and_error; } - /* found an identifier, is it one of the macro arguments? */ - for (p = s; isalnum(c = *p) || c == '_'; ++p) - /* nothing */; - ncopy = p - --s; - bp = args; - for (n = 0; n < nargs; ++n) { - if (strncmp(s, *bp++, ncopy)) - continue; - sprintf(arroba, "@%02d@", n); - s = arroba, ncopy = 4; - break; + if (yytoken != IDEN) { + err = "macro arguments must be identifiers"; + goto popctx_and_error; } - if (n == nargs && prevc == '#') - goto bad_stringer; - if ((bufsiz -= ncopy) < 0) - goto too_long; - memcpy(buff, s, ncopy); - buff += ncopy, s = p; - } - if (bufsiz == 0) - goto too_long; - *buff = '\0'; - return 1; + args[n++] = yylval.sym; + next(); + } while (accept(',')); + expect(')'); -bad_stringer: - printerr("'#' is not followed by a macro parameter"); - return 0; -too_long: - printerr("macro definition too long"); - return 0; + return n; + +popctx_and_error: + popctx(); + error(err); } -static char * -mkdefine(char *s) +static void +getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) { - int nargs; - char *args[NR_MACROARG], buff[LINESIZ+1]; - - if ((s = parseargs(s, args, &nargs)) == NULL) - return NULL; - sprintf(buff, "%02d#", nargs); + Symbol **argp; + char *err; + size_t len; + int prevc = 0, ispar; + + for (;;) { + ispar = 0; + if (yytoken == IDEN) { + for (argp = args; argp < &args[nargs]; ++argp) { + if (*argp == yylval.sym) + break; + } + if (argp != &args[nargs]) { + sprintf(yytext, "@%02d@", argp - args); + ispar = 1; + } + } + if (prevc == '#' && !ispar) + goto bad_stringer; + if (yytoken == EOFTOK) + break; - while (isspace(*s)) - ++s; + if ((len = strlen(yytext)) >= bufsiz) { + err = "too long macro"; + goto popctx_and_error; + } + memcpy(bp, yytext, len); + bp += len; + bufsiz -= len; + if ((prevc = yytoken) != '#') { + bufsiz; + *bp++ = ' '; + } + next(); + } + *bp = '\0'; + return; - if (*s == '\0') - buff[0] = '\0'; - else if (!copydefine(s, args, buff+3, LINESIZ-3, nargs)) - return NULL; - return xstrdup(buff); +bad_stringer: + err = "'#' is not followed by a macro parameter"; +popctx_and_error: + popctx(); + error(err); } static void -define(char *s) +define(void) { - char *t; - Symbol *sym; + Symbol *sym,*args[NR_MACROARG]; + char buff[LINESIZ+1]; + int n; if (cppoff) return; - if (!iden(&s)) { - printerr("#define must have an identifier as parameter"); - return; - } - - for (t = s + strlen(s) + 1; isspace(*--t); *t = '\0') - /* nothing */; - if ((s = mkdefine(s)) == NULL) - return; - - sym = lookup(NS_CPP); + if (yytoken != IDEN) + error("macro names must be identifiers"); + sym = yylval.sym; if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) { warn("'%s' redefined", yytext); free(sym->u.s); + } else if (sym->ns != NS_CPP) { + sym = lookup(NS_CPP); } sym->flags |= ISDEFINED; - sym->ns = NS_CPP; - sym->ctx = UCHAR_MAX; - sym->u.s = s; + + pushctx(); + + next(); + n = getpars(args); + sprintf(buff, "%02d#", n); + getdefs(args, n, buff+3, LINESIZ-3); + sym->u.s = xstrdup(buff); + fprintf(stderr, "Defining macro '%s'='%s'\n", sym->name, buff); + + popctx(); } static void -include(char *s) +include(void) { - char **bp, delim, c, *p, *file, path[FILENAME_MAX]; - char *sysinclude[] = { + char **bp, *p, file[FILENAME_MAX], path[FILENAME_MAX]; + static char *sysinclude[] = { PREFIX"/include/", PREFIX"/local/include/", NULL @@ -455,193 +348,199 @@ include(char *s) if (cppoff) return; - if ((c = *s++) == '>') - delim = '>'; - else if (c == '"') - delim = '"'; - else - goto bad_include; - - if (!string(&s, &file, delim)) + switch (*yytext) { + case '<': + if ((p = strchr(input->begin, '>')) == NULL) + goto bad_include; + *p = '\0'; + if (p - input->begin >= FILENAME_MAX) + goto too_long; + strcpy(file, input->begin); + input->begin = input->p = p+1; + next(); + break; + case '"': + if ((p = strchr(yytext + 1, '"')) == NULL) + goto bad_include; + *p = '\0'; + if (p - yytext + 1 >= FILENAME_MAX) + goto too_long; + strcpy(file, yytext + 1); + next(); + if (addinput(file)) + return; + break; + default: goto bad_include; - if (delim == '"' && addinput(file, NULL, NULL)) - return; + } filelen = strlen(file); for (bp = sysinclude; *bp; ++bp) { dirlen = strlen(*bp); - if (dirlen + filelen > FILENAME_MAX) + if (dirlen + filelen > FILENAME_MAX-1) continue; memcpy(path, *bp, dirlen); memcpy(path+dirlen, file, filelen); - if (addinput(path, NULL, NULL)) + if (addinput(path)) break; } if (*bp) - printerr("included file '%s' not found", file); - cleanup(s); + error("included file '%s' not found", file); + return; bad_include: - printerr("#include expects \"FILENAME\" or <FILENAME>"); + error("#include expects \"FILENAME\" or <FILENAME>"); +too_long: + error("#include FILENAME too long"); } static void -line(char *s) +line(void) { - char *file; + char *file, *p; + Type *tp; long n; if (cppoff) return; - if ((n = strtol(s, &s, 10)) <= 0 || n > USHRT_MAX) { - printerr("first parameter of #line is not a positive integer"); + if ((n = strtol(input->p, &input->p, 10)) <= 0 || n > USHRT_MAX) + error("first parameter of #line is not a positive integer"); + + if (yytoken != CONSTANT || yylval.sym->type != inttype) + error("first parameter of #line is not a positive integer"); + + input->nline = yylval.sym->u.i; + next(); + if (yytoken == EOFTOK) return; - } - switch (*s) { - case ' ': - case '\t': - while (isspace(*s)) - ++s; - if (*s == '\0') - goto end_string; - if (*s++ != '"' && !string(&s, &file, '"')) - goto bad_file; - setfname(file); - case '\0': - end_string: - setfline(n-1); - break;; - default: - bad_file: - printerr("second parameter of #line is not a valid filename"); - break; - } - cleanup(s); + tp = yylval.sym->type; + if (yytoken != CONSTANT || tp->op != ARY && tp->type != chartype) + error("second parameter of #line is not a valid filename"); + free(input->fname); + input->fname = xstrdup(yylval.sym->u.s); } static void -pragma(char *s) +pragma(void) { if (cppoff) return; + /* TODO: discard input */ } static void -usererr(char *s) +usererr(void) { if (cppoff) return; - printerr("#error %s", s); - exit(1); + printerr("#error %s", input->p); + /* TODO: discard input */ } static void -ifclause(char *s, int isdef) +ifclause(int isdef) { Symbol *sym; - unsigned n = cppctx++; - - if (cppctx == NR_COND-1) { - printerr("too much nesting levels of conditional inclusion"); - return; - } - if (!iden(&s)) { - printerr("no macro name given in #%s directive", - (isdef) ? "ifdef" : "ifndef"); - return; + unsigned n; + + if (cppctx == NR_COND-1) + error("too much nesting levels of conditional inclusion"); + n = cppctx++ + if (yytoken != IDEN) { + error("no macro name given in #%s directive", + (isdef) ? "ifdef" : "ifndef"); } + sym = lookup(NS_CPP); + next(); if (!(ifstatus[n] = (sym->flags & ISDEFINED) != 0 == isdef)) ++cppoff; - cleanup(s); } static void -ifdef(char *s) +ifdef(void) { - ifclause(s, 1); + ifclause(1); } static void -ifndef(char *s) +ifndef(void) { - ifclause(s, 0); + ifclause(0); } static void -endif(char *s) +endif(void) { - if (cppctx == 0) { - printerr("#endif without #if"); - return; - } + if (cppctx == 0) + error("#endif without #if"); + if (!ifstatus[--cppctx]) --cppoff; - cleanup(s); } static void -elseclause(char *s) +elseclause(void) { struct ifstatus *ip; - if (cppctx == 0) { - printerr("#else without #ifdef/ifndef"); - return; - } + if (cppctx == 0) + error("#else without #ifdef/ifndef"); + cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1; - cleanup(s); } static void -undef(char *s) +undef(void) { Symbol *sym; - if (!iden(&s)) { - printerr("no macro name given in #undef directive"); + if (cppoff) + return; + if (yytoken != IDEN) { + error("no macro name given in #undef directive"); return; } sym = lookup(NS_CPP); sym->flags &= ~ISDEFINED; - cleanup(s); } bool -cpp(char *s) +cpp(void) { static struct { - char *name; - void (*fun)(char *); - } *bp, cmds[] = { - "define", define, - "include", include, - "ifdef", ifdef, - "ifndef", ifndef, - "endif", endif, - "else", elseclause, - "undef", undef, - "line", line, - "pragma", pragma, - "error", usererr, - NULL, NULL + uint8_t tok; + void (*fun)(void); + } *bp, clauses [] = { + {DEFINE, define}, + {INCLUDE, include}, + {LINE, line}, + {IFDEF, ifdef}, + {IFNDEF, ifndef}, + {ELSE, elseclause}, + {UNDEF, undef}, + {PRAGMA, pragma}, + {ERROR, usererr}, + {0, NULL} }; - if (*s++ != '#') + if (*input->p != '#') return 0; - while (isspace(*s)) - ++s; - if (!iden(&s)) - goto incorrect; - for (bp = cmds; bp->name; ++bp) { - if (strcmp(bp->name, yytext)) - continue; - (*bp->fun)(s); - return 1; - } -incorrect: - printerr("invalid preprocessor directive #%s", yytext); + ++input->p; + lexmode = CPPMODE; + setnamespace(NS_CPPCLAUSES); + next(); + for (bp = clauses; bp->tok && bp->tok != yytoken; ++bp) + /* nothing */; + if (!bp->tok) + error("incorrect preprocessor directive"); + next(); + (*bp->fun)(); + + if (yytoken != EOFTOK && !cppoff) + error("trailing characters after preprocessor directive"); + lexmode = CCMODE; return 1; } diff --git a/cc1/error.c b/cc1/error.c @@ -19,8 +19,9 @@ warn_helper(int flag, char *fmt, va_list va) return; if (flag < 0) failure = 1; - fprintf(stderr, "%s:%s:%u: ", - (flag < 0) ? "error" : "warning", getfname(), getfline()); + fprintf(stderr, "%s:%u: %s: ", + input->fname, input->nline, + (flag < 0) ? "error" : "warning"); vfprintf(stderr, fmt, va); putc('\n', stderr); if (flag < 0 && nerrors++ == MAXERRNUM) { diff --git a/cc1/lex.c b/cc1/lex.c @@ -11,38 +11,27 @@ #include "../inc/cc.h" #include "cc1.h" -typedef struct input Input; - -struct input { - char *fname; - unsigned short nline; - FILE *fp; - char *line, *begin, *p; - Symbol *macro; - struct input *next; -}; - unsigned yytoken; struct yystype yylval; char yytext[STRINGSIZ+3]; unsigned short yylen; int cppoff; +int lexmode = CCMODE; static unsigned lex_ns = NS_IDEN; static int safe, eof; -static Input *input; +Input *input; static void -allocinput(char *fname, FILE *fp, char *buff) +allocinput(char *fname, FILE *fp) { Input *ip; ip = xmalloc(sizeof(Input)); - ip->fname = fname; + ip->fname = xstrdup(fname); + ip->p = ip->begin = ip->line = xmalloc(INPUTSIZ); + ip->nline = 0; ip->next = input; - ip->macro = NULL; - ip->begin = ip->line = buff; - ip->nline = (fp) ? 0 : input->nline; ip->fp = fp; input = ip; } @@ -52,10 +41,6 @@ ilex(char *fname) { FILE *fp; - /* - * we can use static file names because this Input is not going - * to be freed ever - */ if (!fname) { fp = stdin; fname = "<stdin>"; @@ -64,93 +49,42 @@ ilex(char *fname) die("error opening output:%s", strerror(errno)); fname = fname; } - allocinput(fname, fp, xmalloc(INPUTSIZ)); + allocinput(fname, fp); *input->begin = '\0'; } bool -addinput(char *fname, Symbol *sym, char *str) +addinput(char *fname) { FILE *fp; - char flags = 0; - if (fname) { - /* - * this call comes from an include clause, so we reuse - * the buffer from the calling Input - */ - if ((fp = fopen(fname, "r")) == NULL) - return 0; - fname = xstrdup(fname); - str = input->line; - *str = '\0'; - } else { - /* - * This call comes from a macro expansion, so we have - * to duplicate the input string because it is the - * expansion of the macro in a temporal buffer - */ - fname = input->fname; - fp = NULL; - str = xstrdup(str); - } - allocinput(fname, fp, str); - input->macro = sym; + if ((fp = fopen(fname, "r")) == NULL) + return 0; + allocinput(fname, fp); return 1; } static void delinput(void) { - Input *ip; + Input *ip = input; -repeat: - if (input->fp) { - /* include input */ - if (fclose(input->fp)) - die("error reading from input file '%s'", input->fname); - if (!input->next) { - eof = 1; - return; - } - free(input->fname); - } else { - /* macro input */ - free(input->line); - } - ip = input; - input = input->next; - free(ip); - - if (*input->begin != '\0') + if (!ip->next) + eof = 1; + if (fclose(ip->fp)) + die("error reading from input file '%s'", ip->fname); + if (eof) return; - if (!input->fp) - goto repeat; -} - -void -setfname(char *name) -{ - free(input->fname); - input->fname = xstrdup(name); -} - -char * -getfname(void) -{ - return input->fname; + input = ip->next; + free(ip->fname); + free(ip->line); } -void -setfline(unsigned short line) -{ - input->nline = line; -} - -unsigned short -getfline(void) +static void +newline(void) { - return input->nline; + if (++input->nline == 0) + die("error:input file '%s' too long", input->fname); } static char @@ -160,22 +94,25 @@ readchar(void) FILE *fp; repeat: - if (feof(input->fp)) - delinput(); - if (eof) - return '\0'; fp = input->fp; - if ((c = getc(fp)) == '\\') { - if ((c = getc(fp)) == '\n') + switch (c = getc(fp)) { + case EOF: + c = '\0'; + break; + case '\\': + if ((c = getc(fp)) == '\n') { + newline(); goto repeat; + } ungetc(c, fp); c = '\\'; - } else if (c == EOF) { - c = '\n'; - } else if (c == '\n' && ++input->nline == 0) { - die("error:input file '%s' too long", getfname()); + break; + case '\n': + newline(); + break; } + return c; } @@ -203,10 +140,17 @@ readline(void) char *bp, *lim; char c, peekc = 0; +repeat: + input->begin = input->p = input->line; + *input->line = '\0'; if (eof) return 0; + if (feof(input->fp)) { + delinput(); + goto repeat; + } lim = &input->line[INPUTSIZ-1]; - for (bp = input->line; bp != lim; *bp++ = c) { + for (bp = input->line; bp < lim; *bp++ = c) { c = (peekc) ? peekc : readchar(); peekc = 0; if (c == '\n' || c == '\0') @@ -222,8 +166,10 @@ readline(void) } } - if (bp == lim) - error("line %u too big in file '%s'", getfline(), getfname()); + if (bp == lim) { + error("line %u too big in file '%s'", + input->line, input->fname); + } *bp = '\0'; return 1; } @@ -231,24 +177,18 @@ readline(void) bool moreinput(void) { - char *p; - repeat: - if (!input->fp) - delinput(); - if (*input->begin) - return 1; if (!readline()) return 0; - p = input->line; - while (isspace(*p)) - ++p; - if (*p == '\0' || cpp(p) || cppoff) { + while (isspace(*input->p)) + ++input->p; + input->begin = input->p; + if (*input->p == '\0' || cpp() || cppoff) { *input->begin = '\0'; goto repeat; } - input->p = input->begin = p; + input->begin = input->p; return 1; } @@ -403,16 +343,17 @@ string(void) *bp++ = '"'; repeat: - for (++input->p; (c = *input->p) != '\0' && c != '"'; ++input->p) { + for (++input->p; (c = *input->p) != '"'; ++input->p) { + if (c == '\0') + error("missing terminating '\"' character"); if (c == '\\') c = escape(); if (bp == &yytext[STRINGSIZ+1]) error("string too long"); *bp++ = c; } - if (c == '\0') - error("missing terminating '\"' character"); - input->begin = input->p + 1; + + input->begin = ++input->p; if (ahead() == '"') goto repeat; *bp = '\0'; @@ -430,15 +371,16 @@ static unsigned iden(void) { Symbol *sym; - char *p, *t, c; + char *p, *begin; - for (p = input->p; isalnum(*p) || *p == '_'; ++p) + begin = input->p; + for (p = begin; isalnum(*p) || *p == '_'; ++p) /* nothing */; input->p = p; tok2str(); yylval.sym = sym = lookup(lex_ns); if (sym->ns == NS_CPP) { - if (!disexpand && sym != input->macro && expand(sym)) + if (!disexpand && expand(begin, sym)) return next(); /* * it is not a correct macro call, so try to find @@ -554,17 +496,15 @@ setnamespace(int ns) static void skipspaces(void) { - char *p; - repeat: - for (p = input->begin; isspace(*p); ++p) - /* nothing */; - if (*p == '\0') { + while (isspace(*input->p)) + ++input->p; + if (*input->p == '\0' && lexmode != CPPMODE) { if (!moreinput()) return; goto repeat; } - input->begin = input->p = p; + input->begin = input->p; } unsigned @@ -573,14 +513,15 @@ next(void) char c; skipspaces(); - if (eof) { - if (cppctx) - error("#endif expected"); + c = *input->begin; + if ((eof || lexmode == CPPMODE) && c == '\0') { strcpy(yytext, "<EOF>"); - return yytoken = EOFTOK; + if (cppctx && eof) + error("#endif expected"); + yytoken = EOFTOK; + goto exit; } - c = *input->begin; if (isalpha(c) || c == '_') yytoken = iden(); else if (isdigit(c)) @@ -592,8 +533,8 @@ next(void) else yytoken = operator(); - fputs(yytext, stderr); - putc('\n', stderr); +exit: + fprintf(stderr, "%s\n", yytext); lex_ns = NS_IDEN; return yytoken; } diff --git a/cc1/symbol.c b/cc1/symbol.c @@ -191,7 +191,7 @@ ikeywords(void) static struct { char *str; unsigned char token, value; - } *bp, buff[] = { + } *bp, keywords[] = { {"auto", SCLASS, AUTO}, {"break", BREAK, BREAK}, {"_Bool", TYPE, BOOL}, @@ -227,14 +227,32 @@ ikeywords(void) {"volatile", TQUALIFIER, VOLATILE}, {"while", WHILE, WHILE}, {NULL, 0, 0}, - }; + }, cppclauses[] = { + {"define", DEFINE, DEFINE}, + {"include", INCLUDE, INCLUDE}, + {"line", LINE, LINE}, + {"ifdef", IFDEF, IFDEF}, + {"else", ELSE, ELSE}, + {"ifndef", IFNDEF, IFNDEF}, + {"undef", UNDEF, UNDEF}, + {"pragma", PRAGMA, PRAGMA}, + {"error", ERROR, ERROR} + }, *list[] = { + keywords, + cppclauses, + NULL + }, **lp; Symbol *sym; + int ns = NS_KEYWORD; - for (bp = buff; bp->str; ++bp) { - strcpy(yytext, bp->str); - sym = lookup(NS_KEYWORD); - sym->token = bp->token; - sym->u.token = bp->value; + for (lp = list; *lp; ++lp) { + for (bp = *lp; bp->str; ++bp) { + strcpy(yytext, bp->str); + sym = lookup(ns); + sym->token = bp->token; + sym->u.token = bp->value; + } + ns = NS_CPPCLAUSES; } globalcnt = 0; }