scc

simple C compiler
git clone git://git.2f30.org/scc
Log | Files | Refs | README | LICENSE

commit 4e05c8d89c382a02992de4fad24c034d333bec2c
parent d0b5bbca0d530653369c8cc94214fe44cc2bfad1
Author: Roberto E. Vargas Caballero <k0ga@shike2.com>
Date:   Fri, 17 Jul 2015 19:36:19 +0200

Merge remote-tracking branch 'kcc/master'

Diffstat:
MREADME | 33+++++++++++++++++----------------
Mcc1/cc1.h | 38+++++++++++++++++++++++++++++++-------
Mcc1/cpp.c | 651+++++++++++++++++++++++++++++++++----------------------------------------------
Mcc1/error.c | 5+++--
Mcc1/lex.c | 209++++++++++++++++++++++++++++---------------------------------------------------
Mcc1/symbol.c | 32+++++++++++++++++++++++++-------
6 files changed, 426 insertions(+), 542 deletions(-)

diff --git a/README b/README @@ -7,12 +7,12 @@ of optimizations to the programmer. After a lot of years seeing compilers for z80 I notice that it is very hard for a C compiler generates a good code for a 8 bit processor like -z80, with a expensive indirect addressing mode, so the best solution +z80, with an expensive indirect addressing mode, so the best solution is to not try that compiler optimize the code and use C as a macro macro assembler. -In order to get this last, we need a compiler that does exactly the -things we want to do. For example is a variable is register it MUST be +In order to get this target, we need a compiler that does exactly the +things programmer want to do. For example is a variable is register it MUST be register and fails in other case. If a variable is automatic try to realize operations directly with the stack (for example use ADD A,(IX+4), instead of allocate the variable into register add and store @@ -20,7 +20,7 @@ again in memory). So if you declare an automatic variable you are a big bullshit or you need it for recursion (static variables are your friends). -This is the reason why I begin to develop this compiler, and I hope +This is the reason why I began to develop this compiler, and I hope it will be useful for you. Changes from standard C @@ -37,7 +37,7 @@ time): - const: The definition of const is not clear in the standard. If a const value is modified then the behaviour is implementation defined. It seems that it was defined more in order to can - allocate variables in ROM that for the error detection. This + allocate variables in ROM than for the error detection. This implememtation will not warn about these modifications and the code will use them as normal variables (standard specifies that a diagnosis message must be printed). @@ -47,17 +47,18 @@ time): variable', which of course depend of the king of optimizations applied to the variable. This qualifier was added to the standard to can deal with longjmp (local variables that are not volatile - have undefined state), but this can achieved with special pragma - values, and for memory mapped registers or variables whose - value is modified asynchronous. In the first case, this is a - a non portable code by definition (depend of the register mapped), - so it is better to deal with it using another solution (compiler - extensions or directly assembler), and in the second case it - generated a lot of problems with moderm processors out of order - and multiprocesor, where not hold the value in a register is - good enough (it is needed a explicit memory barrier). - - - restricted: This qualifer can be only applied to pointers, to + have undefined state), and for memory mapped registers or variables + whose value is modified asynchronous but this can achieved with + special pragma values. + In the first case, this is a a non portable code by definition + (depend of the register mapped), so it is better to deal with + it using another solution (compiler extensions or directly + assembler), and in the second case it generated a lot of + problems with moderm processors out of order and multiprocesor, + where not hold the value in a register is good enough (it is + needed a explicit memory barrier). + + - restrict: This qualifer can be only applied to pointers, to mark that the pointed object has no other alias. This qualifer was introduced to can fix some performance problems in numerical algorithm, where FORTRAN can achieve a better performance (and diff --git a/cc1/cc1.h b/cc1/cc1.h @@ -11,6 +11,8 @@ typedef struct type Type; typedef struct symbol Symbol; typedef struct caselist Caselist; typedef struct node Node; +typedef struct input Input; + struct type { unsigned char op; /* type builder operator */ @@ -70,6 +72,14 @@ struct yystype { unsigned char token; }; +struct input { + char *fname; + void *fp; + char *line, *begin, *p; + struct input *next; + unsigned short nline; +}; + /* * Definition of enumerations */ @@ -96,6 +106,7 @@ enum { NS_LABEL, NS_CPP, NS_KEYWORD, + NS_CPPCLAUSES, NS_STRUCTS }; @@ -111,6 +122,13 @@ enum { ISEXTERN =128 }; + +/* lexer mode, compiler or preprocessor directive */ +enum { + CCMODE, + CPPMODE +}; + /* input tokens */ enum tokens { TQUALIFIER = 128, @@ -177,6 +195,14 @@ enum tokens { CONTINUE, BREAK, RETURN, + DEFINE, + INCLUDE, + LINE, + PRAGMA, + ERROR, + IFDEF, + IFNDEF, + UNDEF, EOFTOK }; @@ -275,11 +301,7 @@ extern unsigned next(void); extern bool moreinput(void); extern void expect(unsigned tok); extern void discard(void); -extern char *getfname(void); -extern unsigned short getfline(void); -extern void setfname(char *name); -extern void setfline(unsigned short line); -extern bool addinput(char *fname, Symbol *sym, char *str); +extern bool addinput(char *fname); extern void setnamespace(int ns); extern void setsafe(int type); extern void ilex(char *fname); @@ -298,8 +320,8 @@ extern Node *expr(void), *negate(Node *np); /* cpp.c */ extern void icpp(void); -extern bool cpp(char *s); -extern int expand(Symbol *sym); +extern bool cpp(void); +extern bool expand(char *begin, Symbol *sym); /* * Definition of global variables @@ -310,6 +332,8 @@ extern unsigned yytoken; extern unsigned short yylen; extern int cppoff, disexpand; extern unsigned cppctx; +extern Input *input; +extern int lexmode; extern Type *voidtype, *pvoidtype, *booltype, *uchartype, *chartype, diff --git a/cc1/cpp.c b/cc1/cpp.c @@ -19,7 +19,7 @@ static char *argp, *macroname; static unsigned arglen; static Symbol *symline, *symfile; static unsigned char ifstatus[NR_COND]; -static int paramerr; +static Type *charptype; unsigned cppctx; int disexpand; @@ -56,87 +56,26 @@ icpp(void) symfile = defmacro("__FILE__"); } -static bool -iden(char **str) -{ - char c, *bp, *s = *str; - - if (!isalpha(c = *s) && c != '_') - return 0; - for (bp = yytext; bp < &yytext[IDENTSIZ]; *bp++ = c) { - if ((c = *s) == '\0' || !isalnum(c) && c != '_') - break; - ++s; - } - if (bp == &yytext[IDENTSIZ]) { - printerr("identifier too long in preprocessor"); - return 0; - } - *bp = '\0'; - - while (isspace(*s)) - ++s; - - *str = s; - return 1; -} - -static bool -string(char **input, char **str, char delim) -{ - char c, *s = *input; - - if (str) - *str = s; - - while ((c = *s) && c != delim) - ++s; - if (c == '\0') - return 0; - *s++ = '\0'; - *input = s; - - return 1; -} - -static void -cleanup(char *s) -{ - while (isspace(*s)) - ++s; - if (*s != '\0') - printerr("trailing characters after preprocessor directive"); -} - static void nextcpp(void) { - next(); - if (yytoken == EOFTOK) { - printerr("unterminated argument list invoking macro \"%s\"", - macroname); - goto mark_error; - } - if (yylen + 1 > arglen) { - printerr("argument overflow invoking macro \"%s\"", - macroname); - goto mark_error; - } - memcpy(argp, yytext, yylen); - argp += yylen; - *argp++ = ' '; - arglen -= yylen + 1; - return; - -mark_error: - paramerr = 1; - yytoken = 0; + next(); + if (yytoken == EOFTOK) + error("unterminated argument list invoking macro \"%s\"", + macroname); + if (yylen + 1 > arglen) + error("argument overflow invoking macro \"%s\"", + macroname); + memcpy(argp, yytext, yylen); + argp += yylen; + *argp++ = ' '; + arglen -= yylen + 1; } static void paren(void) { - while (!paramerr) { + for (;;) { nextcpp(); switch (yytoken) { case ')': @@ -151,7 +90,7 @@ paren(void) static void parameter(void) { - while (!paramerr) { + for (;;) { nextcpp(); switch (yytoken) { case ')': @@ -172,90 +111,55 @@ parsepars(char *buffer, char **listp, int nargs) int n; if (nargs == -1) - return 1; - - if (ahead() != '(') + return -1; + if (ahead() != '(' && nargs > 0) return 0; disexpand = 1; next(); - paramerr = n = 0; + n = 0; argp = buffer; arglen = INPUTSIZ; - if (ahead() != ')') { + if (yytoken != ')') { do { *listp++ = argp; parameter(); - } while (!paramerr && ++n < NR_MACROARG && yytoken == ','); + } while (++n < NR_MACROARG && yytoken == ','); } + if (yytoken != ')') + error("incorrect macro function alike invocation"); disexpand = 0; - if (paramerr) - return -1; - if (n == NR_MACROARG) { - printerr("too much parameters in macro \"%s\"", macroname); - return -1; - } + if (n == NR_MACROARG) + error("too much parameters in macro \"%s\"", macroname); if (n != nargs) { - printerr("macro \"%s\" passed %d arguments, but it takes %d", + error("macro \"%s\" passed %d arguments, but it takes %d", macroname, n, nargs); - return -1; } return 1; } -/* - * sym->u.s is a string with the following format: - * dd#string - * where dd is the number of arguments of the macro - * (-1 if it is a macro without arguments), and string - * is the macro definition, where @dd@ indicates the - * parameter number dd - */ -#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2) -int -expand(Symbol *sym) +static void +copymacro(char *bp, char *s, size_t bufsiz, char *arglist[]) { - unsigned len; - int r, n; - char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE]; - char prevc, c, *bp, *lim, *arg, *s = sym->u.s; + char prevc, c, *arg; - fprintf(stderr, "macro %s:%s\n", sym->name, sym->u.s); - if (sym == symfile) { - sprintf(buffer, "\"%s\"", getfname()); - goto add_macro; - } - if (sym == symline) { - sprintf(buffer, "%d", getfline()); - goto add_macro; - } - - macroname = sym->name; - if ((r = parsepars(arguments, arglist, atoi(s))) < 1) - return r; - - for (n = 0; n < atoi(s); ++n) - fprintf(stderr, "PAR%d:%s\n", n, arglist[n]); - - len = INPUTSIZ-1; - bp = buffer; - for (prevc = '\0', s += 3; c = *s; prevc = c, ++s) { + for (prevc = '\0'; c = *s; prevc = c, ++s) { if (c != '@') { if (c == '#') continue; - if (len-- == 0) + if (bufsiz-- == 0) goto expansion_too_long; *bp++ = c; } else { - unsigned size; + size_t size; if (prevc == '#') - len -= 2; + bufsiz -= 2; arg = arglist[atoi(++s)]; size = strlen(arg); - if (size > len) + if (size > bufsiz) goto expansion_too_long; if (prevc == '#') *bp++ = '"'; @@ -263,190 +167,179 @@ expand(Symbol *sym) bp += size; if (prevc == '#') *bp++ = '"'; - len -= size; + bufsiz -= size; s += 2; } } - *bp = '\0'; - fprintf(stderr, "macro expanded:%s\n", buffer); -add_macro: - addinput(NULL, sym, buffer); - return 1; + *bp = '\0'; + + return; expansion_too_long: - printerr("expansion of macro \"%s\" is too long", macroname); - return -1; + error("expansion of macro \"%s\" is too long", macroname); } -#undef BUFSIZE -/* - * Parse an argument list (par0, par1, ...) and creates - * an array with pointers to all the arguments in the - * list - */ -static char * -parseargs(char *s, char *args[NR_MACROARG], int *nargs) +#define BUFSIZE ((INPUTSIZ > FILENAME_MAX+2) ? INPUTSIZ : FILENAME_MAX+2) +bool +expand(char *begin, Symbol *sym) { - int n; size_t len; - char *endp, c; + int n; + char *s = sym->u.s; + char *arglist[NR_MACROARG], arguments[INPUTSIZ], buffer[BUFSIZE]; - n = -1; - if (*s != '(') - goto set_nargs; - n = 0; - while (isspace(*s++)) - /* nothing */; - if (*s == ')') - goto set_nargs; - - for (n = 1; n <= NR_MACROARG; ++n) { - while (isspace(*s)) - ++s; - if (!isalpha(*s) && *s != '_') { - printerr("macro arguments must be identifiers"); - return NULL; - } - for (endp = s+1; isalnum(*endp) || *endp == '_'; ++endp) - /* nothing */; - if ((len = endp - s) > IDENTSIZ) { - printerr("macro argument too long"); - return NULL; - } - *args++ = s; - for (s = endp; isspace(*s); ++s) - *s = '\0'; - c = *s; - *s++ = '\0'; - if (c == ')') - break; - if (c == ',') { - continue; - } else { - printerr("macro parameters must be comma-separated"); - return NULL; - } + fprintf(stderr, "macro '%s':%s\n", sym->name, sym->u.s); + if (sym == symfile) { + sprintf(buffer, "\"%s\"", input->fname); + goto print_subs; } - if (n > NR_MACROARG) { - printerr("too much parameters in macro"); - return NULL; + if (sym == symline) { + sprintf(buffer, "%d", input->line); + goto print_subs; } -set_nargs: - *nargs = n; - return s; + macroname = sym->name; + if (!parsepars(arguments, arglist, atoi(s))) + return 0; + for (n = 0; n < atoi(s); ++n) + fprintf(stderr, "PAR%d:%s\n", n, arglist[n]); + + copymacro(buffer, s+3, INPUTSIZ-1, arglist); + +print_subs: + fprintf(stderr, "macro '%s' expanded to :'%s'\n", macroname, buffer); + len = strlen(buffer); + + /* cut macro invocation */ + memmove(begin, input->p, input->p - begin); + memmove(begin + len, begin, len); + + /* paste macro expansion */ + memcpy(begin, buffer, len); + input->p = input->begin = begin; + + return 1; } +#undef BUFSIZE -/* - * Copy a string define, and substitute formal arguments of the - * macro into strings in the form @XX@, where XX is the position - * of the argument in the argument list. - */ -static bool -copydefine(char *s, char *args[], char *buff, int bufsiz, int nargs) +static int +getpars(Symbol *args[NR_MACROARG]) { - int n; - size_t ncopy; - char arroba[6], *p, **bp, c, prevc; - - for (prevc = '\0'; c = *s++; prevc = c) { - if (!isalpha(c) && c != '_' || nargs < 1) { - if (bufsiz-- == 0) - goto too_long; - if (prevc == '#') - goto bad_stringer; - *buff++ = c; - if (c != '#') - continue; - while (isspace(*++s)) - /* nothing */; + int n = -1; + char *err; + + if (!accept('(')) + return n; + ++n; + if (accept(')')) + return n; + + do { + if (n == NR_MACROARG) { + err = "too much parameters in macro"; + goto popctx_and_error; } - /* found an identifier, is it one of the macro arguments? */ - for (p = s; isalnum(c = *p) || c == '_'; ++p) - /* nothing */; - ncopy = p - --s; - bp = args; - for (n = 0; n < nargs; ++n) { - if (strncmp(s, *bp++, ncopy)) - continue; - sprintf(arroba, "@%02d@", n); - s = arroba, ncopy = 4; - break; + if (yytoken != IDEN) { + err = "macro arguments must be identifiers"; + goto popctx_and_error; } - if (n == nargs && prevc == '#') - goto bad_stringer; - if ((bufsiz -= ncopy) < 0) - goto too_long; - memcpy(buff, s, ncopy); - buff += ncopy, s = p; - } - if (bufsiz == 0) - goto too_long; - *buff = '\0'; - return 1; + args[n++] = yylval.sym; + next(); + } while (accept(',')); + expect(')'); -bad_stringer: - printerr("'#' is not followed by a macro parameter"); - return 0; -too_long: - printerr("macro definition too long"); - return 0; + return n; + +popctx_and_error: + popctx(); + error(err); } -static char * -mkdefine(char *s) +static void +getdefs(Symbol *args[NR_MACROARG], int nargs, char *bp, size_t bufsiz) { - int nargs; - char *args[NR_MACROARG], buff[LINESIZ+1]; - - if ((s = parseargs(s, args, &nargs)) == NULL) - return NULL; - sprintf(buff, "%02d#", nargs); + Symbol **argp; + char *err; + size_t len; + int prevc = 0, ispar; + + for (;;) { + ispar = 0; + if (yytoken == IDEN) { + for (argp = args; argp < &args[nargs]; ++argp) { + if (*argp == yylval.sym) + break; + } + if (argp != &args[nargs]) { + sprintf(yytext, "@%02d@", argp - args); + ispar = 1; + } + } + if (prevc == '#' && !ispar) + goto bad_stringer; + if (yytoken == EOFTOK) + break; - while (isspace(*s)) - ++s; + if ((len = strlen(yytext)) >= bufsiz) { + err = "too long macro"; + goto popctx_and_error; + } + memcpy(bp, yytext, len); + bp += len; + bufsiz -= len; + if ((prevc = yytoken) != '#') { + bufsiz; + *bp++ = ' '; + } + next(); + } + *bp = '\0'; + return; - if (*s == '\0') - buff[0] = '\0'; - else if (!copydefine(s, args, buff+3, LINESIZ-3, nargs)) - return NULL; - return xstrdup(buff); +bad_stringer: + err = "'#' is not followed by a macro parameter"; +popctx_and_error: + popctx(); + error(err); } static void -define(char *s) +define(void) { - char *t; - Symbol *sym; + Symbol *sym,*args[NR_MACROARG]; + char buff[LINESIZ+1]; + int n; if (cppoff) return; - if (!iden(&s)) { - printerr("#define must have an identifier as parameter"); - return; - } - - for (t = s + strlen(s) + 1; isspace(*--t); *t = '\0') - /* nothing */; - if ((s = mkdefine(s)) == NULL) - return; - - sym = lookup(NS_CPP); + if (yytoken != IDEN) + error("macro names must be identifiers"); + sym = yylval.sym; if ((sym->flags & ISDEFINED) && sym->ns == NS_CPP) { warn("'%s' redefined", yytext); free(sym->u.s); + } else if (sym->ns != NS_CPP) { + sym = lookup(NS_CPP); } sym->flags |= ISDEFINED; - sym->ns = NS_CPP; - sym->ctx = UCHAR_MAX; - sym->u.s = s; + + pushctx(); + + next(); + n = getpars(args); + sprintf(buff, "%02d#", n); + getdefs(args, n, buff+3, LINESIZ-3); + sym->u.s = xstrdup(buff); + fprintf(stderr, "Defining macro '%s'='%s'\n", sym->name, buff); + + popctx(); } static void -include(char *s) +include(void) { - char **bp, delim, c, *p, *file, path[FILENAME_MAX]; - char *sysinclude[] = { + char **bp, *p, file[FILENAME_MAX], path[FILENAME_MAX]; + static char *sysinclude[] = { PREFIX"/include/", PREFIX"/local/include/", NULL @@ -455,193 +348,199 @@ include(char *s) if (cppoff) return; - if ((c = *s++) == '>') - delim = '>'; - else if (c == '"') - delim = '"'; - else - goto bad_include; - - if (!string(&s, &file, delim)) + switch (*yytext) { + case '<': + if ((p = strchr(input->begin, '>')) == NULL) + goto bad_include; + *p = '\0'; + if (p - input->begin >= FILENAME_MAX) + goto too_long; + strcpy(file, input->begin); + input->begin = input->p = p+1; + next(); + break; + case '"': + if ((p = strchr(yytext + 1, '"')) == NULL) + goto bad_include; + *p = '\0'; + if (p - yytext + 1 >= FILENAME_MAX) + goto too_long; + strcpy(file, yytext + 1); + next(); + if (addinput(file)) + return; + break; + default: goto bad_include; - if (delim == '"' && addinput(file, NULL, NULL)) - return; + } filelen = strlen(file); for (bp = sysinclude; *bp; ++bp) { dirlen = strlen(*bp); - if (dirlen + filelen > FILENAME_MAX) + if (dirlen + filelen > FILENAME_MAX-1) continue; memcpy(path, *bp, dirlen); memcpy(path+dirlen, file, filelen); - if (addinput(path, NULL, NULL)) + if (addinput(path)) break; } if (*bp) - printerr("included file '%s' not found", file); - cleanup(s); + error("included file '%s' not found", file); + return; bad_include: - printerr("#include expects \"FILENAME\" or <FILENAME>"); + error("#include expects \"FILENAME\" or <FILENAME>"); +too_long: + error("#include FILENAME too long"); } static void -line(char *s) +line(void) { - char *file; + char *file, *p; + Type *tp; long n; if (cppoff) return; - if ((n = strtol(s, &s, 10)) <= 0 || n > USHRT_MAX) { - printerr("first parameter of #line is not a positive integer"); + if ((n = strtol(input->p, &input->p, 10)) <= 0 || n > USHRT_MAX) + error("first parameter of #line is not a positive integer"); + + if (yytoken != CONSTANT || yylval.sym->type != inttype) + error("first parameter of #line is not a positive integer"); + + input->nline = yylval.sym->u.i; + next(); + if (yytoken == EOFTOK) return; - } - switch (*s) { - case ' ': - case '\t': - while (isspace(*s)) - ++s; - if (*s == '\0') - goto end_string; - if (*s++ != '"' && !string(&s, &file, '"')) - goto bad_file; - setfname(file); - case '\0': - end_string: - setfline(n-1); - break;; - default: - bad_file: - printerr("second parameter of #line is not a valid filename"); - break; - } - cleanup(s); + tp = yylval.sym->type; + if (yytoken != CONSTANT || tp->op != ARY && tp->type != chartype) + error("second parameter of #line is not a valid filename"); + free(input->fname); + input->fname = xstrdup(yylval.sym->u.s); } static void -pragma(char *s) +pragma(void) { if (cppoff) return; + /* TODO: discard input */ } static void -usererr(char *s) +usererr(void) { if (cppoff) return; - printerr("#error %s", s); - exit(1); + printerr("#error %s", input->p); + /* TODO: discard input */ } static void -ifclause(char *s, int isdef) +ifclause(int isdef) { Symbol *sym; - unsigned n = cppctx++; - - if (cppctx == NR_COND-1) { - printerr("too much nesting levels of conditional inclusion"); - return; - } - if (!iden(&s)) { - printerr("no macro name given in #%s directive", - (isdef) ? "ifdef" : "ifndef"); - return; + unsigned n; + + if (cppctx == NR_COND-1) + error("too much nesting levels of conditional inclusion"); + n = cppctx++ + if (yytoken != IDEN) { + error("no macro name given in #%s directive", + (isdef) ? "ifdef" : "ifndef"); } + sym = lookup(NS_CPP); + next(); if (!(ifstatus[n] = (sym->flags & ISDEFINED) != 0 == isdef)) ++cppoff; - cleanup(s); } static void -ifdef(char *s) +ifdef(void) { - ifclause(s, 1); + ifclause(1); } static void -ifndef(char *s) +ifndef(void) { - ifclause(s, 0); + ifclause(0); } static void -endif(char *s) +endif(void) { - if (cppctx == 0) { - printerr("#endif without #if"); - return; - } + if (cppctx == 0) + error("#endif without #if"); + if (!ifstatus[--cppctx]) --cppoff; - cleanup(s); } static void -elseclause(char *s) +elseclause(void) { struct ifstatus *ip; - if (cppctx == 0) { - printerr("#else without #ifdef/ifndef"); - return; - } + if (cppctx == 0) + error("#else without #ifdef/ifndef"); + cppoff += (ifstatus[cppctx-1] ^= 1) ? -1 : 1; - cleanup(s); } static void -undef(char *s) +undef(void) { Symbol *sym; - if (!iden(&s)) { - printerr("no macro name given in #undef directive"); + if (cppoff) + return; + if (yytoken != IDEN) { + error("no macro name given in #undef directive"); return; } sym = lookup(NS_CPP); sym->flags &= ~ISDEFINED; - cleanup(s); } bool -cpp(char *s) +cpp(void) { static struct { - char *name; - void (*fun)(char *); - } *bp, cmds[] = { - "define", define, - "include", include, - "ifdef", ifdef, - "ifndef", ifndef, - "endif", endif, - "else", elseclause, - "undef", undef, - "line", line, - "pragma", pragma, - "error", usererr, - NULL, NULL + uint8_t tok; + void (*fun)(void); + } *bp, clauses [] = { + {DEFINE, define}, + {INCLUDE, include}, + {LINE, line}, + {IFDEF, ifdef}, + {IFNDEF, ifndef}, + {ELSE, elseclause}, + {UNDEF, undef}, + {PRAGMA, pragma}, + {ERROR, usererr}, + {0, NULL} }; - if (*s++ != '#') + if (*input->p != '#') return 0; - while (isspace(*s)) - ++s; - if (!iden(&s)) - goto incorrect; - for (bp = cmds; bp->name; ++bp) { - if (strcmp(bp->name, yytext)) - continue; - (*bp->fun)(s); - return 1; - } -incorrect: - printerr("invalid preprocessor directive #%s", yytext); + ++input->p; + lexmode = CPPMODE; + setnamespace(NS_CPPCLAUSES); + next(); + for (bp = clauses; bp->tok && bp->tok != yytoken; ++bp) + /* nothing */; + if (!bp->tok) + error("incorrect preprocessor directive"); + next(); + (*bp->fun)(); + + if (yytoken != EOFTOK && !cppoff) + error("trailing characters after preprocessor directive"); + lexmode = CCMODE; return 1; } diff --git a/cc1/error.c b/cc1/error.c @@ -19,8 +19,9 @@ warn_helper(int flag, char *fmt, va_list va) return; if (flag < 0) failure = 1; - fprintf(stderr, "%s:%s:%u: ", - (flag < 0) ? "error" : "warning", getfname(), getfline()); + fprintf(stderr, "%s:%u: %s: ", + input->fname, input->nline, + (flag < 0) ? "error" : "warning"); vfprintf(stderr, fmt, va); putc('\n', stderr); if (flag < 0 && nerrors++ == MAXERRNUM) { diff --git a/cc1/lex.c b/cc1/lex.c @@ -11,38 +11,27 @@ #include "../inc/cc.h" #include "cc1.h" -typedef struct input Input; - -struct input { - char *fname; - unsigned short nline; - FILE *fp; - char *line, *begin, *p; - Symbol *macro; - struct input *next; -}; - unsigned yytoken; struct yystype yylval; char yytext[STRINGSIZ+3]; unsigned short yylen; int cppoff; +int lexmode = CCMODE; static unsigned lex_ns = NS_IDEN; static int safe, eof; -static Input *input; +Input *input; static void -allocinput(char *fname, FILE *fp, char *buff) +allocinput(char *fname, FILE *fp) { Input *ip; ip = xmalloc(sizeof(Input)); - ip->fname = fname; + ip->fname = xstrdup(fname); + ip->p = ip->begin = ip->line = xmalloc(INPUTSIZ); + ip->nline = 0; ip->next = input; - ip->macro = NULL; - ip->begin = ip->line = buff; - ip->nline = (fp) ? 0 : input->nline; ip->fp = fp; input = ip; } @@ -52,10 +41,6 @@ ilex(char *fname) { FILE *fp; - /* - * we can use static file names because this Input is not going - * to be freed ever - */ if (!fname) { fp = stdin; fname = "<stdin>"; @@ -64,93 +49,42 @@ ilex(char *fname) die("error opening output:%s", strerror(errno)); fname = fname; } - allocinput(fname, fp, xmalloc(INPUTSIZ)); + allocinput(fname, fp); *input->begin = '\0'; } bool -addinput(char *fname, Symbol *sym, char *str) +addinput(char *fname) { FILE *fp; - char flags = 0; - if (fname) { - /* - * this call comes from an include clause, so we reuse - * the buffer from the calling Input - */ - if ((fp = fopen(fname, "r")) == NULL) - return 0; - fname = xstrdup(fname); - str = input->line; - *str = '\0'; - } else { - /* - * This call comes from a macro expansion, so we have - * to duplicate the input string because it is the - * expansion of the macro in a temporal buffer - */ - fname = input->fname; - fp = NULL; - str = xstrdup(str); - } - allocinput(fname, fp, str); - input->macro = sym; + if ((fp = fopen(fname, "r")) == NULL) + return 0; + allocinput(fname, fp); return 1; } static void delinput(void) { - Input *ip; + Input *ip = input; -repeat: - if (input->fp) { - /* include input */ - if (fclose(input->fp)) - die("error reading from input file '%s'", input->fname); - if (!input->next) { - eof = 1; - return; - } - free(input->fname); - } else { - /* macro input */ - free(input->line); - } - ip = input; - input = input->next; - free(ip); - - if (*input->begin != '\0') + if (!ip->next) + eof = 1; + if (fclose(ip->fp)) + die("error reading from input file '%s'", ip->fname); + if (eof) return; - if (!input->fp) - goto repeat; -} - -void -setfname(char *name) -{ - free(input->fname); - input->fname = xstrdup(name); -} - -char * -getfname(void) -{ - return input->fname; + input = ip->next; + free(ip->fname); + free(ip->line); } -void -setfline(unsigned short line) -{ - input->nline = line; -} - -unsigned short -getfline(void) +static void +newline(void) { - return input->nline; + if (++input->nline == 0) + die("error:input file '%s' too long", input->fname); } static char @@ -160,22 +94,25 @@ readchar(void) FILE *fp; repeat: - if (feof(input->fp)) - delinput(); - if (eof) - return '\0'; fp = input->fp; - if ((c = getc(fp)) == '\\') { - if ((c = getc(fp)) == '\n') + switch (c = getc(fp)) { + case EOF: + c = '\0'; + break; + case '\\': + if ((c = getc(fp)) == '\n') { + newline(); goto repeat; + } ungetc(c, fp); c = '\\'; - } else if (c == EOF) { - c = '\n'; - } else if (c == '\n' && ++input->nline == 0) { - die("error:input file '%s' too long", getfname()); + break; + case '\n': + newline(); + break; } + return c; } @@ -203,10 +140,17 @@ readline(void) char *bp, *lim; char c, peekc = 0; +repeat: + input->begin = input->p = input->line; + *input->line = '\0'; if (eof) return 0; + if (feof(input->fp)) { + delinput(); + goto repeat; + } lim = &input->line[INPUTSIZ-1]; - for (bp = input->line; bp != lim; *bp++ = c) { + for (bp = input->line; bp < lim; *bp++ = c) { c = (peekc) ? peekc : readchar(); peekc = 0; if (c == '\n' || c == '\0') @@ -222,8 +166,10 @@ readline(void) } } - if (bp == lim) - error("line %u too big in file '%s'", getfline(), getfname()); + if (bp == lim) { + error("line %u too big in file '%s'", + input->line, input->fname); + } *bp = '\0'; return 1; } @@ -231,24 +177,18 @@ readline(void) bool moreinput(void) { - char *p; - repeat: - if (!input->fp) - delinput(); - if (*input->begin) - return 1; if (!readline()) return 0; - p = input->line; - while (isspace(*p)) - ++p; - if (*p == '\0' || cpp(p) || cppoff) { + while (isspace(*input->p)) + ++input->p; + input->begin = input->p; + if (*input->p == '\0' || cpp() || cppoff) { *input->begin = '\0'; goto repeat; } - input->p = input->begin = p; + input->begin = input->p; return 1; } @@ -403,16 +343,17 @@ string(void) *bp++ = '"'; repeat: - for (++input->p; (c = *input->p) != '\0' && c != '"'; ++input->p) { + for (++input->p; (c = *input->p) != '"'; ++input->p) { + if (c == '\0') + error("missing terminating '\"' character"); if (c == '\\') c = escape(); if (bp == &yytext[STRINGSIZ+1]) error("string too long"); *bp++ = c; } - if (c == '\0') - error("missing terminating '\"' character"); - input->begin = input->p + 1; + + input->begin = ++input->p; if (ahead() == '"') goto repeat; *bp = '\0'; @@ -430,15 +371,16 @@ static unsigned iden(void) { Symbol *sym; - char *p, *t, c; + char *p, *begin; - for (p = input->p; isalnum(*p) || *p == '_'; ++p) + begin = input->p; + for (p = begin; isalnum(*p) || *p == '_'; ++p) /* nothing */; input->p = p; tok2str(); yylval.sym = sym = lookup(lex_ns); if (sym->ns == NS_CPP) { - if (!disexpand && sym != input->macro && expand(sym)) + if (!disexpand && expand(begin, sym)) return next(); /* * it is not a correct macro call, so try to find @@ -554,17 +496,15 @@ setnamespace(int ns) static void skipspaces(void) { - char *p; - repeat: - for (p = input->begin; isspace(*p); ++p) - /* nothing */; - if (*p == '\0') { + while (isspace(*input->p)) + ++input->p; + if (*input->p == '\0' && lexmode != CPPMODE) { if (!moreinput()) return; goto repeat; } - input->begin = input->p = p; + input->begin = input->p; } unsigned @@ -573,14 +513,15 @@ next(void) char c; skipspaces(); - if (eof) { - if (cppctx) - error("#endif expected"); + c = *input->begin; + if ((eof || lexmode == CPPMODE) && c == '\0') { strcpy(yytext, "<EOF>"); - return yytoken = EOFTOK; + if (cppctx && eof) + error("#endif expected"); + yytoken = EOFTOK; + goto exit; } - c = *input->begin; if (isalpha(c) || c == '_') yytoken = iden(); else if (isdigit(c)) @@ -592,8 +533,8 @@ next(void) else yytoken = operator(); - fputs(yytext, stderr); - putc('\n', stderr); +exit: + fprintf(stderr, "%s\n", yytext); lex_ns = NS_IDEN; return yytoken; } diff --git a/cc1/symbol.c b/cc1/symbol.c @@ -191,7 +191,7 @@ ikeywords(void) static struct { char *str; unsigned char token, value; - } *bp, buff[] = { + } *bp, keywords[] = { {"auto", SCLASS, AUTO}, {"break", BREAK, BREAK}, {"_Bool", TYPE, BOOL}, @@ -227,14 +227,32 @@ ikeywords(void) {"volatile", TQUALIFIER, VOLATILE}, {"while", WHILE, WHILE}, {NULL, 0, 0}, - }; + }, cppclauses[] = { + {"define", DEFINE, DEFINE}, + {"include", INCLUDE, INCLUDE}, + {"line", LINE, LINE}, + {"ifdef", IFDEF, IFDEF}, + {"else", ELSE, ELSE}, + {"ifndef", IFNDEF, IFNDEF}, + {"undef", UNDEF, UNDEF}, + {"pragma", PRAGMA, PRAGMA}, + {"error", ERROR, ERROR} + }, *list[] = { + keywords, + cppclauses, + NULL + }, **lp; Symbol *sym; + int ns = NS_KEYWORD; - for (bp = buff; bp->str; ++bp) { - strcpy(yytext, bp->str); - sym = lookup(NS_KEYWORD); - sym->token = bp->token; - sym->u.token = bp->value; + for (lp = list; *lp; ++lp) { + for (bp = *lp; bp->str; ++bp) { + strcpy(yytext, bp->str); + sym = lookup(ns); + sym->token = bp->token; + sym->u.token = bp->value; + } + ns = NS_CPPCLAUSES; } globalcnt = 0; }