iris

small scheme interpreter
git clone git://git.2f30.org/iris
Log | Files | Refs | LICENSE

commit 957be47ca3edb0e5d2365360a479d91190db0fc2
parent 60d4c2eb9824194b7c1b4a4dda537080f6e6ad25
Author: sin <sin@2f30.org>
Date:   Sun, 11 May 2014 14:39:38 +0100

Re-organize the lexer

Still some things left to do.

Diffstat:
Mlexer.c | 203++++++++++++++++++++++++++++++++++++++-----------------------------------------
Mlexer.h | 19+++++++------------
Mrepl.c | 45+++++++++------------------------------------
3 files changed, 114 insertions(+), 153 deletions(-)

diff --git a/lexer.c b/lexer.c @@ -1,5 +1,6 @@ /* See LICENSE file for copyright and license details. */ #include <ctype.h> +#include <stdio.h> #include <stdlib.h> #include <string.h> #include "lexer.h" @@ -23,14 +24,14 @@ enum state { State_Illegal_Input, }; -struct lexerctx { - enum state state; - int ready; - const char *s; - const char *e; -}; +#define MAXTOKSIZE 256 -int +static enum state state; +static char *s; +static char *e; +static char buf[MAXTOKSIZE]; + +static int delim(int c) { return (c == ' ' || c == '\t' || c == '\n' || @@ -48,185 +49,192 @@ extractstring(struct tok *t) return strndup(t->s, t->e - t->s); } -void -freelexer(struct lexerctx *ctx) -{ - free(ctx); -} - struct tok -gettok(struct lexerctx *ctx, const char *buf, size_t len) +gettok(FILE *in) { struct tok tok; char *tmp; + int c; - if (ctx->ready == 0) { - ctx->s = buf; - ctx->e = buf; - ctx->ready = 1; - } - ctx->state = State_Se; - while (ctx->e < &buf[len]) { - switch (ctx->state) { +again: + state = State_Se; + s = e = buf; + while ((c = getc(in)) != EOF) { + *e = c; + switch (state) { case State_Se: - if (*ctx->e == ' ' || *ctx->e == '\t' || - *ctx->e == '\n') - break; - if (isalpha(*ctx->e) != 0) - ctx->state = State_Identifier; - else if (*ctx->e == '#') - ctx->state = State_Probable_Boolean; - else if (isdigit(*ctx->e) != 0) - ctx->state = State_Number; - else if (*ctx->e == '-' || *ctx->e == '+') - ctx->state = State_Signed_Number; - else if (*ctx->e == '"') - ctx->state = State_Probable_String; - else if (*ctx->e == '(') - ctx->state = State_Lparen; - else if (*ctx->e == ')') - ctx->state = State_Rparen; - else if (*ctx->e == '\'') - ctx->state = State_Quote; - else if (*ctx->e == '.') - ctx->state = State_Dot; - else if (*ctx->e == ';') - ctx->state = State_Comment; + if (*e == ' ' || *e == '\t' || *e == '\n') + goto again; + if (isalpha(*e) != 0) + state = State_Identifier; + else if (*e == '#') + state = State_Probable_Boolean; + else if (isdigit(*e) != 0) + state = State_Number; + else if (*e == '-' || *e == '+') + state = State_Signed_Number; + else if (*e == '"') + state = State_Probable_String; + else if (*e == '(') + state = State_Lparen; + else if (*e == ')') + state = State_Rparen; + else if (*e == '\'') + state = State_Quote; + else if (*e == '.') + state = State_Dot; + else if (*e == ';') + state = State_Comment; else - ctx->state = State_Illegal_Input; - if (ctx->state != State_Se) - ctx->s = ctx->e; + state = State_Illegal_Input; break; case State_Identifier: - if (delim(*ctx->e) != 0) { + if (delim(*e) != 0) { tok.type = Identifier; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; } - if (isalnum(*ctx->e) == 0) { + if (isalnum(*e) == 0) { tok.type = Error; tok.s = "malformed identifier"; tok.e = NULL; + ungetc(*e, in); return tok; } break; case State_Probable_Boolean: - if (*ctx->e == 'f' || *ctx->e == 't') - ctx->state = State_Boolean; - else if (*ctx->e == '\\') - ctx->state = State_Probable_Character; + if (*e == 'f' || *e == 't') + state = State_Boolean; + else if (*e == '\\') + state = State_Probable_Character; else { tok.type = Error; tok.s = "not a boolean or a character"; tok.e = NULL; + ungetc(*e, in); return tok; } break; case State_Boolean: - if (delim(*ctx->e) != 0) { + if (delim(*e) != 0) { tok.type = Boolean; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; } tok.type = Error; tok.s = "missing delimiter after boolean"; tok.e = NULL; + ungetc(*e, in); return tok; case State_Number: - if (delim(*ctx->e) != 0) { + if (delim(*e) != 0) { tok.type = Number; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; } - if (isdigit(*ctx->e) == 0) { + if (isdigit(*e) == 0) { tok.type = Error; tok.s = "not a number"; tok.e = NULL; + ungetc(*e, in); return tok; } break; case State_Signed_Number: - if (isdigit(*ctx->e) == 0) { + if (isdigit(*e) == 0) { tok.type = Error; tok.s = "not a number"; tok.e = NULL; + ungetc(*e, in); return tok; } - ctx->state = State_Number; + state = State_Number; break; case State_Probable_Character: - if (isalpha(*ctx->e) != 0) { - ctx->state = State_Character; + if (isalpha(*e) != 0) { + state = State_Character; } else { tok.type = Error; tok.s = "expected character constant"; tok.e = NULL; + ungetc(*e, in); return tok; } break; case State_Character: - if (delim(*ctx->e) != 0) { + if (delim(*e) != 0) { tok.type = Character; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; } tok.type = Error; tok.s = "missing delimiter after character constant"; tok.e = NULL; + ungetc(*e, in); return tok; - break; case State_Probable_String: - if (*ctx->e == '"') - ctx->state = String; + if (*e == '"') + state = String; break; case State_String: tok.type = String; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; case State_Lparen: tok.type = Lparen; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; case State_Rparen: tok.type = Rparen; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; case State_Quote: tok.type = Quote; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; case State_Dot: tok.type = Dot; - tok.s = ctx->s; - tok.e = ctx->e; + tok.s = s; + tok.e = e; + ungetc(*e, in); return tok; case State_Comment: - tmp = strchr(ctx->s, '\n'); + tmp = strchr(s, '\n'); if (tmp) { - ctx->state = State_Se; - ctx->s = tmp; + state = State_Se; + s = tmp; } break; case State_Illegal_Input: tok.type = Error; tok.s = "illegal input"; tok.e = NULL; + ungetc(*e, in); return tok; default: tok.type = Error; tok.s = "internal lex error"; tok.e = NULL; + ungetc(*e, in); return tok; } - ctx->e++; + e++; } tok.type = Eof; tok.s = "reached the end-of-file"; @@ -234,24 +242,9 @@ gettok(struct lexerctx *ctx, const char *buf, size_t len) return tok; } -struct lexerctx * +int initlexer(void) { - struct lexerctx *ctx; - - ctx = malloc(sizeof(*ctx)); - if (!ctx) - return NULL; - ctx->state = State_Se; - ctx->ready = 0; - ctx->s = NULL; - ctx->e = NULL; - return ctx; -} - -void -resetlexer(struct lexerctx *ctx) -{ - ctx->ready = 0; - ctx->state = State_Se; + state = State_Se; + return 0; } diff --git a/lexer.h b/lexer.h @@ -1,5 +1,5 @@ /* See LICENSE file for copyright and license details. */ -typedef enum { +enum toktype { Error = -2, Eof = -1, Identifier, @@ -11,19 +11,14 @@ typedef enum { Rparen, Quote, Dot -} toktype; +}; struct tok { - toktype type; - const char *s; - const char *e; + enum toktype type; + char *s; + char *e; }; -struct lexerctx; - -int delim(int); char *extractstring(struct tok *); -void freelexer(struct lexerctx *ctx); -struct tok gettok(struct lexerctx *, const char *, size_t); -struct lexerctx *initlexer(void); -void resetlexer(struct lexerctx *); +struct tok gettok(FILE *fp); +int initlexer(void); diff --git a/repl.c b/repl.c @@ -1,52 +1,25 @@ /* See LICENSE file for copyright and license details. */ #include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <unistd.h> #include "lexer.h" #include "debug.h" -#include "util.h" static void -lexertest(char *buf) +lexertest(FILE *in) { struct tok tok; - struct lexerctx *ctx; - ctx = initlexer(); - if (ctx) { - resetlexer(ctx); - do { - tok = gettok(ctx, buf, strlen(buf)); - printtok(&tok); - putchar('\n'); - } while (tok.type != Eof && tok.type != Error); - freelexer(ctx); - } + initlexer(); + do { + tok = gettok(in); + printtok(&tok); + putchar('\n'); + fflush(stdout); + } while (tok.type != Eof && tok.type != Error); } int main(void) { - char *buf = NULL; - size_t sz = 0; - int interactive = isatty(fileno(stdin)); - - if (interactive == 1) - puts("Welcome to iris, use ^C to quit"); - do { - if (interactive == 1) { - printf("> "); - fflush(stdout); - } - if (afgets(&buf, &sz, stdin)) - lexertest(buf); - if (ferror(stdin)) { - fprintf(stderr, "input error\n"); - return EXIT_FAILURE; - } - fflush(stdout); - } while (interactive == 1); - free(buf); + lexertest(stdin); return 0; }