iris

small scheme interpreter
git clone git://git.2f30.org/iris
Log | Files | Refs | LICENSE

commit dc38f21d1a6cfb851b8ac452f0134bbb2223574e
Author: sin <sin@2f30.org>
Date:   Fri,  9 May 2014 13:40:24 +0100

Initial commit

Diffstat:
Alexer.c | 245+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alexer.h | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Arepl.c | 31+++++++++++++++++++++++++++++++
3 files changed, 325 insertions(+), 0 deletions(-)

diff --git a/lexer.c b/lexer.c @@ -0,0 +1,245 @@ +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "lexer.h" + +int +delim(int c) +{ + return (c == ' ' || c == '\t' || c == '\n' || + c == '(' || c == ')'); +} + +char * +extractstring(struct tok *tok) +{ + if (tok->s == NULL) + return NULL; + if (tok->e == NULL) + return strdup(tok->s); + return strndup(tok->s, tok->e - tok->s); +} + +struct tok +gettok(struct lexerctx *ctx, const char *buf, size_t len) +{ + struct tok tok; + state state = State_Se; + char *tmp; + + if (ctx->ready == 0) { + ctx->s = buf; + ctx->e = buf; + ctx->ready = 1; + } + + while (ctx->e < &buf[len]) { + switch (state) { + case State_Se: + if (isalpha(*ctx->e) != 0) + state = State_Identifier; + else if (*ctx->e == '#') + state = State_Probable_Boolean; + else if (isdigit(*ctx->e) != 0) + state = State_Number; + else if (*ctx->e == '"') + state = State_Probable_String; + else if (*ctx->e == '(') + state = State_Lparen; + else if (*ctx->e == ')') + state = State_Rparen; + else if (*ctx->e == '\'') + state = State_Quote; + else if (*ctx->e == '.') + state = State_Dot; + else if (*ctx->e == ';') + state = State_Comment; + else + state = State_Unknown_Token; + if (state != State_Se) + ctx->s = ctx->e; + break; + case State_Identifier: + if (delim(*ctx->e) != 0) { + tok.type = Identifier; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + } + if (isalnum(*ctx->e) == 0) { + tok.type = Error; + tok.s = "malformed identifier"; + tok.e = NULL; + return tok; + } + break; + case State_Probable_Boolean: + if (*ctx->e == 'f' || *ctx->e == 't') + state = State_Boolean; + else if (*ctx->e == '\\') + state = State_Probable_Character; + else { + tok.type = Error; + tok.s = "not a boolean or a character"; + tok.e = NULL; + return tok; + } + break; + case State_Boolean: + if (delim(*ctx->e) != 0) { + tok.type = Boolean; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + } + tok.type = Error; + tok.s = "missing delimiter after boolean"; + tok.e = NULL; + return tok; + case State_Number: + if (delim(*ctx->e) != 0) { + tok.type = Number; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + } + if (isdigit(*ctx->e) == 0) { + tok.type = Error; + tok.s = "not a number"; + tok.e = NULL; + return tok; + } + break; + case State_Probable_Character: + if (isalpha(*ctx->e) != 0) { + state = State_Character; + } else { + tok.type = Error; + tok.s = "expected character constant"; + tok.e = NULL; + return tok; + } + break; + case State_Character: + if (delim(*ctx->e) != 0) { + tok.type = Character; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + } + tok.type = Error; + tok.s = "missing delimiter after character constant"; + tok.e = NULL; + return tok; + break; + case State_Probable_String: + if (*ctx->e == '"') + state = String; + break; + case State_String: + tok.type = String; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + case State_Lparen: + tok.type = Lparen; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + case State_Rparen: + tok.type = Rparen; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + case State_Quote: + tok.type = Error; + tok.s = "quote lexing not implemented yet"; + tok.e = ctx->e; + return tok; + case State_Dot: + tok.type = Dot; + tok.s = ctx->s; + tok.e = ctx->e; + return tok; + case State_Comment: + tmp = strchr(ctx->s, '\n'); + if (tmp) { + state = State_Se; + ctx->s = tmp; + } + break; + case State_Unknown_Token: + tok.type = Error; + tok.s = "unknown token"; + tok.e = NULL; + return tok; + default: + tok.type = Error; + tok.s = "internal lex error"; + tok.e = NULL; + return tok; + } + ctx->e++; + } + + tok.type = Eof; + tok.s = "reached the end-of-file"; + tok.e = NULL; + return tok; +} + +void +printtok(struct tok *tok) +{ + char *s; + + s = extractstring(tok); + if (!s) + return; + switch (tok->type) { + case Error: + printf("Error: '%s'", s); + break; + case Eof: + printf("Eof"); + break; + case Identifier: + printf("Identifier: '%s'", s); + break; + case Boolean: + printf("Boolean: '%s'", s); + break; + case Number: + printf("Number: '%s'", s); + break; + case Character: + printf("Character: '%s'", s); + break; + case String: + printf("String: '%s'", s); + break; + case Lparen: + printf("Lparen: '%s'", s); + break; + case Rparen: + printf("Rparen: '%s'", s); + break; + case Quote: + printf("Quote: '%s'", s); + break; + case Dot: + printf("Dot: '%s'", s); + break; + default: + printf("Unknown token type: %d", tok->type); + break; + } + free(s); +} + +void +resetlexer(struct lexerctx *ctx) +{ + ctx->ready = 0; +} diff --git a/lexer.h b/lexer.h @@ -0,0 +1,49 @@ +typedef enum { + Error = -2, + Eof = -1, + Identifier, + Boolean, + Number, + Character, + String, + Lparen, + Rparen, + Quote, + Dot +} toktype; + +typedef enum { + State_Se, + State_Identifier, + State_Probable_Boolean, + State_Boolean, + State_Number, + State_Probable_Character, + State_Character, + State_Probable_String, + State_String, + State_Lparen, + State_Rparen, + State_Quote, + State_Dot, + State_Comment, + State_Unknown_Token, +} state; + +struct tok { + toktype type; + const char *s; + const char *e; +}; + +struct lexerctx { + int ready; + const char *s; + const char *e; +}; + +int delim(int); +char *extractstring(struct tok *); +struct tok gettok(struct lexerctx *, const char *, size_t); +void printtok(struct tok *); +void resetlexer(struct lexerctx *); diff --git a/repl.c b/repl.c @@ -0,0 +1,31 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "lexer.h" + +int +main(void) +{ + char buf[BUFSIZ]; + struct tok tok; + struct lexerctx ctx; + + do { + printf("> "); + fflush(stdout); + if (fgets(buf, sizeof(buf), stdin)) { + resetlexer(&ctx); + do { + tok = gettok(&ctx, buf, strlen(buf)); + printtok(&tok); + putchar('\n'); + } while (tok.type != Eof && tok.type != Error); + } + if (ferror(stdin)) { + fprintf(stderr, "input error\n"); + return EXIT_FAILURE; + } + } while (1); + /* not reachable */ + return 0; +}