Initial commit - iris - small scheme interpreter

commit dc38f21d1a6cfb851b8ac452f0134bbb2223574e
Author: sin <sin@2f30.org>
Date:   Fri,  9 May 2014 13:40:24 +0100

Initial commit

Diffstat:
A lexer.c  | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A lexer.h  | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
A repl.c  | 31 +++++++++++++++++++++++++++++++

3 files changed, 325 insertions(+), 0 deletions(-)
diff --git a/lexer.c b/lexer.c
@@ -0,0 +1,245 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+int
+delim(int c)
+{
+	return (c == ' ' || c == '\t' || c == '\n' ||
+		c == '(' || c == ')');
+}
+
+char *
+extractstring(struct tok *tok)
+{
+	if (tok->s == NULL)
+		return NULL;
+	if (tok->e == NULL)
+		return strdup(tok->s);
+	return strndup(tok->s, tok->e - tok->s);
+}
+
+struct tok
+gettok(struct lexerctx *ctx, const char *buf, size_t len)
+{
+	struct tok tok;
+	state state = State_Se;
+	char *tmp;
+
+	if (ctx->ready == 0) {
+		ctx->s = buf;
+		ctx->e = buf;
+		ctx->ready = 1;
+	}
+
+	while (ctx->e < &buf[len]) {
+		switch (state) {
+		case State_Se:
+			if (isalpha(*ctx->e) != 0)
+				state = State_Identifier;
+			else if (*ctx->e == '#')
+				state = State_Probable_Boolean;
+			else if (isdigit(*ctx->e) != 0)
+				state = State_Number;
+			else if (*ctx->e == '"')
+				state = State_Probable_String;
+			else if (*ctx->e == '(')
+				state = State_Lparen;
+			else if (*ctx->e == ')')
+				state = State_Rparen;
+			else if (*ctx->e == '\'')
+				state = State_Quote;
+			else if (*ctx->e == '.')
+				state = State_Dot;
+			else if (*ctx->e == ';')
+				state = State_Comment;
+			else
+				state = State_Unknown_Token;
+			if (state != State_Se)
+				ctx->s = ctx->e;
+			break;
+		case State_Identifier:
+			if (delim(*ctx->e) != 0) {
+				tok.type = Identifier;
+				tok.s = ctx->s;
+				tok.e = ctx->e;
+				return tok;
+			}
+			if (isalnum(*ctx->e) == 0) {
+				tok.type = Error;
+				tok.s = "malformed identifier";
+				tok.e = NULL;
+				return tok;
+			}
+			break;
+		case State_Probable_Boolean:
+			if (*ctx->e == 'f' || *ctx->e == 't')
+				state = State_Boolean;
+			else if (*ctx->e == '\\')
+				state = State_Probable_Character;
+			else {
+				tok.type = Error;
+				tok.s = "not a boolean or a character";
+				tok.e = NULL;
+				return tok;
+			}
+			break;
+		case State_Boolean:
+			if (delim(*ctx->e) != 0) {
+				tok.type = Boolean;
+				tok.s = ctx->s;
+				tok.e = ctx->e;
+				return tok;
+			}
+			tok.type = Error;
+			tok.s = "missing delimiter after boolean";
+			tok.e = NULL;
+			return tok;
+		case State_Number:
+			if (delim(*ctx->e) != 0) {
+				tok.type = Number;
+				tok.s = ctx->s;
+				tok.e = ctx->e;
+				return tok;
+			}
+			if (isdigit(*ctx->e) == 0) {
+				tok.type = Error;
+				tok.s = "not a number";
+				tok.e = NULL;
+				return tok;
+			}
+			break;
+		case State_Probable_Character:
+			if (isalpha(*ctx->e) != 0) {
+				state = State_Character;
+			} else {
+				tok.type = Error;
+				tok.s = "expected character constant";
+				tok.e = NULL;
+				return tok;
+			}
+			break;
+		case State_Character:
+			if (delim(*ctx->e) != 0) {
+				tok.type = Character;
+				tok.s = ctx->s;
+				tok.e = ctx->e;
+				return tok;
+			}
+			tok.type = Error;
+			tok.s = "missing delimiter after character constant";
+			tok.e = NULL;
+			return tok;
+			break;
+		case State_Probable_String:
+			if (*ctx->e == '"')
+				state = String;
+			break;
+		case State_String:
+			tok.type = String;
+			tok.s = ctx->s;
+			tok.e = ctx->e;
+			return tok;
+		case State_Lparen:
+			tok.type = Lparen;
+			tok.s = ctx->s;
+			tok.e = ctx->e;
+			return tok;
+		case State_Rparen:
+			tok.type = Rparen;
+			tok.s = ctx->s;
+			tok.e = ctx->e;
+			return tok;
+		case State_Quote:
+			tok.type = Error;
+			tok.s = "quote lexing not implemented yet";
+			tok.e = ctx->e;
+			return tok;
+		case State_Dot:
+			tok.type = Dot;
+			tok.s = ctx->s;
+			tok.e = ctx->e;
+			return tok;
+		case State_Comment:
+			tmp = strchr(ctx->s, '\n');
+			if (tmp) {
+				state = State_Se;
+				ctx->s = tmp;
+			}
+			break;
+		case State_Unknown_Token:
+			tok.type = Error;
+			tok.s = "unknown token";
+			tok.e = NULL;
+			return tok;
+		default:
+			tok.type = Error;
+			tok.s = "internal lex error";
+			tok.e = NULL;
+			return tok;
+		}
+		ctx->e++;
+	}
+
+	tok.type = Eof;
+	tok.s = "reached the end-of-file";
+	tok.e = NULL;
+	return tok;
+}
+
+void
+printtok(struct tok *tok)
+{
+	char *s;
+
+	s = extractstring(tok);
+	if (!s)
+		return;
+	switch (tok->type) {
+	case Error:
+		printf("Error: '%s'", s);
+		break;
+	case Eof:
+		printf("Eof");
+		break;
+	case Identifier:
+		printf("Identifier: '%s'", s);
+		break;
+	case Boolean:
+		printf("Boolean: '%s'", s);
+		break;
+	case Number:
+		printf("Number: '%s'", s);
+		break;
+	case Character:
+		printf("Character: '%s'", s);
+		break;
+	case String:
+		printf("String: '%s'", s);
+		break;
+	case Lparen:
+		printf("Lparen: '%s'", s);
+		break;
+	case Rparen:
+		printf("Rparen: '%s'", s);
+		break;
+	case Quote:
+		printf("Quote: '%s'", s);
+		break;
+	case Dot:
+		printf("Dot: '%s'", s);
+		break;
+	default:
+		printf("Unknown token type: %d", tok->type);
+		break;
+	}
+	free(s);
+}
+
+void
+resetlexer(struct lexerctx *ctx)
+{
+	ctx->ready = 0;
+}
diff --git a/lexer.h b/lexer.h
@@ -0,0 +1,49 @@
+typedef enum {
+	Error = -2,
+	Eof = -1,
+	Identifier,
+	Boolean,
+	Number,
+	Character,
+	String,
+	Lparen,
+	Rparen,
+	Quote,
+	Dot
+} toktype;
+
+typedef enum {
+	State_Se,
+	State_Identifier,
+	State_Probable_Boolean,
+	State_Boolean,
+	State_Number,
+	State_Probable_Character,
+	State_Character,
+	State_Probable_String,
+	State_String,
+	State_Lparen,
+	State_Rparen,
+	State_Quote,
+	State_Dot,
+	State_Comment,
+	State_Unknown_Token,
+} state;
+
+struct tok {
+	toktype type;
+	const char *s;
+	const char *e;
+};
+
+struct lexerctx {
+	int ready;
+	const char *s;
+	const char *e;
+};
+
+int delim(int);
+char *extractstring(struct tok *);
+struct tok gettok(struct lexerctx *, const char *, size_t);
+void printtok(struct tok *);
+void resetlexer(struct lexerctx *);
diff --git a/repl.c b/repl.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+int
+main(void)
+{
+	char buf[BUFSIZ];
+	struct tok tok;
+	struct lexerctx ctx;
+
+	do {
+		printf("> ");
+		fflush(stdout);
+		if (fgets(buf, sizeof(buf), stdin)) {
+			resetlexer(&ctx);
+			do {
+				tok = gettok(&ctx, buf, strlen(buf));
+				printtok(&tok);
+				putchar('\n');
+			} while (tok.type != Eof && tok.type != Error);
+		}
+		if (ferror(stdin)) {
+			fprintf(stderr, "input error\n");
+			return EXIT_FAILURE;
+		}
+	} while (1);
+	/* not reachable */
+	return 0;
+}

	iris small scheme interpreter
	git clone git://git.2f30.org/iris
	Log \| Files \| Refs \| LICENSE

A	lexer.c	\|	245	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A	lexer.h	\|	49	+++++++++++++++++++++++++++++++++++++++++++++++++
A	repl.c	\|	31	+++++++++++++++++++++++++++++++