commit dc38f21d1a6cfb851b8ac452f0134bbb2223574e
Author: sin <sin@2f30.org>
Date: Fri, 9 May 2014 13:40:24 +0100
Initial commit
Diffstat:
A | lexer.c | | | 245 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | lexer.h | | | 49 | +++++++++++++++++++++++++++++++++++++++++++++++++ |
A | repl.c | | | 31 | +++++++++++++++++++++++++++++++ |
3 files changed, 325 insertions(+), 0 deletions(-)
diff --git a/lexer.c b/lexer.c
@@ -0,0 +1,245 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+int
+delim(int c)
+{
+ return (c == ' ' || c == '\t' || c == '\n' ||
+ c == '(' || c == ')');
+}
+
+char *
+extractstring(struct tok *tok)
+{
+ if (tok->s == NULL)
+ return NULL;
+ if (tok->e == NULL)
+ return strdup(tok->s);
+ return strndup(tok->s, tok->e - tok->s);
+}
+
+struct tok
+gettok(struct lexerctx *ctx, const char *buf, size_t len)
+{
+ struct tok tok;
+ state state = State_Se;
+ char *tmp;
+
+ if (ctx->ready == 0) {
+ ctx->s = buf;
+ ctx->e = buf;
+ ctx->ready = 1;
+ }
+
+ while (ctx->e < &buf[len]) {
+ switch (state) {
+ case State_Se:
+ if (isalpha(*ctx->e) != 0)
+ state = State_Identifier;
+ else if (*ctx->e == '#')
+ state = State_Probable_Boolean;
+ else if (isdigit(*ctx->e) != 0)
+ state = State_Number;
+ else if (*ctx->e == '"')
+ state = State_Probable_String;
+ else if (*ctx->e == '(')
+ state = State_Lparen;
+ else if (*ctx->e == ')')
+ state = State_Rparen;
+ else if (*ctx->e == '\'')
+ state = State_Quote;
+ else if (*ctx->e == '.')
+ state = State_Dot;
+ else if (*ctx->e == ';')
+ state = State_Comment;
+ else
+ state = State_Unknown_Token;
+ if (state != State_Se)
+ ctx->s = ctx->e;
+ break;
+ case State_Identifier:
+ if (delim(*ctx->e) != 0) {
+ tok.type = Identifier;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ }
+ if (isalnum(*ctx->e) == 0) {
+ tok.type = Error;
+ tok.s = "malformed identifier";
+ tok.e = NULL;
+ return tok;
+ }
+ break;
+ case State_Probable_Boolean:
+ if (*ctx->e == 'f' || *ctx->e == 't')
+ state = State_Boolean;
+ else if (*ctx->e == '\\')
+ state = State_Probable_Character;
+ else {
+ tok.type = Error;
+ tok.s = "not a boolean or a character";
+ tok.e = NULL;
+ return tok;
+ }
+ break;
+ case State_Boolean:
+ if (delim(*ctx->e) != 0) {
+ tok.type = Boolean;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ }
+ tok.type = Error;
+ tok.s = "missing delimiter after boolean";
+ tok.e = NULL;
+ return tok;
+ case State_Number:
+ if (delim(*ctx->e) != 0) {
+ tok.type = Number;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ }
+ if (isdigit(*ctx->e) == 0) {
+ tok.type = Error;
+ tok.s = "not a number";
+ tok.e = NULL;
+ return tok;
+ }
+ break;
+ case State_Probable_Character:
+ if (isalpha(*ctx->e) != 0) {
+ state = State_Character;
+ } else {
+ tok.type = Error;
+ tok.s = "expected character constant";
+ tok.e = NULL;
+ return tok;
+ }
+ break;
+ case State_Character:
+ if (delim(*ctx->e) != 0) {
+ tok.type = Character;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ }
+ tok.type = Error;
+ tok.s = "missing delimiter after character constant";
+ tok.e = NULL;
+ return tok;
+ break;
+ case State_Probable_String:
+ if (*ctx->e == '"')
+ state = String;
+ break;
+ case State_String:
+ tok.type = String;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ case State_Lparen:
+ tok.type = Lparen;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ case State_Rparen:
+ tok.type = Rparen;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ case State_Quote:
+ tok.type = Error;
+ tok.s = "quote lexing not implemented yet";
+ tok.e = ctx->e;
+ return tok;
+ case State_Dot:
+ tok.type = Dot;
+ tok.s = ctx->s;
+ tok.e = ctx->e;
+ return tok;
+ case State_Comment:
+ tmp = strchr(ctx->s, '\n');
+ if (tmp) {
+ state = State_Se;
+ ctx->s = tmp;
+ }
+ break;
+ case State_Unknown_Token:
+ tok.type = Error;
+ tok.s = "unknown token";
+ tok.e = NULL;
+ return tok;
+ default:
+ tok.type = Error;
+ tok.s = "internal lex error";
+ tok.e = NULL;
+ return tok;
+ }
+ ctx->e++;
+ }
+
+ tok.type = Eof;
+ tok.s = "reached the end-of-file";
+ tok.e = NULL;
+ return tok;
+}
+
+void
+printtok(struct tok *tok)
+{
+ char *s;
+
+ s = extractstring(tok);
+ if (!s)
+ return;
+ switch (tok->type) {
+ case Error:
+ printf("Error: '%s'", s);
+ break;
+ case Eof:
+ printf("Eof");
+ break;
+ case Identifier:
+ printf("Identifier: '%s'", s);
+ break;
+ case Boolean:
+ printf("Boolean: '%s'", s);
+ break;
+ case Number:
+ printf("Number: '%s'", s);
+ break;
+ case Character:
+ printf("Character: '%s'", s);
+ break;
+ case String:
+ printf("String: '%s'", s);
+ break;
+ case Lparen:
+ printf("Lparen: '%s'", s);
+ break;
+ case Rparen:
+ printf("Rparen: '%s'", s);
+ break;
+ case Quote:
+ printf("Quote: '%s'", s);
+ break;
+ case Dot:
+ printf("Dot: '%s'", s);
+ break;
+ default:
+ printf("Unknown token type: %d", tok->type);
+ break;
+ }
+ free(s);
+}
+
+void
+resetlexer(struct lexerctx *ctx)
+{
+ ctx->ready = 0;
+}
diff --git a/lexer.h b/lexer.h
@@ -0,0 +1,49 @@
+typedef enum {
+ Error = -2,
+ Eof = -1,
+ Identifier,
+ Boolean,
+ Number,
+ Character,
+ String,
+ Lparen,
+ Rparen,
+ Quote,
+ Dot
+} toktype;
+
+typedef enum {
+ State_Se,
+ State_Identifier,
+ State_Probable_Boolean,
+ State_Boolean,
+ State_Number,
+ State_Probable_Character,
+ State_Character,
+ State_Probable_String,
+ State_String,
+ State_Lparen,
+ State_Rparen,
+ State_Quote,
+ State_Dot,
+ State_Comment,
+ State_Unknown_Token,
+} state;
+
+struct tok {
+ toktype type;
+ const char *s;
+ const char *e;
+};
+
+struct lexerctx {
+ int ready;
+ const char *s;
+ const char *e;
+};
+
+int delim(int);
+char *extractstring(struct tok *);
+struct tok gettok(struct lexerctx *, const char *, size_t);
+void printtok(struct tok *);
+void resetlexer(struct lexerctx *);
diff --git a/repl.c b/repl.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "lexer.h"
+
+int
+main(void)
+{
+ char buf[BUFSIZ];
+ struct tok tok;
+ struct lexerctx ctx;
+
+ do {
+ printf("> ");
+ fflush(stdout);
+ if (fgets(buf, sizeof(buf), stdin)) {
+ resetlexer(&ctx);
+ do {
+ tok = gettok(&ctx, buf, strlen(buf));
+ printtok(&tok);
+ putchar('\n');
+ } while (tok.type != Eof && tok.type != Error);
+ }
+ if (ferror(stdin)) {
+ fprintf(stderr, "input error\n");
+ return EXIT_FAILURE;
+ }
+ } while (1);
+ /* not reachable */
+ return 0;
+}