commit 957be47ca3edb0e5d2365360a479d91190db0fc2
parent 60d4c2eb9824194b7c1b4a4dda537080f6e6ad25
Author: sin <sin@2f30.org>
Date: Sun, 11 May 2014 14:39:38 +0100
Re-organize the lexer
Still some things left to do.
Diffstat:
M | lexer.c | | | 203 | ++++++++++++++++++++++++++++++++++++++----------------------------------------- |
M | lexer.h | | | 19 | +++++++------------ |
M | repl.c | | | 45 | +++++++++------------------------------------ |
3 files changed, 114 insertions(+), 153 deletions(-)
diff --git a/lexer.c b/lexer.c
@@ -1,5 +1,6 @@
/* See LICENSE file for copyright and license details. */
#include <ctype.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lexer.h"
@@ -23,14 +24,14 @@ enum state {
State_Illegal_Input,
};
-struct lexerctx {
- enum state state;
- int ready;
- const char *s;
- const char *e;
-};
+#define MAXTOKSIZE 256
-int
+static enum state state;
+static char *s;
+static char *e;
+static char buf[MAXTOKSIZE];
+
+static int
delim(int c)
{
return (c == ' ' || c == '\t' || c == '\n' ||
@@ -48,185 +49,192 @@ extractstring(struct tok *t)
return strndup(t->s, t->e - t->s);
}
-void
-freelexer(struct lexerctx *ctx)
-{
- free(ctx);
-}
-
struct tok
-gettok(struct lexerctx *ctx, const char *buf, size_t len)
+gettok(FILE *in)
{
struct tok tok;
char *tmp;
+ int c;
- if (ctx->ready == 0) {
- ctx->s = buf;
- ctx->e = buf;
- ctx->ready = 1;
- }
- ctx->state = State_Se;
- while (ctx->e < &buf[len]) {
- switch (ctx->state) {
+again:
+ state = State_Se;
+ s = e = buf;
+ while ((c = getc(in)) != EOF) {
+ *e = c;
+ switch (state) {
case State_Se:
- if (*ctx->e == ' ' || *ctx->e == '\t' ||
- *ctx->e == '\n')
- break;
- if (isalpha(*ctx->e) != 0)
- ctx->state = State_Identifier;
- else if (*ctx->e == '#')
- ctx->state = State_Probable_Boolean;
- else if (isdigit(*ctx->e) != 0)
- ctx->state = State_Number;
- else if (*ctx->e == '-' || *ctx->e == '+')
- ctx->state = State_Signed_Number;
- else if (*ctx->e == '"')
- ctx->state = State_Probable_String;
- else if (*ctx->e == '(')
- ctx->state = State_Lparen;
- else if (*ctx->e == ')')
- ctx->state = State_Rparen;
- else if (*ctx->e == '\'')
- ctx->state = State_Quote;
- else if (*ctx->e == '.')
- ctx->state = State_Dot;
- else if (*ctx->e == ';')
- ctx->state = State_Comment;
+ if (*e == ' ' || *e == '\t' || *e == '\n')
+ goto again;
+ if (isalpha(*e) != 0)
+ state = State_Identifier;
+ else if (*e == '#')
+ state = State_Probable_Boolean;
+ else if (isdigit(*e) != 0)
+ state = State_Number;
+ else if (*e == '-' || *e == '+')
+ state = State_Signed_Number;
+ else if (*e == '"')
+ state = State_Probable_String;
+ else if (*e == '(')
+ state = State_Lparen;
+ else if (*e == ')')
+ state = State_Rparen;
+ else if (*e == '\'')
+ state = State_Quote;
+ else if (*e == '.')
+ state = State_Dot;
+ else if (*e == ';')
+ state = State_Comment;
else
- ctx->state = State_Illegal_Input;
- if (ctx->state != State_Se)
- ctx->s = ctx->e;
+ state = State_Illegal_Input;
break;
case State_Identifier:
- if (delim(*ctx->e) != 0) {
+ if (delim(*e) != 0) {
tok.type = Identifier;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
}
- if (isalnum(*ctx->e) == 0) {
+ if (isalnum(*e) == 0) {
tok.type = Error;
tok.s = "malformed identifier";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
break;
case State_Probable_Boolean:
- if (*ctx->e == 'f' || *ctx->e == 't')
- ctx->state = State_Boolean;
- else if (*ctx->e == '\\')
- ctx->state = State_Probable_Character;
+ if (*e == 'f' || *e == 't')
+ state = State_Boolean;
+ else if (*e == '\\')
+ state = State_Probable_Character;
else {
tok.type = Error;
tok.s = "not a boolean or a character";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
break;
case State_Boolean:
- if (delim(*ctx->e) != 0) {
+ if (delim(*e) != 0) {
tok.type = Boolean;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
}
tok.type = Error;
tok.s = "missing delimiter after boolean";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
case State_Number:
- if (delim(*ctx->e) != 0) {
+ if (delim(*e) != 0) {
tok.type = Number;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
}
- if (isdigit(*ctx->e) == 0) {
+ if (isdigit(*e) == 0) {
tok.type = Error;
tok.s = "not a number";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
break;
case State_Signed_Number:
- if (isdigit(*ctx->e) == 0) {
+ if (isdigit(*e) == 0) {
tok.type = Error;
tok.s = "not a number";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
- ctx->state = State_Number;
+ state = State_Number;
break;
case State_Probable_Character:
- if (isalpha(*ctx->e) != 0) {
- ctx->state = State_Character;
+ if (isalpha(*e) != 0) {
+ state = State_Character;
} else {
tok.type = Error;
tok.s = "expected character constant";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
break;
case State_Character:
- if (delim(*ctx->e) != 0) {
+ if (delim(*e) != 0) {
tok.type = Character;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
}
tok.type = Error;
tok.s = "missing delimiter after character constant";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
- break;
case State_Probable_String:
- if (*ctx->e == '"')
- ctx->state = String;
+ if (*e == '"')
+ state = String;
break;
case State_String:
tok.type = String;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
case State_Lparen:
tok.type = Lparen;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
case State_Rparen:
tok.type = Rparen;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
case State_Quote:
tok.type = Quote;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
case State_Dot:
tok.type = Dot;
- tok.s = ctx->s;
- tok.e = ctx->e;
+ tok.s = s;
+ tok.e = e;
+ ungetc(*e, in);
return tok;
case State_Comment:
- tmp = strchr(ctx->s, '\n');
+ tmp = strchr(s, '\n');
if (tmp) {
- ctx->state = State_Se;
- ctx->s = tmp;
+ state = State_Se;
+ s = tmp;
}
break;
case State_Illegal_Input:
tok.type = Error;
tok.s = "illegal input";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
default:
tok.type = Error;
tok.s = "internal lex error";
tok.e = NULL;
+ ungetc(*e, in);
return tok;
}
- ctx->e++;
+ e++;
}
tok.type = Eof;
tok.s = "reached the end-of-file";
@@ -234,24 +242,9 @@ gettok(struct lexerctx *ctx, const char *buf, size_t len)
return tok;
}
-struct lexerctx *
+int
initlexer(void)
{
- struct lexerctx *ctx;
-
- ctx = malloc(sizeof(*ctx));
- if (!ctx)
- return NULL;
- ctx->state = State_Se;
- ctx->ready = 0;
- ctx->s = NULL;
- ctx->e = NULL;
- return ctx;
-}
-
-void
-resetlexer(struct lexerctx *ctx)
-{
- ctx->ready = 0;
- ctx->state = State_Se;
+ state = State_Se;
+ return 0;
}
diff --git a/lexer.h b/lexer.h
@@ -1,5 +1,5 @@
/* See LICENSE file for copyright and license details. */
-typedef enum {
+enum toktype {
Error = -2,
Eof = -1,
Identifier,
@@ -11,19 +11,14 @@ typedef enum {
Rparen,
Quote,
Dot
-} toktype;
+};
struct tok {
- toktype type;
- const char *s;
- const char *e;
+ enum toktype type;
+ char *s;
+ char *e;
};
-struct lexerctx;
-
-int delim(int);
char *extractstring(struct tok *);
-void freelexer(struct lexerctx *ctx);
-struct tok gettok(struct lexerctx *, const char *, size_t);
-struct lexerctx *initlexer(void);
-void resetlexer(struct lexerctx *);
+struct tok gettok(FILE *fp);
+int initlexer(void);
diff --git a/repl.c b/repl.c
@@ -1,52 +1,25 @@
/* See LICENSE file for copyright and license details. */
#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
#include "lexer.h"
#include "debug.h"
-#include "util.h"
static void
-lexertest(char *buf)
+lexertest(FILE *in)
{
struct tok tok;
- struct lexerctx *ctx;
- ctx = initlexer();
- if (ctx) {
- resetlexer(ctx);
- do {
- tok = gettok(ctx, buf, strlen(buf));
- printtok(&tok);
- putchar('\n');
- } while (tok.type != Eof && tok.type != Error);
- freelexer(ctx);
- }
+ initlexer();
+ do {
+ tok = gettok(in);
+ printtok(&tok);
+ putchar('\n');
+ fflush(stdout);
+ } while (tok.type != Eof && tok.type != Error);
}
int
main(void)
{
- char *buf = NULL;
- size_t sz = 0;
- int interactive = isatty(fileno(stdin));
-
- if (interactive == 1)
- puts("Welcome to iris, use ^C to quit");
- do {
- if (interactive == 1) {
- printf("> ");
- fflush(stdout);
- }
- if (afgets(&buf, &sz, stdin))
- lexertest(buf);
- if (ferror(stdin)) {
- fprintf(stderr, "input error\n");
- return EXIT_FAILURE;
- }
- fflush(stdout);
- } while (interactive == 1);
- free(buf);
+ lexertest(stdin);
return 0;
}