iris

small scheme interpreter
git clone git://git.2f30.org/iris
Log | Files | Refs | LICENSE

lexer.c (4646B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 #include "lexer.h"
      7 
      8 #define MAXTOKSIZE 256
      9 
     10 enum state {
     11 	SSe,
     12 	SIdentifier,
     13 	SProbable_Boolean,
     14 	SBoolean,
     15 	SInteger,
     16 	SSigned_Integer,
     17 	SProbable_Character,
     18 	SCharacter,
     19 	SProbable_String,
     20 	SString,
     21 	SLparen,
     22 	SRparen,
     23 	SQuote,
     24 	SDot,
     25 	SComment,
     26 	SIllegal_Input,
     27 };
     28 
     29 static enum state state;
     30 static char *s;
     31 static char *e;
     32 static char buf[MAXTOKSIZE];
     33 static struct tok lasttok;
     34 static int pendingtok;
     35 
     36 static int
     37 delim(int c)
     38 {
     39 	return (c == ' ' || c == '\t' || c == '\n' ||
     40 		c == '(' || c == ')');
     41 }
     42 
     43 struct tok
     44 gettok(FILE *in)
     45 {
     46 	struct tok tok;
     47 	int c;
     48 
     49 	/* if we pushed back a token, return it now */
     50 	if (pendingtok == 1) {
     51 		pendingtok = 0;
     52 		return lasttok;
     53 	}
     54 again:
     55 	state = SSe;
     56 	s = e = buf;
     57 	while ((c = getc(in)) != EOF && e < &buf[MAXTOKSIZE]) {
     58 		*e = c;
     59 		switch (state) {
     60 		case SSe:
     61 			if (*e == ' ' || *e == '\t' || *e == '\n')
     62 				goto again;
     63 			if (isalpha(*e) != 0)
     64 				state = SIdentifier;
     65 			else if (*e == '#')
     66 				state = SProbable_Boolean;
     67 			else if (isdigit(*e) != 0)
     68 				state = SInteger;
     69 			else if (*e == '-' || *e == '+')
     70 				state = SSigned_Integer;
     71 			else if (*e == '"')
     72 				state = SProbable_String;
     73 			else if (*e == '(')
     74 				state = SLparen;
     75 			else if (*e == ')')
     76 				state = SRparen;
     77 			else if (*e == '\'')
     78 				state = SQuote;
     79 			else if (*e == '.')
     80 				state = SDot;
     81 			else if (*e == ';')
     82 				state = SComment;
     83 			else
     84 				state = SIllegal_Input;
     85 			break;
     86 		case SIdentifier:
     87 			if (delim(*e) != 0) {
     88 				tok.type = TIdentifier;
     89 				tok.s = s;
     90 				tok.e = e;
     91 				ungetc(*e, in);
     92 				return tok;
     93 			}
     94 			if (isalnum(*e) == 0) {
     95 				tok.type = TError;
     96 				tok.s = "malformed identifier";
     97 				tok.e = NULL;
     98 				return tok;
     99 			}
    100 			break;
    101 		case SProbable_Boolean:
    102 			if (*e == 'f' || *e == 't')
    103 				state = SBoolean;
    104 			else if (*e == '\\')
    105 				state = SProbable_Character;
    106 			else {
    107 				tok.type = TError;
    108 				tok.s = "not a boolean or a character";
    109 				tok.e = NULL;
    110 				return tok;
    111 			}
    112 			break;
    113 		case SBoolean:
    114 			if (delim(*e) != 0) {
    115 				tok.type = TBoolean;
    116 				tok.s = s;
    117 				tok.e = e;
    118 				ungetc(*e, in);
    119 				return tok;
    120 			}
    121 			tok.type = TError;
    122 			tok.s = "missing delimiter after boolean";
    123 			tok.e = NULL;
    124 			return tok;
    125 		case SInteger:
    126 			if (delim(*e) != 0) {
    127 				tok.type = TInteger;
    128 				tok.s = s;
    129 				tok.e = e;
    130 				ungetc(*e, in);
    131 				return tok;
    132 			}
    133 			if (isdigit(*e) == 0) {
    134 				tok.type = TError;
    135 				tok.s = "not a integer";
    136 				tok.e = NULL;
    137 				return tok;
    138 			}
    139 			break;
    140 		case SSigned_Integer:
    141 			if (isdigit(*e) == 0) {
    142 				tok.type = TError;
    143 				tok.s = "not a integer";
    144 				tok.e = NULL;
    145 				return tok;
    146 			}
    147 			state = SInteger;
    148 			break;
    149 		case SProbable_Character:
    150 			if (isalpha(*e) != 0) {
    151 				state = SCharacter;
    152 			} else {
    153 				tok.type = TError;
    154 				tok.s = "expected character constant";
    155 				tok.e = NULL;
    156 				return tok;
    157 			}
    158 			break;
    159 		case SCharacter:
    160 			if (delim(*e) != 0) {
    161 				tok.type = TCharacter;
    162 				tok.s = s;
    163 				tok.e = e;
    164 				ungetc(*e, in);
    165 				return tok;
    166 			}
    167 			tok.type = TError;
    168 			tok.s = "missing delimiter after character constant";
    169 			tok.e = NULL;
    170 			return tok;
    171 		case SProbable_String:
    172 			if (*e == '"')
    173 				state = SString;
    174 			break;
    175 		case SString:
    176 			tok.type = TString;
    177 			tok.s = s;
    178 			tok.e = e;
    179 			ungetc(*e, in);
    180 			return tok;
    181 		case SLparen:
    182 			tok.type = TLparen;
    183 			tok.s = s;
    184 			tok.e = e;
    185 			ungetc(*e, in);
    186 			return tok;
    187 		case SRparen:
    188 			tok.type = TRparen;
    189 			tok.s = s;
    190 			tok.e = e;
    191 			ungetc(*e, in);
    192 			return tok;
    193 		case SQuote:
    194 			tok.type = TQuote;
    195 			tok.s = s;
    196 			tok.e = e;
    197 			ungetc(*e, in);
    198 			return tok;
    199 		case SDot:
    200 			tok.type = TDot;
    201 			tok.s = s;
    202 			tok.e = e;
    203 			ungetc(*e, in);
    204 			return tok;
    205 		case SComment:
    206 			if (*e == '\n')
    207 				goto again;
    208 			break;
    209 		case SIllegal_Input:
    210 			tok.type = TError;
    211 			tok.s = "illegal input";
    212 			tok.e = NULL;
    213 			return tok;
    214 		default:
    215 			tok.type = TError;
    216 			tok.s = "internal lex error";
    217 			tok.e = NULL;
    218 			return tok;
    219 		}
    220 		e++;
    221 	}
    222 	if (e == &buf[MAXTOKSIZE]) {
    223 		tok.type = TError;
    224 		tok.s = "reached the maximum token size";
    225 		tok.e = NULL;
    226 		return tok;
    227 	}
    228 	if (state != SSe) {
    229 		tok.type = TError;
    230 		tok.s = "unexpected EOF encountered";
    231 		tok.e = NULL;
    232 		return tok;
    233 	}
    234 	tok.type = TEof;
    235 	tok.s = "reached end-of-file";
    236 	tok.e = NULL;
    237 	return tok;
    238 }
    239 
    240 char *
    241 lexeme(struct tok *t)
    242 {
    243 	if (t->s == NULL)
    244 		return NULL;
    245 	/* In this case, t->s is null-terminated */
    246 	if (t->e == NULL)
    247 		return strdup(t->s);
    248 	return strndup(t->s, t->e - t->s);
    249 }
    250 
    251 void
    252 puttok(struct tok t)
    253 {
    254 	lasttok = t;
    255 	pendingtok = 1;
    256 }