lexer.c (4646B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <ctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 #include <string.h> 6 #include "lexer.h" 7 8 #define MAXTOKSIZE 256 9 10 enum state { 11 SSe, 12 SIdentifier, 13 SProbable_Boolean, 14 SBoolean, 15 SInteger, 16 SSigned_Integer, 17 SProbable_Character, 18 SCharacter, 19 SProbable_String, 20 SString, 21 SLparen, 22 SRparen, 23 SQuote, 24 SDot, 25 SComment, 26 SIllegal_Input, 27 }; 28 29 static enum state state; 30 static char *s; 31 static char *e; 32 static char buf[MAXTOKSIZE]; 33 static struct tok lasttok; 34 static int pendingtok; 35 36 static int 37 delim(int c) 38 { 39 return (c == ' ' || c == '\t' || c == '\n' || 40 c == '(' || c == ')'); 41 } 42 43 struct tok 44 gettok(FILE *in) 45 { 46 struct tok tok; 47 int c; 48 49 /* if we pushed back a token, return it now */ 50 if (pendingtok == 1) { 51 pendingtok = 0; 52 return lasttok; 53 } 54 again: 55 state = SSe; 56 s = e = buf; 57 while ((c = getc(in)) != EOF && e < &buf[MAXTOKSIZE]) { 58 *e = c; 59 switch (state) { 60 case SSe: 61 if (*e == ' ' || *e == '\t' || *e == '\n') 62 goto again; 63 if (isalpha(*e) != 0) 64 state = SIdentifier; 65 else if (*e == '#') 66 state = SProbable_Boolean; 67 else if (isdigit(*e) != 0) 68 state = SInteger; 69 else if (*e == '-' || *e == '+') 70 state = SSigned_Integer; 71 else if (*e == '"') 72 state = SProbable_String; 73 else if (*e == '(') 74 state = SLparen; 75 else if (*e == ')') 76 state = SRparen; 77 else if (*e == '\'') 78 state = SQuote; 79 else if (*e == '.') 80 state = SDot; 81 else if (*e == ';') 82 state = SComment; 83 else 84 state = SIllegal_Input; 85 break; 86 case SIdentifier: 87 if (delim(*e) != 0) { 88 tok.type = TIdentifier; 89 tok.s = s; 90 tok.e = e; 91 ungetc(*e, in); 92 return tok; 93 } 94 if (isalnum(*e) == 0) { 95 tok.type = TError; 96 tok.s = "malformed identifier"; 97 tok.e = NULL; 98 return tok; 99 } 100 break; 101 case SProbable_Boolean: 102 if (*e == 'f' || *e == 't') 103 state = SBoolean; 104 else if (*e == '\\') 105 state = SProbable_Character; 106 else { 107 tok.type = TError; 108 tok.s = "not a boolean or a character"; 109 tok.e = NULL; 110 return tok; 111 } 112 break; 113 case SBoolean: 114 if (delim(*e) != 0) { 115 tok.type = TBoolean; 116 tok.s = s; 117 tok.e = e; 118 ungetc(*e, in); 119 return tok; 120 } 121 tok.type = TError; 122 tok.s = "missing delimiter after boolean"; 123 tok.e = NULL; 124 return tok; 125 case SInteger: 126 if (delim(*e) != 0) { 127 tok.type = TInteger; 128 tok.s = s; 129 tok.e = e; 130 ungetc(*e, in); 131 return tok; 132 } 133 if (isdigit(*e) == 0) { 134 tok.type = TError; 135 tok.s = "not a integer"; 136 tok.e = NULL; 137 return tok; 138 } 139 break; 140 case SSigned_Integer: 141 if (isdigit(*e) == 0) { 142 tok.type = TError; 143 tok.s = "not a integer"; 144 tok.e = NULL; 145 return tok; 146 } 147 state = SInteger; 148 break; 149 case SProbable_Character: 150 if (isalpha(*e) != 0) { 151 state = SCharacter; 152 } else { 153 tok.type = TError; 154 tok.s = "expected character constant"; 155 tok.e = NULL; 156 return tok; 157 } 158 break; 159 case SCharacter: 160 if (delim(*e) != 0) { 161 tok.type = TCharacter; 162 tok.s = s; 163 tok.e = e; 164 ungetc(*e, in); 165 return tok; 166 } 167 tok.type = TError; 168 tok.s = "missing delimiter after character constant"; 169 tok.e = NULL; 170 return tok; 171 case SProbable_String: 172 if (*e == '"') 173 state = SString; 174 break; 175 case SString: 176 tok.type = TString; 177 tok.s = s; 178 tok.e = e; 179 ungetc(*e, in); 180 return tok; 181 case SLparen: 182 tok.type = TLparen; 183 tok.s = s; 184 tok.e = e; 185 ungetc(*e, in); 186 return tok; 187 case SRparen: 188 tok.type = TRparen; 189 tok.s = s; 190 tok.e = e; 191 ungetc(*e, in); 192 return tok; 193 case SQuote: 194 tok.type = TQuote; 195 tok.s = s; 196 tok.e = e; 197 ungetc(*e, in); 198 return tok; 199 case SDot: 200 tok.type = TDot; 201 tok.s = s; 202 tok.e = e; 203 ungetc(*e, in); 204 return tok; 205 case SComment: 206 if (*e == '\n') 207 goto again; 208 break; 209 case SIllegal_Input: 210 tok.type = TError; 211 tok.s = "illegal input"; 212 tok.e = NULL; 213 return tok; 214 default: 215 tok.type = TError; 216 tok.s = "internal lex error"; 217 tok.e = NULL; 218 return tok; 219 } 220 e++; 221 } 222 if (e == &buf[MAXTOKSIZE]) { 223 tok.type = TError; 224 tok.s = "reached the maximum token size"; 225 tok.e = NULL; 226 return tok; 227 } 228 if (state != SSe) { 229 tok.type = TError; 230 tok.s = "unexpected EOF encountered"; 231 tok.e = NULL; 232 return tok; 233 } 234 tok.type = TEof; 235 tok.s = "reached end-of-file"; 236 tok.e = NULL; 237 return tok; 238 } 239 240 char * 241 lexeme(struct tok *t) 242 { 243 if (t->s == NULL) 244 return NULL; 245 /* In this case, t->s is null-terminated */ 246 if (t->e == NULL) 247 return strdup(t->s); 248 return strndup(t->s, t->e - t->s); 249 } 250 251 void 252 puttok(struct tok t) 253 { 254 lasttok = t; 255 pendingtok = 1; 256 }