fatbase

portable OpenBSD tools
git clone git://git.2f30.org/fatbase
Log | Files | Refs

lalr.c (11006B)


      1 /*	$OpenBSD: lalr.c,v 1.17 2014/05/17 15:19:17 chl Exp $	*/
      2 /*	$NetBSD: lalr.c,v 1.4 1996/03/19 03:21:33 jtc Exp $	*/
      3 
      4 /*
      5  * Copyright (c) 1989 The Regents of the University of California.
      6  * All rights reserved.
      7  *
      8  * This code is derived from software contributed to Berkeley by
      9  * Robert Paul Corbett.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. Neither the name of the University nor the names of its contributors
     20  *    may be used to endorse or promote products derived from this software
     21  *    without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     33  * SUCH DAMAGE.
     34  */
     35 
     36 #include "defs.h"
     37 
     38 typedef struct shorts {
     39 	struct shorts *next;
     40 	short value;
     41 } shorts;
     42 
     43 int tokensetsize;
     44 short *lookaheads;
     45 short *LAruleno;
     46 unsigned *LA;
     47 short *accessing_symbol;
     48 core **state_table;
     49 shifts **shift_table;
     50 reductions **reduction_table;
     51 short *goto_map;
     52 short *from_state;
     53 short *to_state;
     54 
     55 short **transpose();
     56 void set_state_table(void);
     57 void set_accessing_symbol(void);
     58 void set_shift_table(void);
     59 void set_reduction_table(void);
     60 void set_maxrhs(void);
     61 void initialize_LA(void);
     62 void set_goto_map(void);
     63 void initialize_F(void);
     64 void build_relations(void);
     65 void compute_FOLLOWS(void);
     66 void compute_lookaheads(void);
     67 int map_goto(int, int);
     68 void digraph(short **);
     69 void add_lookback_edge(int, int, int);
     70 void traverse(int);
     71 
     72 static int infinity;
     73 static int maxrhs;
     74 static int ngotos;
     75 static unsigned *F;
     76 static short **includes;
     77 static shorts **lookback;
     78 static short **R;
     79 static short *INDEX;
     80 static short *VERTICES;
     81 static int top;
     82 
     83 void
     84 lalr(void)
     85 {
     86 	tokensetsize = WORDSIZE(ntokens);
     87 
     88 	set_state_table();
     89 	set_accessing_symbol();
     90 	set_shift_table();
     91 	set_reduction_table();
     92 	set_maxrhs();
     93 	initialize_LA();
     94 	set_goto_map();
     95 	initialize_F();
     96 	build_relations();
     97 	compute_FOLLOWS();
     98 	compute_lookaheads();
     99 	free_derives();
    100 	free_nullable();
    101 }
    102 
    103 
    104 void
    105 set_state_table(void)
    106 {
    107 	core *sp;
    108 
    109 	state_table = NEW2(nstates, core *);
    110 	for (sp = first_state; sp; sp = sp->next)
    111 		state_table[sp->number] = sp;
    112 }
    113 
    114 
    115 void
    116 set_accessing_symbol(void)
    117 {
    118 	core *sp;
    119 
    120 	accessing_symbol = NEW2(nstates, short);
    121 	for (sp = first_state; sp; sp = sp->next)
    122 		accessing_symbol[sp->number] = sp->accessing_symbol;
    123 }
    124 
    125 
    126 void
    127 set_shift_table(void)
    128 {
    129 	shifts *sp;
    130 
    131 	shift_table = NEW2(nstates, shifts *);
    132 	for (sp = first_shift; sp; sp = sp->next)
    133 		shift_table[sp->number] = sp;
    134 }
    135 
    136 
    137 void
    138 set_reduction_table(void)
    139 {
    140 	reductions *rp;
    141 
    142 	reduction_table = NEW2(nstates, reductions *);
    143 	for (rp = first_reduction; rp; rp = rp->next)
    144 		reduction_table[rp->number] = rp;
    145 }
    146 
    147 
    148 void
    149 set_maxrhs(void)
    150 {
    151 	short *itemp, *item_end;
    152 	int length, max;
    153 
    154 	length = 0;
    155 	max = 0;
    156 	item_end = ritem + nitems;
    157 	for (itemp = ritem; itemp < item_end; itemp++) {
    158 		if (*itemp >= 0) {
    159 			length++;
    160 		} else {
    161 			if (length > max) max = length;
    162 			length = 0;
    163 		}
    164 	}
    165 
    166 	maxrhs = max;
    167 }
    168 
    169 
    170 void
    171 initialize_LA(void)
    172 {
    173 	int i, j, k;
    174 	reductions *rp;
    175 
    176 	lookaheads = NEW2(nstates + 1, short);
    177 
    178 	k = 0;
    179 	for (i = 0; i < nstates; i++) {
    180 		lookaheads[i] = k;
    181 		rp = reduction_table[i];
    182 		if (rp)
    183 			k += rp->nreds;
    184 	}
    185 	lookaheads[nstates] = k;
    186 
    187 	LA = NEW2(k * tokensetsize, unsigned);
    188 	LAruleno = NEW2(k, short);
    189 	lookback = NEW2(k, shorts *);
    190 
    191 	k = 0;
    192 	for (i = 0; i < nstates; i++) {
    193 		rp = reduction_table[i];
    194 		if (rp) {
    195 			for (j = 0; j < rp->nreds; j++) {
    196 				LAruleno[k] = rp->rules[j];
    197 				k++;
    198 			}
    199 		}
    200 	}
    201 }
    202 
    203 void
    204 set_goto_map(void)
    205 {
    206 	shifts *sp;
    207 	int i, k, symbol;
    208 	int state1, state2;
    209 	short *temp_map;
    210 
    211 	goto_map = NEW2(nvars + 1, short) - ntokens;
    212 	temp_map = NEW2(nvars + 1, short) - ntokens;
    213 
    214 	ngotos = 0;
    215 	for (sp = first_shift; sp; sp = sp->next) {
    216 		for (i = sp->nshifts - 1; i >= 0; i--) {
    217 			symbol = accessing_symbol[sp->shift[i]];
    218 
    219 			if (ISTOKEN(symbol)) break;
    220 
    221 			if (ngotos == MAXSHORT)
    222 				fatal("too many gotos");
    223 
    224 			ngotos++;
    225 			goto_map[symbol]++;
    226 		}
    227 	}
    228 
    229 	k = 0;
    230 	for (i = ntokens; i < nsyms; i++) {
    231 		temp_map[i] = k;
    232 		k += goto_map[i];
    233 	}
    234 
    235 	for (i = ntokens; i < nsyms; i++)
    236 		goto_map[i] = temp_map[i];
    237 
    238 	goto_map[nsyms] = ngotos;
    239 	temp_map[nsyms] = ngotos;
    240 
    241 	from_state = NEW2(ngotos, short);
    242 	to_state = NEW2(ngotos, short);
    243 
    244 	for (sp = first_shift; sp; sp = sp->next) {
    245 		state1 = sp->number;
    246 		for (i = sp->nshifts - 1; i >= 0; i--) {
    247 			state2 = sp->shift[i];
    248 			symbol = accessing_symbol[state2];
    249 
    250 			if (ISTOKEN(symbol)) break;
    251 
    252 			k = temp_map[symbol]++;
    253 			from_state[k] = state1;
    254 			to_state[k] = state2;
    255 		}
    256 	}
    257 
    258 	free(temp_map + ntokens);
    259 }
    260 
    261 
    262 
    263 /*  Map_goto maps a state/symbol pair into its numeric representation.	*/
    264 
    265 int
    266 map_goto(int state, int symbol)
    267 {
    268 	int high, low, middle, s;
    269 
    270 	low = goto_map[symbol];
    271 	high = goto_map[symbol + 1];
    272 
    273 	for (;;) {
    274 		assert(low <= high);
    275 		middle = (low + high) >> 1;
    276 		s = from_state[middle];
    277 		if (s == state)
    278 			return (middle);
    279 		else if (s < state)
    280 			low = middle + 1;
    281 		else
    282 			high = middle - 1;
    283 	}
    284 }
    285 
    286 
    287 void
    288 initialize_F(void)
    289 {
    290 	int i, j, k;
    291 	shifts *sp;
    292 	short *edge, *rp, **reads;
    293 	unsigned int *rowp;
    294 	int nedges, stateno, symbol, nwords;
    295 
    296 	nwords = ngotos * tokensetsize;
    297 	F = NEW2(nwords, unsigned);
    298 
    299 	reads = NEW2(ngotos, short *);
    300 	edge = NEW2(ngotos + 1, short);
    301 	nedges = 0;
    302 
    303 	rowp = F;
    304 	for (i = 0; i < ngotos; i++) {
    305 		stateno = to_state[i];
    306 		sp = shift_table[stateno];
    307 
    308 		if (sp) {
    309 			k = sp->nshifts;
    310 
    311 			for (j = 0; j < k; j++) {
    312 				symbol = accessing_symbol[sp->shift[j]];
    313 				if (ISVAR(symbol))
    314 					break;
    315 				SETBIT(rowp, symbol);
    316 			}
    317 
    318 			for (; j < k; j++) {
    319 				symbol = accessing_symbol[sp->shift[j]];
    320 				if (nullable[symbol])
    321 					edge[nedges++] = map_goto(stateno, symbol);
    322 			}
    323 			
    324 			if (nedges) {
    325 				reads[i] = rp = NEW2(nedges + 1, short);
    326 
    327 				for (j = 0; j < nedges; j++)
    328 					rp[j] = edge[j];
    329 
    330 				rp[nedges] = -1;
    331 				nedges = 0;
    332 			}
    333 		}
    334 
    335 		rowp += tokensetsize;
    336 	}
    337 
    338 	SETBIT(F, 0);
    339 	digraph(reads);
    340 
    341 	for (i = 0; i < ngotos; i++) {
    342 		if (reads[i])
    343 			free(reads[i]);
    344 	}
    345 
    346 	free(reads);
    347 	free(edge);
    348 }
    349 
    350 
    351 void
    352 build_relations(void)
    353 {
    354 	int i, j, k;
    355 	short *rulep, *rp;
    356 	shifts *sp;
    357 	int length, nedges, done;
    358 	int state1, stateno, symbol1, symbol2;
    359 	short *shortp, *edge, *states;
    360 	short **new_includes;
    361 
    362 	includes = NEW2(ngotos, short *);
    363 	edge = NEW2(ngotos + 1, short);
    364 	states = NEW2(maxrhs + 1, short);
    365 
    366 	for (i = 0; i < ngotos; i++) {
    367 		nedges = 0;
    368 		state1 = from_state[i];
    369 		symbol1 = accessing_symbol[to_state[i]];
    370 
    371 		for (rulep = derives[symbol1]; *rulep >= 0; rulep++) {
    372 			length = 1;
    373 			states[0] = state1;
    374 			stateno = state1;
    375 
    376 			for (rp = ritem + rrhs[*rulep]; *rp >= 0; rp++) {
    377 				symbol2 = *rp;
    378 				sp = shift_table[stateno];
    379 				k = sp->nshifts;
    380 
    381 				for (j = 0; j < k; j++) {
    382 					stateno = sp->shift[j];
    383 					if (accessing_symbol[stateno] == symbol2)
    384 						break;
    385 				}
    386 
    387 				states[length++] = stateno;
    388 			}
    389 
    390 			add_lookback_edge(stateno, *rulep, i);
    391 
    392 			length--;
    393 			done = 0;
    394 			while (!done) {
    395 				done = 1;
    396 				rp--;
    397 				if (ISVAR(*rp)) {
    398 					stateno = states[--length];
    399 					edge[nedges++] = map_goto(stateno, *rp);
    400 					if (nullable[*rp] && length > 0)
    401 						done = 0;
    402 				}
    403 			}
    404 		}
    405 
    406 		if (nedges) {
    407 			includes[i] = shortp = NEW2(nedges + 1, short);
    408 			for (j = 0; j < nedges; j++)
    409 				shortp[j] = edge[j];
    410 			shortp[nedges] = -1;
    411 		}
    412 	}
    413 
    414 	new_includes = transpose(includes, ngotos);
    415 
    416 	for (i = 0; i < ngotos; i++)
    417 		if (includes[i])
    418 			free(includes[i]);
    419 
    420 	free(includes);
    421 
    422 	includes = new_includes;
    423 
    424 	free(edge);
    425 	free(states);
    426 }
    427 
    428 void
    429 add_lookback_edge(int stateno, int ruleno, int gotono)
    430 {
    431 	int i, k, found;
    432 	shorts *sp;
    433 
    434 	i = lookaheads[stateno];
    435 	k = lookaheads[stateno + 1];
    436 	found = 0;
    437 	while (!found && i < k) {
    438 		if (LAruleno[i] == ruleno)
    439 			found = 1;
    440 		else
    441 			++i;
    442 	}
    443 	assert(found);
    444 
    445 	sp = NEW(shorts);
    446 	sp->next = lookback[i];
    447 	sp->value = gotono;
    448 	lookback[i] = sp;
    449 }
    450 
    451 
    452 
    453 short **
    454 transpose(short **R, int n)
    455 {
    456 	short **new_R, **temp_R, *nedges, *sp;
    457 	int i, k;
    458 
    459 	nedges = NEW2(n, short);
    460 
    461 	for (i = 0; i < n; i++) {
    462 		sp = R[i];
    463 		if (sp) {
    464 			while (*sp >= 0)
    465 				nedges[*sp++]++;
    466 		}
    467 	}
    468 
    469 	new_R = NEW2(n, short *);
    470 	temp_R = NEW2(n, short *);
    471 
    472 	for (i = 0; i < n; i++) {
    473 		k = nedges[i];
    474 		if (k > 0) {
    475 			sp = NEW2(k + 1, short);
    476 			new_R[i] = sp;
    477 			temp_R[i] = sp;
    478 			sp[k] = -1;
    479 		}
    480 	}
    481 
    482 	free(nedges);
    483 
    484 	for (i = 0; i < n; i++) {
    485 		sp = R[i];
    486 		if (sp) {
    487 			while (*sp >= 0)
    488 				*temp_R[*sp++]++ = i;
    489 		}
    490 	}
    491 
    492 	free(temp_R);
    493 
    494 	return (new_R);
    495 }
    496 
    497 
    498 void
    499 compute_FOLLOWS(void)
    500 {
    501 	digraph(includes);
    502 }
    503 
    504 void
    505 compute_lookaheads(void)
    506 {
    507 	int i, n;
    508 	unsigned int *fp1, *fp2, *fp3;
    509 	shorts *sp, *next;
    510 	unsigned int *rowp;
    511 
    512 	rowp = LA;
    513 	n = lookaheads[nstates];
    514 	for (i = 0; i < n; i++) {
    515 		fp3 = rowp + tokensetsize;
    516 		for (sp = lookback[i]; sp; sp = sp->next) {
    517 			fp1 = rowp;
    518 			fp2 = F + tokensetsize * sp->value;
    519 			while (fp1 < fp3)
    520 				*fp1++ |= *fp2++;
    521 		}
    522 		rowp = fp3;
    523 	}
    524 
    525 	for (i = 0; i < n; i++)
    526 		for (sp = lookback[i]; sp; sp = next) {
    527 			next = sp->next;
    528 			free(sp);
    529 		}
    530 
    531 	free(lookback);
    532 	free(F);
    533 }
    534 
    535 void
    536 digraph(short **relation)
    537 {
    538 	int i;
    539 
    540 	infinity = ngotos + 2;
    541 	INDEX = NEW2(ngotos + 1, short);
    542 	VERTICES = NEW2(ngotos + 1, short);
    543 	top = 0;
    544 	R = relation;
    545 
    546 	memset(INDEX, 0, ngotos * sizeof(short));
    547 	for (i = 0; i < ngotos; i++)
    548 		if (R[i])
    549 			traverse(i);
    550 
    551 	free(INDEX);
    552 	free(VERTICES);
    553 }
    554 
    555 
    556 void
    557 traverse(int i)
    558 {
    559 	unsigned int *base, *fp1, *fp2, *fp3;
    560 	int j, height;
    561 	short *rp;
    562 
    563 	VERTICES[++top] = i;
    564 	INDEX[i] = height = top;
    565 
    566 	base = F + i * tokensetsize;
    567 	fp3 = base + tokensetsize;
    568 
    569 	rp = R[i];
    570 	if (rp) {
    571 		while ((j = *rp++) >= 0) {
    572 			if (INDEX[j] == 0)
    573 				traverse(j);
    574 
    575 			if (INDEX[i] > INDEX[j])
    576 				INDEX[i] = INDEX[j];
    577 
    578 			fp1 = base;
    579 			fp2 = F + j * tokensetsize;
    580 
    581 			while (fp1 < fp3)
    582 				*fp1++ |= *fp2++;
    583 		}
    584 	}
    585 
    586 	if (INDEX[i] == height) {
    587 		for (;;) {
    588 			j = VERTICES[top--];
    589 			INDEX[j] = infinity;
    590 
    591 			if (i == j)
    592 				break;
    593 
    594 			fp1 = base;
    595 			fp2 = F + j * tokensetsize;
    596 
    597 			while (fp1 < fp3)
    598 				*fp2++ = *fp1++;
    599 		}
    600 	}
    601 }