fatbase

portable OpenBSD tools
git clone git@git.2f30.org/fatbase.git
Log | Files | Refs

dfa.c (25910B)


      1 /*	$OpenBSD: dfa.c,v 1.6 2003/06/04 17:34:44 millert Exp $	*/
      2 
      3 /* dfa - DFA construction routines */
      4 
      5 /*-
      6  * Copyright (c) 1990 The Regents of the University of California.
      7  * All rights reserved.
      8  *
      9  * This code is derived from software contributed to Berkeley by
     10  * Vern Paxson.
     11  * 
     12  * The United States Government has rights in this work pursuant
     13  * to contract no. DE-AC03-76SF00098 between the United States
     14  * Department of Energy and the University of California.
     15  *
     16  * Redistribution and use in source and binary forms, with or without
     17  * modification, are permitted provided that the following conditions
     18  * are met:
     19  *
     20  * 1. Redistributions of source code must retain the above copyright
     21  *    notice, this list of conditions and the following disclaimer.
     22  * 2. Redistributions in binary form must reproduce the above copyright
     23  *    notice, this list of conditions and the following disclaimer in the
     24  *    documentation and/or other materials provided with the distribution.
     25  *
     26  * Neither the name of the University nor the names of its contributors
     27  * may be used to endorse or promote products derived from this software
     28  * without specific prior written permission.
     29  *
     30  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
     31  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
     32  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     33  * PURPOSE.
     34  */
     35 
     36 /* $Header: /cvs/src/usr.bin/lex/dfa.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */
     37 
     38 #include "flexdef.h"
     39 
     40 
     41 /* declare functions that have forward references */
     42 
     43 void dump_associated_rules PROTO((FILE*, int));
     44 void dump_transitions PROTO((FILE*, int[]));
     45 void sympartition PROTO((int[], int, int[], int[]));
     46 int symfollowset PROTO((int[], int, int, int[]));
     47 
     48 
     49 /* check_for_backing_up - check a DFA state for backing up
     50  *
     51  * synopsis
     52  *     void check_for_backing_up( int ds, int state[numecs] );
     53  *
     54  * ds is the number of the state to check and state[] is its out-transitions,
     55  * indexed by equivalence class.
     56  */
     57 
     58 void check_for_backing_up( ds, state )
     59 int ds;
     60 int state[];
     61 	{
     62 	if ( (reject && ! dfaacc[ds].dfaacc_set) ||
     63 	     (! reject && ! dfaacc[ds].dfaacc_state) )
     64 		{ /* state is non-accepting */
     65 		++num_backing_up;
     66 
     67 		if ( backing_up_report )
     68 			{
     69 			fprintf( backing_up_file,
     70 				_( "State #%d is non-accepting -\n" ), ds );
     71 
     72 			/* identify the state */
     73 			dump_associated_rules( backing_up_file, ds );
     74 
     75 			/* Now identify it further using the out- and
     76 			 * jam-transitions.
     77 			 */
     78 			dump_transitions( backing_up_file, state );
     79 
     80 			putc( '\n', backing_up_file );
     81 			}
     82 		}
     83 	}
     84 
     85 
     86 /* check_trailing_context - check to see if NFA state set constitutes
     87  *                          "dangerous" trailing context
     88  *
     89  * synopsis
     90  *    void check_trailing_context( int nfa_states[num_states+1], int num_states,
     91  *				int accset[nacc+1], int nacc );
     92  *
     93  * NOTES
     94  *  Trailing context is "dangerous" if both the head and the trailing
     95  *  part are of variable size \and/ there's a DFA state which contains
     96  *  both an accepting state for the head part of the rule and NFA states
     97  *  which occur after the beginning of the trailing context.
     98  *
     99  *  When such a rule is matched, it's impossible to tell if having been
    100  *  in the DFA state indicates the beginning of the trailing context or
    101  *  further-along scanning of the pattern.  In these cases, a warning
    102  *  message is issued.
    103  *
    104  *    nfa_states[1 .. num_states] is the list of NFA states in the DFA.
    105  *    accset[1 .. nacc] is the list of accepting numbers for the DFA state.
    106  */
    107 
    108 void check_trailing_context( nfa_states, num_states, accset, nacc )
    109 int *nfa_states, num_states;
    110 int *accset;
    111 int nacc;
    112 	{
    113 	int i, j;
    114 
    115 	for ( i = 1; i <= num_states; ++i )
    116 		{
    117 		int ns = nfa_states[i];
    118 		int type = state_type[ns];
    119 		int ar = assoc_rule[ns];
    120 
    121 		if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE )
    122 			{ /* do nothing */
    123 			}
    124 
    125 		else if ( type == STATE_TRAILING_CONTEXT )
    126 			{
    127 			/* Potential trouble.  Scan set of accepting numbers
    128 			 * for the one marking the end of the "head".  We
    129 			 * assume that this looping will be fairly cheap
    130 			 * since it's rare that an accepting number set
    131 			 * is large.
    132 			 */
    133 			for ( j = 1; j <= nacc; ++j )
    134 				if ( accset[j] & YY_TRAILING_HEAD_MASK )
    135 					{
    136 					line_warning(
    137 					_( "dangerous trailing context" ),
    138 						rule_linenum[ar] );
    139 					return;
    140 					}
    141 			}
    142 		}
    143 	}
    144 
    145 
    146 /* dump_associated_rules - list the rules associated with a DFA state
    147  *
    148  * Goes through the set of NFA states associated with the DFA and
    149  * extracts the first MAX_ASSOC_RULES unique rules, sorts them,
    150  * and writes a report to the given file.
    151  */
    152 
    153 void dump_associated_rules( file, ds )
    154 FILE *file;
    155 int ds;
    156 	{
    157 	int i, j;
    158 	int num_associated_rules = 0;
    159 	int rule_set[MAX_ASSOC_RULES + 1];
    160 	int *dset = dss[ds];
    161 	int size = dfasiz[ds];
    162 
    163 	for ( i = 1; i <= size; ++i )
    164 		{
    165 		int rule_num = rule_linenum[assoc_rule[dset[i]]];
    166 
    167 		for ( j = 1; j <= num_associated_rules; ++j )
    168 			if ( rule_num == rule_set[j] )
    169 				break;
    170 
    171 		if ( j > num_associated_rules )
    172 			{ /* new rule */
    173 			if ( num_associated_rules < MAX_ASSOC_RULES )
    174 				rule_set[++num_associated_rules] = rule_num;
    175 			}
    176 		}
    177 
    178 	bubble( rule_set, num_associated_rules );
    179 
    180 	fprintf( file, _( " associated rule line numbers:" ) );
    181 
    182 	for ( i = 1; i <= num_associated_rules; ++i )
    183 		{
    184 		if ( i % 8 == 1 )
    185 			putc( '\n', file );
    186 
    187 		fprintf( file, "\t%d", rule_set[i] );
    188 		}
    189 
    190 	putc( '\n', file );
    191 	}
    192 
    193 
    194 /* dump_transitions - list the transitions associated with a DFA state
    195  *
    196  * synopsis
    197  *     dump_transitions( FILE *file, int state[numecs] );
    198  *
    199  * Goes through the set of out-transitions and lists them in human-readable
    200  * form (i.e., not as equivalence classes); also lists jam transitions
    201  * (i.e., all those which are not out-transitions, plus EOF).  The dump
    202  * is done to the given file.
    203  */
    204 
    205 void dump_transitions( file, state )
    206 FILE *file;
    207 int state[];
    208 	{
    209 	int i, ec;
    210 	int out_char_set[CSIZE];
    211 
    212 	for ( i = 0; i < csize; ++i )
    213 		{
    214 		ec = ABS( ecgroup[i] );
    215 		out_char_set[i] = state[ec];
    216 		}
    217 
    218 	fprintf( file, _( " out-transitions: " ) );
    219 
    220 	list_character_set( file, out_char_set );
    221 
    222 	/* now invert the members of the set to get the jam transitions */
    223 	for ( i = 0; i < csize; ++i )
    224 		out_char_set[i] = ! out_char_set[i];
    225 
    226 	fprintf( file, _( "\n jam-transitions: EOF " ) );
    227 
    228 	list_character_set( file, out_char_set );
    229 
    230 	putc( '\n', file );
    231 	}
    232 
    233 
    234 /* epsclosure - construct the epsilon closure of a set of ndfa states
    235  *
    236  * synopsis
    237  *    int *epsclosure( int t[num_states], int *numstates_addr,
    238  *			int accset[num_rules+1], int *nacc_addr,
    239  *			int *hashval_addr );
    240  *
    241  * NOTES
    242  *  The epsilon closure is the set of all states reachable by an arbitrary
    243  *  number of epsilon transitions, which themselves do not have epsilon
    244  *  transitions going out, unioned with the set of states which have non-null
    245  *  accepting numbers.  t is an array of size numstates of nfa state numbers.
    246  *  Upon return, t holds the epsilon closure and *numstates_addr is updated.
    247  *  accset holds a list of the accepting numbers, and the size of accset is
    248  *  given by *nacc_addr.  t may be subjected to reallocation if it is not
    249  *  large enough to hold the epsilon closure.
    250  *
    251  *  hashval is the hash value for the dfa corresponding to the state set.
    252  */
    253 
    254 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr )
    255 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr;
    256 	{
    257 	int stkpos, ns, tsp;
    258 	int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum;
    259 	int stkend, nstate;
    260 	static int did_stk_init = false, *stk; 
    261 
    262 #define MARK_STATE(state) \
    263 trans1[state] = trans1[state] - MARKER_DIFFERENCE;
    264 
    265 #define IS_MARKED(state) (trans1[state] < 0)
    266 
    267 #define UNMARK_STATE(state) \
    268 trans1[state] = trans1[state] + MARKER_DIFFERENCE;
    269 
    270 #define CHECK_ACCEPT(state) \
    271 { \
    272 nfaccnum = accptnum[state]; \
    273 if ( nfaccnum != NIL ) \
    274 accset[++nacc] = nfaccnum; \
    275 }
    276 
    277 #define DO_REALLOCATION \
    278 { \
    279 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \
    280 ++num_reallocs; \
    281 t = reallocate_integer_array( t, current_max_dfa_size ); \
    282 stk = reallocate_integer_array( stk, current_max_dfa_size ); \
    283 } \
    284 
    285 #define PUT_ON_STACK(state) \
    286 { \
    287 if ( ++stkend >= current_max_dfa_size ) \
    288 DO_REALLOCATION \
    289 stk[stkend] = state; \
    290 MARK_STATE(state) \
    291 }
    292 
    293 #define ADD_STATE(state) \
    294 { \
    295 if ( ++numstates >= current_max_dfa_size ) \
    296 DO_REALLOCATION \
    297 t[numstates] = state; \
    298 hashval += state; \
    299 }
    300 
    301 #define STACK_STATE(state) \
    302 { \
    303 PUT_ON_STACK(state) \
    304 CHECK_ACCEPT(state) \
    305 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \
    306 ADD_STATE(state) \
    307 }
    308 
    309 
    310 	if ( ! did_stk_init )
    311 		{
    312 		stk = allocate_integer_array( current_max_dfa_size );
    313 		did_stk_init = true;
    314 		}
    315 
    316 	nacc = stkend = hashval = 0;
    317 
    318 	for ( nstate = 1; nstate <= numstates; ++nstate )
    319 		{
    320 		ns = t[nstate];
    321 
    322 		/* The state could be marked if we've already pushed it onto
    323 		 * the stack.
    324 		 */
    325 		if ( ! IS_MARKED(ns) )
    326 			{
    327 			PUT_ON_STACK(ns)
    328 			CHECK_ACCEPT(ns)
    329 			hashval += ns;
    330 			}
    331 		}
    332 
    333 	for ( stkpos = 1; stkpos <= stkend; ++stkpos )
    334 		{
    335 		ns = stk[stkpos];
    336 		transsym = transchar[ns];
    337 
    338 		if ( transsym == SYM_EPSILON )
    339 			{
    340 			tsp = trans1[ns] + MARKER_DIFFERENCE;
    341 
    342 			if ( tsp != NO_TRANSITION )
    343 				{
    344 				if ( ! IS_MARKED(tsp) )
    345 					STACK_STATE(tsp)
    346 
    347 				tsp = trans2[ns];
    348 
    349 				if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) )
    350 					STACK_STATE(tsp)
    351 				}
    352 			}
    353 		}
    354 
    355 	/* Clear out "visit" markers. */
    356 
    357 	for ( stkpos = 1; stkpos <= stkend; ++stkpos )
    358 		{
    359 		if ( IS_MARKED(stk[stkpos]) )
    360 			UNMARK_STATE(stk[stkpos])
    361 		else
    362 			flexfatal(
    363 			_( "consistency check failed in epsclosure()" ) );
    364 		}
    365 
    366 	*ns_addr = numstates;
    367 	*hv_addr = hashval;
    368 	*nacc_addr = nacc;
    369 
    370 	return t;
    371 	}
    372 
    373 
    374 /* increase_max_dfas - increase the maximum number of DFAs */
    375 
    376 void increase_max_dfas()
    377 	{
    378 	current_max_dfas += MAX_DFAS_INCREMENT;
    379 
    380 	++num_reallocs;
    381 
    382 	base = reallocate_integer_array( base, current_max_dfas );
    383 	def = reallocate_integer_array( def, current_max_dfas );
    384 	dfasiz = reallocate_integer_array( dfasiz, current_max_dfas );
    385 	accsiz = reallocate_integer_array( accsiz, current_max_dfas );
    386 	dhash = reallocate_integer_array( dhash, current_max_dfas );
    387 	dss = reallocate_int_ptr_array( dss, current_max_dfas );
    388 	dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas );
    389 
    390 	if ( nultrans )
    391 		nultrans =
    392 			reallocate_integer_array( nultrans, current_max_dfas );
    393 	}
    394 
    395 
    396 /* ntod - convert an ndfa to a dfa
    397  *
    398  * Creates the dfa corresponding to the ndfa we've constructed.  The
    399  * dfa starts out in state #1.
    400  */
    401 
    402 void ntod()
    403 	{
    404 	int *accset, ds, nacc, newds;
    405 	int sym, hashval, numstates, dsize;
    406 	int num_full_table_rows;	/* used only for -f */
    407 	int *nset, *dset;
    408 	int targptr, totaltrans, i, comstate, comfreq, targ;
    409 	int symlist[CSIZE + 1];
    410 	int num_start_states;
    411 	int todo_head, todo_next;
    412 
    413 	/* Note that the following are indexed by *equivalence classes*
    414 	 * and not by characters.  Since equivalence classes are indexed
    415 	 * beginning with 1, even if the scanner accepts NUL's, this
    416 	 * means that (since every character is potentially in its own
    417 	 * equivalence class) these arrays must have room for indices
    418 	 * from 1 to CSIZE, so their size must be CSIZE + 1.
    419 	 */
    420 	int duplist[CSIZE + 1], state[CSIZE + 1];
    421 	int targfreq[CSIZE + 1], targstate[CSIZE + 1];
    422 
    423 	accset = allocate_integer_array( num_rules + 1 );
    424 	nset = allocate_integer_array( current_max_dfa_size );
    425 
    426 	/* The "todo" queue is represented by the head, which is the DFA
    427 	 * state currently being processed, and the "next", which is the
    428 	 * next DFA state number available (not in use).  We depend on the
    429 	 * fact that snstods() returns DFA's \in increasing order/, and thus
    430 	 * need only know the bounds of the dfas to be processed.
    431 	 */
    432 	todo_head = todo_next = 0;
    433 
    434 	for ( i = 0; i <= csize; ++i )
    435 		{
    436 		duplist[i] = NIL;
    437 		symlist[i] = false;
    438 		}
    439 
    440 	for ( i = 0; i <= num_rules; ++i )
    441 		accset[i] = NIL;
    442 
    443 	if ( trace )
    444 		{
    445 		dumpnfa( scset[1] );
    446 		fputs( _( "\n\nDFA Dump:\n\n" ), stderr );
    447 		}
    448 
    449 	inittbl();
    450 
    451 	/* Check to see whether we should build a separate table for
    452 	 * transitions on NUL characters.  We don't do this for full-speed
    453 	 * (-F) scanners, since for them we don't have a simple state
    454 	 * number lying around with which to index the table.  We also
    455 	 * don't bother doing it for scanners unless (1) NUL is in its own
    456 	 * equivalence class (indicated by a positive value of
    457 	 * ecgroup[NUL]), (2) NUL's equivalence class is the last
    458 	 * equivalence class, and (3) the number of equivalence classes is
    459 	 * the same as the number of characters.  This latter case comes
    460 	 * about when useecs is false or when it's true but every character
    461 	 * still manages to land in its own class (unlikely, but it's
    462 	 * cheap to check for).  If all these things are true then the
    463 	 * character code needed to represent NUL's equivalence class for
    464 	 * indexing the tables is going to take one more bit than the
    465 	 * number of characters, and therefore we won't be assured of
    466 	 * being able to fit it into a YY_CHAR variable.  This rules out
    467 	 * storing the transitions in a compressed table, since the code
    468 	 * for interpreting them uses a YY_CHAR variable (perhaps it
    469 	 * should just use an integer, though; this is worth pondering ...
    470 	 * ###).
    471 	 *
    472 	 * Finally, for full tables, we want the number of entries in the
    473 	 * table to be a power of two so the array references go fast (it
    474 	 * will just take a shift to compute the major index).  If
    475 	 * encoding NUL's transitions in the table will spoil this, we
    476 	 * give it its own table (note that this will be the case if we're
    477 	 * not using equivalence classes).
    478 	 */
    479 
    480 	/* Note that the test for ecgroup[0] == numecs below accomplishes
    481 	 * both (1) and (2) above
    482 	 */
    483 	if ( ! fullspd && ecgroup[0] == numecs )
    484 		{
    485 		/* NUL is alone in its equivalence class, which is the
    486 		 * last one.
    487 		 */
    488 		int use_NUL_table = (numecs == csize);
    489 
    490 		if ( fulltbl && ! use_NUL_table )
    491 			{
    492 			/* We still may want to use the table if numecs
    493 			 * is a power of 2.
    494 			 */
    495 			int power_of_two;
    496 
    497 			for ( power_of_two = 1; power_of_two <= csize;
    498 			      power_of_two *= 2 )
    499 				if ( numecs == power_of_two )
    500 					{
    501 					use_NUL_table = true;
    502 					break;
    503 					}
    504 			}
    505 
    506 		if ( use_NUL_table )
    507 			nultrans = allocate_integer_array( current_max_dfas );
    508 
    509 		/* From now on, nultrans != nil indicates that we're
    510 		 * saving null transitions for later, separate encoding.
    511 		 */
    512 		}
    513 
    514 
    515 	if ( fullspd )
    516 		{
    517 		for ( i = 0; i <= numecs; ++i )
    518 			state[i] = 0;
    519 
    520 		place_state( state, 0, 0 );
    521 		dfaacc[0].dfaacc_state = 0;
    522 		}
    523 
    524 	else if ( fulltbl )
    525 		{
    526 		if ( nultrans )
    527 			/* We won't be including NUL's transitions in the
    528 			 * table, so build it for entries from 0 .. numecs - 1.
    529 			 */
    530 			num_full_table_rows = numecs;
    531 
    532 		else
    533 			/* Take into account the fact that we'll be including
    534 			 * the NUL entries in the transition table.  Build it
    535 			 * from 0 .. numecs.
    536 			 */
    537 			num_full_table_rows = numecs + 1;
    538 
    539 		/* Unless -Ca, declare it "short" because it's a real
    540 		 * long-shot that that won't be large enough.
    541 		 */
    542 		out_str_dec( "static yyconst %s yy_nxt[][%d] =\n    {\n",
    543 			/* '}' so vi doesn't get too confused */
    544 			long_align ? "long" : "short", num_full_table_rows );
    545 
    546 		outn( "    {" );
    547 
    548 		/* Generate 0 entries for state #0. */
    549 		for ( i = 0; i < num_full_table_rows; ++i )
    550 			mk2data( 0 );
    551 
    552 		dataflush();
    553 		outn( "    },\n" );
    554 		}
    555 
    556 	/* Create the first states. */
    557 
    558 	num_start_states = lastsc * 2;
    559 
    560 	for ( i = 1; i <= num_start_states; ++i )
    561 		{
    562 		numstates = 1;
    563 
    564 		/* For each start condition, make one state for the case when
    565 		 * we're at the beginning of the line (the '^' operator) and
    566 		 * one for the case when we're not.
    567 		 */
    568 		if ( i % 2 == 1 )
    569 			nset[numstates] = scset[(i / 2) + 1];
    570 		else
    571 			nset[numstates] =
    572 				mkbranch( scbol[i / 2], scset[i / 2] );
    573 
    574 		nset = epsclosure( nset, &numstates, accset, &nacc, &hashval );
    575 
    576 		if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) )
    577 			{
    578 			numas += nacc;
    579 			totnst += numstates;
    580 			++todo_next;
    581 
    582 			if ( variable_trailing_context_rules && nacc > 0 )
    583 				check_trailing_context( nset, numstates,
    584 							accset, nacc );
    585 			}
    586 		}
    587 
    588 	if ( ! fullspd )
    589 		{
    590 		if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) )
    591 			flexfatal(
    592 			_( "could not create unique end-of-buffer state" ) );
    593 
    594 		++numas;
    595 		++num_start_states;
    596 		++todo_next;
    597 		}
    598 
    599 	while ( todo_head < todo_next )
    600 		{
    601 		targptr = 0;
    602 		totaltrans = 0;
    603 
    604 		for ( i = 1; i <= numecs; ++i )
    605 			state[i] = 0;
    606 
    607 		ds = ++todo_head;
    608 
    609 		dset = dss[ds];
    610 		dsize = dfasiz[ds];
    611 
    612 		if ( trace )
    613 			fprintf( stderr, _( "state # %d:\n" ), ds );
    614 
    615 		sympartition( dset, dsize, symlist, duplist );
    616 
    617 		for ( sym = 1; sym <= numecs; ++sym )
    618 			{
    619 			if ( symlist[sym] )
    620 				{
    621 				symlist[sym] = 0;
    622 
    623 				if ( duplist[sym] == NIL )
    624 					{
    625 					/* Symbol has unique out-transitions. */
    626 					numstates = symfollowset( dset, dsize,
    627 								sym, nset );
    628 					nset = epsclosure( nset, &numstates,
    629 						accset, &nacc, &hashval );
    630 
    631 					if ( snstods( nset, numstates, accset,
    632 						nacc, hashval, &newds ) )
    633 						{
    634 						totnst = totnst + numstates;
    635 						++todo_next;
    636 						numas += nacc;
    637 
    638 						if (
    639 					variable_trailing_context_rules &&
    640 							nacc > 0 )
    641 							check_trailing_context(
    642 								nset, numstates,
    643 								accset, nacc );
    644 						}
    645 
    646 					state[sym] = newds;
    647 
    648 					if ( trace )
    649 						fprintf( stderr, "\t%d\t%d\n",
    650 							sym, newds );
    651 
    652 					targfreq[++targptr] = 1;
    653 					targstate[targptr] = newds;
    654 					++numuniq;
    655 					}
    656 
    657 				else
    658 					{
    659 					/* sym's equivalence class has the same
    660 					 * transitions as duplist(sym)'s
    661 					 * equivalence class.
    662 					 */
    663 					targ = state[duplist[sym]];
    664 					state[sym] = targ;
    665 
    666 					if ( trace )
    667 						fprintf( stderr, "\t%d\t%d\n",
    668 							sym, targ );
    669 
    670 					/* Update frequency count for
    671 					 * destination state.
    672 					 */
    673 
    674 					i = 0;
    675 					while ( targstate[++i] != targ )
    676 						;
    677 
    678 					++targfreq[i];
    679 					++numdup;
    680 					}
    681 
    682 				++totaltrans;
    683 				duplist[sym] = NIL;
    684 				}
    685 			}
    686 
    687 		if ( caseins && ! useecs )
    688 			{
    689 			int j;
    690 
    691 			for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j )
    692 				{
    693 				if ( state[i] == 0 && state[j] != 0 )
    694 					/* We're adding a transition. */
    695 					++totaltrans;
    696 
    697 				else if ( state[i] != 0 && state[j] == 0 )
    698 					/* We're taking away a transition. */
    699 					--totaltrans;
    700 
    701 				state[i] = state[j];
    702 				}
    703 			}
    704 
    705 		numsnpairs += totaltrans;
    706 
    707 		if ( ds > num_start_states )
    708 			check_for_backing_up( ds, state );
    709 
    710 		if ( nultrans )
    711 			{
    712 			nultrans[ds] = state[NUL_ec];
    713 			state[NUL_ec] = 0;	/* remove transition */
    714 			}
    715 
    716 		if ( fulltbl )
    717 			{
    718 			outn( "    {" );
    719 
    720 			/* Supply array's 0-element. */
    721 			if ( ds == end_of_buffer_state )
    722 				mk2data( -end_of_buffer_state );
    723 			else
    724 				mk2data( end_of_buffer_state );
    725 
    726 			for ( i = 1; i < num_full_table_rows; ++i )
    727 				/* Jams are marked by negative of state
    728 				 * number.
    729 				 */
    730 				mk2data( state[i] ? state[i] : -ds );
    731 
    732 			dataflush();
    733 			outn( "    },\n" );
    734 			}
    735 
    736 		else if ( fullspd )
    737 			place_state( state, ds, totaltrans );
    738 
    739 		else if ( ds == end_of_buffer_state )
    740 			/* Special case this state to make sure it does what
    741 			 * it's supposed to, i.e., jam on end-of-buffer.
    742 			 */
    743 			stack1( ds, 0, 0, JAMSTATE );
    744 
    745 		else /* normal, compressed state */
    746 			{
    747 			/* Determine which destination state is the most
    748 			 * common, and how many transitions to it there are.
    749 			 */
    750 
    751 			comfreq = 0;
    752 			comstate = 0;
    753 
    754 			for ( i = 1; i <= targptr; ++i )
    755 				if ( targfreq[i] > comfreq )
    756 					{
    757 					comfreq = targfreq[i];
    758 					comstate = targstate[i];
    759 					}
    760 
    761 			bldtbl( state, ds, totaltrans, comstate, comfreq );
    762 			}
    763 		}
    764 
    765 	if ( fulltbl )
    766 		dataend();
    767 
    768 	else if ( ! fullspd )
    769 		{
    770 		cmptmps();  /* create compressed template entries */
    771 
    772 		/* Create tables for all the states with only one
    773 		 * out-transition.
    774 		 */
    775 		while ( onesp > 0 )
    776 			{
    777 			mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp],
    778 			onedef[onesp] );
    779 			--onesp;
    780 			}
    781 
    782 		mkdeftbl();
    783 		}
    784 
    785 	flex_free( (void *) accset );
    786 	flex_free( (void *) nset );
    787 	}
    788 
    789 
    790 /* snstods - converts a set of ndfa states into a dfa state
    791  *
    792  * synopsis
    793  *    is_new_state = snstods( int sns[numstates], int numstates,
    794  *				int accset[num_rules+1], int nacc,
    795  *				int hashval, int *newds_addr );
    796  *
    797  * On return, the dfa state number is in newds.
    798  */
    799 
    800 int snstods( sns, numstates, accset, nacc, hashval, newds_addr )
    801 int sns[], numstates, accset[], nacc, hashval, *newds_addr;
    802 	{
    803 	int didsort = 0;
    804 	int i, j;
    805 	int newds, *oldsns;
    806 
    807 	for ( i = 1; i <= lastdfa; ++i )
    808 		if ( hashval == dhash[i] )
    809 			{
    810 			if ( numstates == dfasiz[i] )
    811 				{
    812 				oldsns = dss[i];
    813 
    814 				if ( ! didsort )
    815 					{
    816 					/* We sort the states in sns so we
    817 					 * can compare it to oldsns quickly.
    818 					 * We use bubble because there probably
    819 					 * aren't very many states.
    820 					 */
    821 					bubble( sns, numstates );
    822 					didsort = 1;
    823 					}
    824 
    825 				for ( j = 1; j <= numstates; ++j )
    826 					if ( sns[j] != oldsns[j] )
    827 						break;
    828 
    829 				if ( j > numstates )
    830 					{
    831 					++dfaeql;
    832 					*newds_addr = i;
    833 					return 0;
    834 					}
    835 
    836 				++hshcol;
    837 				}
    838 
    839 			else
    840 				++hshsave;
    841 			}
    842 
    843 	/* Make a new dfa. */
    844 
    845 	if ( ++lastdfa >= current_max_dfas )
    846 		increase_max_dfas();
    847 
    848 	newds = lastdfa;
    849 
    850 	dss[newds] = allocate_integer_array( numstates + 1 );
    851 
    852 	/* If we haven't already sorted the states in sns, we do so now,
    853 	 * so that future comparisons with it can be made quickly.
    854 	 */
    855 
    856 	if ( ! didsort )
    857 		bubble( sns, numstates );
    858 
    859 	for ( i = 1; i <= numstates; ++i )
    860 		dss[newds][i] = sns[i];
    861 
    862 	dfasiz[newds] = numstates;
    863 	dhash[newds] = hashval;
    864 
    865 	if ( nacc == 0 )
    866 		{
    867 		if ( reject )
    868 			dfaacc[newds].dfaacc_set = (int *) 0;
    869 		else
    870 			dfaacc[newds].dfaacc_state = 0;
    871 
    872 		accsiz[newds] = 0;
    873 		}
    874 
    875 	else if ( reject )
    876 		{
    877 		/* We sort the accepting set in increasing order so the
    878 		 * disambiguating rule that the first rule listed is considered
    879 		 * match in the event of ties will work.  We use a bubble
    880 		 * sort since the list is probably quite small.
    881 		 */
    882 
    883 		bubble( accset, nacc );
    884 
    885 		dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 );
    886 
    887 		/* Save the accepting set for later */
    888 		for ( i = 1; i <= nacc; ++i )
    889 			{
    890 			dfaacc[newds].dfaacc_set[i] = accset[i];
    891 
    892 			if ( accset[i] <= num_rules )
    893 				/* Who knows, perhaps a REJECT can yield
    894 				 * this rule.
    895 				 */
    896 				rule_useful[accset[i]] = true;
    897 			}
    898 
    899 		accsiz[newds] = nacc;
    900 		}
    901 
    902 	else
    903 		{
    904 		/* Find lowest numbered rule so the disambiguating rule
    905 		 * will work.
    906 		 */
    907 		j = num_rules + 1;
    908 
    909 		for ( i = 1; i <= nacc; ++i )
    910 			if ( accset[i] < j )
    911 				j = accset[i];
    912 
    913 		dfaacc[newds].dfaacc_state = j;
    914 
    915 		if ( j <= num_rules )
    916 			rule_useful[j] = true;
    917 		}
    918 
    919 	*newds_addr = newds;
    920 
    921 	return 1;
    922 	}
    923 
    924 
    925 /* symfollowset - follow the symbol transitions one step
    926  *
    927  * synopsis
    928  *    numstates = symfollowset( int ds[current_max_dfa_size], int dsize,
    929  *				int transsym, int nset[current_max_dfa_size] );
    930  */
    931 
    932 int symfollowset( ds, dsize, transsym, nset )
    933 int ds[], dsize, transsym, nset[];
    934 	{
    935 	int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist;
    936 
    937 	numstates = 0;
    938 
    939 	for ( i = 1; i <= dsize; ++i )
    940 		{ /* for each nfa state ns in the state set of ds */
    941 		ns = ds[i];
    942 		sym = transchar[ns];
    943 		tsp = trans1[ns];
    944 
    945 		if ( sym < 0 )
    946 			{ /* it's a character class */
    947 			sym = -sym;
    948 			ccllist = cclmap[sym];
    949 			lenccl = ccllen[sym];
    950 
    951 			if ( cclng[sym] )
    952 				{
    953 				for ( j = 0; j < lenccl; ++j )
    954 					{
    955 					/* Loop through negated character
    956 					 * class.
    957 					 */
    958 					ch = ccltbl[ccllist + j];
    959 
    960 					if ( ch == 0 )
    961 						ch = NUL_ec;
    962 
    963 					if ( ch > transsym )
    964 						/* Transsym isn't in negated
    965 						 * ccl.
    966 						 */
    967 						break;
    968 
    969 					else if ( ch == transsym )
    970 						/* next 2 */ goto bottom;
    971 					}
    972 
    973 				/* Didn't find transsym in ccl. */
    974 				nset[++numstates] = tsp;
    975 				}
    976 
    977 			else
    978 				for ( j = 0; j < lenccl; ++j )
    979 					{
    980 					ch = ccltbl[ccllist + j];
    981 
    982 					if ( ch == 0 )
    983 						ch = NUL_ec;
    984 
    985 					if ( ch > transsym )
    986 						break;
    987 					else if ( ch == transsym )
    988 						{
    989 						nset[++numstates] = tsp;
    990 						break;
    991 						}
    992 					}
    993 			}
    994 
    995 		else if ( sym >= 'A' && sym <= 'Z' && caseins )
    996 			flexfatal(
    997 			_( "consistency check failed in symfollowset" ) );
    998 
    999 		else if ( sym == SYM_EPSILON )
   1000 			{ /* do nothing */
   1001 			}
   1002 
   1003 		else if ( ABS( ecgroup[sym] ) == transsym )
   1004 			nset[++numstates] = tsp;
   1005 
   1006 		bottom: ;
   1007 		}
   1008 
   1009 	return numstates;
   1010 	}
   1011 
   1012 
   1013 /* sympartition - partition characters with same out-transitions
   1014  *
   1015  * synopsis
   1016  *    sympartition( int ds[current_max_dfa_size], int numstates,
   1017  *			int symlist[numecs], int duplist[numecs] );
   1018  */
   1019 
   1020 void sympartition( ds, numstates, symlist, duplist )
   1021 int ds[], numstates;
   1022 int symlist[], duplist[];
   1023 	{
   1024 	int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich;
   1025 
   1026 	/* Partitioning is done by creating equivalence classes for those
   1027 	 * characters which have out-transitions from the given state.  Thus
   1028 	 * we are really creating equivalence classes of equivalence classes.
   1029 	 */
   1030 
   1031 	for ( i = 1; i <= numecs; ++i )
   1032 		{ /* initialize equivalence class list */
   1033 		duplist[i] = i - 1;
   1034 		dupfwd[i] = i + 1;
   1035 		}
   1036 
   1037 	duplist[1] = NIL;
   1038 	dupfwd[numecs] = NIL;
   1039 
   1040 	for ( i = 1; i <= numstates; ++i )
   1041 		{
   1042 		ns = ds[i];
   1043 		tch = transchar[ns];
   1044 
   1045 		if ( tch != SYM_EPSILON )
   1046 			{
   1047 			if ( tch < -lastccl || tch >= csize )
   1048 				{
   1049 				flexfatal(
   1050 		_( "bad transition character detected in sympartition()" ) );
   1051 				}
   1052 
   1053 			if ( tch >= 0 )
   1054 				{ /* character transition */
   1055 				int ec = ecgroup[tch];
   1056 
   1057 				mkechar( ec, dupfwd, duplist );
   1058 				symlist[ec] = 1;
   1059 				}
   1060 
   1061 			else
   1062 				{ /* character class */
   1063 				tch = -tch;
   1064 
   1065 				lenccl = ccllen[tch];
   1066 				cclp = cclmap[tch];
   1067 				mkeccl( ccltbl + cclp, lenccl, dupfwd,
   1068 					duplist, numecs, NUL_ec );
   1069 
   1070 				if ( cclng[tch] )
   1071 					{
   1072 					j = 0;
   1073 
   1074 					for ( k = 0; k < lenccl; ++k )
   1075 						{
   1076 						ich = ccltbl[cclp + k];
   1077 
   1078 						if ( ich == 0 )
   1079 							ich = NUL_ec;
   1080 
   1081 						for ( ++j; j < ich; ++j )
   1082 							symlist[j] = 1;
   1083 						}
   1084 
   1085 					for ( ++j; j <= numecs; ++j )
   1086 						symlist[j] = 1;
   1087 					}
   1088 
   1089 				else
   1090 					for ( k = 0; k < lenccl; ++k )
   1091 						{
   1092 						ich = ccltbl[cclp + k];
   1093 
   1094 						if ( ich == 0 )
   1095 							ich = NUL_ec;
   1096 
   1097 						symlist[ich] = 1;
   1098 						}
   1099 				}
   1100 			}
   1101 		}
   1102 	}