dfa.c (25910B)
1 /* $OpenBSD: dfa.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */ 2 3 /* dfa - DFA construction routines */ 4 5 /*- 6 * Copyright (c) 1990 The Regents of the University of California. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to Berkeley by 10 * Vern Paxson. 11 * 12 * The United States Government has rights in this work pursuant 13 * to contract no. DE-AC03-76SF00098 between the United States 14 * Department of Energy and the University of California. 15 * 16 * Redistribution and use in source and binary forms, with or without 17 * modification, are permitted provided that the following conditions 18 * are met: 19 * 20 * 1. Redistributions of source code must retain the above copyright 21 * notice, this list of conditions and the following disclaimer. 22 * 2. Redistributions in binary form must reproduce the above copyright 23 * notice, this list of conditions and the following disclaimer in the 24 * documentation and/or other materials provided with the distribution. 25 * 26 * Neither the name of the University nor the names of its contributors 27 * may be used to endorse or promote products derived from this software 28 * without specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 31 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 32 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 33 * PURPOSE. 34 */ 35 36 /* $Header: /cvs/src/usr.bin/lex/dfa.c,v 1.6 2003/06/04 17:34:44 millert Exp $ */ 37 38 #include "flexdef.h" 39 40 41 /* declare functions that have forward references */ 42 43 void dump_associated_rules PROTO((FILE*, int)); 44 void dump_transitions PROTO((FILE*, int[])); 45 void sympartition PROTO((int[], int, int[], int[])); 46 int symfollowset PROTO((int[], int, int, int[])); 47 48 49 /* check_for_backing_up - check a DFA state for backing up 50 * 51 * synopsis 52 * void check_for_backing_up( int ds, int state[numecs] ); 53 * 54 * ds is the number of the state to check and state[] is its out-transitions, 55 * indexed by equivalence class. 56 */ 57 58 void check_for_backing_up( ds, state ) 59 int ds; 60 int state[]; 61 { 62 if ( (reject && ! dfaacc[ds].dfaacc_set) || 63 (! reject && ! dfaacc[ds].dfaacc_state) ) 64 { /* state is non-accepting */ 65 ++num_backing_up; 66 67 if ( backing_up_report ) 68 { 69 fprintf( backing_up_file, 70 _( "State #%d is non-accepting -\n" ), ds ); 71 72 /* identify the state */ 73 dump_associated_rules( backing_up_file, ds ); 74 75 /* Now identify it further using the out- and 76 * jam-transitions. 77 */ 78 dump_transitions( backing_up_file, state ); 79 80 putc( '\n', backing_up_file ); 81 } 82 } 83 } 84 85 86 /* check_trailing_context - check to see if NFA state set constitutes 87 * "dangerous" trailing context 88 * 89 * synopsis 90 * void check_trailing_context( int nfa_states[num_states+1], int num_states, 91 * int accset[nacc+1], int nacc ); 92 * 93 * NOTES 94 * Trailing context is "dangerous" if both the head and the trailing 95 * part are of variable size \and/ there's a DFA state which contains 96 * both an accepting state for the head part of the rule and NFA states 97 * which occur after the beginning of the trailing context. 98 * 99 * When such a rule is matched, it's impossible to tell if having been 100 * in the DFA state indicates the beginning of the trailing context or 101 * further-along scanning of the pattern. In these cases, a warning 102 * message is issued. 103 * 104 * nfa_states[1 .. num_states] is the list of NFA states in the DFA. 105 * accset[1 .. nacc] is the list of accepting numbers for the DFA state. 106 */ 107 108 void check_trailing_context( nfa_states, num_states, accset, nacc ) 109 int *nfa_states, num_states; 110 int *accset; 111 int nacc; 112 { 113 int i, j; 114 115 for ( i = 1; i <= num_states; ++i ) 116 { 117 int ns = nfa_states[i]; 118 int type = state_type[ns]; 119 int ar = assoc_rule[ns]; 120 121 if ( type == STATE_NORMAL || rule_type[ar] != RULE_VARIABLE ) 122 { /* do nothing */ 123 } 124 125 else if ( type == STATE_TRAILING_CONTEXT ) 126 { 127 /* Potential trouble. Scan set of accepting numbers 128 * for the one marking the end of the "head". We 129 * assume that this looping will be fairly cheap 130 * since it's rare that an accepting number set 131 * is large. 132 */ 133 for ( j = 1; j <= nacc; ++j ) 134 if ( accset[j] & YY_TRAILING_HEAD_MASK ) 135 { 136 line_warning( 137 _( "dangerous trailing context" ), 138 rule_linenum[ar] ); 139 return; 140 } 141 } 142 } 143 } 144 145 146 /* dump_associated_rules - list the rules associated with a DFA state 147 * 148 * Goes through the set of NFA states associated with the DFA and 149 * extracts the first MAX_ASSOC_RULES unique rules, sorts them, 150 * and writes a report to the given file. 151 */ 152 153 void dump_associated_rules( file, ds ) 154 FILE *file; 155 int ds; 156 { 157 int i, j; 158 int num_associated_rules = 0; 159 int rule_set[MAX_ASSOC_RULES + 1]; 160 int *dset = dss[ds]; 161 int size = dfasiz[ds]; 162 163 for ( i = 1; i <= size; ++i ) 164 { 165 int rule_num = rule_linenum[assoc_rule[dset[i]]]; 166 167 for ( j = 1; j <= num_associated_rules; ++j ) 168 if ( rule_num == rule_set[j] ) 169 break; 170 171 if ( j > num_associated_rules ) 172 { /* new rule */ 173 if ( num_associated_rules < MAX_ASSOC_RULES ) 174 rule_set[++num_associated_rules] = rule_num; 175 } 176 } 177 178 bubble( rule_set, num_associated_rules ); 179 180 fprintf( file, _( " associated rule line numbers:" ) ); 181 182 for ( i = 1; i <= num_associated_rules; ++i ) 183 { 184 if ( i % 8 == 1 ) 185 putc( '\n', file ); 186 187 fprintf( file, "\t%d", rule_set[i] ); 188 } 189 190 putc( '\n', file ); 191 } 192 193 194 /* dump_transitions - list the transitions associated with a DFA state 195 * 196 * synopsis 197 * dump_transitions( FILE *file, int state[numecs] ); 198 * 199 * Goes through the set of out-transitions and lists them in human-readable 200 * form (i.e., not as equivalence classes); also lists jam transitions 201 * (i.e., all those which are not out-transitions, plus EOF). The dump 202 * is done to the given file. 203 */ 204 205 void dump_transitions( file, state ) 206 FILE *file; 207 int state[]; 208 { 209 int i, ec; 210 int out_char_set[CSIZE]; 211 212 for ( i = 0; i < csize; ++i ) 213 { 214 ec = ABS( ecgroup[i] ); 215 out_char_set[i] = state[ec]; 216 } 217 218 fprintf( file, _( " out-transitions: " ) ); 219 220 list_character_set( file, out_char_set ); 221 222 /* now invert the members of the set to get the jam transitions */ 223 for ( i = 0; i < csize; ++i ) 224 out_char_set[i] = ! out_char_set[i]; 225 226 fprintf( file, _( "\n jam-transitions: EOF " ) ); 227 228 list_character_set( file, out_char_set ); 229 230 putc( '\n', file ); 231 } 232 233 234 /* epsclosure - construct the epsilon closure of a set of ndfa states 235 * 236 * synopsis 237 * int *epsclosure( int t[num_states], int *numstates_addr, 238 * int accset[num_rules+1], int *nacc_addr, 239 * int *hashval_addr ); 240 * 241 * NOTES 242 * The epsilon closure is the set of all states reachable by an arbitrary 243 * number of epsilon transitions, which themselves do not have epsilon 244 * transitions going out, unioned with the set of states which have non-null 245 * accepting numbers. t is an array of size numstates of nfa state numbers. 246 * Upon return, t holds the epsilon closure and *numstates_addr is updated. 247 * accset holds a list of the accepting numbers, and the size of accset is 248 * given by *nacc_addr. t may be subjected to reallocation if it is not 249 * large enough to hold the epsilon closure. 250 * 251 * hashval is the hash value for the dfa corresponding to the state set. 252 */ 253 254 int *epsclosure( t, ns_addr, accset, nacc_addr, hv_addr ) 255 int *t, *ns_addr, accset[], *nacc_addr, *hv_addr; 256 { 257 int stkpos, ns, tsp; 258 int numstates = *ns_addr, nacc, hashval, transsym, nfaccnum; 259 int stkend, nstate; 260 static int did_stk_init = false, *stk; 261 262 #define MARK_STATE(state) \ 263 trans1[state] = trans1[state] - MARKER_DIFFERENCE; 264 265 #define IS_MARKED(state) (trans1[state] < 0) 266 267 #define UNMARK_STATE(state) \ 268 trans1[state] = trans1[state] + MARKER_DIFFERENCE; 269 270 #define CHECK_ACCEPT(state) \ 271 { \ 272 nfaccnum = accptnum[state]; \ 273 if ( nfaccnum != NIL ) \ 274 accset[++nacc] = nfaccnum; \ 275 } 276 277 #define DO_REALLOCATION \ 278 { \ 279 current_max_dfa_size += MAX_DFA_SIZE_INCREMENT; \ 280 ++num_reallocs; \ 281 t = reallocate_integer_array( t, current_max_dfa_size ); \ 282 stk = reallocate_integer_array( stk, current_max_dfa_size ); \ 283 } \ 284 285 #define PUT_ON_STACK(state) \ 286 { \ 287 if ( ++stkend >= current_max_dfa_size ) \ 288 DO_REALLOCATION \ 289 stk[stkend] = state; \ 290 MARK_STATE(state) \ 291 } 292 293 #define ADD_STATE(state) \ 294 { \ 295 if ( ++numstates >= current_max_dfa_size ) \ 296 DO_REALLOCATION \ 297 t[numstates] = state; \ 298 hashval += state; \ 299 } 300 301 #define STACK_STATE(state) \ 302 { \ 303 PUT_ON_STACK(state) \ 304 CHECK_ACCEPT(state) \ 305 if ( nfaccnum != NIL || transchar[state] != SYM_EPSILON ) \ 306 ADD_STATE(state) \ 307 } 308 309 310 if ( ! did_stk_init ) 311 { 312 stk = allocate_integer_array( current_max_dfa_size ); 313 did_stk_init = true; 314 } 315 316 nacc = stkend = hashval = 0; 317 318 for ( nstate = 1; nstate <= numstates; ++nstate ) 319 { 320 ns = t[nstate]; 321 322 /* The state could be marked if we've already pushed it onto 323 * the stack. 324 */ 325 if ( ! IS_MARKED(ns) ) 326 { 327 PUT_ON_STACK(ns) 328 CHECK_ACCEPT(ns) 329 hashval += ns; 330 } 331 } 332 333 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 334 { 335 ns = stk[stkpos]; 336 transsym = transchar[ns]; 337 338 if ( transsym == SYM_EPSILON ) 339 { 340 tsp = trans1[ns] + MARKER_DIFFERENCE; 341 342 if ( tsp != NO_TRANSITION ) 343 { 344 if ( ! IS_MARKED(tsp) ) 345 STACK_STATE(tsp) 346 347 tsp = trans2[ns]; 348 349 if ( tsp != NO_TRANSITION && ! IS_MARKED(tsp) ) 350 STACK_STATE(tsp) 351 } 352 } 353 } 354 355 /* Clear out "visit" markers. */ 356 357 for ( stkpos = 1; stkpos <= stkend; ++stkpos ) 358 { 359 if ( IS_MARKED(stk[stkpos]) ) 360 UNMARK_STATE(stk[stkpos]) 361 else 362 flexfatal( 363 _( "consistency check failed in epsclosure()" ) ); 364 } 365 366 *ns_addr = numstates; 367 *hv_addr = hashval; 368 *nacc_addr = nacc; 369 370 return t; 371 } 372 373 374 /* increase_max_dfas - increase the maximum number of DFAs */ 375 376 void increase_max_dfas() 377 { 378 current_max_dfas += MAX_DFAS_INCREMENT; 379 380 ++num_reallocs; 381 382 base = reallocate_integer_array( base, current_max_dfas ); 383 def = reallocate_integer_array( def, current_max_dfas ); 384 dfasiz = reallocate_integer_array( dfasiz, current_max_dfas ); 385 accsiz = reallocate_integer_array( accsiz, current_max_dfas ); 386 dhash = reallocate_integer_array( dhash, current_max_dfas ); 387 dss = reallocate_int_ptr_array( dss, current_max_dfas ); 388 dfaacc = reallocate_dfaacc_union( dfaacc, current_max_dfas ); 389 390 if ( nultrans ) 391 nultrans = 392 reallocate_integer_array( nultrans, current_max_dfas ); 393 } 394 395 396 /* ntod - convert an ndfa to a dfa 397 * 398 * Creates the dfa corresponding to the ndfa we've constructed. The 399 * dfa starts out in state #1. 400 */ 401 402 void ntod() 403 { 404 int *accset, ds, nacc, newds; 405 int sym, hashval, numstates, dsize; 406 int num_full_table_rows; /* used only for -f */ 407 int *nset, *dset; 408 int targptr, totaltrans, i, comstate, comfreq, targ; 409 int symlist[CSIZE + 1]; 410 int num_start_states; 411 int todo_head, todo_next; 412 413 /* Note that the following are indexed by *equivalence classes* 414 * and not by characters. Since equivalence classes are indexed 415 * beginning with 1, even if the scanner accepts NUL's, this 416 * means that (since every character is potentially in its own 417 * equivalence class) these arrays must have room for indices 418 * from 1 to CSIZE, so their size must be CSIZE + 1. 419 */ 420 int duplist[CSIZE + 1], state[CSIZE + 1]; 421 int targfreq[CSIZE + 1], targstate[CSIZE + 1]; 422 423 accset = allocate_integer_array( num_rules + 1 ); 424 nset = allocate_integer_array( current_max_dfa_size ); 425 426 /* The "todo" queue is represented by the head, which is the DFA 427 * state currently being processed, and the "next", which is the 428 * next DFA state number available (not in use). We depend on the 429 * fact that snstods() returns DFA's \in increasing order/, and thus 430 * need only know the bounds of the dfas to be processed. 431 */ 432 todo_head = todo_next = 0; 433 434 for ( i = 0; i <= csize; ++i ) 435 { 436 duplist[i] = NIL; 437 symlist[i] = false; 438 } 439 440 for ( i = 0; i <= num_rules; ++i ) 441 accset[i] = NIL; 442 443 if ( trace ) 444 { 445 dumpnfa( scset[1] ); 446 fputs( _( "\n\nDFA Dump:\n\n" ), stderr ); 447 } 448 449 inittbl(); 450 451 /* Check to see whether we should build a separate table for 452 * transitions on NUL characters. We don't do this for full-speed 453 * (-F) scanners, since for them we don't have a simple state 454 * number lying around with which to index the table. We also 455 * don't bother doing it for scanners unless (1) NUL is in its own 456 * equivalence class (indicated by a positive value of 457 * ecgroup[NUL]), (2) NUL's equivalence class is the last 458 * equivalence class, and (3) the number of equivalence classes is 459 * the same as the number of characters. This latter case comes 460 * about when useecs is false or when it's true but every character 461 * still manages to land in its own class (unlikely, but it's 462 * cheap to check for). If all these things are true then the 463 * character code needed to represent NUL's equivalence class for 464 * indexing the tables is going to take one more bit than the 465 * number of characters, and therefore we won't be assured of 466 * being able to fit it into a YY_CHAR variable. This rules out 467 * storing the transitions in a compressed table, since the code 468 * for interpreting them uses a YY_CHAR variable (perhaps it 469 * should just use an integer, though; this is worth pondering ... 470 * ###). 471 * 472 * Finally, for full tables, we want the number of entries in the 473 * table to be a power of two so the array references go fast (it 474 * will just take a shift to compute the major index). If 475 * encoding NUL's transitions in the table will spoil this, we 476 * give it its own table (note that this will be the case if we're 477 * not using equivalence classes). 478 */ 479 480 /* Note that the test for ecgroup[0] == numecs below accomplishes 481 * both (1) and (2) above 482 */ 483 if ( ! fullspd && ecgroup[0] == numecs ) 484 { 485 /* NUL is alone in its equivalence class, which is the 486 * last one. 487 */ 488 int use_NUL_table = (numecs == csize); 489 490 if ( fulltbl && ! use_NUL_table ) 491 { 492 /* We still may want to use the table if numecs 493 * is a power of 2. 494 */ 495 int power_of_two; 496 497 for ( power_of_two = 1; power_of_two <= csize; 498 power_of_two *= 2 ) 499 if ( numecs == power_of_two ) 500 { 501 use_NUL_table = true; 502 break; 503 } 504 } 505 506 if ( use_NUL_table ) 507 nultrans = allocate_integer_array( current_max_dfas ); 508 509 /* From now on, nultrans != nil indicates that we're 510 * saving null transitions for later, separate encoding. 511 */ 512 } 513 514 515 if ( fullspd ) 516 { 517 for ( i = 0; i <= numecs; ++i ) 518 state[i] = 0; 519 520 place_state( state, 0, 0 ); 521 dfaacc[0].dfaacc_state = 0; 522 } 523 524 else if ( fulltbl ) 525 { 526 if ( nultrans ) 527 /* We won't be including NUL's transitions in the 528 * table, so build it for entries from 0 .. numecs - 1. 529 */ 530 num_full_table_rows = numecs; 531 532 else 533 /* Take into account the fact that we'll be including 534 * the NUL entries in the transition table. Build it 535 * from 0 .. numecs. 536 */ 537 num_full_table_rows = numecs + 1; 538 539 /* Unless -Ca, declare it "short" because it's a real 540 * long-shot that that won't be large enough. 541 */ 542 out_str_dec( "static yyconst %s yy_nxt[][%d] =\n {\n", 543 /* '}' so vi doesn't get too confused */ 544 long_align ? "long" : "short", num_full_table_rows ); 545 546 outn( " {" ); 547 548 /* Generate 0 entries for state #0. */ 549 for ( i = 0; i < num_full_table_rows; ++i ) 550 mk2data( 0 ); 551 552 dataflush(); 553 outn( " },\n" ); 554 } 555 556 /* Create the first states. */ 557 558 num_start_states = lastsc * 2; 559 560 for ( i = 1; i <= num_start_states; ++i ) 561 { 562 numstates = 1; 563 564 /* For each start condition, make one state for the case when 565 * we're at the beginning of the line (the '^' operator) and 566 * one for the case when we're not. 567 */ 568 if ( i % 2 == 1 ) 569 nset[numstates] = scset[(i / 2) + 1]; 570 else 571 nset[numstates] = 572 mkbranch( scbol[i / 2], scset[i / 2] ); 573 574 nset = epsclosure( nset, &numstates, accset, &nacc, &hashval ); 575 576 if ( snstods( nset, numstates, accset, nacc, hashval, &ds ) ) 577 { 578 numas += nacc; 579 totnst += numstates; 580 ++todo_next; 581 582 if ( variable_trailing_context_rules && nacc > 0 ) 583 check_trailing_context( nset, numstates, 584 accset, nacc ); 585 } 586 } 587 588 if ( ! fullspd ) 589 { 590 if ( ! snstods( nset, 0, accset, 0, 0, &end_of_buffer_state ) ) 591 flexfatal( 592 _( "could not create unique end-of-buffer state" ) ); 593 594 ++numas; 595 ++num_start_states; 596 ++todo_next; 597 } 598 599 while ( todo_head < todo_next ) 600 { 601 targptr = 0; 602 totaltrans = 0; 603 604 for ( i = 1; i <= numecs; ++i ) 605 state[i] = 0; 606 607 ds = ++todo_head; 608 609 dset = dss[ds]; 610 dsize = dfasiz[ds]; 611 612 if ( trace ) 613 fprintf( stderr, _( "state # %d:\n" ), ds ); 614 615 sympartition( dset, dsize, symlist, duplist ); 616 617 for ( sym = 1; sym <= numecs; ++sym ) 618 { 619 if ( symlist[sym] ) 620 { 621 symlist[sym] = 0; 622 623 if ( duplist[sym] == NIL ) 624 { 625 /* Symbol has unique out-transitions. */ 626 numstates = symfollowset( dset, dsize, 627 sym, nset ); 628 nset = epsclosure( nset, &numstates, 629 accset, &nacc, &hashval ); 630 631 if ( snstods( nset, numstates, accset, 632 nacc, hashval, &newds ) ) 633 { 634 totnst = totnst + numstates; 635 ++todo_next; 636 numas += nacc; 637 638 if ( 639 variable_trailing_context_rules && 640 nacc > 0 ) 641 check_trailing_context( 642 nset, numstates, 643 accset, nacc ); 644 } 645 646 state[sym] = newds; 647 648 if ( trace ) 649 fprintf( stderr, "\t%d\t%d\n", 650 sym, newds ); 651 652 targfreq[++targptr] = 1; 653 targstate[targptr] = newds; 654 ++numuniq; 655 } 656 657 else 658 { 659 /* sym's equivalence class has the same 660 * transitions as duplist(sym)'s 661 * equivalence class. 662 */ 663 targ = state[duplist[sym]]; 664 state[sym] = targ; 665 666 if ( trace ) 667 fprintf( stderr, "\t%d\t%d\n", 668 sym, targ ); 669 670 /* Update frequency count for 671 * destination state. 672 */ 673 674 i = 0; 675 while ( targstate[++i] != targ ) 676 ; 677 678 ++targfreq[i]; 679 ++numdup; 680 } 681 682 ++totaltrans; 683 duplist[sym] = NIL; 684 } 685 } 686 687 if ( caseins && ! useecs ) 688 { 689 int j; 690 691 for ( i = 'A', j = 'a'; i <= 'Z'; ++i, ++j ) 692 { 693 if ( state[i] == 0 && state[j] != 0 ) 694 /* We're adding a transition. */ 695 ++totaltrans; 696 697 else if ( state[i] != 0 && state[j] == 0 ) 698 /* We're taking away a transition. */ 699 --totaltrans; 700 701 state[i] = state[j]; 702 } 703 } 704 705 numsnpairs += totaltrans; 706 707 if ( ds > num_start_states ) 708 check_for_backing_up( ds, state ); 709 710 if ( nultrans ) 711 { 712 nultrans[ds] = state[NUL_ec]; 713 state[NUL_ec] = 0; /* remove transition */ 714 } 715 716 if ( fulltbl ) 717 { 718 outn( " {" ); 719 720 /* Supply array's 0-element. */ 721 if ( ds == end_of_buffer_state ) 722 mk2data( -end_of_buffer_state ); 723 else 724 mk2data( end_of_buffer_state ); 725 726 for ( i = 1; i < num_full_table_rows; ++i ) 727 /* Jams are marked by negative of state 728 * number. 729 */ 730 mk2data( state[i] ? state[i] : -ds ); 731 732 dataflush(); 733 outn( " },\n" ); 734 } 735 736 else if ( fullspd ) 737 place_state( state, ds, totaltrans ); 738 739 else if ( ds == end_of_buffer_state ) 740 /* Special case this state to make sure it does what 741 * it's supposed to, i.e., jam on end-of-buffer. 742 */ 743 stack1( ds, 0, 0, JAMSTATE ); 744 745 else /* normal, compressed state */ 746 { 747 /* Determine which destination state is the most 748 * common, and how many transitions to it there are. 749 */ 750 751 comfreq = 0; 752 comstate = 0; 753 754 for ( i = 1; i <= targptr; ++i ) 755 if ( targfreq[i] > comfreq ) 756 { 757 comfreq = targfreq[i]; 758 comstate = targstate[i]; 759 } 760 761 bldtbl( state, ds, totaltrans, comstate, comfreq ); 762 } 763 } 764 765 if ( fulltbl ) 766 dataend(); 767 768 else if ( ! fullspd ) 769 { 770 cmptmps(); /* create compressed template entries */ 771 772 /* Create tables for all the states with only one 773 * out-transition. 774 */ 775 while ( onesp > 0 ) 776 { 777 mk1tbl( onestate[onesp], onesym[onesp], onenext[onesp], 778 onedef[onesp] ); 779 --onesp; 780 } 781 782 mkdeftbl(); 783 } 784 785 flex_free( (void *) accset ); 786 flex_free( (void *) nset ); 787 } 788 789 790 /* snstods - converts a set of ndfa states into a dfa state 791 * 792 * synopsis 793 * is_new_state = snstods( int sns[numstates], int numstates, 794 * int accset[num_rules+1], int nacc, 795 * int hashval, int *newds_addr ); 796 * 797 * On return, the dfa state number is in newds. 798 */ 799 800 int snstods( sns, numstates, accset, nacc, hashval, newds_addr ) 801 int sns[], numstates, accset[], nacc, hashval, *newds_addr; 802 { 803 int didsort = 0; 804 int i, j; 805 int newds, *oldsns; 806 807 for ( i = 1; i <= lastdfa; ++i ) 808 if ( hashval == dhash[i] ) 809 { 810 if ( numstates == dfasiz[i] ) 811 { 812 oldsns = dss[i]; 813 814 if ( ! didsort ) 815 { 816 /* We sort the states in sns so we 817 * can compare it to oldsns quickly. 818 * We use bubble because there probably 819 * aren't very many states. 820 */ 821 bubble( sns, numstates ); 822 didsort = 1; 823 } 824 825 for ( j = 1; j <= numstates; ++j ) 826 if ( sns[j] != oldsns[j] ) 827 break; 828 829 if ( j > numstates ) 830 { 831 ++dfaeql; 832 *newds_addr = i; 833 return 0; 834 } 835 836 ++hshcol; 837 } 838 839 else 840 ++hshsave; 841 } 842 843 /* Make a new dfa. */ 844 845 if ( ++lastdfa >= current_max_dfas ) 846 increase_max_dfas(); 847 848 newds = lastdfa; 849 850 dss[newds] = allocate_integer_array( numstates + 1 ); 851 852 /* If we haven't already sorted the states in sns, we do so now, 853 * so that future comparisons with it can be made quickly. 854 */ 855 856 if ( ! didsort ) 857 bubble( sns, numstates ); 858 859 for ( i = 1; i <= numstates; ++i ) 860 dss[newds][i] = sns[i]; 861 862 dfasiz[newds] = numstates; 863 dhash[newds] = hashval; 864 865 if ( nacc == 0 ) 866 { 867 if ( reject ) 868 dfaacc[newds].dfaacc_set = (int *) 0; 869 else 870 dfaacc[newds].dfaacc_state = 0; 871 872 accsiz[newds] = 0; 873 } 874 875 else if ( reject ) 876 { 877 /* We sort the accepting set in increasing order so the 878 * disambiguating rule that the first rule listed is considered 879 * match in the event of ties will work. We use a bubble 880 * sort since the list is probably quite small. 881 */ 882 883 bubble( accset, nacc ); 884 885 dfaacc[newds].dfaacc_set = allocate_integer_array( nacc + 1 ); 886 887 /* Save the accepting set for later */ 888 for ( i = 1; i <= nacc; ++i ) 889 { 890 dfaacc[newds].dfaacc_set[i] = accset[i]; 891 892 if ( accset[i] <= num_rules ) 893 /* Who knows, perhaps a REJECT can yield 894 * this rule. 895 */ 896 rule_useful[accset[i]] = true; 897 } 898 899 accsiz[newds] = nacc; 900 } 901 902 else 903 { 904 /* Find lowest numbered rule so the disambiguating rule 905 * will work. 906 */ 907 j = num_rules + 1; 908 909 for ( i = 1; i <= nacc; ++i ) 910 if ( accset[i] < j ) 911 j = accset[i]; 912 913 dfaacc[newds].dfaacc_state = j; 914 915 if ( j <= num_rules ) 916 rule_useful[j] = true; 917 } 918 919 *newds_addr = newds; 920 921 return 1; 922 } 923 924 925 /* symfollowset - follow the symbol transitions one step 926 * 927 * synopsis 928 * numstates = symfollowset( int ds[current_max_dfa_size], int dsize, 929 * int transsym, int nset[current_max_dfa_size] ); 930 */ 931 932 int symfollowset( ds, dsize, transsym, nset ) 933 int ds[], dsize, transsym, nset[]; 934 { 935 int ns, tsp, sym, i, j, lenccl, ch, numstates, ccllist; 936 937 numstates = 0; 938 939 for ( i = 1; i <= dsize; ++i ) 940 { /* for each nfa state ns in the state set of ds */ 941 ns = ds[i]; 942 sym = transchar[ns]; 943 tsp = trans1[ns]; 944 945 if ( sym < 0 ) 946 { /* it's a character class */ 947 sym = -sym; 948 ccllist = cclmap[sym]; 949 lenccl = ccllen[sym]; 950 951 if ( cclng[sym] ) 952 { 953 for ( j = 0; j < lenccl; ++j ) 954 { 955 /* Loop through negated character 956 * class. 957 */ 958 ch = ccltbl[ccllist + j]; 959 960 if ( ch == 0 ) 961 ch = NUL_ec; 962 963 if ( ch > transsym ) 964 /* Transsym isn't in negated 965 * ccl. 966 */ 967 break; 968 969 else if ( ch == transsym ) 970 /* next 2 */ goto bottom; 971 } 972 973 /* Didn't find transsym in ccl. */ 974 nset[++numstates] = tsp; 975 } 976 977 else 978 for ( j = 0; j < lenccl; ++j ) 979 { 980 ch = ccltbl[ccllist + j]; 981 982 if ( ch == 0 ) 983 ch = NUL_ec; 984 985 if ( ch > transsym ) 986 break; 987 else if ( ch == transsym ) 988 { 989 nset[++numstates] = tsp; 990 break; 991 } 992 } 993 } 994 995 else if ( sym >= 'A' && sym <= 'Z' && caseins ) 996 flexfatal( 997 _( "consistency check failed in symfollowset" ) ); 998 999 else if ( sym == SYM_EPSILON ) 1000 { /* do nothing */ 1001 } 1002 1003 else if ( ABS( ecgroup[sym] ) == transsym ) 1004 nset[++numstates] = tsp; 1005 1006 bottom: ; 1007 } 1008 1009 return numstates; 1010 } 1011 1012 1013 /* sympartition - partition characters with same out-transitions 1014 * 1015 * synopsis 1016 * sympartition( int ds[current_max_dfa_size], int numstates, 1017 * int symlist[numecs], int duplist[numecs] ); 1018 */ 1019 1020 void sympartition( ds, numstates, symlist, duplist ) 1021 int ds[], numstates; 1022 int symlist[], duplist[]; 1023 { 1024 int tch, i, j, k, ns, dupfwd[CSIZE + 1], lenccl, cclp, ich; 1025 1026 /* Partitioning is done by creating equivalence classes for those 1027 * characters which have out-transitions from the given state. Thus 1028 * we are really creating equivalence classes of equivalence classes. 1029 */ 1030 1031 for ( i = 1; i <= numecs; ++i ) 1032 { /* initialize equivalence class list */ 1033 duplist[i] = i - 1; 1034 dupfwd[i] = i + 1; 1035 } 1036 1037 duplist[1] = NIL; 1038 dupfwd[numecs] = NIL; 1039 1040 for ( i = 1; i <= numstates; ++i ) 1041 { 1042 ns = ds[i]; 1043 tch = transchar[ns]; 1044 1045 if ( tch != SYM_EPSILON ) 1046 { 1047 if ( tch < -lastccl || tch >= csize ) 1048 { 1049 flexfatal( 1050 _( "bad transition character detected in sympartition()" ) ); 1051 } 1052 1053 if ( tch >= 0 ) 1054 { /* character transition */ 1055 int ec = ecgroup[tch]; 1056 1057 mkechar( ec, dupfwd, duplist ); 1058 symlist[ec] = 1; 1059 } 1060 1061 else 1062 { /* character class */ 1063 tch = -tch; 1064 1065 lenccl = ccllen[tch]; 1066 cclp = cclmap[tch]; 1067 mkeccl( ccltbl + cclp, lenccl, dupfwd, 1068 duplist, numecs, NUL_ec ); 1069 1070 if ( cclng[tch] ) 1071 { 1072 j = 0; 1073 1074 for ( k = 0; k < lenccl; ++k ) 1075 { 1076 ich = ccltbl[cclp + k]; 1077 1078 if ( ich == 0 ) 1079 ich = NUL_ec; 1080 1081 for ( ++j; j < ich; ++j ) 1082 symlist[j] = 1; 1083 } 1084 1085 for ( ++j; j <= numecs; ++j ) 1086 symlist[j] = 1; 1087 } 1088 1089 else 1090 for ( k = 0; k < lenccl; ++k ) 1091 { 1092 ich = ccltbl[cclp + k]; 1093 1094 if ( ich == 0 ) 1095 ich = NUL_ec; 1096 1097 symlist[ich] = 1; 1098 } 1099 } 1100 } 1101 } 1102 }