parse.y (17941B)
1 /* $OpenBSD: parse.y,v 1.8 2003/06/04 17:34:44 millert Exp $ */ 2 3 /* parse.y - parser for flex input */ 4 5 %token CHAR NUMBER SECTEND SCDECL XSCDECL NAME PREVCCL EOF_OP 6 %token OPTION_OP OPT_OUTFILE OPT_PREFIX OPT_YYCLASS 7 8 %token CCE_ALNUM CCE_ALPHA CCE_BLANK CCE_CNTRL CCE_DIGIT CCE_GRAPH 9 %token CCE_LOWER CCE_PRINT CCE_PUNCT CCE_SPACE CCE_UPPER CCE_XDIGIT 10 11 %{ 12 /*- 13 * Copyright (c) 1990 The Regents of the University of California. 14 * All rights reserved. 15 * 16 * This code is derived from software contributed to Berkeley by 17 * Vern Paxson. 18 * 19 * The United States Government has rights in this work pursuant 20 * to contract no. DE-AC03-76SF00098 between the United States 21 * Department of Energy and the University of California. 22 * 23 * Redistribution and use in source and binary forms, with or without 24 * modification, are permitted provided that the following conditions 25 * are met: 26 * 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 33 * Neither the name of the University nor the names of its contributors 34 * may be used to endorse or promote products derived from this software 35 * without specific prior written permission. 36 * 37 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 38 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 39 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 40 * PURPOSE. 41 */ 42 43 /* $Header: /cvs/src/usr.bin/lex/parse.y,v 1.8 2003/06/04 17:34:44 millert Exp $ */ 44 45 46 /* Some versions of bison are broken in that they use alloca() but don't 47 * declare it properly. The following is the patented (just kidding!) 48 * #ifdef chud to fix the problem, courtesy of Francois Pinard. 49 */ 50 #ifdef YYBISON 51 /* AIX requires this to be the first thing in the file. What a piece. */ 52 # ifdef _AIX 53 #pragma alloca 54 # endif 55 #endif 56 57 #include "flexdef.h" 58 59 /* The remainder of the alloca() cruft has to come after including flexdef.h, 60 * so HAVE_ALLOCA_H is (possibly) defined. 61 */ 62 #ifdef YYBISON 63 # ifdef __GNUC__ 64 # ifndef alloca 65 # define alloca __builtin_alloca 66 # endif 67 # else 68 # if HAVE_ALLOCA_H 69 # include <alloca.h> 70 # else 71 # ifdef __hpux 72 void *alloca (); 73 # else 74 # ifdef __TURBOC__ 75 # include <malloc.h> 76 # else 77 char *alloca (); 78 # endif 79 # endif 80 # endif 81 # endif 82 #endif 83 84 /* Bletch, ^^^^ that was ugly! */ 85 86 87 int pat, scnum, eps, headcnt, trailcnt, anyccl, lastchar, i, rulelen; 88 int trlcontxt, xcluflg, currccl, cclsorted, varlength, variable_trail_rule; 89 90 int *scon_stk; 91 int scon_stk_ptr; 92 93 static int madeany = false; /* whether we've made the '.' character class */ 94 int previous_continued_action; /* whether the previous rule's action was '|' */ 95 96 /* Expand a POSIX character class expression. */ 97 #define CCL_EXPR(func) \ 98 { \ 99 int c; \ 100 for ( c = 0; c < csize; ++c ) \ 101 if ( isascii(c) && func(c) ) \ 102 ccladd( currccl, c ); \ 103 } 104 105 /* While POSIX defines isblank(), it's not ANSI C. */ 106 #define IS_BLANK(c) ((c) == ' ' || (c) == '\t') 107 108 /* On some over-ambitious machines, such as DEC Alpha's, the default 109 * token type is "long" instead of "int"; this leads to problems with 110 * declaring yylval in flexdef.h. But so far, all the yacc's I've seen 111 * wrap their definitions of YYSTYPE with "#ifndef YYSTYPE"'s, so the 112 * following should ensure that the default token type is "int". 113 */ 114 #define YYSTYPE int 115 116 %} 117 118 %% 119 goal : initlex sect1 sect1end sect2 initforrule 120 { /* add default rule */ 121 int def_rule; 122 123 pat = cclinit(); 124 cclnegate( pat ); 125 126 def_rule = mkstate( -pat ); 127 128 /* Remember the number of the default rule so we 129 * don't generate "can't match" warnings for it. 130 */ 131 default_rule = num_rules; 132 133 finish_rule( def_rule, false, 0, 0 ); 134 135 for ( i = 1; i <= lastsc; ++i ) 136 scset[i] = mkbranch( scset[i], def_rule ); 137 138 if ( spprdflt ) 139 add_action( 140 "YY_FATAL_ERROR( \"flex scanner jammed\" )" ); 141 else 142 add_action( "ECHO" ); 143 144 add_action( ";\n\tYY_BREAK\n" ); 145 } 146 ; 147 148 initlex : 149 { /* initialize for processing rules */ 150 151 /* Create default DFA start condition. */ 152 scinstal( "INITIAL", false ); 153 } 154 ; 155 156 sect1 : sect1 startconddecl namelist1 157 | sect1 options 158 | 159 | error 160 { synerr( "unknown error processing section 1" ); } 161 ; 162 163 sect1end : SECTEND 164 { 165 check_options(); 166 scon_stk = allocate_integer_array( lastsc + 1 ); 167 scon_stk_ptr = 0; 168 } 169 ; 170 171 startconddecl : SCDECL 172 { xcluflg = false; } 173 174 | XSCDECL 175 { xcluflg = true; } 176 ; 177 178 namelist1 : namelist1 NAME 179 { scinstal( nmstr, xcluflg ); } 180 181 | NAME 182 { scinstal( nmstr, xcluflg ); } 183 184 | error 185 { synerr( "bad start condition list" ); } 186 ; 187 188 options : OPTION_OP optionlist 189 ; 190 191 optionlist : optionlist option 192 | 193 ; 194 195 option : OPT_OUTFILE '=' NAME 196 { 197 outfilename = copy_string( nmstr ); 198 did_outfilename = 1; 199 } 200 | OPT_PREFIX '=' NAME 201 { prefix = copy_string( nmstr ); } 202 | OPT_YYCLASS '=' NAME 203 { yyclass = copy_string( nmstr ); } 204 ; 205 206 sect2 : sect2 scon initforrule flexrule '\n' 207 { scon_stk_ptr = $2; } 208 | sect2 scon '{' sect2 '}' 209 { scon_stk_ptr = $2; } 210 | 211 ; 212 213 initforrule : 214 { 215 /* Initialize for a parse of one rule. */ 216 trlcontxt = variable_trail_rule = varlength = false; 217 trailcnt = headcnt = rulelen = 0; 218 current_state_type = STATE_NORMAL; 219 previous_continued_action = continued_action; 220 in_rule = true; 221 222 new_rule(); 223 } 224 ; 225 226 flexrule : '^' rule 227 { 228 pat = $2; 229 finish_rule( pat, variable_trail_rule, 230 headcnt, trailcnt ); 231 232 if ( scon_stk_ptr > 0 ) 233 { 234 for ( i = 1; i <= scon_stk_ptr; ++i ) 235 scbol[scon_stk[i]] = 236 mkbranch( scbol[scon_stk[i]], 237 pat ); 238 } 239 240 else 241 { 242 /* Add to all non-exclusive start conditions, 243 * including the default (0) start condition. 244 */ 245 246 for ( i = 1; i <= lastsc; ++i ) 247 if ( ! scxclu[i] ) 248 scbol[i] = mkbranch( scbol[i], 249 pat ); 250 } 251 252 if ( ! bol_needed ) 253 { 254 bol_needed = true; 255 256 if ( performance_report > 1 ) 257 pinpoint_message( 258 "'^' operator results in sub-optimal performance" ); 259 } 260 } 261 262 | rule 263 { 264 pat = $1; 265 finish_rule( pat, variable_trail_rule, 266 headcnt, trailcnt ); 267 268 if ( scon_stk_ptr > 0 ) 269 { 270 for ( i = 1; i <= scon_stk_ptr; ++i ) 271 scset[scon_stk[i]] = 272 mkbranch( scset[scon_stk[i]], 273 pat ); 274 } 275 276 else 277 { 278 for ( i = 1; i <= lastsc; ++i ) 279 if ( ! scxclu[i] ) 280 scset[i] = 281 mkbranch( scset[i], 282 pat ); 283 } 284 } 285 286 | EOF_OP 287 { 288 if ( scon_stk_ptr > 0 ) 289 build_eof_action(); 290 291 else 292 { 293 /* This EOF applies to all start conditions 294 * which don't already have EOF actions. 295 */ 296 for ( i = 1; i <= lastsc; ++i ) 297 if ( ! sceof[i] ) 298 scon_stk[++scon_stk_ptr] = i; 299 300 if ( scon_stk_ptr == 0 ) 301 warn( 302 "all start conditions already have <<EOF>> rules" ); 303 304 else 305 build_eof_action(); 306 } 307 } 308 309 | error 310 { synerr( "unrecognized rule" ); } 311 ; 312 313 scon_stk_ptr : 314 { $$ = scon_stk_ptr; } 315 ; 316 317 scon : '<' scon_stk_ptr namelist2 '>' 318 { $$ = $2; } 319 320 | '<' '*' '>' 321 { 322 $$ = scon_stk_ptr; 323 324 for ( i = 1; i <= lastsc; ++i ) 325 { 326 int j; 327 328 for ( j = 1; j <= scon_stk_ptr; ++j ) 329 if ( scon_stk[j] == i ) 330 break; 331 332 if ( j > scon_stk_ptr ) 333 scon_stk[++scon_stk_ptr] = i; 334 } 335 } 336 337 | 338 { $$ = scon_stk_ptr; } 339 ; 340 341 namelist2 : namelist2 ',' sconname 342 343 | sconname 344 345 | error 346 { synerr( "bad start condition list" ); } 347 ; 348 349 sconname : NAME 350 { 351 if ( (scnum = sclookup( nmstr )) == 0 ) 352 format_pinpoint_message( 353 "undeclared start condition %s", 354 nmstr ); 355 else 356 { 357 for ( i = 1; i <= scon_stk_ptr; ++i ) 358 if ( scon_stk[i] == scnum ) 359 { 360 format_warn( 361 "<%s> specified twice", 362 scname[scnum] ); 363 break; 364 } 365 366 if ( i > scon_stk_ptr ) 367 scon_stk[++scon_stk_ptr] = scnum; 368 } 369 } 370 ; 371 372 rule : re2 re 373 { 374 if ( transchar[lastst[$2]] != SYM_EPSILON ) 375 /* Provide final transition \now/ so it 376 * will be marked as a trailing context 377 * state. 378 */ 379 $2 = link_machines( $2, 380 mkstate( SYM_EPSILON ) ); 381 382 mark_beginning_as_normal( $2 ); 383 current_state_type = STATE_NORMAL; 384 385 if ( previous_continued_action ) 386 { 387 /* We need to treat this as variable trailing 388 * context so that the backup does not happen 389 * in the action but before the action switch 390 * statement. If the backup happens in the 391 * action, then the rules "falling into" this 392 * one's action will *also* do the backup, 393 * erroneously. 394 */ 395 if ( ! varlength || headcnt != 0 ) 396 warn( 397 "trailing context made variable due to preceding '|' action" ); 398 399 /* Mark as variable. */ 400 varlength = true; 401 headcnt = 0; 402 } 403 404 if ( lex_compat || (varlength && headcnt == 0) ) 405 { /* variable trailing context rule */ 406 /* Mark the first part of the rule as the 407 * accepting "head" part of a trailing 408 * context rule. 409 * 410 * By the way, we didn't do this at the 411 * beginning of this production because back 412 * then current_state_type was set up for a 413 * trail rule, and add_accept() can create 414 * a new state ... 415 */ 416 add_accept( $1, 417 num_rules | YY_TRAILING_HEAD_MASK ); 418 variable_trail_rule = true; 419 } 420 421 else 422 trailcnt = rulelen; 423 424 $$ = link_machines( $1, $2 ); 425 } 426 427 | re2 re '$' 428 { synerr( "trailing context used twice" ); } 429 430 | re '$' 431 { 432 headcnt = 0; 433 trailcnt = 1; 434 rulelen = 1; 435 varlength = false; 436 437 current_state_type = STATE_TRAILING_CONTEXT; 438 439 if ( trlcontxt ) 440 { 441 synerr( "trailing context used twice" ); 442 $$ = mkstate( SYM_EPSILON ); 443 } 444 445 else if ( previous_continued_action ) 446 { 447 /* See the comment in the rule for "re2 re" 448 * above. 449 */ 450 warn( 451 "trailing context made variable due to preceding '|' action" ); 452 453 varlength = true; 454 } 455 456 if ( lex_compat || varlength ) 457 { 458 /* Again, see the comment in the rule for 459 * "re2 re" above. 460 */ 461 add_accept( $1, 462 num_rules | YY_TRAILING_HEAD_MASK ); 463 variable_trail_rule = true; 464 } 465 466 trlcontxt = true; 467 468 eps = mkstate( SYM_EPSILON ); 469 $$ = link_machines( $1, 470 link_machines( eps, mkstate( '\n' ) ) ); 471 } 472 473 | re 474 { 475 $$ = $1; 476 477 if ( trlcontxt ) 478 { 479 if ( lex_compat || (varlength && headcnt == 0) ) 480 /* Both head and trail are 481 * variable-length. 482 */ 483 variable_trail_rule = true; 484 else 485 trailcnt = rulelen; 486 } 487 } 488 ; 489 490 491 re : re '|' series 492 { 493 varlength = true; 494 $$ = mkor( $1, $3 ); 495 } 496 497 | series 498 { $$ = $1; } 499 ; 500 501 502 re2 : re '/' 503 { 504 /* This rule is written separately so the 505 * reduction will occur before the trailing 506 * series is parsed. 507 */ 508 509 if ( trlcontxt ) 510 synerr( "trailing context used twice" ); 511 else 512 trlcontxt = true; 513 514 if ( varlength ) 515 /* We hope the trailing context is 516 * fixed-length. 517 */ 518 varlength = false; 519 else 520 headcnt = rulelen; 521 522 rulelen = 0; 523 524 current_state_type = STATE_TRAILING_CONTEXT; 525 $$ = $1; 526 } 527 ; 528 529 series : series singleton 530 { 531 /* This is where concatenation of adjacent patterns 532 * gets done. 533 */ 534 $$ = link_machines( $1, $2 ); 535 } 536 537 | singleton 538 { $$ = $1; } 539 ; 540 541 singleton : singleton '*' 542 { 543 varlength = true; 544 545 $$ = mkclos( $1 ); 546 } 547 548 | singleton '+' 549 { 550 varlength = true; 551 $$ = mkposcl( $1 ); 552 } 553 554 | singleton '?' 555 { 556 varlength = true; 557 $$ = mkopt( $1 ); 558 } 559 560 | singleton '{' NUMBER ',' NUMBER '}' 561 { 562 varlength = true; 563 564 if ( $3 > $5 || $3 < 0 ) 565 { 566 synerr( "bad iteration values" ); 567 $$ = $1; 568 } 569 else 570 { 571 if ( $3 == 0 ) 572 { 573 if ( $5 <= 0 ) 574 { 575 synerr( 576 "bad iteration values" ); 577 $$ = $1; 578 } 579 else 580 $$ = mkopt( 581 mkrep( $1, 1, $5 ) ); 582 } 583 else 584 $$ = mkrep( $1, $3, $5 ); 585 } 586 } 587 588 | singleton '{' NUMBER ',' '}' 589 { 590 varlength = true; 591 592 if ( $3 <= 0 ) 593 { 594 synerr( "iteration value must be positive" ); 595 $$ = $1; 596 } 597 598 else 599 $$ = mkrep( $1, $3, INFINITY ); 600 } 601 602 | singleton '{' NUMBER '}' 603 { 604 /* The singleton could be something like "(foo)", 605 * in which case we have no idea what its length 606 * is, so we punt here. 607 */ 608 varlength = true; 609 610 if ( $3 <= 0 ) 611 { 612 synerr( "iteration value must be positive" ); 613 $$ = $1; 614 } 615 616 else 617 $$ = link_machines( $1, 618 copysingl( $1, $3 - 1 ) ); 619 } 620 621 | '.' 622 { 623 if ( ! madeany ) 624 { 625 /* Create the '.' character class. */ 626 anyccl = cclinit(); 627 ccladd( anyccl, '\n' ); 628 cclnegate( anyccl ); 629 630 if ( useecs ) 631 mkeccl( ccltbl + cclmap[anyccl], 632 ccllen[anyccl], nextecm, 633 ecgroup, csize, csize ); 634 635 madeany = true; 636 } 637 638 ++rulelen; 639 640 $$ = mkstate( -anyccl ); 641 } 642 643 | fullccl 644 { 645 if ( ! cclsorted ) 646 /* Sort characters for fast searching. We 647 * use a shell sort since this list could 648 * be large. 649 */ 650 cshell( ccltbl + cclmap[$1], ccllen[$1], true ); 651 652 if ( useecs ) 653 mkeccl( ccltbl + cclmap[$1], ccllen[$1], 654 nextecm, ecgroup, csize, csize ); 655 656 ++rulelen; 657 658 $$ = mkstate( -$1 ); 659 } 660 661 | PREVCCL 662 { 663 ++rulelen; 664 665 $$ = mkstate( -$1 ); 666 } 667 668 | '"' string '"' 669 { $$ = $2; } 670 671 | '(' re ')' 672 { $$ = $2; } 673 674 | CHAR 675 { 676 ++rulelen; 677 678 if ( caseins && $1 >= 'A' && $1 <= 'Z' ) 679 $1 = clower( $1 ); 680 681 $$ = mkstate( $1 ); 682 } 683 ; 684 685 fullccl : '[' ccl ']' 686 { $$ = $2; } 687 688 | '[' '^' ccl ']' 689 { 690 cclnegate( $3 ); 691 $$ = $3; 692 } 693 ; 694 695 ccl : ccl CHAR '-' CHAR 696 { 697 if ( caseins ) 698 { 699 if ( $2 >= 'A' && $2 <= 'Z' ) 700 $2 = clower( $2 ); 701 if ( $4 >= 'A' && $4 <= 'Z' ) 702 $4 = clower( $4 ); 703 } 704 705 if ( $2 > $4 ) 706 synerr( "negative range in character class" ); 707 708 else 709 { 710 for ( i = $2; i <= $4; ++i ) 711 ccladd( $1, i ); 712 713 /* Keep track if this ccl is staying in 714 * alphabetical order. 715 */ 716 cclsorted = cclsorted && ($2 > lastchar); 717 lastchar = $4; 718 } 719 720 $$ = $1; 721 } 722 723 | ccl CHAR 724 { 725 if ( caseins && $2 >= 'A' && $2 <= 'Z' ) 726 $2 = clower( $2 ); 727 728 ccladd( $1, $2 ); 729 cclsorted = cclsorted && ($2 > lastchar); 730 lastchar = $2; 731 $$ = $1; 732 } 733 734 | ccl ccl_expr 735 { 736 /* Too hard to properly maintain cclsorted. */ 737 cclsorted = false; 738 $$ = $1; 739 } 740 741 | 742 { 743 cclsorted = true; 744 lastchar = 0; 745 currccl = $$ = cclinit(); 746 } 747 ; 748 749 ccl_expr: CCE_ALNUM { CCL_EXPR(isalnum) } 750 | CCE_ALPHA { CCL_EXPR(isalpha) } 751 | CCE_BLANK { CCL_EXPR(IS_BLANK) } 752 | CCE_CNTRL { CCL_EXPR(iscntrl) } 753 | CCE_DIGIT { CCL_EXPR(isdigit) } 754 | CCE_GRAPH { CCL_EXPR(isgraph) } 755 | CCE_LOWER { CCL_EXPR(islower) } 756 | CCE_PRINT { CCL_EXPR(isprint) } 757 | CCE_PUNCT { CCL_EXPR(ispunct) } 758 | CCE_SPACE { CCL_EXPR(isspace) } 759 | CCE_UPPER { 760 if ( caseins ) 761 CCL_EXPR(islower) 762 else 763 CCL_EXPR(isupper) 764 } 765 | CCE_XDIGIT { CCL_EXPR(isxdigit) } 766 ; 767 768 string : string CHAR 769 { 770 if ( caseins && $2 >= 'A' && $2 <= 'Z' ) 771 $2 = clower( $2 ); 772 773 ++rulelen; 774 775 $$ = link_machines( $1, mkstate( $2 ) ); 776 } 777 778 | 779 { $$ = mkstate( SYM_EPSILON ); } 780 ; 781 782 %% 783 784 785 /* build_eof_action - build the "<<EOF>>" action for the active start 786 * conditions 787 */ 788 789 void build_eof_action() 790 { 791 register int i; 792 char action_text[MAXLINE]; 793 794 for ( i = 1; i <= scon_stk_ptr; ++i ) 795 { 796 if ( sceof[scon_stk[i]] ) 797 format_pinpoint_message( 798 "multiple <<EOF>> rules for start condition %s", 799 scname[scon_stk[i]] ); 800 801 else 802 { 803 sceof[scon_stk[i]] = true; 804 snprintf( action_text, sizeof action_text, 805 "case YY_STATE_EOF(%s):\n", 806 scname[scon_stk[i]] ); 807 add_action( action_text ); 808 } 809 } 810 811 line_directive_out( (FILE *) 0, 1 ); 812 813 /* This isn't a normal rule after all - don't count it as 814 * such, so we don't have any holes in the rule numbering 815 * (which make generating "rule can never match" warnings 816 * more difficult. 817 */ 818 --num_rules; 819 ++num_eof_rules; 820 } 821 822 823 /* format_synerr - write out formatted syntax error */ 824 825 void format_synerr( msg, arg ) 826 char msg[], arg[]; 827 { 828 char errmsg[MAXLINE]; 829 830 (void) snprintf( errmsg, sizeof errmsg, msg, arg ); 831 synerr( errmsg ); 832 } 833 834 835 /* synerr - report a syntax error */ 836 837 void synerr( str ) 838 char str[]; 839 { 840 syntaxerror = true; 841 pinpoint_message( str ); 842 } 843 844 845 /* format_warn - write out formatted warning */ 846 847 void format_warn( msg, arg ) 848 char msg[], arg[]; 849 { 850 char warn_msg[MAXLINE]; 851 852 (void) snprintf( warn_msg, sizeof warn_msg, msg, arg ); 853 warn( warn_msg ); 854 } 855 856 857 /* warn - report a warning, unless -w was given */ 858 859 void warn( str ) 860 char str[]; 861 { 862 line_warning( str, linenum ); 863 } 864 865 /* format_pinpoint_message - write out a message formatted with one string, 866 * pinpointing its location 867 */ 868 869 void format_pinpoint_message( msg, arg ) 870 char msg[], arg[]; 871 { 872 char errmsg[MAXLINE]; 873 874 (void) snprintf( errmsg, sizeof errmsg, msg, arg ); 875 pinpoint_message( errmsg ); 876 } 877 878 879 /* pinpoint_message - write out a message, pinpointing its location */ 880 881 void pinpoint_message( str ) 882 char str[]; 883 { 884 line_pinpoint( str, linenum ); 885 } 886 887 888 /* line_warning - report a warning at a given line, unless -w was given */ 889 890 void line_warning( str, line ) 891 char str[]; 892 int line; 893 { 894 char warning[MAXLINE]; 895 896 if ( ! nowarn ) 897 { 898 snprintf( warning, sizeof warning, "warning, %s", str ); 899 line_pinpoint( warning, line ); 900 } 901 } 902 903 904 /* line_pinpoint - write out a message, pinpointing it at the given line */ 905 906 void line_pinpoint( str, line ) 907 char str[]; 908 int line; 909 { 910 fprintf( stderr, "\"%s\", line %d: %s\n", infilename, line, str ); 911 } 912 913 914 /* yyerror - eat up an error message from the parser; 915 * currently, messages are ignore 916 */ 917 918 void yyerror( msg ) 919 char msg[]; 920 { 921 }