scan.l (15320B)
1 /* $OpenBSD: scan.l,v 1.9 2006/12/06 05:03:29 ray Exp $ */ 2 3 /* scan.l - scanner for flex input */ 4 5 %{ 6 /*- 7 * Copyright (c) 1990 The Regents of the University of California. 8 * All rights reserved. 9 * 10 * This code is derived from software contributed to Berkeley by 11 * Vern Paxson. 12 * 13 * The United States Government has rights in this work pursuant 14 * to contract no. DE-AC03-76SF00098 between the United States 15 * Department of Energy and the University of California. 16 * 17 * Redistribution and use in source and binary forms, with or without 18 * modification, are permitted provided that the following conditions 19 * are met: 20 * 21 * 1. Redistributions of source code must retain the above copyright 22 * notice, this list of conditions and the following disclaimer. 23 * 2. Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * 27 * Neither the name of the University nor the names of its contributors 28 * may be used to endorse or promote products derived from this software 29 * without specific prior written permission. 30 * 31 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 32 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 33 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 34 * PURPOSE. 35 */ 36 37 /* $Header: /cvs/src/usr.bin/lex/scan.l,v 1.9 2006/12/06 05:03:29 ray Exp $ */ 38 39 #include "flexdef.h" 40 #include "ytab.h" 41 42 #define ACTION_ECHO add_action( yytext ) 43 #define ACTION_IFDEF(def, should_define) \ 44 { \ 45 if ( should_define ) \ 46 action_define( def, 1 ); \ 47 } 48 49 #define MARK_END_OF_PROLOG mark_prolog(); 50 51 #define YY_DECL \ 52 int flexscan() 53 54 #define RETURNCHAR \ 55 yylval = (unsigned char) yytext[0]; \ 56 return CHAR; 57 58 #define RETURNNAME \ 59 strlcpy( nmstr, yytext, sizeof nmstr); \ 60 return NAME; 61 62 #define PUT_BACK_STRING(str, start) \ 63 for ( i = strlen( str ) - 1; i >= start; --i ) \ 64 unput((str)[i]) 65 66 #define CHECK_REJECT(str) \ 67 if ( all_upper( str ) ) \ 68 reject = true; 69 70 #define CHECK_YYMORE(str) \ 71 if ( all_lower( str ) ) \ 72 yymore_used = true; 73 %} 74 75 %option caseless nodefault outfile="scan.c" stack noyy_top_state 76 %option nostdinit 77 78 %x SECT2 SECT2PROLOG SECT3 CODEBLOCK PICKUPDEF SC CARETISBOL NUM QUOTE 79 %x FIRSTCCL CCL ACTION RECOVER COMMENT ACTION_STRING PERCENT_BRACE_ACTION 80 %x OPTION LINEDIR 81 82 WS [[:blank:]]+ 83 OPTWS [[:blank:]]* 84 NOT_WS [^[:blank:]\n] 85 86 NL \r?\n 87 88 NAME ([[:alpha:]_][[:alnum:]_-]*) 89 NOT_NAME [^[:alpha:]_*\n]+ 90 91 SCNAME {NAME} 92 93 ESCSEQ (\\([^\n]|[0-7]{1,3}|x[[:xdigit:]]{1,2})) 94 95 FIRST_CCL_CHAR ([^\\\n]|{ESCSEQ}) 96 CCL_CHAR ([^\\\n\]]|{ESCSEQ}) 97 CCL_EXPR ("[:"[[:alpha:]]+":]") 98 99 LEXOPT [aceknopr] 100 101 %% 102 static int bracelevel, didadef, indented_code; 103 static int doing_rule_action = false; 104 static int option_sense; 105 106 int doing_codeblock = false; 107 int i; 108 Char nmdef[MAXLINE], myesc(); 109 110 111 <INITIAL>{ 112 ^{WS} indented_code = true; BEGIN(CODEBLOCK); 113 ^"/*" ACTION_ECHO; yy_push_state( COMMENT ); 114 ^#{OPTWS}line{WS} yy_push_state( LINEDIR ); 115 ^"%s"{NAME}? return SCDECL; 116 ^"%x"{NAME}? return XSCDECL; 117 ^"%{".*{NL} { 118 ++linenum; 119 line_directive_out( (FILE *) 0, 1 ); 120 indented_code = false; 121 BEGIN(CODEBLOCK); 122 } 123 124 {WS} /* discard */ 125 126 ^"%%".* { 127 sectnum = 2; 128 bracelevel = 0; 129 mark_defs1(); 130 line_directive_out( (FILE *) 0, 1 ); 131 BEGIN(SECT2PROLOG); 132 return SECTEND; 133 } 134 135 ^"%pointer".*{NL} yytext_is_array = false; ++linenum; 136 ^"%array".*{NL} yytext_is_array = true; ++linenum; 137 138 ^"%option" BEGIN(OPTION); return OPTION_OP; 139 140 ^"%"{LEXOPT}{OPTWS}[[:digit:]]*{OPTWS}{NL} ++linenum; /* ignore */ 141 ^"%"{LEXOPT}{WS}.*{NL} ++linenum; /* ignore */ 142 143 ^"%"[^sxaceknopr{}].* synerr( _( "unrecognized '%' directive" ) ); 144 145 ^{NAME} { 146 strlcpy( nmstr, yytext, sizeof nmstr ); 147 didadef = false; 148 BEGIN(PICKUPDEF); 149 } 150 151 {SCNAME} RETURNNAME; 152 ^{OPTWS}{NL} ++linenum; /* allows blank lines in section 1 */ 153 {OPTWS}{NL} ACTION_ECHO; ++linenum; /* maybe end of comment line */ 154 } 155 156 157 <COMMENT>{ 158 "*/" ACTION_ECHO; yy_pop_state(); 159 "*" ACTION_ECHO; 160 [^*\n]+ ACTION_ECHO; 161 [^*\n]*{NL} ++linenum; ACTION_ECHO; 162 } 163 164 <LINEDIR>{ 165 \n yy_pop_state(); 166 [[:digit:]]+ linenum = myctoi( yytext ); 167 168 \"[^"\n]*\" { 169 flex_free( (void *) infilename ); 170 infilename = copy_string( yytext + 1 ); 171 infilename[strlen( infilename ) - 1] = '\0'; 172 } 173 . /* ignore spurious characters */ 174 } 175 176 <CODEBLOCK>{ 177 ^"%}".*{NL} ++linenum; BEGIN(INITIAL); 178 179 {NAME}|{NOT_NAME}|. ACTION_ECHO; 180 181 {NL} { 182 ++linenum; 183 ACTION_ECHO; 184 if ( indented_code ) 185 BEGIN(INITIAL); 186 } 187 } 188 189 190 <PICKUPDEF>{ 191 {WS} /* separates name and definition */ 192 193 {NOT_WS}.* { 194 strlcpy( (char *) nmdef, yytext, sizeof nmdef); 195 196 /* Skip trailing whitespace. */ 197 for ( i = strlen( (char *) nmdef ) - 1; 198 i >= 0 && (nmdef[i] == ' ' || nmdef[i] == '\t'); 199 --i ) 200 ; 201 202 nmdef[i + 1] = '\0'; 203 204 ndinstal( nmstr, nmdef ); 205 didadef = true; 206 } 207 208 {NL} { 209 if ( ! didadef ) 210 synerr( _( "incomplete name definition" ) ); 211 BEGIN(INITIAL); 212 ++linenum; 213 } 214 } 215 216 217 <OPTION>{ 218 {NL} ++linenum; BEGIN(INITIAL); 219 {WS} option_sense = true; 220 221 "=" return '='; 222 223 no option_sense = ! option_sense; 224 225 7bit csize = option_sense ? 128 : 256; 226 8bit csize = option_sense ? 256 : 128; 227 228 align long_align = option_sense; 229 always-interactive { 230 action_define( "YY_ALWAYS_INTERACTIVE", option_sense ); 231 } 232 array yytext_is_array = option_sense; 233 backup backing_up_report = option_sense; 234 batch interactive = ! option_sense; 235 "c++" C_plus_plus = option_sense; 236 caseful|case-sensitive caseins = ! option_sense; 237 caseless|case-insensitive caseins = option_sense; 238 debug ddebug = option_sense; 239 default spprdflt = ! option_sense; 240 ecs useecs = option_sense; 241 fast { 242 useecs = usemecs = false; 243 use_read = fullspd = true; 244 } 245 full { 246 useecs = usemecs = false; 247 use_read = fulltbl = true; 248 } 249 input ACTION_IFDEF("YY_NO_INPUT", ! option_sense); 250 interactive interactive = option_sense; 251 lex-compat lex_compat = option_sense; 252 main { 253 action_define( "YY_MAIN", option_sense ); 254 do_yywrap = ! option_sense; 255 } 256 meta-ecs usemecs = option_sense; 257 never-interactive { 258 action_define( "YY_NEVER_INTERACTIVE", option_sense ); 259 } 260 perf-report performance_report += option_sense ? 1 : -1; 261 pointer yytext_is_array = ! option_sense; 262 read use_read = option_sense; 263 reject reject_really_used = option_sense; 264 stack action_define( "YY_STACK_USED", option_sense ); 265 stdinit do_stdinit = option_sense; 266 stdout use_stdout = option_sense; 267 unput ACTION_IFDEF("YY_NO_UNPUT", ! option_sense); 268 verbose printstats = option_sense; 269 warn nowarn = ! option_sense; 270 yylineno do_yylineno = option_sense; 271 yymore yymore_really_used = option_sense; 272 yywrap do_yywrap = option_sense; 273 274 yy_push_state ACTION_IFDEF("YY_NO_PUSH_STATE", ! option_sense); 275 yy_pop_state ACTION_IFDEF("YY_NO_POP_STATE", ! option_sense); 276 yy_top_state ACTION_IFDEF("YY_NO_TOP_STATE", ! option_sense); 277 278 yy_scan_buffer ACTION_IFDEF("YY_NO_SCAN_BUFFER", ! option_sense); 279 yy_scan_bytes ACTION_IFDEF("YY_NO_SCAN_BYTES", ! option_sense); 280 yy_scan_string ACTION_IFDEF("YY_NO_SCAN_STRING", ! option_sense); 281 282 outfile return OPT_OUTFILE; 283 prefix return OPT_PREFIX; 284 yyclass return OPT_YYCLASS; 285 286 \"[^"\n]*\" { 287 strlcpy( nmstr, yytext + 1, sizeof nmstr); 288 if (nmstr[strlen(nmstr) - 1] == '"') 289 nmstr[strlen(nmstr) - 1] = '\0'; 290 return NAME; 291 } 292 293 (([a-mo-z]|n[a-np-z])[[:alpha:]\-+]*)|. { 294 format_synerr( _( "unrecognized %%option: %s" ), 295 yytext ); 296 BEGIN(RECOVER); 297 } 298 } 299 300 <RECOVER>.*{NL} ++linenum; BEGIN(INITIAL); 301 302 303 <SECT2PROLOG>{ 304 ^"%{".* ++bracelevel; yyless( 2 ); /* eat only %{ */ 305 ^"%}".* --bracelevel; yyless( 2 ); /* eat only %} */ 306 307 ^{WS}.* ACTION_ECHO; /* indented code in prolog */ 308 309 ^{NOT_WS}.* { /* non-indented code */ 310 if ( bracelevel <= 0 ) 311 { /* not in %{ ... %} */ 312 yyless( 0 ); /* put it all back */ 313 yy_set_bol( 1 ); 314 mark_prolog(); 315 BEGIN(SECT2); 316 } 317 else 318 ACTION_ECHO; 319 } 320 321 .* ACTION_ECHO; 322 {NL} ++linenum; ACTION_ECHO; 323 324 <<EOF>> { 325 mark_prolog(); 326 sectnum = 0; 327 yyterminate(); /* to stop the parser */ 328 } 329 } 330 331 <SECT2>{ 332 ^{OPTWS}{NL} ++linenum; /* allow blank lines in section 2 */ 333 334 ^{OPTWS}"%{" { 335 indented_code = false; 336 doing_codeblock = true; 337 bracelevel = 1; 338 BEGIN(PERCENT_BRACE_ACTION); 339 } 340 341 ^{OPTWS}"<" BEGIN(SC); return '<'; 342 ^{OPTWS}"^" return '^'; 343 \" BEGIN(QUOTE); return '"'; 344 "{"/[[:digit:]] BEGIN(NUM); return '{'; 345 "$"/([[:blank:]]|{NL}) return '$'; 346 347 {WS}"%{" { 348 bracelevel = 1; 349 BEGIN(PERCENT_BRACE_ACTION); 350 351 if ( in_rule ) 352 { 353 doing_rule_action = true; 354 in_rule = false; 355 return '\n'; 356 } 357 } 358 {WS}"|".*{NL} continued_action = true; ++linenum; return '\n'; 359 360 ^{WS}"/*" { 361 yyless( yyleng - 2 ); /* put back '/', '*' */ 362 bracelevel = 0; 363 continued_action = false; 364 BEGIN(ACTION); 365 } 366 367 ^{WS} /* allow indented rules */ 368 369 {WS} { 370 /* This rule is separate from the one below because 371 * otherwise we get variable trailing context, so 372 * we can't build the scanner using -{f,F}. 373 */ 374 bracelevel = 0; 375 continued_action = false; 376 BEGIN(ACTION); 377 378 if ( in_rule ) 379 { 380 doing_rule_action = true; 381 in_rule = false; 382 return '\n'; 383 } 384 } 385 386 {OPTWS}{NL} { 387 bracelevel = 0; 388 continued_action = false; 389 BEGIN(ACTION); 390 unput( '\n' ); /* so <ACTION> sees it */ 391 392 if ( in_rule ) 393 { 394 doing_rule_action = true; 395 in_rule = false; 396 return '\n'; 397 } 398 } 399 400 ^{OPTWS}"<<EOF>>" | 401 "<<EOF>>" return EOF_OP; 402 403 ^"%%".* { 404 sectnum = 3; 405 BEGIN(SECT3); 406 yyterminate(); /* to stop the parser */ 407 } 408 409 "["({FIRST_CCL_CHAR}|{CCL_EXPR})({CCL_CHAR}|{CCL_EXPR})* { 410 int cclval; 411 412 strlcpy( nmstr, yytext, sizeof nmstr); 413 414 /* Check to see if we've already encountered this 415 * ccl. 416 */ 417 if ( (cclval = ccllookup( (Char *) nmstr )) != 0 ) 418 { 419 if ( input() != ']' ) 420 synerr( _( "bad character class" ) ); 421 422 yylval = cclval; 423 ++cclreuse; 424 return PREVCCL; 425 } 426 else 427 { 428 /* We fudge a bit. We know that this ccl will 429 * soon be numbered as lastccl + 1 by cclinit. 430 */ 431 cclinstal( (Char *) nmstr, lastccl + 1 ); 432 433 /* Push back everything but the leading bracket 434 * so the ccl can be rescanned. 435 */ 436 yyless( 1 ); 437 438 BEGIN(FIRSTCCL); 439 return '['; 440 } 441 } 442 443 "{"{NAME}"}" { 444 register Char *nmdefptr; 445 Char *ndlookup(); 446 447 strlcpy( nmstr, yytext + 1, sizeof nmstr ); 448 nmstr[yyleng - 2] = '\0'; /* chop trailing brace */ 449 450 if ( (nmdefptr = ndlookup( nmstr )) == 0 ) 451 format_synerr( 452 _( "undefined definition {%s}" ), 453 nmstr ); 454 455 else 456 { /* push back name surrounded by ()'s */ 457 int len = strlen( (char *) nmdefptr ); 458 459 if ( lex_compat || nmdefptr[0] == '^' || 460 (len > 0 && nmdefptr[len - 1] == '$') ) 461 { /* don't use ()'s after all */ 462 PUT_BACK_STRING((char *) nmdefptr, 0); 463 464 if ( nmdefptr[0] == '^' ) 465 BEGIN(CARETISBOL); 466 } 467 468 else 469 { 470 unput(')'); 471 PUT_BACK_STRING((char *) nmdefptr, 0); 472 unput('('); 473 } 474 } 475 } 476 477 [/|*+?.(){}] return (unsigned char) yytext[0]; 478 . RETURNCHAR; 479 } 480 481 482 <SC>{ 483 [,*] return (unsigned char) yytext[0]; 484 ">" BEGIN(SECT2); return '>'; 485 ">"/^ BEGIN(CARETISBOL); return '>'; 486 {SCNAME} RETURNNAME; 487 . { 488 format_synerr( _( "bad <start condition>: %s" ), 489 yytext ); 490 } 491 } 492 493 <CARETISBOL>"^" BEGIN(SECT2); return '^'; 494 495 496 <QUOTE>{ 497 [^"\n] RETURNCHAR; 498 \" BEGIN(SECT2); return '"'; 499 500 {NL} { 501 synerr( _( "missing quote" ) ); 502 BEGIN(SECT2); 503 ++linenum; 504 return '"'; 505 } 506 } 507 508 509 <FIRSTCCL>{ 510 "^"/[^-\]\n] BEGIN(CCL); return '^'; 511 "^"/("-"|"]") return '^'; 512 . BEGIN(CCL); RETURNCHAR; 513 } 514 515 <CCL>{ 516 -/[^\]\n] return '-'; 517 [^\]\n] RETURNCHAR; 518 "]" BEGIN(SECT2); return ']'; 519 .|{NL} { 520 synerr( _( "bad character class" ) ); 521 BEGIN(SECT2); 522 return ']'; 523 } 524 } 525 526 <FIRSTCCL,CCL>{ 527 "[:alnum:]" BEGIN(CCL); return CCE_ALNUM; 528 "[:alpha:]" BEGIN(CCL); return CCE_ALPHA; 529 "[:blank:]" BEGIN(CCL); return CCE_BLANK; 530 "[:cntrl:]" BEGIN(CCL); return CCE_CNTRL; 531 "[:digit:]" BEGIN(CCL); return CCE_DIGIT; 532 "[:graph:]" BEGIN(CCL); return CCE_GRAPH; 533 "[:lower:]" BEGIN(CCL); return CCE_LOWER; 534 "[:print:]" BEGIN(CCL); return CCE_PRINT; 535 "[:punct:]" BEGIN(CCL); return CCE_PUNCT; 536 "[:space:]" BEGIN(CCL); return CCE_SPACE; 537 "[:upper:]" BEGIN(CCL); return CCE_UPPER; 538 "[:xdigit:]" BEGIN(CCL); return CCE_XDIGIT; 539 {CCL_EXPR} { 540 format_synerr( 541 _( "bad character class expression: %s" ), 542 yytext ); 543 BEGIN(CCL); return CCE_ALNUM; 544 } 545 } 546 547 <NUM>{ 548 [[:digit:]]+ { 549 yylval = myctoi( yytext ); 550 return NUMBER; 551 } 552 553 "," return ','; 554 "}" BEGIN(SECT2); return '}'; 555 556 . { 557 synerr( _( "bad character inside {}'s" ) ); 558 BEGIN(SECT2); 559 return '}'; 560 } 561 562 {NL} { 563 synerr( _( "missing }" ) ); 564 BEGIN(SECT2); 565 ++linenum; 566 return '}'; 567 } 568 } 569 570 571 <PERCENT_BRACE_ACTION>{ 572 {OPTWS}"%}".* bracelevel = 0; 573 574 <ACTION>"/*" ACTION_ECHO; yy_push_state( COMMENT ); 575 576 <CODEBLOCK,ACTION>{ 577 "reject" { 578 ACTION_ECHO; 579 CHECK_REJECT(yytext); 580 } 581 "yymore" { 582 ACTION_ECHO; 583 CHECK_YYMORE(yytext); 584 } 585 } 586 587 {NAME}|{NOT_NAME}|. ACTION_ECHO; 588 {NL} { 589 ++linenum; 590 ACTION_ECHO; 591 if ( bracelevel == 0 || 592 (doing_codeblock && indented_code) ) 593 { 594 if ( doing_rule_action ) 595 add_action( "\tYY_BREAK\n" ); 596 597 doing_rule_action = doing_codeblock = false; 598 BEGIN(SECT2); 599 } 600 } 601 } 602 603 604 /* Reject and YYmore() are checked for above, in PERCENT_BRACE_ACTION */ 605 <ACTION>{ 606 "{" ACTION_ECHO; ++bracelevel; 607 "}" ACTION_ECHO; --bracelevel; 608 [^[:alpha:]_{}"'/\n]+ ACTION_ECHO; 609 {NAME} ACTION_ECHO; 610 "'"([^'\\\n]|\\.)*"'" ACTION_ECHO; /* character constant */ 611 \" ACTION_ECHO; BEGIN(ACTION_STRING); 612 {NL} { 613 ++linenum; 614 ACTION_ECHO; 615 if ( bracelevel == 0 ) 616 { 617 if ( doing_rule_action ) 618 add_action( "\tYY_BREAK\n" ); 619 620 doing_rule_action = false; 621 BEGIN(SECT2); 622 } 623 } 624 . ACTION_ECHO; 625 } 626 627 <ACTION_STRING>{ 628 [^"\\\n]+ ACTION_ECHO; 629 \\. ACTION_ECHO; 630 {NL} ++linenum; ACTION_ECHO; 631 \" ACTION_ECHO; BEGIN(ACTION); 632 . ACTION_ECHO; 633 } 634 635 <COMMENT,ACTION,ACTION_STRING><<EOF>> { 636 synerr( _( "EOF encountered inside an action" ) ); 637 yyterminate(); 638 } 639 640 641 <SECT2,QUOTE,FIRSTCCL,CCL>{ESCSEQ} { 642 yylval = myesc( (Char *) yytext ); 643 644 if ( YY_START == FIRSTCCL ) 645 BEGIN(CCL); 646 647 return CHAR; 648 } 649 650 651 <SECT3>{ 652 .*(\n?) ECHO; 653 <<EOF>> sectnum = 0; yyterminate(); 654 } 655 656 <*>.|\n format_synerr( _( "bad character: %s" ), yytext ); 657 658 %% 659 660 661 int yywrap() 662 { 663 if ( --num_input_files > 0 ) 664 { 665 set_input_file( *++input_files ); 666 return 0; 667 } 668 669 else 670 return 1; 671 } 672 673 674 /* set_input_file - open the given file (if NULL, stdin) for scanning */ 675 676 void set_input_file( file ) 677 char *file; 678 { 679 if ( file && strcmp( file, "-" ) ) 680 { 681 infilename = copy_string( file ); 682 yyin = fopen( infilename, "r" ); 683 684 if ( yyin == NULL ) 685 lerrsf( _( "can't open %s" ), file ); 686 } 687 688 else 689 { 690 yyin = stdin; 691 infilename = copy_string( "<stdin>" ); 692 } 693 694 linenum = 1; 695 } 696 697 698 /* Wrapper routines for accessing the scanner's malloc routines. */ 699 700 void *flex_alloc( size ) 701 size_t size; 702 { 703 return (void *) malloc( size ); 704 } 705 706 void *flex_realloc( ptr, size ) 707 void *ptr; 708 size_t size; 709 { 710 return (void *) realloc( ptr, size ); 711 } 712 713 void flex_free( ptr ) 714 void *ptr; 715 { 716 if ( ptr ) 717 free( ptr ); 718 }