hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

awkgram.y (13980B)


      1 /*	$OpenBSD: awkgram.y,v 1.9 2011/09/28 19:27:18 millert Exp $	*/
      2 /****************************************************************
      3 Copyright (C) Lucent Technologies 1997
      4 All Rights Reserved
      5 
      6 Permission to use, copy, modify, and distribute this software and
      7 its documentation for any purpose and without fee is hereby
      8 granted, provided that the above copyright notice appear in all
      9 copies and that both that the copyright notice and this
     10 permission notice and warranty disclaimer appear in supporting
     11 documentation, and that the name Lucent Technologies or any of
     12 its entities not be used in advertising or publicity pertaining
     13 to distribution of the software without specific, written prior
     14 permission.
     15 
     16 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
     17 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
     18 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
     19 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     20 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
     21 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
     22 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
     23 THIS SOFTWARE.
     24 ****************************************************************/
     25 
     26 %{
     27 #include <stdio.h>
     28 #include <string.h>
     29 #include "awk.h"
     30 
     31 void checkdup(Node *list, Cell *item);
     32 int yywrap(void) { return(1); }
     33 
     34 Node	*beginloc = 0;
     35 Node	*endloc = 0;
     36 int	infunc	= 0;	/* = 1 if in arglist or body of func */
     37 int	inloop	= 0;	/* = 1 if in while, for, do */
     38 char	*curfname = 0;	/* current function name */
     39 Node	*arglist = 0;	/* list of args for current function */
     40 %}
     41 
     42 %union {
     43 	Node	*p;
     44 	Cell	*cp;
     45 	int	i;
     46 	char	*s;
     47 }
     48 
     49 %token	<i>	FIRSTTOKEN	/* must be first */
     50 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
     51 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
     52 %token	<i>	ARRAY
     53 %token	<i>	MATCH NOTMATCH MATCHOP
     54 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE
     55 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
     56 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
     57 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
     58 %token	<i>	ADD MINUS MULT DIVIDE MOD
     59 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
     60 %token	<i>	PRINT PRINTF SPRINTF
     61 %token	<p>	ELSE INTEST CONDEXPR
     62 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
     63 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
     64 %token	<s>	REGEXPR
     65 
     66 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
     67 %type	<p>	pa_pat pa_stat pa_stats
     68 %type	<s>	reg_expr
     69 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
     70 %type	<p>	var varname funcname varlist
     71 %type	<p>	for if else while
     72 %type	<i>	do st
     73 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
     74 %type	<i>	subop print
     75 
     76 %right	ASGNOP
     77 %right	'?'
     78 %right	':'
     79 %left	BOR
     80 %left	AND
     81 %left	GETLINE
     82 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
     83 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
     84 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
     85 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
     86 %left	REGEXPR VAR VARNF IVAR WHILE '('
     87 %left	CAT
     88 %left	'+' '-'
     89 %left	'*' '/' '%'
     90 %left	NOT UMINUS
     91 %right	POWER
     92 %right	DECR INCR
     93 %left	INDIRECT
     94 %token	LASTTOKEN	/* must be last */
     95 
     96 %%
     97 
     98 program:
     99 	  pas	{ if (errorflag==0)
    100 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
    101 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
    102 	;
    103 
    104 and:
    105 	  AND | and NL
    106 	;
    107 
    108 bor:
    109 	  BOR | bor NL
    110 	;
    111 
    112 comma:
    113 	  ',' | comma NL
    114 	;
    115 
    116 do:
    117 	  DO | do NL
    118 	;
    119 
    120 else:
    121 	  ELSE | else NL
    122 	;
    123 
    124 for:
    125 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    126 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
    127 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    128 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
    129 	| FOR '(' varname IN varname rparen {inloop++;} stmt
    130 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
    131 	;
    132 
    133 funcname:
    134 	  VAR	{ setfname($1); }
    135 	| CALL	{ setfname($1); }
    136 	;
    137 
    138 if:
    139 	  IF '(' pattern rparen		{ $$ = notnull($3); }
    140 	;
    141 
    142 lbrace:
    143 	  '{' | lbrace NL
    144 	;
    145 
    146 nl:
    147 	  NL | nl NL
    148 	;
    149 
    150 opt_nl:
    151 	  /* empty */	{ $$ = 0; }
    152 	| nl
    153 	;
    154 
    155 opt_pst:
    156 	  /* empty */	{ $$ = 0; }
    157 	| pst
    158 	;
    159 
    160 
    161 opt_simple_stmt:
    162 	  /* empty */			{ $$ = 0; }
    163 	| simple_stmt
    164 	;
    165 
    166 pas:
    167 	  opt_pst			{ $$ = 0; }
    168 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
    169 	;
    170 
    171 pa_pat:
    172 	  pattern	{ $$ = notnull($1); }
    173 	;
    174 
    175 pa_stat:
    176 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
    177 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
    178 	| pa_pat ',' opt_nl pa_pat		{ $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
    179 	| pa_pat ',' opt_nl pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $4, $6); }
    180 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
    181 	| XBEGIN lbrace stmtlist '}'
    182 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
    183 	| XEND lbrace stmtlist '}'
    184 		{ endloc = linkum(endloc, $3); $$ = 0; }
    185 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
    186 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
    187 	;
    188 
    189 pa_stats:
    190 	  pa_stat
    191 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
    192 	;
    193 
    194 patlist:
    195 	  pattern
    196 	| patlist comma pattern		{ $$ = linkum($1, $3); }
    197 	;
    198 
    199 ppattern:
    200 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
    201 	| ppattern '?' ppattern ':' ppattern %prec '?'
    202 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    203 	| ppattern bor ppattern %prec BOR
    204 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    205 	| ppattern and ppattern %prec AND
    206 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    207 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    208 	| ppattern MATCHOP ppattern
    209 		{ if (constnode($3))
    210 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    211 		  else
    212 			$$ = op3($2, (Node *)1, $1, $3); }
    213 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    214 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    215 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    216 	| re
    217 	| term
    218 	;
    219 
    220 pattern:
    221 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
    222 	| pattern '?' pattern ':' pattern %prec '?'
    223 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    224 	| pattern bor pattern %prec BOR
    225 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    226 	| pattern and pattern %prec AND
    227 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    228 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
    229 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
    230 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
    231 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
    232 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
    233 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
    234 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    235 	| pattern MATCHOP pattern
    236 		{ if (constnode($3))
    237 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    238 		  else
    239 			$$ = op3($2, (Node *)1, $1, $3); }
    240 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    241 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    242 	| pattern '|' GETLINE var	{ 
    243 			if (safe) SYNTAX("cmd | getline is unsafe");
    244 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
    245 	| pattern '|' GETLINE		{ 
    246 			if (safe) SYNTAX("cmd | getline is unsafe");
    247 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
    248 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    249 	| re
    250 	| term
    251 	;
    252 
    253 plist:
    254 	  pattern comma pattern		{ $$ = linkum($1, $3); }
    255 	| plist comma pattern		{ $$ = linkum($1, $3); }
    256 	;
    257 
    258 pplist:
    259 	  ppattern
    260 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
    261 	;
    262 
    263 prarg:
    264 	  /* empty */			{ $$ = rectonode(); }
    265 	| pplist
    266 	| '(' plist ')'			{ $$ = $2; }
    267 	;
    268 
    269 print:
    270 	  PRINT | PRINTF
    271 	;
    272 
    273 pst:
    274 	  NL | ';' | pst NL | pst ';'
    275 	;
    276 
    277 rbrace:
    278 	  '}' | rbrace NL
    279 	;
    280 
    281 re:
    282 	   reg_expr
    283 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
    284 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
    285 	;
    286 
    287 reg_expr:
    288 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
    289 	;
    290 
    291 rparen:
    292 	  ')' | rparen NL
    293 	;
    294 
    295 simple_stmt:
    296 	  print prarg '|' term		{ 
    297 			if (safe) SYNTAX("print | is unsafe");
    298 			else $$ = stat3($1, $2, itonp($3), $4); }
    299 	| print prarg APPEND term	{
    300 			if (safe) SYNTAX("print >> is unsafe");
    301 			else $$ = stat3($1, $2, itonp($3), $4); }
    302 	| print prarg GT term		{
    303 			if (safe) SYNTAX("print > is unsafe");
    304 			else $$ = stat3($1, $2, itonp($3), $4); }
    305 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
    306 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
    307 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
    308 	| pattern			{ $$ = exptostat($1); }
    309 	| error				{ yyclearin; SYNTAX("illegal statement"); }
    310 	;
    311 
    312 st:
    313 	  nl
    314 	| ';' opt_nl
    315 	;
    316 
    317 stmt:
    318 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
    319 				  $$ = stat1(BREAK, NIL); }
    320 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
    321 				  $$ = stat1(CONTINUE, NIL); }
    322 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
    323 		{ $$ = stat2(DO, $3, notnull($7)); }
    324 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
    325 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
    326 	| for
    327 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
    328 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
    329 	| lbrace stmtlist rbrace { $$ = $2; }
    330 	| NEXT st	{ if (infunc)
    331 				SYNTAX("next is illegal inside a function");
    332 			  $$ = stat1(NEXT, NIL); }
    333 	| NEXTFILE st	{ if (infunc)
    334 				SYNTAX("nextfile is illegal inside a function");
    335 			  $$ = stat1(NEXTFILE, NIL); }
    336 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
    337 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
    338 	| simple_stmt st
    339 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
    340 	| ';' opt_nl		{ $$ = 0; }
    341 	;
    342 
    343 stmtlist:
    344 	  stmt
    345 	| stmtlist stmt		{ $$ = linkum($1, $2); }
    346 	;
    347 
    348 subop:
    349 	  SUB | GSUB
    350 	;
    351 
    352 term:
    353  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
    354  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
    355 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
    356 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
    357 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
    358 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
    359 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
    360 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
    361 	| '+' term %prec UMINUS		{ $$ = $2; }
    362 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
    363 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    364 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
    365 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    366 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
    367 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
    368 	| CLOSE term			{ $$ = op1(CLOSE, $2); }
    369 	| DECR var			{ $$ = op1(PREDECR, $2); }
    370 	| INCR var			{ $$ = op1(PREINCR, $2); }
    371 	| var DECR			{ $$ = op1(POSTDECR, $1); }
    372 	| var INCR			{ $$ = op1(POSTINCR, $1); }
    373 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
    374 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
    375 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
    376 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
    377 	| INDEX '(' pattern comma pattern ')'
    378 		{ $$ = op2(INDEX, $3, $5); }
    379 	| INDEX '(' pattern comma reg_expr ')'
    380 		{ SYNTAX("index() doesn't permit regular expressions");
    381 		  $$ = op2(INDEX, $3, (Node*)$5); }
    382 	| '(' pattern ')'		{ $$ = $2; }
    383 	| MATCHFCN '(' pattern comma reg_expr ')'
    384 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
    385 	| MATCHFCN '(' pattern comma pattern ')'
    386 		{ if (constnode($5))
    387 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
    388 		  else
    389 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
    390 	| NUMBER			{ $$ = celltonode($1, CCON); }
    391 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
    392 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
    393 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
    394 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
    395 	| SPLIT '(' pattern comma varname ')'
    396 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
    397 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
    398 	| STRING	 		{ $$ = celltonode($1, CCON); }
    399 	| subop '(' reg_expr comma pattern ')'
    400 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
    401 	| subop '(' pattern comma pattern ')'
    402 		{ if (constnode($3))
    403 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
    404 		  else
    405 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
    406 	| subop '(' reg_expr comma pattern comma var ')'
    407 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
    408 	| subop '(' pattern comma pattern comma var ')'
    409 		{ if (constnode($3))
    410 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
    411 		  else
    412 			$$ = op4($1, (Node *)1, $3, $5, $7); }
    413 	| SUBSTR '(' pattern comma pattern comma pattern ')'
    414 		{ $$ = op3(SUBSTR, $3, $5, $7); }
    415 	| SUBSTR '(' pattern comma pattern ')'
    416 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
    417 	| var
    418 	;
    419 
    420 var:
    421 	  varname
    422 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
    423 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
    424 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
    425 	;	
    426 
    427 varlist:
    428 	  /* nothing */		{ arglist = $$ = 0; }
    429 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
    430 	| varlist comma VAR	{
    431 			checkdup($1, $3);
    432 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
    433 	;
    434 
    435 varname:
    436 	  VAR			{ $$ = celltonode($1, CVAR); }
    437 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
    438 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
    439 	;
    440 
    441 
    442 while:
    443 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
    444 	;
    445 
    446 %%
    447 
    448 void setfname(Cell *p)
    449 {
    450 	if (isarr(p))
    451 		SYNTAX("%s is an array, not a function", p->nval);
    452 	else if (isfcn(p))
    453 		SYNTAX("you can't define function %s more than once", p->nval);
    454 	curfname = p->nval;
    455 }
    456 
    457 int constnode(Node *p)
    458 {
    459 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
    460 }
    461 
    462 char *strnode(Node *p)
    463 {
    464 	return ((Cell *)(p->narg[0]))->sval;
    465 }
    466 
    467 Node *notnull(Node *n)
    468 {
    469 	switch (n->nobj) {
    470 	case LE: case LT: case EQ: case NE: case GT: case GE:
    471 	case BOR: case AND: case NOT:
    472 		return n;
    473 	default:
    474 		return op2(NE, n, nullnode);
    475 	}
    476 }
    477 
    478 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
    479 {
    480 	char *s = cp->nval;
    481 	for ( ; vl; vl = vl->nnext) {
    482 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
    483 			SYNTAX("duplicate argument %s", s);
    484 			break;
    485 		}
    486 	}
    487 }