hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

regparse.c (21226B)


      1 /*
      2  * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
      3  *
      4  * Sccsid @(#)regparse.c	1.12 (gritter) 9/22/03
      5  */
      6 /*  UNIX(R) Regular Expresssion Library
      7  *
      8  *  Note: Code is released under the GNU LGPL
      9  *
     10  *  Copyright (C) 2001 Caldera International, Inc.
     11  *
     12  *  This library is free software; you can redistribute it and/or
     13  *  modify it under the terms of the GNU Lesser General Public
     14  *  License as published by the Free Software Foundation; either
     15  *  version 2 of the License, or (at your option) any later version.
     16  *
     17  *  This library is distributed in the hope that it will be useful,
     18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20  *  Lesser General Public License for more details.
     21  *
     22  *  You should have received a copy of the GNU Lesser General Public
     23  *  License along with this library; if not, write to:
     24  *        Free Software Foundation, Inc.
     25  *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     26  */
     27 
     28 /*	#include "synonyms.h"	*/
     29 #include <stdlib.h>
     30 #include <ctype.h>
     31 #include "re.h"
     32 
     33 LIBUXRE_STATIC void
     34 libuxre_regdeltree(Tree *tp, int all)
     35 {
     36 	if (tp == 0)
     37 		return;
     38 	if (tp->op < 0)
     39 	{
     40 		switch (KIND_ROP(tp->op))
     41 		{
     42 		case BINARY_ROP:
     43 			libuxre_regdeltree(tp->right.ptr, all);
     44 			/*FALLTHROUGH*/
     45 		case UNARY_ROP:
     46 			libuxre_regdeltree(tp->left.ptr, all);
     47 			break;
     48 		default:
     49 			if (tp->op == ROP_BKT && all)
     50 			{
     51 				libuxre_bktfree(tp->right.info.bkt);
     52 				free(tp->right.info.bkt);
     53 			}
     54 			break;
     55 		}
     56 	}
     57 	free(tp);
     58 }
     59 
     60 LIBUXRE_STATIC Tree *
     61 libuxre_reg1tree(w_type op, Tree *lp)
     62 {
     63 	Tree *tp;
     64 
     65 	if ((tp = malloc(sizeof(Tree))) == 0)
     66 	{
     67 		if (lp != 0)
     68 			libuxre_regdeltree(lp, 1);
     69 		return 0;
     70 	}
     71 	tp->op = op;
     72 	tp->left.ptr = lp;
     73 	if (lp != 0)
     74 		lp->parent = tp;
     75 	return tp;
     76 }
     77 
     78 LIBUXRE_STATIC Tree *
     79 libuxre_reg2tree(w_type op, Tree *lp, Tree *rp)
     80 {
     81 	Tree *tp;
     82 
     83 	if ((tp = malloc(sizeof(Tree))) == 0)
     84 	{
     85 		libuxre_regdeltree(lp, 1);
     86 		libuxre_regdeltree(rp, 1);
     87 		return 0;
     88 	}
     89 	tp->op = op;
     90 	tp->left.ptr = lp;
     91 	lp->parent = tp;
     92 	tp->right.ptr = rp;
     93 	rp->parent = tp;
     94 	return tp;
     95 }
     96 
     97 static int
     98 lex(Lex *lxp)
     99 {
    100 	size_t num;
    101 	w_type wc;
    102 	int n, mb_cur_max;
    103 
    104 	mb_cur_max = lxp->mb_cur_max;
    105 nextc:	switch (wc = *lxp->pat++) /* interesting ones are single bytes */
    106 	{
    107 	case '\0':
    108 		lxp->pat--;	/* continue to report ROP_END */
    109 		wc = ROP_END;
    110 		break;
    111 	case '(':
    112 		if (lxp->flags & REG_PARENS)
    113 		{
    114 		leftparen:;
    115 			/*
    116 			* Must keep track of the closed and
    117 			* yet-to-be closed groups as a list.
    118 			* Consider (()a(()b(()c(()d... in which
    119 			* at each letter another even-numbered
    120 			* group is made available, but no
    121 			* odd-numbered ones are.
    122 			*/
    123 			if ((lxp->flags & REG_NOBACKREF) == 0)
    124 			{
    125 				if (lxp->nleft >= lxp->nclist) /* grow it */
    126 				{
    127 					unsigned char *p;
    128 
    129 					lxp->nclist += 8; /* arbitrary */
    130 					if ((p = realloc(lxp->clist,
    131 						lxp->nclist)) == 0)
    132 					{
    133 						lxp->err = REG_ESPACE;
    134 						return -1;
    135 					}
    136 					lxp->clist = p;
    137 				}
    138 				lxp->clist[lxp->nleft] = 0; /* unavailable */
    139 			}
    140 			lxp->nleft++;
    141 			wc = ROP_LP;
    142 		}
    143 		break;
    144 	case ')':
    145 		/*
    146 		* For REG_PARENS, only take a right paren as a close
    147 		* if there is a matching left paren.
    148 		*/
    149 		if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft)
    150 		{
    151 			lxp->nright++;
    152 		rightparen:;
    153 			/*
    154 			* The group that is being closed is the highest
    155 			* numbered as-yet-unclosed group.
    156 			*/
    157 			if ((lxp->flags & REG_NOBACKREF) == 0)
    158 			{
    159 				num = lxp->nleft;
    160 				while (lxp->clist[--num] != 0)
    161 					;
    162 				lxp->clist[num] = 1;
    163 			}
    164 			wc = ROP_RP;
    165 		}
    166 		break;
    167 	case '.':
    168 		wc = ROP_ANYCH;
    169 		if (lxp->flags & REG_NEWLINE)
    170 			wc = ROP_NOTNL;
    171 		break;
    172 	case '*':
    173 		if (lxp->flags & REG_ADDITIVE)
    174 		{
    175 	nxtstar:	switch (*lxp->pat)
    176 			{
    177 			case '+':
    178 				if ((lxp->flags & REG_PLUS) == 0)
    179 					break;
    180 				lxp->pat++;
    181 				goto nxtstar;
    182 			case '?':
    183 				if ((lxp->flags & REG_QUEST) == 0)
    184 					break;
    185 				/*FALLTHRU*/
    186 			case '*':
    187 				lxp->pat++;
    188 				goto nxtstar;
    189 			}
    190 		}
    191 		wc = ROP_STAR;
    192 		break;
    193 	case '^':
    194 		/*
    195 		* Look "behind" to see if this is an anchor.
    196 		* Take it as an anchor if it follows an alternation
    197 		* operator.  (lxp->tok is initially set to ROP_OR.)
    198 		*/
    199 		if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) {
    200 			if (lxp->flags & REG_ADDITIVE)
    201 			{
    202 				int optional = 0;
    203 
    204 			nxtcar:	switch (*lxp->pat)
    205 				{
    206 				case '+':
    207 					if ((lxp->flags & REG_PLUS) == 0)
    208 						break;
    209 					lxp->pat++;
    210 					goto nxtcar;
    211 				case '?':
    212 					if ((lxp->flags & REG_QUEST) == 0)
    213 						break;
    214 					/*FALLTHRU*/
    215 				case '*':
    216 					optional = 1;
    217 					lxp->pat++;
    218 					goto nxtcar;
    219 				}
    220 				if (optional)
    221 					goto nextc;
    222 			}
    223 			wc = ROP_BOL;
    224 		}
    225 		break;
    226 	case '$':
    227 		/*
    228 		* Look ahead to see if this is an anchor,
    229 		* unless any '$' is an anchor.
    230 		* Take it as an anchor if it occurs just before
    231 		* the pattern end or an alternation operator.
    232 		*/
    233 		if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0'
    234 			|| (lxp->flags & REG_OR && *lxp->pat == '|')
    235 			|| (lxp->flags & REG_NLALT && *lxp->pat == '\n'))
    236 		{
    237 			if (lxp->flags & REG_ADDITIVE)
    238 			{
    239 				int optional = 0;
    240 
    241 			nxtdol:	switch (*lxp->pat)
    242 				{
    243 				case '+':
    244 					if ((lxp->flags & REG_PLUS) == 0)
    245 						break;
    246 					lxp->pat++;
    247 					goto nxtdol;
    248 				case '?':
    249 					if ((lxp->flags & REG_QUEST) == 0)
    250 						break;
    251 					/*FALLTHRU*/
    252 				case '*':
    253 					optional = 1;
    254 					lxp->pat++;
    255 					goto nxtdol;
    256 				}
    257 				if (optional)
    258 					goto nextc;
    259 			}
    260 			wc = ROP_EOL;
    261 		}
    262 		break;
    263 	case '+':
    264 		if (lxp->flags & REG_PLUS)
    265 		{
    266 			wc = ROP_PLUS;
    267 			if (lxp->flags & REG_ADDITIVE)
    268 			{
    269 		nxtplus:	switch (*lxp->pat)
    270 				{
    271 				case '?':
    272 					if ((lxp->flags & REG_QUEST) == 0)
    273 						break;
    274 				case '*':
    275 					wc = ROP_STAR;
    276 					/*FALLTHRU*/
    277 				case '+':
    278 					lxp->pat++;
    279 					goto nxtplus;
    280 				}
    281 			}
    282 		}
    283 		break;
    284 	case '?':
    285 		if (lxp->flags & REG_QUEST)
    286 		{
    287 			wc = ROP_QUEST;
    288 			if (lxp->flags & REG_ADDITIVE)
    289 			{
    290 		nxtquest:	switch (*lxp->pat)
    291 				{
    292 				case '+':
    293 					if ((lxp->flags & REG_PLUS) == 0)
    294 						break;
    295 				case '*':
    296 					wc = ROP_STAR;
    297 					/*FALLTHRU*/
    298 				case '?':
    299 					lxp->pat++;
    300 					goto nxtquest;
    301 				}
    302 			}
    303 		}
    304 		break;
    305 	case '\n':
    306 		if (lxp->flags & REG_NLALT)
    307 		{
    308 			/*
    309 			* Even when newline is an alternative separator,
    310 			* it doesn't permit parenthesized subexpressions
    311 			* to include it.
    312 			*/
    313 			if (lxp->nleft != lxp->nright)
    314 			{
    315 				lxp->err = REG_EPAREN;
    316 				return -1;
    317 			}
    318 			wc = ROP_OR;
    319 		}
    320 		else if (lxp->flags & REG_NEWLINE)
    321 			lxp->flags |= REG_NFA;
    322 		break;
    323 	case '|':
    324 		if (lxp->flags & REG_OR)
    325 			wc = ROP_OR;
    326 		break;
    327 	case '[':
    328 		if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0)
    329 		{
    330 			lxp->err = REG_ESPACE;
    331 			return -1;
    332 		}
    333 		if ((lxp->flags & REG_GOTBKT) == 0)	/* first time */
    334 		{
    335 			struct lc_collate *col;
    336 
    337 			lxp->flags |= REG_GOTBKT;
    338 			lxp->bktflags = 0;
    339 			if (lxp->flags & REG_ICASE)
    340 				lxp->bktflags |= BKT_ONECASE;
    341 			if (lxp->flags & REG_NEWLINE)
    342 				lxp->bktflags |= BKT_NOTNL;
    343 			if (lxp->flags & REG_BADRANGE)
    344 				lxp->bktflags |= BKT_BADRANGE;
    345 			if (lxp->flags & REG_ODDRANGE)
    346 				lxp->bktflags |= BKT_ODDRANGE;
    347 			if (lxp->flags & REG_SEPRANGE)
    348 				lxp->bktflags |= BKT_SEPRANGE;
    349 			if (lxp->flags & REG_BKTQUOTE)
    350 				lxp->bktflags |= BKT_QUOTE;
    351 			if (lxp->flags & REG_BKTEMPTY)
    352 				lxp->bktflags |= BKT_EMPTY;
    353 			if (lxp->flags & REG_ESCNL)
    354 				lxp->bktflags |= BKT_ESCNL;
    355 			if (lxp->flags & REG_NLALT)
    356 				lxp->bktflags |= BKT_NLBAD;
    357 			if (lxp->flags & REG_ESCSEQ)
    358 				lxp->bktflags |= BKT_ESCSEQ;
    359 			if (lxp->flags & REG_BKTESCAPE)
    360 				lxp->bktflags |= BKT_ESCAPE;
    361 			if (lxp->flags & REG_NOI18N)
    362 				lxp->bktflags |= BKT_NOI18N;
    363 			if (lxp->flags & REG_OLDESC)
    364 				lxp->bktflags |= BKT_OLDESC;
    365 			if ((col = libuxre_lc_collate(0)) != 0)
    366 			{
    367 				if (col->maintbl == 0
    368 					|| col->flags & CHF_ENCODED)
    369 				{
    370 					(void)libuxre_lc_collate(col);
    371 					col = 0;
    372 				}
    373 				else if (col->flags & CHF_MULTICH)
    374 					lxp->flags |= REG_NFA;
    375 			}
    376 			lxp->col = col;
    377 		}
    378 		n = lxp->bktflags;
    379 		if (*lxp->pat == '^')
    380 		{
    381 			n |= BKT_NEGATED;
    382 			lxp->pat++;
    383 		}
    384 		lxp->info.bkt->col = lxp->col;
    385 		if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat,
    386 						n, mb_cur_max)) < 0)
    387 		{
    388 			free(lxp->info.bkt);
    389 			lxp->err = -n;	/* convert to REG_* errors */
    390 			return -1;
    391 		}
    392 		/*
    393 		* NFA forced if newline can be a match and REG_NEWLINE is set.
    394 		*/
    395 		if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE
    396 			&& lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */
    397 			&& libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0)
    398 		{
    399 			lxp->flags |= REG_NFA;
    400 		}
    401 		lxp->pat += n;
    402 		wc = ROP_BKT;
    403 		break;
    404 	case '{':
    405 		if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0)
    406 			break;
    407 	interval:;
    408 		if (!isdigit(num = *lxp->pat))
    409 		{
    410 		badbr:;
    411 			lxp->err = REG_BADBR;
    412 			if (*lxp->pat == '\0')
    413 				lxp->err = REG_EBRACE; /* more accurate */
    414 			return -1;
    415 		}
    416 		num -= '0';
    417 		while (isdigit(wc = *++lxp->pat))
    418 		{
    419 			num *= 10;
    420 			if ((num += wc - '0') > BRACE_MAX)
    421 				goto badbr;
    422 		}
    423 		lxp->info.num[0] = num;
    424 		lxp->info.num[1] = num;
    425 		if (wc == ',')
    426 		{
    427 			lxp->info.num[1] = BRACE_INF;
    428 			if (isdigit(wc = *++lxp->pat))
    429 			{
    430 				num = wc - '0';
    431 				while (isdigit(wc = *++lxp->pat))
    432 				{
    433 					num *= 10;
    434 					if ((num += wc - '0') > BRACE_MAX)
    435 						goto badbr;
    436 				}
    437 				if (num < lxp->info.num[0])
    438 					goto badbr;
    439 				lxp->info.num[1] = num;
    440 			}
    441 		}
    442 		if ((lxp->flags & REG_BRACES) == 0)
    443 		{
    444 			if (wc != '\\')
    445 				goto badbr;
    446 			wc = *++lxp->pat;
    447 		}
    448 		if (wc != '}')
    449 			goto badbr;
    450 		lxp->pat++;
    451 		wc = ROP_BRACE;
    452 		/*
    453 		* Replace interval with simpler equivalents where possible,
    454 		* even when the operators are not otherwise available.
    455 		*/
    456 		if (lxp->info.num[1] <= 1)
    457 		{
    458 			if (lxp->info.num[0] == 1)
    459 				wc = ROP_NOP;	/* {1,1} is noise */
    460 			else if (lxp->info.num[1] == 0)
    461 				wc = ROP_EMPTY;	/* {0,0} is empty string */
    462 			else
    463 				wc = ROP_QUEST;	/* {0,1} is ? */
    464 		}
    465 		else if (lxp->info.num[1] == BRACE_INF)
    466 		{
    467 			if (lxp->info.num[0] == 0)
    468 				wc = ROP_STAR;
    469 			else if (lxp->info.num[0] == 1)
    470 				wc = ROP_PLUS;
    471 			else if (lxp->info.num[0] > BRACE_DFAMAX)
    472 				lxp->flags |= REG_NFA;
    473 		}
    474 		else if (lxp->info.num[1] > BRACE_DFAMAX)
    475 		{
    476 			lxp->flags |= REG_NFA;
    477 		}
    478 		break;
    479 	case '\\':
    480 		switch (wc = *lxp->pat++)
    481 		{
    482 		case '\0':
    483 			lxp->err = REG_EESCAPE;
    484 			return -1;
    485 		case '<':
    486 			if (lxp->flags & REG_ANGLES)
    487 			{
    488 				lxp->flags |= REG_NFA;
    489 				wc = ROP_LT;
    490 			}
    491 			goto out;
    492 		case '>':
    493 			if (lxp->flags & REG_ANGLES)
    494 			{
    495 				lxp->flags |= REG_NFA;
    496 				wc = ROP_GT;
    497 			}
    498 			goto out;
    499 		case '(':
    500 			if ((lxp->flags & REG_PARENS) == 0)
    501 				goto leftparen;
    502 			goto out;
    503 		case ')':
    504 			if ((lxp->flags & REG_PARENS) == 0)
    505 			{
    506 				if (++lxp->nright > lxp->nleft)
    507 				{
    508 					lxp->err = REG_EPAREN;
    509 					return -1;
    510 				}
    511 				goto rightparen;
    512 			}
    513 			goto out;
    514 		case '{':
    515 			if (lxp->flags & (REG_BRACES|REG_NOBRACES))
    516 				goto out;
    517 			goto interval;
    518 		case '1':
    519 		case '2':
    520 		case '3':
    521 		case '4':
    522 		case '5':
    523 		case '6':
    524 		case '7':
    525 		case '8':
    526 		case '9':
    527 			num = wc - '0';
    528 			if ((lxp->flags & REG_NOBACKREF) == 0)
    529 			{
    530 			backref:;
    531 				if (num > lxp->nleft
    532 					|| lxp->clist[num - 1] == 0)
    533 				{
    534 					lxp->err = REG_ESUBREG;
    535 					return -1;
    536 				}
    537 				lxp->info.sub = num;
    538 				if (lxp->maxref < num)
    539 					lxp->maxref = num;
    540 				lxp->flags |= REG_NFA;
    541 				wc = ROP_REF;
    542 				goto out;
    543 			}
    544 			/*
    545 			* For compatibility (w/awk), permit "octal" 8 and 9.
    546 			* Already have the value of the first digit in num.
    547 			*
    548 			* If REG_OLDESC, exactly three digits must be present.
    549 			*/
    550 		tryoctal:;
    551 			if ((lxp->flags & REG_ESCSEQ) == 0)
    552 				goto out;
    553 			if ((wc = *lxp->pat) >= '0' && wc <= '9')
    554 			{
    555 				num <<= 3;
    556 				num += wc - '0';
    557 				if ((wc = *++lxp->pat) >= '0' && wc <= '9')
    558 				{
    559 					num <<= 3;
    560 					num += wc - '0';
    561 					lxp->pat++;
    562 				}
    563 				else if (lxp->flags & REG_OLDESC)
    564 				{
    565 					lxp->pat--;
    566 					wc = lxp->pat[-1];
    567 					goto out;
    568 				}
    569 			}
    570 			else if (lxp->flags & REG_OLDESC)
    571 			{
    572 				wc = lxp->pat[-1];
    573 				goto out;
    574 			}
    575 			if ((wc = num) <= 0)
    576 			{
    577 				lxp->err = REG_BADESC;
    578 				return -1;
    579 			}
    580 			goto out;
    581 		case '0':
    582 			if ((lxp->flags & REG_NOBACKREF) == 0
    583 				&& (num = *lxp->pat) >= '0' && num <= '9')
    584 			{
    585 				num -= '0';
    586 				/*
    587 				* This loop ignores wraparounds.
    588 				* Keep track of number of digits in n.
    589 				*/
    590 				n = 1;
    591 				while ((wc = *++lxp->pat) >= '0' && wc <= '9')
    592 				{
    593 					num *= 10;
    594 					num += wc - '0';
    595 					n++;
    596 				}
    597 				if (num != 0)
    598 					goto backref;
    599 				lxp->pat -= n;
    600 			}
    601 			num = 0;
    602 			goto tryoctal;
    603 		case 'a':
    604 			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
    605 				wc = '\a';
    606 			goto out;
    607 		case 'b':
    608 			if (lxp->flags & REG_ESCSEQ)
    609 				wc = '\b';
    610 			goto out;
    611 		case 'f':
    612 			if (lxp->flags & REG_ESCSEQ)
    613 				wc = '\f';
    614 			goto out;
    615 		case 'n':
    616 			if (lxp->flags & (REG_ESCSEQ | REG_ESCNL))
    617 			{
    618 				wc = '\n';
    619 				if (lxp->flags & REG_NEWLINE)
    620 					lxp->flags |= REG_NFA;
    621 			}
    622 			goto out;
    623 		case 'r':
    624 			if (lxp->flags & REG_ESCSEQ)
    625 				wc = '\r';
    626 			goto out;
    627 		case 't':
    628 			if (lxp->flags & REG_ESCSEQ)
    629 				wc = '\t';
    630 			goto out;
    631 		case 'v':
    632 			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ)
    633 				wc = '\v';
    634 			goto out;
    635 		case 'x':
    636 			if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ
    637 				&& isxdigit(num = *lxp->pat))
    638 			{
    639 				wc = num;
    640 				num = 0;
    641 				/*
    642 				* Take as many hex digits as possible,
    643 				* ignoring overflows.
    644 				* If the result (squeezed into a w_type)
    645 				* is positive, it's okay.
    646 				*/
    647 				do
    648 				{
    649 					if (isdigit(wc))
    650 						wc -= '0';
    651 					else if (isupper(wc))
    652 						wc -= 'A' + 10;
    653 					else
    654 						wc -= 'a' + 10;
    655 					num <<= 4;
    656 					num |= wc;
    657 				} while (isxdigit(wc = *++lxp->pat));
    658 				if ((wc = num) <= 0)
    659 				{
    660 					lxp->err = REG_BADESC;
    661 					return -1;
    662 				}
    663 			}
    664 			goto out;
    665 		}
    666 		/*FALLTHROUGH*/
    667 	default:
    668 		if (!ISONEBYTE(wc))
    669 		{
    670 			if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0)
    671 				lxp->pat += n;
    672 			else if (n < 0)
    673 			{
    674 				lxp->err = REG_ILLSEQ;
    675 				return -1;
    676 			}
    677 		}
    678 		if (lxp->flags & REG_ICASE)
    679 			wc = to_lower(wc);
    680 		break;
    681 	}
    682 out:;
    683 	lxp->tok = wc;
    684 	return 0;
    685 }
    686 
    687 static Tree	*alt(Lex *);
    688 
    689 static Tree *
    690 leaf(Lex *lxp)
    691 {
    692 	Tree *tp;
    693 
    694 	if ((tp = malloc(sizeof(Tree))) == 0)
    695 	{
    696 		lxp->err = REG_ESPACE;
    697 		return 0;
    698 	}
    699 	switch (tp->op = lxp->tok) /* covers most cases */
    700 	{
    701 	default:
    702 		if (tp->op < 0)
    703 		{
    704 			lxp->err = REG_BADPAT;
    705 			tp->right.ptr = 0;
    706 			goto badunary;
    707 		}
    708 		break;
    709 	case ROP_STAR:
    710 	case ROP_PLUS:
    711 	case ROP_QUEST:
    712 		if ((lxp->flags & REG_NOAUTOQUOTE) == 0
    713 			&& lxp->pat[-1] != '}')
    714 		{
    715 			tp->op = lxp->pat[-1];
    716 			break;
    717 		}
    718 		/*FALLTHROUGH*/
    719 	case ROP_BRACE:
    720 	case ROP_EMPTY:	/* was {0,0} ROP_BRACE */
    721 	case ROP_NOP:	/* was {1,1} ROP_BRACE */
    722 		lxp->err = REG_BADRPT;
    723 	badunary:;
    724 		tp->left.ptr = 0;
    725 		goto err;
    726 	case ROP_ANYCH:
    727 	case ROP_NOTNL:
    728 		break;
    729 	case ROP_BOL:
    730 	case ROP_EOL:
    731 	case ROP_LT:
    732 	case ROP_GT:
    733 		/*
    734 		* Look ahead for what would have been taken to be
    735 		* postfix operators.
    736 		*/
    737 		if (lex(lxp) != 0)
    738 			goto err;
    739 		switch (lxp->tok)
    740 		{
    741 		case ROP_STAR:
    742 		case ROP_PLUS:
    743 		case ROP_QUEST:
    744 			if ((lxp->flags & REG_NOAUTOQUOTE) == 0
    745 				&& lxp->pat[-1] != '}')
    746 			{
    747 				lxp->tok = lxp->pat[-1];
    748 				break;
    749 			}
    750 			/*FALLTHROUGH*/
    751 		case ROP_BRACE:
    752 		case ROP_EMPTY:	/* was {0,0} ROP_BRACE */
    753 		case ROP_NOP:	/* was {1,1} ROP_BRACE */
    754 			lxp->err = REG_BADRPT;
    755 			goto err;
    756 		}
    757 		return tp;
    758 	case ROP_BKT:
    759 		tp->right.info.bkt = lxp->info.bkt;
    760 		break;
    761 	case ROP_REF:
    762 		tp->right.info.sub = lxp->info.sub;
    763 		break;
    764 	case ROP_LP:
    765 		tp->right.info.sub = lxp->nleft;
    766 		if (lex(lxp) != 0)
    767 			goto badunary;
    768 		if (lxp->tok == ROP_RP)	/* empty parens; choice of meaning */
    769 		{
    770 			if (lxp->flags & REG_MTPARENBAD)
    771 			{
    772 				lxp->err = REG_EMPTYPAREN;
    773 				goto badunary;
    774 			}
    775 			lxp->tok = ROP_EMPTY;
    776 			if (lxp->flags & REG_MTPARENFAIL)
    777 				lxp->tok = ROP_NONE;
    778 			if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0)
    779 				goto badunary;
    780 		}
    781 		else if ((tp->left.ptr = alt(lxp)) == 0)
    782 		{
    783 			if (lxp->err == REG_BADPAT)
    784 				goto parenerr;
    785 			goto badunary;
    786 		}
    787 		else if (lxp->tok != ROP_RP)
    788 		{
    789 			lxp->err = REG_BADPAT;
    790 		parenerr:;
    791 			if (lxp->nleft != lxp->nright)
    792 				lxp->err = REG_EPAREN;	/* better choice */
    793 			goto badunary;
    794 		}
    795 		tp->left.ptr->parent = tp;
    796 		break;
    797 	}
    798 	if (lex(lxp) != 0)
    799 	{
    800 	err:;
    801 		libuxre_regdeltree(tp, 1);
    802 		tp = 0;
    803 	}
    804 	return tp;
    805 }
    806 
    807 static Tree *
    808 post(Lex *lxp)
    809 {
    810 	Tree *lp;
    811 
    812 	if ((lp = leaf(lxp)) == 0)
    813 		return 0;
    814 	switch (lxp->tok)
    815 	{
    816 	case ROP_EMPTY:	/* this was {0,0} ROP_BRACE */
    817 		libuxre_regdeltree(lp, 1);
    818 		lp = 0;
    819 		/*FALLTHROUGH*/
    820 	case ROP_BRACE:
    821 	case ROP_STAR:
    822 	case ROP_PLUS:
    823 	case ROP_QUEST:
    824 		if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0)
    825 		{
    826 			lxp->err = REG_ESPACE;
    827 			return 0;
    828 		}
    829 		if (lxp->tok == ROP_BRACE)
    830 			lp->right.info = lxp->info;
    831 		/*FALLTHROUGH*/
    832 	case ROP_NOP:	/* this was {1,1} ROP_BRACE */
    833 		if (lex(lxp) != 0)
    834 		{
    835 			libuxre_regdeltree(lp, 1);
    836 			return 0;
    837 		}
    838 		break;
    839 	}
    840 	return lp;
    841 }
    842 
    843 static Tree *
    844 cat(Lex *lxp)
    845 {
    846 	Tree *lp, *rp;
    847 
    848 	if ((lp = post(lxp)) == 0)
    849 		return 0;
    850 	for (;;)
    851 	{
    852 		if (lxp->tok == ROP_OR || lxp->tok == ROP_RP
    853 			|| lxp->tok == ROP_END)
    854 		{
    855 			return lp;
    856 		}
    857 		if ((rp = post(lxp)) == 0)
    858 			break;
    859 		if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
    860 		{
    861 			lxp->err = REG_ESPACE;
    862 			return 0;
    863 		}
    864 	}
    865 	libuxre_regdeltree(lp, 1);
    866 	return 0;
    867 }
    868 
    869 static Tree *
    870 alt(Lex *lxp)
    871 {
    872 	Tree *lp, *rp;
    873 
    874 	if ((lp = cat(lxp)) == 0)
    875 		return 0;
    876 	for (;;)
    877 	{
    878 		if (lxp->tok != ROP_OR)
    879 			return lp;
    880 		if (lex(lxp) != 0)
    881 			break;
    882 		if (lxp->tok == ROP_END)
    883 			return lp;	/* ignore trailing '|' */
    884 		if ((rp = cat(lxp)) == 0)
    885 			break;
    886 		if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0)
    887 		{
    888 			lxp->err = REG_ESPACE;
    889 			return 0;
    890 		}
    891 	}
    892 	libuxre_regdeltree(lp, 1);
    893 	return 0;
    894 }
    895 
    896 LIBUXRE_STATIC Tree *
    897 libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags)
    898 {
    899 	Tree *lp, *rp;
    900 
    901 	lp = 0;			/* in case of error */
    902 	lxp->clist = 0;
    903 	lxp->col = 0;
    904 	lxp->err = 0;
    905 	lxp->maxref = 0;
    906 	lxp->nleft = 0;
    907 	lxp->nright = 0;
    908 	lxp->nclist = 0;
    909 	lxp->mb_cur_max = MB_CUR_MAX;
    910 	if (flags & REG_OR && *pat == '|')
    911 		pat++;	/* skip initial OR like egrep did */
    912 	lxp->pat = pat;
    913 	lxp->flags = flags;
    914 	lxp->tok = ROP_OR;	/* enables ^ as anchor */
    915 	/*
    916 	* Get initial token.
    917 	*/
    918 	if (lex(lxp) != 0)
    919 	{
    920 	err:;
    921 		if (lp != 0)
    922 		{
    923 			libuxre_regdeltree(lp, 1);
    924 			lp = 0;
    925 		}
    926 		if (lxp->err == 0)
    927 			lxp->err = REG_ESPACE;
    928 		goto ret;
    929 	}
    930 	if (lxp->tok == ROP_END)
    931 	{
    932 		lxp->err = REG_NOPAT;
    933 		goto err;
    934 	}
    935 	if ((lp = alt(lxp)) == 0)	/* parse entire RE */
    936 		goto err;
    937 	if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0)
    938 	{
    939 		if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0)
    940 			goto err;
    941 		lp->right.info.sub = 0;
    942 	}
    943 	if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0)
    944 		goto err;
    945 	if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0)
    946 		goto err;
    947 	lp->parent = 0;
    948 ret:;
    949 	if (lxp->clist != 0)
    950 		free(lxp->clist);
    951 	return lp;
    952 }
    953 
    954 #ifdef REGDEBUG
    955 
    956 LIBUXRE_STATIC void
    957 libuxre_regtree(Tree *tp, int n)
    958 {
    959 	const char *opstr;
    960 	char buf[32];
    961 	int kind, next;
    962 
    963 	if (n < 0)
    964 		next = -n + 2;
    965 	else
    966 		next = n + 2;
    967 	switch (tp->op)
    968 	{
    969 	case ROP_OR:
    970 		opstr = "|";
    971 		kind = BINARY_ROP;
    972 		break;
    973 	case ROP_CAT:
    974 		opstr = "&";
    975 		kind = BINARY_ROP;
    976 		break;
    977 	case ROP_STAR:
    978 		opstr = "*";
    979 		kind = UNARY_ROP;
    980 		break;
    981 	case ROP_PLUS:
    982 		opstr = "+";
    983 		kind = UNARY_ROP;
    984 		break;
    985 	case ROP_QUEST:
    986 		opstr = "?";
    987 		kind = UNARY_ROP;
    988 		break;
    989 	case ROP_BRACE:
    990 		opstr = buf;
    991 		if (tp->right.info.num[1] == BRACE_INF)
    992 		{
    993 			sprintf(buf, "{%u,inf}",
    994 				(unsigned)tp->right.info.num[0]);
    995 		}
    996 		else
    997 		{
    998 			sprintf(buf, "{%u,%u}",
    999 				(unsigned)tp->right.info.num[0],
   1000 				(unsigned)tp->right.info.num[1]);
   1001 		}
   1002 		kind = UNARY_ROP;
   1003 		break;
   1004 	case ROP_LP:
   1005 		opstr = buf;
   1006 		sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub);
   1007 		kind = UNARY_ROP;
   1008 		break;
   1009 	case ROP_RP:
   1010 		opstr = buf;
   1011 		sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub);
   1012 		kind = UNARY_ROP;
   1013 		break;
   1014 	case ROP_NOP:
   1015 		opstr = "<NOP>";
   1016 		kind = LEAF_ROP;
   1017 		break;
   1018 	case ROP_BOL:
   1019 		opstr = "<BOL>";
   1020 		kind = LEAF_ROP;
   1021 		break;
   1022 	case ROP_EOL:
   1023 		opstr = "<EOL>";
   1024 		kind = LEAF_ROP;
   1025 		break;
   1026 	case ROP_ALL:
   1027 		opstr = "<ALL>";
   1028 		kind = LEAF_ROP;
   1029 		break;
   1030 	case ROP_ANYCH:
   1031 		opstr = "<ANYCH>";
   1032 		kind = LEAF_ROP;
   1033 		break;
   1034 	case ROP_NOTNL:
   1035 		opstr = "<NOTNL>";
   1036 		kind = LEAF_ROP;
   1037 		break;
   1038 	case ROP_EMPTY:
   1039 		opstr = "<MT>";
   1040 		kind = LEAF_ROP;
   1041 		break;
   1042 	case ROP_NONE:
   1043 		opstr = "<NONE>";
   1044 		kind = LEAF_ROP;
   1045 		break;
   1046 	case ROP_BKT:
   1047 		opstr = buf;
   1048 		sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt);
   1049 		kind = LEAF_ROP;
   1050 		break;
   1051 	case ROP_BKTCOPY:
   1052 		opstr = buf;
   1053 		sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt);
   1054 		kind = LEAF_ROP;
   1055 		break;
   1056 	case ROP_LT:
   1057 		opstr = "\\<";
   1058 		kind = LEAF_ROP;
   1059 		break;
   1060 	case ROP_GT:
   1061 		opstr = "\\>";
   1062 		kind = LEAF_ROP;
   1063 		break;
   1064 	case ROP_REF:
   1065 		opstr = buf;
   1066 		sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub);
   1067 		kind = LEAF_ROP;
   1068 		break;
   1069 	case ROP_END:
   1070 		opstr = "<END>";
   1071 		kind = LEAF_ROP;
   1072 		break;
   1073 	default:
   1074 		opstr = buf;
   1075 		if (tp->op > UCHAR_MAX)
   1076 			sprintf(buf, "W%#x", tp->op);
   1077 		else if (tp->op <= 0)
   1078 			sprintf(buf, "UNK=%u", tp->op);
   1079 		else
   1080 			sprintf(buf, "%c", tp->op);
   1081 		kind = LEAF_ROP;
   1082 		break;
   1083 	}
   1084 	if (kind == BINARY_ROP)
   1085 		libuxre_regtree(tp->right.ptr, -next);
   1086 	printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr);
   1087 	if (kind != LEAF_ROP)
   1088 		libuxre_regtree(tp->left.ptr, next);
   1089 }
   1090 
   1091 #endif /*REGDEBUG*/