hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

regnfa.c (22968B)


      1 /*
      2  * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
      3  *
      4  * Sccsid @(#)regnfa.c	1.8 (gritter) 2/6/05
      5  */
      6 /*  UNIX(R) Regular Expresssion Library
      7  *
      8  *  Note: Code is released under the GNU LGPL
      9  *
     10  *  Copyright (C) 2001 Caldera International, Inc.
     11  *
     12  *  This library is free software; you can redistribute it and/or
     13  *  modify it under the terms of the GNU Lesser General Public
     14  *  License as published by the Free Software Foundation; either
     15  *  version 2 of the License, or (at your option) any later version.
     16  *
     17  *  This library is distributed in the hope that it will be useful,
     18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20  *  Lesser General Public License for more details.
     21  *
     22  *  You should have received a copy of the GNU Lesser General Public
     23  *  License along with this library; if not, write to:
     24  *        Free Software Foundation, Inc.
     25  *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     26  */
     27 
     28 /*	#include "synonyms.h"	*/
     29 #include <string.h>
     30 #include <stdlib.h>
     31 #include "re.h"
     32 #include <stddef.h>
     33 #include <ctype.h>
     34 
     35 typedef unsigned char	Uchar;
     36 typedef unsigned short	Ushort;
     37 
     38 /*
     39 * Nondeterministic Finite Automata.
     40 */
     41 typedef struct t_graph	Graph;
     42 struct t_graph
     43 {
     44 	union
     45 	{
     46 		Graph	*ptr;
     47 		Info	info;
     48 	} alt;
     49 	Graph		*next;
     50 	w_type		op;
     51 };
     52 
     53 typedef struct t_stack	Stack;
     54 struct t_stack
     55 {
     56 	Stack		*link;	/* simplifies cleanup */
     57 	Stack		*prev;	/* covered states */
     58 	Graph		*wasgp;	/* node associated with this state */
     59 	const Uchar	*str;	/* saved position in the string */
     60 	Ushort		cnt;	/* ROP_BRACE: traversal count */
     61 };
     62 
     63 	/*
     64 	* A Context holds all the information needed for each
     65 	* potential path through the NFA graph.
     66 	*/
     67 typedef struct t_ctxt	Context;
     68 struct t_ctxt
     69 {
     70 	Context		*link;	/* simplifies cleanup */
     71 	Context		*next;	/* singly linked */
     72 	Stack		*sp;	/* nested counts */
     73 	Graph		*gp;	/* starting node */
     74 	Graph		*wasgp;	/* node associated with this state */
     75 	const Uchar	*str;	/* saved position in the string */
     76 	Ushort		cnt;	/* ROP_BRACE: traversal count */
     77 	size_t		nset;	/* length of rm[] that is currently set */
     78 	regmatch_t	rm[1];	/* enough to cover re_nsub+1 (np->rmlen) */
     79 };
     80 
     81 struct re_nfa_ /*Nfa*/
     82 {
     83 	Graph	*gp;	/* entire NFA */
     84 	Stack	*sp;	/* unused Stacks */
     85 	Stack	*allsp;	/* linked Stacks (for cleanup) */
     86 	Context	*allcp;	/* linked Contexts (for cleanup) */
     87 	Context	*cur;	/* Contexts to be continued now */
     88 	Context	*step;	/* Contexts waiting for a step of the NFA */
     89 	Context	*avail;	/* unused Contexts */
     90 	Context	**ecur;	/* ends cur list of Contexts */
     91 	Context	**estp;	/* ends step list of Contexts */
     92 	size_t	rmlen;	/* length of rm[] in each Context */
     93 	size_t	rmmin;	/* minimum length needed */
     94 	size_t	used;	/* length used for this libuxre_regnfaexec() */
     95 	w_type	beg;	/* nonzero for fixed char initial node NFAs */
     96 };
     97 
     98 #define ROP_MTOR	ROP_CAT	/* ROP_OR, except might be empty loop */
     99 
    100 	/*
    101 	* Depth first traversal.
    102 	* Make a singly linked list (in alt.ptr) of the graph's nodes.
    103 	* Must toss any ROP_BKTs, too, since "alt" is overwritten.
    104 	*/
    105 static void
    106 deltolist(Graph *gp, Graph **list)
    107 {
    108 	Graph *ptr;
    109 
    110 	if ((ptr = gp->next) != 0) /* first time */
    111 	{
    112 		gp->next = 0;
    113 		if (gp->op == ROP_OR || gp->op == ROP_MTOR)
    114 			deltolist(gp->alt.ptr, list);
    115 		deltolist(ptr, list);
    116 		if (gp->op == ROP_BKT)
    117 		{
    118 			libuxre_bktfree(gp->alt.info.bkt);
    119 			free(gp->alt.info.bkt);
    120 		}
    121 	}
    122 	else if (gp->op == ROP_END)
    123 		gp->op = ROP_NOP;
    124 	else
    125 		return;
    126 	gp->alt.ptr = *list;
    127 	*list = gp;
    128 }
    129 
    130 	/*
    131 	* After the list is turned into a linked list,
    132 	* walk that list freeing the nodes.
    133 	*/
    134 static void
    135 delgraph(Graph *gp)
    136 {
    137 	Graph *gp2, end;
    138 
    139 	gp2 = &end;
    140 	deltolist(gp, &gp2);
    141 	while ((gp = gp2) != &end)
    142 	{
    143 		gp2 = gp->alt.ptr;
    144 		free(gp);
    145 	}
    146 }
    147 
    148 	/*
    149 	* Depth first traversal.
    150 	* Look for ROP_NOPs and prune them from the graph.
    151 	* Chain them all together on *nop's list.
    152 	*/
    153 static Graph *
    154 nopskip(Graph *gp, Graph **nop)
    155 {
    156 	Graph *ptr;
    157 
    158 	if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */
    159 	{
    160 		if (gp->op == ROP_NOP)
    161 		{
    162 			if (gp->alt.ptr != 0) /* touched */
    163 				return gp->next; /* already did it */
    164 			gp->alt.ptr = *nop;
    165 			*nop = gp;
    166 		}
    167 		gp->next = 0; /* this subgraph's pending */
    168 		if (gp->op == ROP_OR || gp->op == ROP_MTOR)
    169 			gp->alt.ptr = nopskip(gp->alt.ptr, nop);
    170 		gp->next = nopskip(ptr, nop);
    171 		if (gp->op == ROP_NOP)
    172 			return gp->next;
    173 	}
    174 	return gp;
    175 }
    176 
    177 	/*
    178 	* Postorder traversal of the parse tree.
    179 	* Build a graph using "Thompson's" algorithm.
    180 	* The only significant modification is the
    181 	* ROP_BRACE->ROP_MTOR construction.
    182 	* Returns 1 => graph might match empty
    183 	*	  0 => graph cannot match empty
    184 	*	 -1 => error (in allocation)
    185 	*/
    186 static int
    187 mkgraph(Tree *tp, Graph **first, Graph **last)
    188 {
    189 	Graph *new = 0, *nop, *lf, *ll, *rf, *rl;
    190 	int lmt, rmt = 0;
    191 
    192 	if (tp->op != ROP_CAT)
    193 	{
    194 		if ((new = malloc(sizeof(Graph))) == 0)
    195 			return 0;
    196 		new->op = tp->op;	/* usually */
    197 	}
    198 	switch (tp->op)
    199 	{
    200 	case ROP_REF:
    201 		new->alt.info.sub = tp->right.info.sub;
    202 		*first = new;
    203 		*last = new;
    204 		return 1; /* safe--can't really tell */
    205 	case ROP_BKT:
    206 		tp->op = ROP_BKTCOPY;	/* now graph owns clean up */
    207 		/*FALLTHROUGH*/
    208 	case ROP_BKTCOPY:
    209 		new->alt.info.bkt = tp->right.info.bkt;
    210 		/*FALLTHROUGH*/
    211 	default:
    212 		*first = new;
    213 		*last = new;
    214 		return 0;
    215 	case ROP_EMPTY:
    216 		new->op = ROP_NOP;
    217 		new->alt.ptr = 0;	/* untouched */
    218 		*first = new;
    219 		*last = new;
    220 		return 1;
    221 	case ROP_OR:
    222 	case ROP_CAT:
    223 		lf = 0;	/* in case of error */
    224 		if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0)
    225 			goto err;
    226 		/*FALLTHROUGH*/
    227 	case ROP_STAR:
    228 	case ROP_PLUS:
    229 	case ROP_QUEST:
    230 	case ROP_BRACE:
    231 	case ROP_LP:
    232 		if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0)
    233 			goto err;
    234 		break;
    235 	}
    236 	/*
    237 	* Note that ROP_NOP only serves as the node that reconnects
    238 	* the two choices of an incoming ROP_OR or ROP_QUEST.  To
    239 	* prevent rewalking portions of the graph in nopskip(),
    240 	* this code marks all ROP_NOP nodes as currently untouched.
    241 	*/
    242 	switch (tp->op)
    243 	{
    244 	case ROP_OR:
    245 		if ((nop = malloc(sizeof(Graph))) == 0)
    246 			goto err;
    247 		nop->op = ROP_NOP;
    248 		nop->alt.ptr = 0;	/* untouched */
    249 		ll->next = nop;
    250 		rl->next = nop;
    251 		new->next = lf;
    252 		new->alt.ptr = rf;
    253 		*first = new;
    254 		*last = nop;
    255 		return lmt | rmt;
    256 	case ROP_CAT:	/* no "new" */
    257 		ll->next = rf;
    258 		*first = lf;
    259 		*last = rl;
    260 		return lmt & rmt;
    261 	case ROP_QUEST:
    262 		if ((nop = malloc(sizeof(Graph))) == 0)
    263 			goto err;
    264 		nop->op = ROP_NOP;
    265 		nop->alt.ptr = 0;	/* untouched */
    266 		new->op = ROP_OR;
    267 		new->next = lf;
    268 		new->alt.ptr = nop;
    269 		ll->next = nop;
    270 		*first = new;
    271 		*last = nop;
    272 		return 1;
    273 	case ROP_STAR:
    274 		*first = new;
    275 		rmt = 1;
    276 	star:;
    277 		new->op = lmt ? ROP_MTOR : ROP_OR;
    278 		new->alt.ptr = lf;
    279 		ll->next = new;
    280 		*last = new;
    281 		return rmt;
    282 	case ROP_PLUS:
    283 		*first = lf;
    284 		rmt = lmt;
    285 		goto star;
    286 	case ROP_BRACE:
    287 		if ((nop = malloc(sizeof(Graph))) == 0)
    288 			goto err;
    289 		nop->op = ROP_MTOR; /* going to save state anyway... */
    290 		nop->alt.ptr = lf;
    291 		ll->next = new;
    292 		new->next = nop;
    293 		new->alt.info.num[1] = tp->right.info.num[1];
    294 		if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0)
    295 		{
    296 			lmt = 1;
    297 			*first = new;
    298 		}
    299 		else
    300 		{
    301 			new->alt.info.num[0]--;	/* already done 1 */
    302 			if (new->alt.info.num[1] != BRACE_INF)
    303 				new->alt.info.num[1]--;	/* likewise */
    304 			*first = lf;
    305 		}
    306 		*last = nop;
    307 		return lmt;
    308 	case ROP_LP:
    309 		if ((nop = malloc(sizeof(Graph))) == 0)
    310 			goto err;
    311 		nop->op = ROP_RP;
    312 		nop->alt.info.sub = tp->right.info.sub;
    313 		new->alt.info.sub = tp->right.info.sub;
    314 		new->next = lf;
    315 		ll->next = nop;
    316 		*first = new;
    317 		*last = nop;
    318 		return lmt;
    319 	}
    320 err:;
    321 	if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0)
    322 		delgraph(rf);
    323 	if (lf != 0)
    324 		delgraph(lf);
    325 	if (tp->op != ROP_CAT)
    326 		free(new);
    327 	return -1;
    328 }
    329 
    330 	/*
    331 	* Semi-preorder traversal.
    332 	* Return zero if there's no simple first character
    333 	* (including the operation ROP_BOL) that must always
    334 	* be at the start of a matching string.
    335 	* This code doesn't attempt to get an answer if the
    336 	* first of the tree many be empty.
    337 	*/
    338 static w_type
    339 firstop(Tree *tp)
    340 {
    341 	w_type op;
    342 
    343 	switch (tp->op)
    344 	{
    345 	case ROP_OR:
    346 		if ((op = firstop(tp->left.ptr)) == 0
    347 			|| op != firstop(tp->right.ptr))
    348 		{
    349 			return 0;
    350 		}
    351 		return op;
    352 	case ROP_BRACE:
    353 		if (tp->right.info.num[0] == 0)
    354 			return 0;
    355 		/*FALLTHROUGH*/
    356 	case ROP_CAT:
    357 	case ROP_PLUS:
    358 	case ROP_LP:
    359 		return firstop(tp->left.ptr);
    360 	default:
    361 		if (tp->op < 0)
    362 			return 0;
    363 		/*FALLTHROUGH*/
    364 	case ROP_BOL:
    365 		return tp->op;
    366 	}
    367 }
    368 
    369 void
    370 libuxre_regdelnfa(Nfa *np)
    371 {
    372 	Context *cp, *cpn;
    373 	Stack *sp, *spn;
    374 
    375 	if (np->gp != 0)
    376 		delgraph(np->gp);
    377 	for (cp = np->allcp; cp != 0; cp = cpn)
    378 	{
    379 		cpn = cp->link;
    380 		free(cp);
    381 	}
    382 	for (sp = np->allsp; sp != 0; sp = spn)
    383 	{
    384 		spn = sp->link;
    385 		free(sp);
    386 	}
    387 	free(np);
    388 }
    389 
    390 LIBUXRE_STATIC int
    391 libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp)
    392 {
    393 	Graph *gp, end;
    394 	Nfa *np;
    395 
    396 	if ((np = malloc(sizeof(Nfa))) == 0)
    397 		goto err;
    398 	np->gp = 0; /* in case of error */
    399 	if (mkgraph(tp, &np->gp, &gp) < 0)
    400 		goto err;
    401 	gp->next = 0;	/* nothing follows ROP_END */
    402 	np->rmlen = 0;
    403 	if ((ep->re_flags & REG_NOSUB) == 0)
    404 		np->rmlen = ep->re_nsub + 1;
    405 	np->rmmin = 0;
    406 	if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen)
    407 		np->rmlen = np->rmmin;
    408 	/*
    409 	* Delete all ROP_NOPs from the graph.
    410 	* nopskip() disconnects them from the graph and
    411 	* links them together through their alt.ptr's.
    412 	*/
    413 	gp = &end;
    414 	np->gp = nopskip(np->gp, &gp);
    415 	while (gp != &end)
    416 	{
    417 		Graph *gp2 = gp;
    418 
    419 		gp = gp->alt.ptr;
    420 		free(gp2);
    421 	}
    422 	np->sp = 0;
    423 	np->allsp = 0;
    424 	np->avail = 0;
    425 	np->allcp = 0;
    426 	ep->re_nfa = np;
    427 	np->beg = firstop(tp); 
    428 	return 0;
    429 err:;
    430 	if (np != 0)
    431 	{
    432 		if (np->gp != 0)
    433 			delgraph(np->gp);
    434 		free(np);
    435 	}
    436 	return REG_ESPACE;
    437 }
    438 
    439 static Stack *
    440 newstck(Nfa *np)
    441 {
    442 	Stack *sp, **spp;
    443 	int i;
    444 
    445 	if ((sp = np->sp) == 0)	/* get more */
    446 	{
    447 		spp = &np->sp;
    448 		i = 4;
    449 		while ((sp = malloc(sizeof(Stack))) != 0)
    450 		{
    451 			sp->link = np->allsp;
    452 			np->allsp = sp;
    453 			*spp = sp;
    454 			spp = &sp->prev;
    455 			if (--i == 0)
    456 				break;
    457 		}
    458 		*spp = 0;
    459 		if ((sp = np->sp) == 0)	/* first malloc failed */
    460 			return 0;
    461 	}
    462 	np->sp = sp->prev;
    463 	return sp;
    464 }
    465 
    466 static int
    467 mkstck(Nfa *np, Context *cp, Graph *gp)
    468 {
    469 	Stack *new, *sp;
    470 
    471 	if (gp == 0)	/* copy existing stack tail */
    472 	{
    473 		/*
    474 		* Hoist up top of stack.
    475 		*/
    476 		new = cp->sp;
    477 		cp->wasgp = new->wasgp;
    478 		cp->str = new->str;
    479 		cp->cnt = new->cnt;
    480 		cp->sp = new->prev;
    481 		if ((sp = new->prev) == 0) /* only one below */
    482 		{
    483 			new->prev = np->sp;
    484 			np->sp = new;
    485 			cp->sp = 0;
    486 			return 0;
    487 		}
    488 		for (;;) /* copy the rest; reusing the old top */
    489 		{
    490 			new->wasgp = sp->wasgp;
    491 			new->str = sp->str;
    492 			new->cnt = sp->cnt;
    493 			if ((new->prev = sp->prev) == 0)
    494 				break;
    495 			if ((new->prev = newstck(np)) == 0)
    496 				return REG_ESPACE;
    497 			new = new->prev;
    498 			sp = sp->prev;
    499 		}
    500 		return 0;
    501 	}
    502 	if (cp->wasgp != 0)	/* push current down */
    503 	{
    504 		if ((new = newstck(np)) == 0)
    505 			return REG_ESPACE;
    506 		new->prev = cp->sp;
    507 		cp->sp = new;
    508 		new->wasgp = cp->wasgp;
    509 		new->str = cp->str;
    510 		new->cnt = cp->cnt;
    511 	}
    512 	cp->wasgp = gp;
    513 	cp->str = 0;
    514 	cp->cnt = 0;
    515 	return 0;
    516 }
    517 
    518 	/*
    519 	* Allocate a new Context (from np->avail)
    520 	* and add it to the end of the current list.
    521 	*/
    522 static int
    523 newctxt(Nfa *np, Context *cp, Graph *gp)
    524 {
    525 	Context *new;
    526 	size_t n;
    527 
    528 	if ((new = np->avail) == 0)	/* need more */
    529 	{
    530 		Context *ncp, **cpp;
    531 		int i;
    532 
    533 		/*
    534 		* Can't easily allocate Contexts in one call because
    535 		* the alignments (given the varying length of rm[])
    536 		* are potentially nontrivial.
    537 		*/
    538 		n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t);
    539 		i = 4;
    540 		cpp = &np->avail;
    541 		while ((ncp = malloc(n)) != 0)
    542 		{
    543 			ncp->link = np->allcp;
    544 			np->allcp = ncp;
    545 			*cpp = ncp;
    546 			cpp = &ncp->next;
    547 			if (--i == 0)
    548 				break;
    549 		}
    550 		*cpp = 0;
    551 		if ((new = np->avail) == 0)	/* first malloc failed */
    552 			return REG_ESPACE;
    553 	}
    554 	np->avail = new->next;
    555 	new->next = 0;
    556 	new->gp = gp;
    557 	new->sp = 0;
    558 	new->wasgp = 0;
    559 	new->nset = 0;
    560 	if (cp != 0) /* copy existing context information */
    561 	{
    562 		if (cp->sp != 0) /* copy tail of stack */
    563 		{
    564 			new->sp = cp->sp;
    565 			if (mkstck(np, new, 0) != 0)
    566 				return REG_ESPACE;
    567 		}
    568 		new->wasgp = cp->wasgp;
    569 		new->str = cp->str;
    570 		new->cnt = cp->cnt;
    571 		/*
    572 		* Copy any valid subexpression match information
    573 		* from the existing context.
    574 		*/
    575 		if (np->used != 0 && (n = cp->nset) != 0)
    576 		{
    577 			regmatch_t *rmn = new->rm, *rmo = cp->rm;
    578 
    579 			new->nset = n;
    580 			for (;; ++rmn, ++rmo)
    581 			{
    582 				rmn->rm_so = rmo->rm_so;
    583 				rmn->rm_eo = rmo->rm_eo;
    584 				if (--n == 0)
    585 					break;
    586 			}
    587 		}
    588 	}
    589 	/*
    590 	* Append it to the end of the current Context list.
    591 	*/
    592 	*np->ecur = new;
    593 	np->ecur = &new->next;
    594 	return 0;
    595 }
    596 
    597 	/*
    598 	* Compare two byte string sequences for equality.
    599 	* If REG_ICASE, walk through the strings doing
    600 	* caseless comparisons of the wide characters.
    601 	*/
    602 static int
    603 casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max)
    604 {
    605 	const Uchar *p = &xp->str[i];
    606 	const Uchar *end;
    607 	w_type wc1, wc2;
    608 	int k;
    609 
    610 	if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */
    611 		return 1;
    612 	if ((xp->flags & REG_ICASE) == 0)
    613 		return 0;
    614 	/*
    615 	* Walk through each testing for a match, ignoring case,
    616 	* of the resulting wide characters.
    617 	* Note that only "s" can run out of characters.
    618 	*/
    619 	end = &p[n];
    620 	do
    621 	{
    622 		if ((wc1 = *s++) == '\0')
    623 			return 0;
    624 		if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0)
    625 			s += k;
    626 		if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0)
    627 			p += k;
    628 		if (wc1 != wc2)
    629 		{
    630 			wc1 = to_lower(wc1);
    631 			wc2 = to_lower(wc2);
    632 			if (wc1 != wc2)
    633 				return 0;
    634 		}
    635 	} while (p < end);
    636 	return 1;
    637 }
    638 
    639 LIBUXRE_STATIC int
    640 libuxre_regnfaexec(Nfa *np, Exec *xp)
    641 {
    642 	const Uchar *s, *s1, *s2;
    643 	Context *cp, *cpn;
    644 	Graph *gp, *brace;
    645 	Stack *sp, *spn;
    646 	ssize_t rmso, len;
    647 	int i, ret, mb_cur_max;
    648 	w_type wc;
    649 	size_t n;
    650 
    651 	ret = 0;	/* assume it matches */
    652 	rmso = -1;	/* but no match yet */
    653 	np->cur = 0;
    654 	np->step = 0;
    655 	np->ecur = &np->cur;
    656 	np->estp = &np->step;
    657 	if ((np->used = xp->nmatch) < np->rmmin)
    658 		np->used = np->rmmin;
    659 	s1 = 0;		/* one char back */
    660 	s = xp->str;	/* current high water in string */
    661 	mb_cur_max = xp->mb_cur_max;
    662 	for (;;)
    663 	{
    664 		/*
    665 		* Get next character from string.
    666 		* If the engine proper hasn't started and the engine
    667 		* requires a particular character to start and this
    668 		* character isn't it, try the next one.
    669 		*/
    670 		for (;;)
    671 		{
    672 			s2 = s1;
    673 			s1 = s;
    674 			if (!ISONEBYTE(wc = *s++) &&
    675 					(i = libuxre_mb2wc(&wc, s)) > 0)
    676 				s += i;
    677 			if (np->cur != 0 || np->beg == wc || np->beg == 0)
    678 				break;
    679 			if (np->beg == ROP_BOL)
    680 			{
    681 				if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0)
    682 					break;
    683 				if ((xp->flags & REG_NEWLINE) == 0)
    684 					goto nomatch;
    685 				if (s2 != 0 && *s2 == '\n')
    686 					break;
    687 			}
    688 			if (wc == '\0')
    689 				goto nomatch;
    690 		}
    691 		/*
    692 		* Start the engine by inserting a fresh initial context
    693 		* if there's no known match as yet.  (Once some match
    694 		* has been found, the end is near.)
    695 		*/
    696 		if (rmso < 0 && newctxt(np, 0, np->gp) != 0)
    697 			goto err;
    698 		/*
    699 		* Walk the current Contexts list, trying each.
    700 		* "loop" is when a new Context is to be tried,
    701 		* "again" is when the same Context continues,
    702 		* but wc was not yet matched.
    703 		*/
    704 		cp = np->cur;
    705 	loop:;
    706 		gp = cp->gp;
    707 	again:;
    708 		switch (gp->op)
    709 		{
    710 		case ROP_BRACE: /* gp->next->op == ROP_MTOR */
    711 			brace = gp;
    712 			gp = gp->next;
    713 			goto mtor;
    714 		case ROP_MTOR:
    715 			brace = 0;
    716 		mtor:;
    717 			if (cp->wasgp != gp) /* first time */
    718 			{
    719 				if (mkstck(np, cp, gp) != 0)
    720 					goto err;
    721 			}
    722 			else if (cp->str == s) /* spinning */
    723 				goto poptonext;
    724 			cp->str = s;
    725 			if (brace != 0)
    726 			{
    727 				if (cp->cnt >= brace->alt.info.num[1])
    728 					goto poptonext;
    729 				if (++cp->cnt <= brace->alt.info.num[0])
    730 				{
    731 					gp = gp->alt.ptr;
    732 					goto again;
    733 				}
    734 				if (cp->cnt > BRACE_MAX)
    735 					cp->cnt = BRACE_MAX;
    736 			}
    737 			if (newctxt(np, cp, gp->alt.ptr) != 0)
    738 				goto err;
    739 		poptonext:;
    740 			cp->wasgp = 0;
    741 			if ((sp = cp->sp) != 0) /* pop stack */
    742 			{
    743 				cp->sp = sp->prev;
    744 				cp->wasgp = sp->wasgp;
    745 				cp->str = sp->str;
    746 				cp->cnt = sp->cnt;
    747 				sp->prev = np->sp;
    748 				np->sp = sp;
    749 			}
    750 			/*FALLTHROUGH*/
    751 		case ROP_EMPTY:
    752 		tonext:;
    753 			gp = gp->next;
    754 			goto again;
    755 		case ROP_OR:
    756 			if (newctxt(np, cp, gp->alt.ptr) != 0)
    757 				goto err;
    758 			goto tonext;
    759 		case ROP_LP:
    760 			if ((n = gp->alt.info.sub) < np->used)
    761 			{
    762 				size_t k;
    763 
    764 				cp->rm[n].rm_so = s1 - xp->str;
    765 				cp->rm[n].rm_eo = -1;
    766 				/*
    767 				* Mark any skipped subexpressions as
    768 				* failing to participate in the match.
    769 				*/
    770 				if ((k = cp->nset) < n)
    771 				{
    772 					regmatch_t *rmp = &cp->rm[k];
    773 
    774 					for (;; rmp++)
    775 					{
    776 						rmp->rm_so = -1;
    777 						rmp->rm_eo = -1;
    778 						if (++k >= n)
    779 							break;
    780 					}
    781 				}
    782 				cp->nset = n + 1;
    783 			}
    784 			goto tonext;
    785 		case ROP_RP:
    786 			if ((n = gp->alt.info.sub) < np->used)
    787 				cp->rm[n].rm_eo = s1 - xp->str;
    788 			goto tonext;
    789 		case ROP_BOL:
    790 			if (s2 == 0)
    791 			{
    792 				if (xp->flags & REG_NOTBOL)
    793 					goto failed;
    794 			}
    795 			else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n')
    796 				goto failed;
    797 			goto tonext;
    798 		case ROP_EOL:
    799 			if (wc == '\0')
    800 			{
    801 				if (xp->flags & REG_NOTEOL)
    802 					goto failed;
    803 			}
    804 			else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n')
    805 				goto failed;
    806 			goto tonext;
    807 		default:	/* character match */
    808 			if (gp->op != wc)
    809 			{
    810 				if ((xp->flags & REG_ICASE) == 0
    811 					|| gp->op != to_lower(wc))
    812 				{
    813 					goto failed;
    814 				}
    815 			}
    816 		nextwc:;
    817 			cp->gp = gp->next;
    818 		tostep:;
    819 			cpn = cp->next;
    820 			cp->next = 0;
    821 			*np->estp = cp;
    822 			np->estp = &cp->next;
    823 			if ((cp = cpn) == 0)
    824 				break;
    825 			goto loop;
    826 		case ROP_NOTNL:
    827 			if (wc == '\n')
    828 				goto failed;
    829 			/*FALLTHROUGH*/
    830 		case ROP_ANYCH:
    831 			if (wc > '\0')
    832 				goto nextwc;
    833 			/*FALLTHROUGH*/
    834 		case ROP_NONE:
    835 		failed:;
    836 			cpn = cp->next;
    837 			cp->next = np->avail;
    838 			np->avail = cp;
    839 			if ((cp = cpn) == 0)
    840 				break;
    841 			goto loop;
    842 		case ROP_LT:
    843 			if (s2 == 0)
    844 			{
    845 				if (xp->flags & REG_NOTBOL)
    846 					goto failed;
    847 			}
    848 			else
    849 			{
    850 				w_type pwc;
    851 
    852 				if (wc != '_' &&
    853 				    !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
    854 					goto failed;
    855 				if (!ISONEBYTE(pwc = *s2))
    856 					libuxre_mb2wc(&pwc, &s2[1]);
    857 				if (pwc == '_' ||
    858 				    iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc))
    859 					goto failed;
    860 			}
    861 			goto tonext;
    862 		case ROP_GT:
    863 			if (wc == '_' ||
    864 			    iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
    865 				goto failed;
    866 			goto tonext;
    867 		case ROP_BKT:
    868 		case ROP_BKTCOPY:
    869 			if (cp->wasgp == gp) /* rest of MCCE */
    870 			{
    871 			checkspin:;
    872 				if (s1 >= cp->str) /* got it all */
    873 					goto poptonext;
    874 				goto tostep;
    875 			}
    876 			if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s,
    877 							mb_cur_max)) < 0)
    878 				goto failed;
    879 			if ((n = i) == 0)	/* only matched wc */
    880 				goto nextwc;
    881 		spin:;
    882 			if (mkstck(np, cp, gp) != 0)
    883 				goto err;
    884 			cp->gp = gp;	/* stay here until reach past s+n */
    885 			cp->str = s + n;
    886 			goto tostep;
    887 		case ROP_REF:
    888 			if (cp->wasgp == gp)	/* rest of matched string */
    889 				goto checkspin;
    890 			if ((n = gp->alt.info.sub) >= cp->nset)
    891 				goto failed;
    892 			if ((len = cp->rm[n].rm_eo) < 0)
    893 				goto failed;
    894 			if ((len -= n = cp->rm[n].rm_so) == 0)
    895 				goto tonext;
    896 			if (casecmp(s1, xp, n, len, mb_cur_max) == 0)
    897 				goto failed;
    898 			if ((n = s - s1) >= len)
    899 				goto nextwc;
    900 			n = len - n;
    901 			goto spin;
    902 		case ROP_END:	/* success! */
    903 			if (xp->flags & REG_NONEMPTY)
    904 			{
    905 				if (s2 == 0)
    906 					goto failed;
    907 			}
    908 			if (xp->nmatch == 0)
    909 				goto match;
    910 			/*
    911 			* Mark any skipped subexpressions as failing to match.
    912 			*/
    913 			if ((n = cp->nset) < xp->nmatch)
    914 			{
    915 				do
    916 				{
    917 					cp->rm[n].rm_so = -1;
    918 					cp->rm[n].rm_eo = -1;
    919 				} while (++n < xp->nmatch);
    920 			}
    921 			/*
    922 			* Note the left-most match that's longest.
    923 			*/
    924 			n = cp->rm[0].rm_so;
    925 			if (rmso < 0 || n < rmso)
    926 			{
    927 				rmso = n;
    928 			record:;
    929 				memcpy(xp->match, cp->rm,
    930 					xp->nmatch * sizeof(regmatch_t));
    931 				goto failed;
    932 			}
    933 			if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo)
    934 				goto failed;
    935 			if (xp->match[0].rm_eo < cp->rm[0].rm_eo)
    936 				goto record;
    937 #if 0 /* maximize the lengths of earlier LP...RPs */
    938 			/*
    939 			* If both are of the same length and start
    940 			* at the same point, choose the one with
    941 			* a "longest submatch from left to right"
    942 			* where an empty string wins over a nonmatch.
    943 			*/
    944 			for (n = 1; n < xp->nmatch; n++)
    945 			{
    946 				ssize_t nlen;
    947 
    948 				/*
    949 				* First, go with the choice that has any
    950 				* match for subexpr n.
    951 				*/
    952 				len = xp->match[n].rm_eo;
    953 				nlen = cp->rm[n].rm_eo;
    954 				if (nlen < 0)
    955 				{
    956 					if (len >= 0)
    957 						break;
    958 				}
    959 				else if (len < 0)
    960 					goto record;
    961 				/*
    962 				* Both have a match; go with the longer.
    963 				*/
    964 				len -= xp->match[n].rm_so;
    965 				nlen -= cp->rm[n].rm_so;
    966 				if (nlen < len)
    967 					break;
    968 				if (nlen > len)
    969 					goto record;
    970 			}
    971 #else /* take LP and RP as "fence posts" and maximize earlier gaps */
    972 			/*
    973 			* If both are of the same length and start
    974 			* at the same point, choose the one with
    975 			* the larger earlier subpatterns, in which
    976 			* each rm_so and rm_eo serves as a separator.
    977 			*/
    978 			for (n = 1; n < xp->nmatch; n++)
    979 			{
    980 				ssize_t nlen;
    981 				int	use;
    982 
    983 				if (xp->flags & REG_AVOIDNULL) {
    984 					/*
    985 					* This is to to satisfy POSIX.1-2001
    986 					* XBD pp. 172-173 ll. 6127-6129, whose
    987 					* translation is "do not match null
    988 					* expressions if there is a choice".
    989 					* See also POSIX.2 interpretation #43
    990 					* in which the question was raised.
    991 					*
    992 					* The first subexpression of "\(x*\)*"
    993 					* must thus match the string "xxx".
    994 					*/
    995 					use = cp->rm[n].rm_eo -
    996 							cp->rm[n].rm_so >=
    997 						xp->match[n].rm_eo -
    998 							xp->match[n].rm_so ||
    999 						xp->match[n].rm_so < 0;
   1000 				} else
   1001 					use = 1;
   1002 				/*
   1003 				* Choose the rightmost ROP_LP as that
   1004 				* maximizes the gap from before.
   1005 				*/
   1006 				len = xp->match[n].rm_so;
   1007 				nlen = cp->rm[n].rm_so;
   1008 				if (len < nlen && use)
   1009 					goto record;
   1010 				if (len > nlen)
   1011 					break;
   1012 				/*
   1013 				* The ROP_LPs are at the same point:
   1014 				* Choose the rightmost ROP_RP.
   1015 				*/
   1016 				len = xp->match[n].rm_eo;
   1017 				nlen = cp->rm[n].rm_eo;
   1018 				if (len < nlen && use)
   1019 					goto record;
   1020 				if (len > nlen)
   1021 					break;
   1022 			}
   1023 #endif
   1024 			goto failed;
   1025 		}
   1026 		/*
   1027 		* Finished the current Context list.  If the input string
   1028 		* has been entirely scanned, we're done.  Otherwise, make
   1029 		* the next step list current for the next character.
   1030 		* If the next step list was empty and there's an existing
   1031 		* match, that's the left-most longest.
   1032 		*/
   1033 		if (wc == '\0')
   1034 		{
   1035 			if (rmso >= 0)
   1036 				goto match;
   1037 			goto nomatch;
   1038 		}
   1039 		np->ecur = np->estp;
   1040 		if ((np->cur = np->step) == 0)
   1041 		{
   1042 			if (rmso >= 0)
   1043 				goto match;
   1044 			np->ecur = &np->cur; /* was pointing at step */
   1045 		}
   1046 		np->step = 0;
   1047 		np->estp = &np->step;
   1048 	}
   1049 nomatch:;
   1050 	ret = REG_NOMATCH;
   1051 match:;
   1052 	np->avail = 0;
   1053 	for (cp = np->allcp; cp != 0; cp = cpn)
   1054 	{
   1055 		cpn = cp->link;
   1056 		cp->next = np->avail;
   1057 		np->avail = cp;
   1058 	}
   1059 	np->sp = 0;
   1060 	for (sp = np->allsp; sp != 0; sp = spn)
   1061 	{
   1062 		spn = sp->link;
   1063 		sp->prev = np->sp;
   1064 		np->sp = sp;
   1065 	}
   1066 	return ret;
   1067 err:;
   1068 	ret = REG_ESPACE;
   1069 	goto match;
   1070 }