hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

regexp.h (25528B)


      1 /*
      2  * Simple Regular Expression functions. Derived from Unix 7th Edition,
      3  * /usr/src/cmd/expr.y
      4  *
      5  * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002.
      6  *
      7  * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  *   Redistributions of source code and documentation must retain the
     13  *    above copyright notice, this list of conditions and the following
     14  *    disclaimer.
     15  *   Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *   All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *      This product includes software developed or owned by Caldera
     21  *      International, Inc.
     22  *   Neither the name of Caldera International, Inc. nor the names of
     23  *    other contributors may be used to endorse or promote products
     24  *    derived from this software without specific prior written permission.
     25  *
     26  * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA
     27  * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR
     28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     29  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE
     31  * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR
     32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
     34  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     35  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
     36  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
     37  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     38  */
     39 
     40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
     41 #define	REGEXP_H_USED	__attribute__ ((used))
     42 #elif defined __GNUC__
     43 #define	REGEXP_H_USED	__attribute__ ((unused))
     44 #else
     45 #define	REGEXP_H_USED
     46 #endif
     47 static const char regexp_h_sccsid[] REGEXP_H_USED =
     48 	"@(#)regexp.sl	1.56 (gritter) 5/29/05";
     49 
     50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__)
     51 #define	REGEXP_H_WCHARS
     52 #endif
     53 
     54 #define	CBRA	2
     55 #define	CCHR	4
     56 #define	CDOT	8
     57 #define	CCL	12
     58 /*	CLNUM	14	used in sed */
     59 /*	CEND	16	used in sed */
     60 #define	CDOL	20
     61 #define	CCEOF	22
     62 #define	CKET	24
     63 #define	CBACK	36
     64 #define	CNCL	40
     65 #define	CBRC	44
     66 #define	CLET	48
     67 #define	CCH1	52
     68 #define	CCH2	56
     69 #define	CCH3	60
     70 
     71 #define	STAR	01
     72 #define RNGE	03
     73 #define	REGEXP_H_LEAST	0100
     74 
     75 #ifdef	REGEXP_H_WCHARS
     76 #define	CMB	0200
     77 #else	/* !REGEXP_H_WCHARS */
     78 #define	CMB	0
     79 #endif	/* !REGEXP_H_WCHARS */
     80 
     81 #define	NBRA	9
     82 
     83 #define PLACE(c)	ep[c >> 3] |= bittab[c & 07]
     84 #define ISTHERE(c)	(ep[c >> 3] & bittab[c & 07])
     85 
     86 #ifdef	REGEXP_H_WCHARS
     87 #define	REGEXP_H_IS_THERE(ep, c)	((ep)[c >> 3] & bittab[c & 07])
     88 #endif
     89 
     90 #include	<ctype.h>
     91 #include	<string.h>
     92 #include	<limits.h>
     93 #ifdef	REGEXP_H_WCHARS
     94 #include	<stdlib.h>
     95 #include	<wchar.h>
     96 #include	<wctype.h>
     97 #endif	/* REGEXP_H_WCHARS */
     98 
     99 #define	regexp_h_uletter(c)	(isalpha(c) || (c) == '_')
    100 #ifdef	REGEXP_H_WCHARS
    101 #define	regexp_h_wuletter(c)	(iswalpha(c) || (c) == L'_')
    102 
    103 /*
    104  * Used to allocate memory for the multibyte star algorithm.
    105  */
    106 #ifndef	regexp_h_malloc
    107 #define	regexp_h_malloc(n)	malloc(n)
    108 #endif
    109 #ifndef	regexp_h_free
    110 #define	regexp_h_free(p)	free(p)
    111 #endif
    112 
    113 /*
    114  * Can be predefined to 'inline' to inline some multibyte functions;
    115  * may improve performance for files that contain many multibyte
    116  * sequences.
    117  */
    118 #ifndef	regexp_h_inline
    119 #define	regexp_h_inline
    120 #endif
    121 
    122 /*
    123  * Mask to determine whether the first byte of a sequence possibly
    124  * starts a multibyte character. Set to 0377 to force mbtowc() for
    125  * any byte sequence (except 0).
    126  */
    127 #ifndef	REGEXP_H_MASK
    128 #define	REGEXP_H_MASK	0200
    129 #endif
    130 #endif	/* REGEXP_H_WCHARS */
    131 
    132 /*
    133  * For regexpr.h.
    134  */
    135 #ifndef	regexp_h_static
    136 #define	regexp_h_static
    137 #endif
    138 #ifndef	REGEXP_H_STEP_INIT
    139 #define	REGEXP_H_STEP_INIT
    140 #endif
    141 #ifndef	REGEXP_H_ADVANCE_INIT
    142 #define	REGEXP_H_ADVANCE_INIT
    143 #endif
    144 
    145 char	*braslist[NBRA];
    146 char	*braelist[NBRA];
    147 int	nbra;
    148 char	*loc1, *loc2, *locs;
    149 int	sed;
    150 int	nodelim;
    151 
    152 regexp_h_static int	circf;
    153 regexp_h_static int	low;
    154 regexp_h_static int	size;
    155 
    156 regexp_h_static unsigned char	bittab[] = {
    157 	1,
    158 	2,
    159 	4,
    160 	8,
    161 	16,
    162 	32,
    163 	64,
    164 	128
    165 };
    166 static int	regexp_h_advance(register const char *lp,
    167 			register const char *ep);
    168 static void	regexp_h_getrnge(register const char *str, int least);
    169 
    170 static const char	*regexp_h_bol;	/* beginning of input line (for \<) */
    171 
    172 #ifdef	REGEXP_H_WCHARS
    173 static int	regexp_h_wchars;
    174 static int	regexp_h_mbcurmax;
    175 
    176 static const char	*regexp_h_firstwc;	/* location of first
    177 						   multibyte character
    178 						   on input line */
    179 
    180 #define	regexp_h_getwc(c)	{ \
    181 	if (regexp_h_wchars) { \
    182 		char mbbuf[MB_LEN_MAX + 1], *mbptr; \
    183 		wchar_t wcbuf; \
    184 		int mb, len; \
    185 		mbptr = mbbuf; \
    186 		do { \
    187 			mb = GETC(); \
    188 			*mbptr++ = mb; \
    189 			*mbptr = '\0'; \
    190 		} while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \
    191 			&& mb != eof && mbptr < mbbuf + MB_LEN_MAX); \
    192 		if (len == -1) \
    193 			ERROR(67); \
    194 		c = wcbuf; \
    195 	} else { \
    196 		c = GETC(); \
    197 	} \
    198 }
    199 
    200 #define	regexp_h_store(wc, mb, me)	{ \
    201 	int len; \
    202 	if (wc == WEOF) \
    203 		ERROR(67); \
    204 	if ((len = me - mb) <= regexp_h_mbcurmax) { \
    205 		char mt[MB_LEN_MAX]; \
    206 		if (wctomb(mt, wc) >= len) \
    207 			ERROR(50); \
    208 	} \
    209 	switch (len = wctomb(mb, wc)) { \
    210 	case -1: \
    211 		 ERROR(67); \
    212 	case 0: \
    213 		mb++; \
    214 		break; \
    215 	default: \
    216 		mb += len; \
    217 	} \
    218 }
    219 
    220 static regexp_h_inline wint_t
    221 regexp_h_fetchwc(const char **mb, int islp)
    222 {
    223 	wchar_t wc;
    224 	int len;
    225 
    226 	if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) {
    227 		(*mb)++;
    228 		return WEOF;
    229 	}
    230 	if (islp && regexp_h_firstwc == NULL)
    231 		regexp_h_firstwc = *mb;
    232 	/*if (len == 0) {
    233 		(*mb)++;
    234 		return L'\0';
    235 	} handled in singlebyte code */
    236 	*mb += len;
    237 	return wc;
    238 }
    239 
    240 #define	regexp_h_fetch(mb, islp)	((*(mb) & REGEXP_H_MASK) == 0 ? \
    241 						(*(mb)++&0377): \
    242 						regexp_h_fetchwc(&(mb), islp))
    243 
    244 static regexp_h_inline wint_t
    245 regexp_h_showwc(const char *mb)
    246 {
    247 	wchar_t wc;
    248 
    249 	if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0)
    250 		return WEOF;
    251 	return wc;
    252 }
    253 
    254 #define	regexp_h_show(mb)	((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \
    255 					regexp_h_showwc(mb))
    256 
    257 /*
    258  * Return the character immediately preceding mb. Since no byte is
    259  * required to be the first byte of a character, the longest multibyte
    260  * character ending at &[mb-1] is searched.
    261  */
    262 static regexp_h_inline wint_t
    263 regexp_h_previous(const char *mb)
    264 {
    265 	const char *p = mb;
    266 	wchar_t wc, lastwc = WEOF;
    267 	int len, max = 0;
    268 
    269 	if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc)
    270 		return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF);
    271 	while (p-- > regexp_h_bol) {
    272 		mbtowc(NULL, NULL, 0);
    273 		if ((len = mbtowc(&wc, p, mb - p)) >= 0) {
    274 			if (len < max || len < mb - p)
    275 				break;
    276 			max = len;
    277 			lastwc = wc;
    278 		} else if (len < 0 && max > 0)
    279 			break;
    280 	}
    281 	return lastwc;
    282 }
    283 
    284 #define	regexp_h_cclass(set, c, af)	\
    285 	((c) == 0 || (c) == WEOF ? 0 : ( \
    286 		((c) > 0177) ? \
    287 			regexp_h_cclass_wc(set, c, af) : ( \
    288 				REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \
    289 			) \
    290 		) \
    291 	)
    292 
    293 static regexp_h_inline int
    294 regexp_h_cclass_wc(const char *set, register wint_t c, int af)
    295 {
    296 	register wint_t wc, wl = WEOF;
    297 	const char *end;
    298 
    299 	end = &set[18] + set[0] - 1;
    300 	set += 17;
    301 	while (set < end) {
    302 		wc = regexp_h_fetch(set, 0);
    303 #ifdef	REGEXP_H_VI_BACKSLASH
    304 		if (wc == '\\' && set < end &&
    305 				(*set == ']' || *set == '-' ||
    306 				 *set == '^' || *set == '\\')) {
    307 			wc = regexp_h_fetch(set, 0);
    308 		} else
    309 #endif	/* REGEXP_H_VI_BACKSLASH */
    310 		if (wc == '-' && wl != WEOF && set < end) {
    311 			wc = regexp_h_fetch(set, 0);
    312 #ifdef	REGEXP_H_VI_BACKSLASH
    313 			if (wc == '\\' && set < end &&
    314 					(*set == ']' || *set == '-' ||
    315 					 *set == '^' || *set == '\\')) {
    316 				wc = regexp_h_fetch(set, 0);
    317 			}
    318 #endif	/* REGEXP_H_VI_BACKSLASH */
    319 			if (c > wl && c < wc)
    320 				return af;
    321 		}
    322 		if (c == wc)
    323 			return af;
    324 		wl = wc;
    325 	}
    326 	return !af;
    327 }
    328 #else	/* !REGEXP_H_WCHARS */
    329 #define	regexp_h_wchars		0
    330 #define	regexp_h_getwc(c)	{ c = GETC(); }
    331 #endif	/* !REGEXP_H_WCHARS */
    332 
    333 regexp_h_static char *
    334 compile(char *instring, char *ep, const char *endbuf, int seof)
    335 {
    336 	INIT	/* Dependent declarations and initializations */
    337 	register int c;
    338 	register int eof = seof;
    339 	char *lastep = instring;
    340 	int cclcnt;
    341 	char bracket[NBRA], *bracketp;
    342 	int closed;
    343 	char neg;
    344 	int lc;
    345 	int i, cflg;
    346 
    347 #ifdef	REGEXP_H_WCHARS
    348 	char *eq;
    349 	regexp_h_mbcurmax = MB_CUR_MAX;
    350 	regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0;
    351 #endif
    352 	lastep = 0;
    353 	bracketp = bracket;
    354 	if((c = GETC()) == eof || c == '\n') {
    355 		if (c == '\n') {
    356 			UNGETC(c);
    357 			nodelim = 1;
    358 		}
    359 		if(*ep == 0 && !sed)
    360 			ERROR(41);
    361 		if (bracketp > bracket)
    362 			ERROR(42);
    363 		RETURN(ep);
    364 	}
    365 	circf = closed = nbra = 0;
    366 	if (c == '^')
    367 		circf++;
    368 	else
    369 		UNGETC(c);
    370 	for (;;) {
    371 		if (ep >= endbuf)
    372 			ERROR(50);
    373 		regexp_h_getwc(c);
    374 		if(c != '*' && ((c != '\\') || (PEEKC() != '{')))
    375 			lastep = ep;
    376 		if (c == eof) {
    377 			*ep++ = CCEOF;
    378 			if (bracketp > bracket)
    379 				ERROR(42);
    380 			RETURN(ep);
    381 		}
    382 		switch (c) {
    383 
    384 		case '.':
    385 			*ep++ = CDOT|regexp_h_wchars;
    386 			continue;
    387 
    388 		case '\n':
    389 			if (sed == 0) {
    390 				UNGETC(c);
    391 				*ep++ = CCEOF;
    392 				nodelim = 1;
    393 				RETURN(ep);
    394 			}
    395 			ERROR(36);
    396 		case '*':
    397 			if (lastep==0 || *lastep==CBRA || *lastep==CKET ||
    398 					*lastep==(CBRC|regexp_h_wchars) ||
    399 					*lastep==(CLET|regexp_h_wchars))
    400 				goto defchar;
    401 			*lastep |= STAR;
    402 			continue;
    403 
    404 		case '$':
    405 			if(PEEKC() != eof)
    406 				goto defchar;
    407 			*ep++ = CDOL;
    408 			continue;
    409 
    410 		case '[':
    411 #ifdef	REGEXP_H_WCHARS
    412 			if (regexp_h_wchars == 0) {
    413 #endif
    414 				if(&ep[33] >= endbuf)
    415 					ERROR(50);
    416 
    417 				*ep++ = CCL;
    418 				lc = 0;
    419 				for(i = 0; i < 32; i++)
    420 					ep[i] = 0;
    421 
    422 				neg = 0;
    423 				if((c = GETC()) == '^') {
    424 					neg = 1;
    425 					c = GETC();
    426 				}
    427 
    428 				do {
    429 					c &= 0377;
    430 					if(c == '\0' || c == '\n')
    431 						ERROR(49);
    432 #ifdef	REGEXP_H_VI_BACKSLASH
    433 					if(c == '\\' && ((c = PEEKC()) == ']' ||
    434 							c == '-' || c == '^' ||
    435 							c == '\\')) {
    436 						c = GETC();
    437 						c &= 0377;
    438 					} else
    439 #endif	/* REGEXP_H_VI_BACKSLASH */
    440 					if(c == '-' && lc != 0) {
    441 						if ((c = GETC()) == ']') {
    442 							PLACE('-');
    443 							break;
    444 						}
    445 #ifdef	REGEXP_H_VI_BACKSLASH
    446 						if(c == '\\' &&
    447 							((c = PEEKC()) == ']' ||
    448 								c == '-' ||
    449 								c == '^' ||
    450 								c == '\\'))
    451 							c = GETC();
    452 #endif	/* REGEXP_H_VI_BACKSLASH */
    453 						c &= 0377;
    454 						while(lc < c) {
    455 							PLACE(lc);
    456 							lc++;
    457 						}
    458 					}
    459 					lc = c;
    460 					PLACE(c);
    461 				} while((c = GETC()) != ']');
    462 				if(neg) {
    463 					for(cclcnt = 0; cclcnt < 32; cclcnt++)
    464 						ep[cclcnt] ^= 0377;
    465 					ep[0] &= 0376;
    466 				}
    467 
    468 				ep += 32;
    469 #ifdef	REGEXP_H_WCHARS
    470 			} else {
    471 				if (&ep[18] >= endbuf)
    472 					ERROR(50);
    473 				*ep++ = CCL|CMB;
    474 				*ep++ = 0;
    475 				lc = 0;
    476 				for (i = 0; i < 16; i++)
    477 					ep[i] = 0;
    478 				eq = &ep[16];
    479 				regexp_h_getwc(c);
    480 				if (c == L'^') {
    481 					regexp_h_getwc(c);
    482 					ep[-2] = CNCL|CMB;
    483 				}
    484 				do {
    485 					if (c == '\0' || c == '\n')
    486 						ERROR(49);
    487 #ifdef	REGEXP_H_VI_BACKSLASH
    488 					if(c == '\\' && ((c = PEEKC()) == ']' ||
    489 							c == '-' || c == '^' ||
    490 							c == '\\')) {
    491 						regexp_h_store(c, eq, endbuf);
    492 						regexp_h_getwc(c);
    493 					} else
    494 #endif	/* REGEXP_H_VI_BACKSLASH */
    495 					if (c == '-' && lc != 0 && lc <= 0177) {
    496 						regexp_h_store(c, eq, endbuf);
    497 						regexp_h_getwc(c);
    498 						if (c == ']') {
    499 							PLACE('-');
    500 							break;
    501 						}
    502 #ifdef	REGEXP_H_VI_BACKSLASH
    503 						if(c == '\\' &&
    504 							((c = PEEKC()) == ']' ||
    505 								c == '-' ||
    506 								c == '^' ||
    507 								c == '\\')) {
    508 							regexp_h_store(c, eq,
    509 								endbuf);
    510 							regexp_h_getwc(c);
    511 						}
    512 #endif	/* REGEXP_H_VI_BACKSLASH */
    513 						while (lc < (c & 0177)) {
    514 							PLACE(lc);
    515 							lc++;
    516 						}
    517 					}
    518 					lc = c;
    519 					if (c <= 0177)
    520 						PLACE(c);
    521 					regexp_h_store(c, eq, endbuf);
    522 					regexp_h_getwc(c);
    523 				} while (c != L']');
    524 				if ((i = eq - &ep[16]) > 255)
    525 					ERROR(50);
    526 				lastep[1] = i;
    527 				ep = eq;
    528 			}
    529 #endif	/* REGEXP_H_WCHARS */
    530 
    531 			continue;
    532 
    533 		case '\\':
    534 			regexp_h_getwc(c);
    535 			switch(c) {
    536 
    537 			case '(':
    538 				if(nbra >= NBRA)
    539 					ERROR(43);
    540 				*bracketp++ = nbra;
    541 				*ep++ = CBRA;
    542 				*ep++ = nbra++;
    543 				continue;
    544 
    545 			case ')':
    546 				if(bracketp <= bracket)
    547 					ERROR(42);
    548 				*ep++ = CKET;
    549 				*ep++ = *--bracketp;
    550 				closed++;
    551 				continue;
    552 
    553 			case '<':
    554 				*ep++ = CBRC|regexp_h_wchars;
    555 				continue;
    556 
    557 			case '>':
    558 				*ep++ = CLET|regexp_h_wchars;
    559 				continue;
    560 
    561 			case '{':
    562 				if(lastep == (char *) (0))
    563 					goto defchar;
    564 				*lastep |= RNGE;
    565 				cflg = 0;
    566 			nlim:
    567 				c = GETC();
    568 				i = 0;
    569 				do {
    570 					if ('0' <= c && c <= '9')
    571 						i = 10 * i + c - '0';
    572 					else
    573 						ERROR(16);
    574 				} while(((c = GETC()) != '\\') && (c != ','));
    575 				if (i > 255)
    576 					ERROR(11);
    577 				*ep++ = i;
    578 				if (c == ',') {
    579 					if(cflg++)
    580 						ERROR(44);
    581 					if((c = GETC()) == '\\') {
    582 						*ep++ = (char)255;
    583 						*lastep |= REGEXP_H_LEAST;
    584 					} else {
    585 						UNGETC(c);
    586 						goto nlim; /* get 2'nd number */
    587 					}
    588 				}
    589 				if(GETC() != '}')
    590 					ERROR(45);
    591 				if(!cflg)	/* one number */
    592 					*ep++ = i;
    593 				else if((ep[-1] & 0377) < (ep[-2] & 0377))
    594 					ERROR(46);
    595 				continue;
    596 
    597 			case '\n':
    598 				ERROR(36);
    599 
    600 			case 'n':
    601 				c = '\n';
    602 				goto defchar;
    603 
    604 			default:
    605 				if(c >= '1' && c <= '9') {
    606 					if((c -= '1') >= closed)
    607 						ERROR(25);
    608 					*ep++ = CBACK;
    609 					*ep++ = c;
    610 					continue;
    611 				}
    612 			}
    613 			/* Drop through to default to use \ to turn off special chars */
    614 
    615 		defchar:
    616 		default:
    617 			lastep = ep;
    618 #ifdef	REGEXP_H_WCHARS
    619 			if (regexp_h_wchars == 0) {
    620 #endif
    621 				*ep++ = CCHR;
    622 				*ep++ = c;
    623 #ifdef	REGEXP_H_WCHARS
    624 			} else {
    625 				char	mbbuf[MB_LEN_MAX];
    626 
    627 				switch (wctomb(mbbuf, c)) {
    628 				case 1: *ep++ = CCH1;
    629 					break;
    630 				case 2:	*ep++ = CCH2;
    631 					break;
    632 				case 3:	*ep++ = CCH3;
    633 					break;
    634 				default:
    635 					*ep++ = CCHR|CMB;
    636 				}
    637 				regexp_h_store(c, ep, endbuf);
    638 			}
    639 #endif	/* REGEXP_H_WCHARS */
    640 		}
    641 	}
    642 }
    643 
    644 int
    645 step(const char *p1, const char *p2)
    646 {
    647 	register int c;
    648 #ifdef	REGEXP_H_WCHARS
    649 	register int d;
    650 #endif	/* REGEXP_H_WCHARS */
    651 
    652 	REGEXP_H_STEP_INIT	/* get circf */
    653 	regexp_h_bol = p1;
    654 #ifdef	REGEXP_H_WCHARS
    655 	regexp_h_firstwc = NULL;
    656 #endif	/* REGEXP_H_WCHARS */
    657 	if (circf) {
    658 		loc1 = (char *)p1;
    659 		return(regexp_h_advance(p1, p2));
    660 	}
    661 	/* fast check for first character */
    662 	if (*p2==CCHR) {
    663 		c = p2[1] & 0377;
    664 		do {
    665 			if ((*p1 & 0377) != c)
    666 				continue;
    667 			if (regexp_h_advance(p1, p2)) {
    668 				loc1 = (char *)p1;
    669 				return(1);
    670 			}
    671 		} while (*p1++);
    672 		return(0);
    673 	}
    674 #ifdef	REGEXP_H_WCHARS
    675 	else if (*p2==CCH1) {
    676 		do {
    677 			if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) {
    678 				loc1 = (char *)p1;
    679 				return(1);
    680 			}
    681 			c = regexp_h_fetch(p1, 1);
    682 		} while (c);
    683 		return(0);
    684 	} else if (*p2==CCH2) {
    685 		do {
    686 			if (p1[0] == p2[1] && p1[1] == p2[2] &&
    687 					regexp_h_advance(p1, p2)) {
    688 				loc1 = (char *)p1;
    689 				return(1);
    690 			}
    691 			c = regexp_h_fetch(p1, 1);
    692 		} while (c);
    693 		return(0);
    694 	} else if (*p2==CCH3) {
    695 		do {
    696 			if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&&
    697 					regexp_h_advance(p1, p2)) {
    698 				loc1 = (char *)p1;
    699 				return(1);
    700 			}
    701 			c = regexp_h_fetch(p1, 1);
    702 		} while (c);
    703 		return(0);
    704 	} else if ((*p2&0377)==(CCHR|CMB)) {
    705 		d = regexp_h_fetch(p2, 0);
    706 		do {
    707 			c = regexp_h_fetch(p1, 1);
    708 			if (c == d && regexp_h_advance(p1, p2)) {
    709 				loc1 = (char *)p1;
    710 				return(1);
    711 			}
    712 		} while(c);
    713 		return(0);
    714 	}
    715 		/* regular algorithm */
    716 	if (regexp_h_wchars)
    717 		do {
    718 			if (regexp_h_advance(p1, p2)) {
    719 				loc1 = (char *)p1;
    720 				return(1);
    721 			}
    722 			c = regexp_h_fetch(p1, 1);
    723 		} while (c);
    724 	else
    725 #endif	/* REGEXP_H_WCHARS */
    726 		do {
    727 			if (regexp_h_advance(p1, p2)) {
    728 				loc1 = (char *)p1;
    729 				return(1);
    730 			}
    731 		} while (*p1++);
    732 	return(0);
    733 }
    734 
    735 #ifdef	REGEXP_H_WCHARS
    736 /*
    737  * It is painfully slow to read character-wise backwards in a
    738  * multibyte string (see regexp_h_previous() above). For the star
    739  * algorithm, we therefore keep track of every character as it is
    740  * read in forward direction.
    741  *
    742  * Don't use alloca() for stack blocks since there is no measurable
    743  * speedup and huge amounts of memory are used up for long input
    744  * lines.
    745  */
    746 #ifndef	REGEXP_H_STAKBLOK
    747 #define	REGEXP_H_STAKBLOK	1000
    748 #endif
    749 
    750 struct	regexp_h_stack {
    751 	struct regexp_h_stack	*s_nxt;
    752 	struct regexp_h_stack	*s_prv;
    753 	const char	*s_ptr[REGEXP_H_STAKBLOK];
    754 };
    755 
    756 #define	regexp_h_push(sb, sp, sc, lp)	(regexp_h_wchars ? \
    757 			regexp_h_pushwc(sb, sp, sc, lp) : (void)0)
    758 
    759 static regexp_h_inline void
    760 regexp_h_pushwc(struct regexp_h_stack **sb,
    761 		struct regexp_h_stack **sp,
    762 		const char ***sc, const char *lp)
    763 {
    764 	if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc)
    765 		return;
    766 	if (*sb == NULL) {
    767 		if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL)
    768 			return;
    769 		(*sb)->s_nxt = (*sb)->s_prv = NULL;
    770 		*sp = *sb;
    771 		*sc = &(*sb)->s_ptr[0];
    772 	} else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) {
    773 		if ((*sp)->s_nxt == NULL) {
    774 			struct regexp_h_stack	*bq;
    775 
    776 			if ((bq = regexp_h_malloc(sizeof *bq)) == NULL)
    777 				return;
    778 			bq->s_nxt = NULL;
    779 			bq->s_prv = *sp;
    780 			(*sp)->s_nxt = bq;
    781 			*sp = bq;
    782 		} else
    783 			*sp = (*sp)->s_nxt;
    784 		*sc = &(*sp)->s_ptr[0];
    785 	}
    786 	*(*sc)++ = lp;
    787 }
    788 
    789 static regexp_h_inline const char *
    790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp,
    791 		const char ***sc, const char *lp)
    792 {
    793 	if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc)
    794 		return &lp[-1];
    795 	if (*sp == NULL)
    796 		return regexp_h_firstwc;
    797 	if (*sc == &(*sp)->s_ptr[0]) {
    798 		if ((*sp)->s_prv == NULL) {
    799 			regexp_h_free(*sp);
    800 			*sp = NULL;
    801 			*sb = NULL;
    802 			return regexp_h_firstwc;
    803 		}
    804 		*sp = (*sp)->s_prv;
    805 		regexp_h_free((*sp)->s_nxt);
    806 		(*sp)->s_nxt = NULL ;
    807 		*sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK];
    808 	}
    809 	return *(--(*sc));
    810 }
    811 
    812 static void
    813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp)
    814 {
    815 	for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt)
    816 		if ((*sp)->s_prv)
    817 			regexp_h_free((*sp)->s_prv);
    818 	if (*sp) {
    819 		if ((*sp)->s_prv)
    820 			regexp_h_free((*sp)->s_prv);
    821 		regexp_h_free(*sp);
    822 	}
    823 	*sp = *sb = NULL;
    824 }
    825 #else	/* !REGEXP_H_WCHARS */
    826 #define	regexp_h_push(sb, sp, sc, lp)
    827 #endif	/* !REGEXP_H_WCHARS */
    828 
    829 static int
    830 regexp_h_advance(const char *lp, const char *ep)
    831 {
    832 	register const char *curlp;
    833 	int c, least;
    834 #ifdef	REGEXP_H_WCHARS
    835 	int d;
    836 	struct regexp_h_stack	*sb = NULL, *sp = NULL;
    837 	const char	**sc;
    838 #endif	/* REGEXP_H_WCHARS */
    839 	char *bbeg;
    840 	int ct;
    841 
    842 	for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) {
    843 
    844 	case CCHR:
    845 #ifdef	REGEXP_H_WCHARS
    846 	case CCH1:
    847 #endif
    848 		if (*ep++ == *lp++)
    849 			continue;
    850 		return(0);
    851 
    852 #ifdef	REGEXP_H_WCHARS
    853 	case CCHR|CMB:
    854 		if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1))
    855 			continue;
    856 		return(0);
    857 
    858 	case CCH2:
    859 		if (ep[0] == lp[0] && ep[1] == lp[1]) {
    860 			ep += 2, lp += 2;
    861 			continue;
    862 		}
    863 		return(0);
    864 
    865 	case CCH3:
    866 		if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) {
    867 			ep += 3, lp += 3;
    868 			continue;
    869 		}
    870 		return(0);
    871 #endif	/* REGEXP_H_WCHARS */
    872 
    873 	case CDOT:
    874 		if (*lp++)
    875 			continue;
    876 		return(0);
    877 #ifdef	REGEXP_H_WCHARS
    878 	case CDOT|CMB:
    879 		if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF)
    880 			continue;
    881 		return(0);
    882 #endif	/* REGEXP_H_WCHARS */
    883 
    884 	case CDOL:
    885 		if (*lp==0)
    886 			continue;
    887 		return(0);
    888 
    889 	case CCEOF:
    890 		loc2 = (char *)lp;
    891 		return(1);
    892 
    893 	case CCL:
    894 		c = *lp++ & 0377;
    895 		if(ISTHERE(c)) {
    896 			ep += 32;
    897 			continue;
    898 		}
    899 		return(0);
    900 
    901 #ifdef	REGEXP_H_WCHARS
    902 	case CCL|CMB:
    903 	case CNCL|CMB:
    904 		c = regexp_h_fetch(lp, 1);
    905 		if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) {
    906 			ep += (*ep & 0377) + 17;
    907 			continue;
    908 		}
    909 		return 0;
    910 #endif	/* REGEXP_H_WCHARS */
    911 
    912 	case CBRA:
    913 		braslist[*ep++ & 0377] = (char *)lp;
    914 		continue;
    915 
    916 	case CKET:
    917 		braelist[*ep++ & 0377] = (char *)lp;
    918 		continue;
    919 
    920 	case CBRC:
    921 		if (lp == regexp_h_bol && locs == NULL)
    922 			continue;
    923 		if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377))
    924 				&& !regexp_h_uletter(lp[-1] & 0377)
    925 				&& !isdigit(lp[-1] & 0377))
    926 			continue;
    927 		return(0);
    928 
    929 #ifdef	REGEXP_H_WCHARS
    930 	case CBRC|CMB:
    931 		c = regexp_h_show(lp);
    932 		d = regexp_h_previous(lp);
    933 		if ((iswdigit(c) || regexp_h_wuletter(c))
    934 				&& !regexp_h_wuletter(d)
    935 				&& !iswdigit(d))
    936 			continue;
    937 		return(0);
    938 #endif	/* REGEXP_H_WCHARS */
    939 
    940 	case CLET:
    941 		if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377))
    942 			continue;
    943 		return(0);
    944 
    945 #ifdef	REGEXP_H_WCHARS
    946 	case CLET|CMB:
    947 		c = regexp_h_show(lp);
    948 		if (!regexp_h_wuletter(c) && !iswdigit(c))
    949 			continue;
    950 		return(0);
    951 #endif	/* REGEXP_H_WCHARS */
    952 
    953 	case CCHR|RNGE:
    954 		c = *ep++;
    955 		regexp_h_getrnge(ep, least);
    956 		while(low--)
    957 			if(*lp++ != c)
    958 				return(0);
    959 		curlp = lp;
    960 		while(size--) {
    961 			regexp_h_push(&sb, &sp, &sc, lp);
    962 			if(*lp++ != c)
    963 				break;
    964 		}
    965 		if(size < 0) {
    966 			regexp_h_push(&sb, &sp, &sc, lp);
    967 			lp++;
    968 		}
    969 		ep += 2;
    970 		goto star;
    971 
    972 #ifdef	REGEXP_H_WCHARS
    973 	case CCHR|RNGE|CMB:
    974 	case CCH1|RNGE:
    975 	case CCH2|RNGE:
    976 	case CCH3|RNGE:
    977 		c = regexp_h_fetch(ep, 0);
    978 		regexp_h_getrnge(ep, least);
    979 		while (low--)
    980 			if (regexp_h_fetch(lp, 1) != c)
    981 				return 0;
    982 		curlp = lp;
    983 		while (size--) {
    984 			regexp_h_push(&sb, &sp, &sc, lp);
    985 			if (regexp_h_fetch(lp, 1) != c)
    986 				break;
    987 		}
    988 		if(size < 0) {
    989 			regexp_h_push(&sb, &sp, &sc, lp);
    990 			regexp_h_fetch(lp, 1);
    991 		}
    992 		ep += 2;
    993 		goto star;
    994 #endif	/* REGEXP_H_WCHARS */
    995 
    996 	case CDOT|RNGE:
    997 		regexp_h_getrnge(ep, least);
    998 		while(low--)
    999 			if(*lp++ == '\0')
   1000 				return(0);
   1001 		curlp = lp;
   1002 		while(size--) {
   1003 			regexp_h_push(&sb, &sp, &sc, lp);
   1004 			if(*lp++ == '\0')
   1005 				break;
   1006 		}
   1007 		if(size < 0) {
   1008 			regexp_h_push(&sb, &sp, &sc, lp);
   1009 			lp++;
   1010 		}
   1011 		ep += 2;
   1012 		goto star;
   1013 
   1014 #ifdef	REGEXP_H_WCHARS
   1015 	case CDOT|RNGE|CMB:
   1016 		regexp_h_getrnge(ep, least);
   1017 		while (low--)
   1018 			if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
   1019 				return 0;
   1020 		curlp = lp;
   1021 		while (size--) {
   1022 			regexp_h_push(&sb, &sp, &sc, lp);
   1023 			if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF)
   1024 				break;
   1025 		}
   1026 		if (size < 0) {
   1027 			regexp_h_push(&sb, &sp, &sc, lp);
   1028 			regexp_h_fetch(lp, 1);
   1029 		}
   1030 		ep += 2;
   1031 		goto star;
   1032 #endif	/* REGEXP_H_WCHARS */
   1033 
   1034 	case CCL|RNGE:
   1035 		regexp_h_getrnge(ep + 32, least);
   1036 		while(low--) {
   1037 			c = *lp++ & 0377;
   1038 			if(!ISTHERE(c))
   1039 				return(0);
   1040 		}
   1041 		curlp = lp;
   1042 		while(size--) {
   1043 			regexp_h_push(&sb, &sp, &sc, lp);
   1044 			c = *lp++ & 0377;
   1045 			if(!ISTHERE(c))
   1046 				break;
   1047 		}
   1048 		if(size < 0) {
   1049 			regexp_h_push(&sb, &sp, &sc, lp);
   1050 			lp++;
   1051 		}
   1052 		ep += 34;		/* 32 + 2 */
   1053 		goto star;
   1054 
   1055 #ifdef	REGEXP_H_WCHARS
   1056 	case CCL|RNGE|CMB:
   1057 	case CNCL|RNGE|CMB:
   1058 		regexp_h_getrnge(ep + (*ep & 0377) + 17, least);
   1059 		while (low--) {
   1060 			c = regexp_h_fetch(lp, 1);
   1061 			if (!regexp_h_cclass(ep, c,
   1062 					(ep[-1] & 0377 & ~REGEXP_H_LEAST)
   1063 					== (CCL|RNGE|CMB)))
   1064 				return 0;
   1065 		}
   1066 		curlp = lp;
   1067 		while (size--) {
   1068 			regexp_h_push(&sb, &sp, &sc, lp);
   1069 			c = regexp_h_fetch(lp, 1);
   1070 			if (!regexp_h_cclass(ep, c,
   1071 					(ep[-1] & 0377 & ~REGEXP_H_LEAST)
   1072 					== (CCL|RNGE|CMB)))
   1073 				break;
   1074 		}
   1075 		if (size < 0) {
   1076 			regexp_h_push(&sb, &sp, &sc, lp);
   1077 			regexp_h_fetch(lp, 1);
   1078 		}
   1079 		ep += (*ep & 0377) + 19;
   1080 		goto star;
   1081 #endif	/* REGEXP_H_WCHARS */
   1082 
   1083 	case CBACK:
   1084 		bbeg = braslist[*ep & 0377];
   1085 		ct = braelist[*ep++ & 0377] - bbeg;
   1086 
   1087 		if(strncmp(bbeg, lp, ct) == 0) {
   1088 			lp += ct;
   1089 			continue;
   1090 		}
   1091 		return(0);
   1092 
   1093 	case CBACK|STAR:
   1094 		bbeg = braslist[*ep & 0377];
   1095 		ct = braelist[*ep++ & 0377] - bbeg;
   1096 		curlp = lp;
   1097 		while(strncmp(bbeg, lp, ct) == 0)
   1098 			lp += ct;
   1099 
   1100 		while(lp >= curlp) {
   1101 			if(regexp_h_advance(lp, ep))	return(1);
   1102 			lp -= ct;
   1103 		}
   1104 		return(0);
   1105 
   1106 
   1107 	case CDOT|STAR:
   1108 		curlp = lp;
   1109 		do
   1110 			regexp_h_push(&sb, &sp, &sc, lp);
   1111 		while (*lp++);
   1112 		goto star;
   1113 
   1114 #ifdef	REGEXP_H_WCHARS
   1115 	case CDOT|STAR|CMB:
   1116 		curlp = lp;
   1117 		do
   1118 			regexp_h_push(&sb, &sp, &sc, lp);
   1119 		while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF);
   1120 		goto star;
   1121 #endif	/* REGEXP_H_WCHARS */
   1122 
   1123 	case CCHR|STAR:
   1124 		curlp = lp;
   1125 		do
   1126 			regexp_h_push(&sb, &sp, &sc, lp);
   1127 		while (*lp++ == *ep);
   1128 		ep++;
   1129 		goto star;
   1130 
   1131 #ifdef	REGEXP_H_WCHARS
   1132 	case CCHR|STAR|CMB:
   1133 	case CCH1|STAR:
   1134 	case CCH2|STAR:
   1135 	case CCH3|STAR:
   1136 		curlp = lp;
   1137 		d = regexp_h_fetch(ep, 0);
   1138 		do
   1139 			regexp_h_push(&sb, &sp, &sc, lp);
   1140 		while (regexp_h_fetch(lp, 1) == d);
   1141 		goto star;
   1142 #endif	/* REGEXP_H_WCHARS */
   1143 
   1144 	case CCL|STAR:
   1145 		curlp = lp;
   1146 		do {
   1147 			regexp_h_push(&sb, &sp, &sc, lp);
   1148 			c = *lp++ & 0377;
   1149 		} while(ISTHERE(c));
   1150 		ep += 32;
   1151 		goto star;
   1152 
   1153 #ifdef	REGEXP_H_WCHARS
   1154 	case CCL|STAR|CMB:
   1155 	case CNCL|STAR|CMB:
   1156 		curlp = lp;
   1157 		do {
   1158 			regexp_h_push(&sb, &sp, &sc, lp);
   1159 			c = regexp_h_fetch(lp, 1);
   1160 		} while (regexp_h_cclass(ep, c, (ep[-1] & 0377)
   1161 					== (CCL|STAR|CMB)));
   1162 		ep += (*ep & 0377) + 17;
   1163 		goto star;
   1164 #endif	/* REGEXP_H_WCHARS */
   1165 
   1166 	star:
   1167 #ifdef	REGEXP_H_WCHARS
   1168 		if (regexp_h_wchars == 0) {
   1169 #endif
   1170 			do {
   1171 				if(--lp == locs)
   1172 					break;
   1173 				if (regexp_h_advance(lp, ep))
   1174 					return(1);
   1175 			} while (lp > curlp);
   1176 #ifdef	REGEXP_H_WCHARS
   1177 		} else {
   1178 			do {
   1179 				lp = regexp_h_pop(&sb, &sp, &sc, lp);
   1180 				if (lp <= locs)
   1181 					break;
   1182 				if (regexp_h_advance(lp, ep)) {
   1183 					regexp_h_zerostak(&sb, &sp);
   1184 					return(1);
   1185 				}
   1186 			} while (lp > curlp);
   1187 			regexp_h_zerostak(&sb, &sp);
   1188 		}
   1189 #endif	/* REGEXP_H_WCHARS */
   1190 		return(0);
   1191 
   1192 	}
   1193 }
   1194 
   1195 static void
   1196 regexp_h_getrnge(register const char *str, int least)
   1197 {
   1198 	low = *str++ & 0377;
   1199 	size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low;
   1200 }
   1201 
   1202 int
   1203 advance(const char *lp, const char *ep)
   1204 {
   1205 	REGEXP_H_ADVANCE_INIT	/* skip past circf */
   1206 	regexp_h_bol = lp;
   1207 #ifdef	REGEXP_H_WCHARS
   1208 	regexp_h_firstwc = NULL;
   1209 #endif	/* REGEXP_H_WCHARS */
   1210 	return regexp_h_advance(lp, ep);
   1211 }