regexp.h (25528B)
1 /* 2 * Simple Regular Expression functions. Derived from Unix 7th Edition, 3 * /usr/src/cmd/expr.y 4 * 5 * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. 6 * 7 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * Redistributions of source code and documentation must retain the 13 * above copyright notice, this list of conditions and the following 14 * disclaimer. 15 * Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed or owned by Caldera 21 * International, Inc. 22 * Neither the name of Caldera International, Inc. nor the names of 23 * other contributors may be used to endorse or promote products 24 * derived from this software without specific prior written permission. 25 * 26 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 27 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 28 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 29 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE 31 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 35 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 36 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 37 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 41 #define REGEXP_H_USED __attribute__ ((used)) 42 #elif defined __GNUC__ 43 #define REGEXP_H_USED __attribute__ ((unused)) 44 #else 45 #define REGEXP_H_USED 46 #endif 47 static const char regexp_h_sccsid[] REGEXP_H_USED = 48 "@(#)regexp.sl 1.56 (gritter) 5/29/05"; 49 50 #if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) 51 #define REGEXP_H_WCHARS 52 #endif 53 54 #define CBRA 2 55 #define CCHR 4 56 #define CDOT 8 57 #define CCL 12 58 /* CLNUM 14 used in sed */ 59 /* CEND 16 used in sed */ 60 #define CDOL 20 61 #define CCEOF 22 62 #define CKET 24 63 #define CBACK 36 64 #define CNCL 40 65 #define CBRC 44 66 #define CLET 48 67 #define CCH1 52 68 #define CCH2 56 69 #define CCH3 60 70 71 #define STAR 01 72 #define RNGE 03 73 #define REGEXP_H_LEAST 0100 74 75 #ifdef REGEXP_H_WCHARS 76 #define CMB 0200 77 #else /* !REGEXP_H_WCHARS */ 78 #define CMB 0 79 #endif /* !REGEXP_H_WCHARS */ 80 81 #define NBRA 9 82 83 #define PLACE(c) ep[c >> 3] |= bittab[c & 07] 84 #define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) 85 86 #ifdef REGEXP_H_WCHARS 87 #define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) 88 #endif 89 90 #include <ctype.h> 91 #include <string.h> 92 #include <limits.h> 93 #ifdef REGEXP_H_WCHARS 94 #include <stdlib.h> 95 #include <wchar.h> 96 #include <wctype.h> 97 #endif /* REGEXP_H_WCHARS */ 98 99 #define regexp_h_uletter(c) (isalpha(c) || (c) == '_') 100 #ifdef REGEXP_H_WCHARS 101 #define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') 102 103 /* 104 * Used to allocate memory for the multibyte star algorithm. 105 */ 106 #ifndef regexp_h_malloc 107 #define regexp_h_malloc(n) malloc(n) 108 #endif 109 #ifndef regexp_h_free 110 #define regexp_h_free(p) free(p) 111 #endif 112 113 /* 114 * Can be predefined to 'inline' to inline some multibyte functions; 115 * may improve performance for files that contain many multibyte 116 * sequences. 117 */ 118 #ifndef regexp_h_inline 119 #define regexp_h_inline 120 #endif 121 122 /* 123 * Mask to determine whether the first byte of a sequence possibly 124 * starts a multibyte character. Set to 0377 to force mbtowc() for 125 * any byte sequence (except 0). 126 */ 127 #ifndef REGEXP_H_MASK 128 #define REGEXP_H_MASK 0200 129 #endif 130 #endif /* REGEXP_H_WCHARS */ 131 132 /* 133 * For regexpr.h. 134 */ 135 #ifndef regexp_h_static 136 #define regexp_h_static 137 #endif 138 #ifndef REGEXP_H_STEP_INIT 139 #define REGEXP_H_STEP_INIT 140 #endif 141 #ifndef REGEXP_H_ADVANCE_INIT 142 #define REGEXP_H_ADVANCE_INIT 143 #endif 144 145 char *braslist[NBRA]; 146 char *braelist[NBRA]; 147 int nbra; 148 char *loc1, *loc2, *locs; 149 int sed; 150 int nodelim; 151 152 regexp_h_static int circf; 153 regexp_h_static int low; 154 regexp_h_static int size; 155 156 regexp_h_static unsigned char bittab[] = { 157 1, 158 2, 159 4, 160 8, 161 16, 162 32, 163 64, 164 128 165 }; 166 static int regexp_h_advance(register const char *lp, 167 register const char *ep); 168 static void regexp_h_getrnge(register const char *str, int least); 169 170 static const char *regexp_h_bol; /* beginning of input line (for \<) */ 171 172 #ifdef REGEXP_H_WCHARS 173 static int regexp_h_wchars; 174 static int regexp_h_mbcurmax; 175 176 static const char *regexp_h_firstwc; /* location of first 177 multibyte character 178 on input line */ 179 180 #define regexp_h_getwc(c) { \ 181 if (regexp_h_wchars) { \ 182 char mbbuf[MB_LEN_MAX + 1], *mbptr; \ 183 wchar_t wcbuf; \ 184 int mb, len; \ 185 mbptr = mbbuf; \ 186 do { \ 187 mb = GETC(); \ 188 *mbptr++ = mb; \ 189 *mbptr = '\0'; \ 190 } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ 191 && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ 192 if (len == -1) \ 193 ERROR(67); \ 194 c = wcbuf; \ 195 } else { \ 196 c = GETC(); \ 197 } \ 198 } 199 200 #define regexp_h_store(wc, mb, me) { \ 201 int len; \ 202 if (wc == WEOF) \ 203 ERROR(67); \ 204 if ((len = me - mb) <= regexp_h_mbcurmax) { \ 205 char mt[MB_LEN_MAX]; \ 206 if (wctomb(mt, wc) >= len) \ 207 ERROR(50); \ 208 } \ 209 switch (len = wctomb(mb, wc)) { \ 210 case -1: \ 211 ERROR(67); \ 212 case 0: \ 213 mb++; \ 214 break; \ 215 default: \ 216 mb += len; \ 217 } \ 218 } 219 220 static regexp_h_inline wint_t 221 regexp_h_fetchwc(const char **mb, int islp) 222 { 223 wchar_t wc; 224 int len; 225 226 if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { 227 (*mb)++; 228 return WEOF; 229 } 230 if (islp && regexp_h_firstwc == NULL) 231 regexp_h_firstwc = *mb; 232 /*if (len == 0) { 233 (*mb)++; 234 return L'\0'; 235 } handled in singlebyte code */ 236 *mb += len; 237 return wc; 238 } 239 240 #define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ 241 (*(mb)++&0377): \ 242 regexp_h_fetchwc(&(mb), islp)) 243 244 static regexp_h_inline wint_t 245 regexp_h_showwc(const char *mb) 246 { 247 wchar_t wc; 248 249 if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) 250 return WEOF; 251 return wc; 252 } 253 254 #define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ 255 regexp_h_showwc(mb)) 256 257 /* 258 * Return the character immediately preceding mb. Since no byte is 259 * required to be the first byte of a character, the longest multibyte 260 * character ending at &[mb-1] is searched. 261 */ 262 static regexp_h_inline wint_t 263 regexp_h_previous(const char *mb) 264 { 265 const char *p = mb; 266 wchar_t wc, lastwc = WEOF; 267 int len, max = 0; 268 269 if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) 270 return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); 271 while (p-- > regexp_h_bol) { 272 mbtowc(NULL, NULL, 0); 273 if ((len = mbtowc(&wc, p, mb - p)) >= 0) { 274 if (len < max || len < mb - p) 275 break; 276 max = len; 277 lastwc = wc; 278 } else if (len < 0 && max > 0) 279 break; 280 } 281 return lastwc; 282 } 283 284 #define regexp_h_cclass(set, c, af) \ 285 ((c) == 0 || (c) == WEOF ? 0 : ( \ 286 ((c) > 0177) ? \ 287 regexp_h_cclass_wc(set, c, af) : ( \ 288 REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ 289 ) \ 290 ) \ 291 ) 292 293 static regexp_h_inline int 294 regexp_h_cclass_wc(const char *set, register wint_t c, int af) 295 { 296 register wint_t wc, wl = WEOF; 297 const char *end; 298 299 end = &set[18] + set[0] - 1; 300 set += 17; 301 while (set < end) { 302 wc = regexp_h_fetch(set, 0); 303 #ifdef REGEXP_H_VI_BACKSLASH 304 if (wc == '\\' && set < end && 305 (*set == ']' || *set == '-' || 306 *set == '^' || *set == '\\')) { 307 wc = regexp_h_fetch(set, 0); 308 } else 309 #endif /* REGEXP_H_VI_BACKSLASH */ 310 if (wc == '-' && wl != WEOF && set < end) { 311 wc = regexp_h_fetch(set, 0); 312 #ifdef REGEXP_H_VI_BACKSLASH 313 if (wc == '\\' && set < end && 314 (*set == ']' || *set == '-' || 315 *set == '^' || *set == '\\')) { 316 wc = regexp_h_fetch(set, 0); 317 } 318 #endif /* REGEXP_H_VI_BACKSLASH */ 319 if (c > wl && c < wc) 320 return af; 321 } 322 if (c == wc) 323 return af; 324 wl = wc; 325 } 326 return !af; 327 } 328 #else /* !REGEXP_H_WCHARS */ 329 #define regexp_h_wchars 0 330 #define regexp_h_getwc(c) { c = GETC(); } 331 #endif /* !REGEXP_H_WCHARS */ 332 333 regexp_h_static char * 334 compile(char *instring, char *ep, const char *endbuf, int seof) 335 { 336 INIT /* Dependent declarations and initializations */ 337 register int c; 338 register int eof = seof; 339 char *lastep = instring; 340 int cclcnt; 341 char bracket[NBRA], *bracketp; 342 int closed; 343 char neg; 344 int lc; 345 int i, cflg; 346 347 #ifdef REGEXP_H_WCHARS 348 char *eq; 349 regexp_h_mbcurmax = MB_CUR_MAX; 350 regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; 351 #endif 352 lastep = 0; 353 bracketp = bracket; 354 if((c = GETC()) == eof || c == '\n') { 355 if (c == '\n') { 356 UNGETC(c); 357 nodelim = 1; 358 } 359 if(*ep == 0 && !sed) 360 ERROR(41); 361 if (bracketp > bracket) 362 ERROR(42); 363 RETURN(ep); 364 } 365 circf = closed = nbra = 0; 366 if (c == '^') 367 circf++; 368 else 369 UNGETC(c); 370 for (;;) { 371 if (ep >= endbuf) 372 ERROR(50); 373 regexp_h_getwc(c); 374 if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) 375 lastep = ep; 376 if (c == eof) { 377 *ep++ = CCEOF; 378 if (bracketp > bracket) 379 ERROR(42); 380 RETURN(ep); 381 } 382 switch (c) { 383 384 case '.': 385 *ep++ = CDOT|regexp_h_wchars; 386 continue; 387 388 case '\n': 389 if (sed == 0) { 390 UNGETC(c); 391 *ep++ = CCEOF; 392 nodelim = 1; 393 RETURN(ep); 394 } 395 ERROR(36); 396 case '*': 397 if (lastep==0 || *lastep==CBRA || *lastep==CKET || 398 *lastep==(CBRC|regexp_h_wchars) || 399 *lastep==(CLET|regexp_h_wchars)) 400 goto defchar; 401 *lastep |= STAR; 402 continue; 403 404 case '$': 405 if(PEEKC() != eof) 406 goto defchar; 407 *ep++ = CDOL; 408 continue; 409 410 case '[': 411 #ifdef REGEXP_H_WCHARS 412 if (regexp_h_wchars == 0) { 413 #endif 414 if(&ep[33] >= endbuf) 415 ERROR(50); 416 417 *ep++ = CCL; 418 lc = 0; 419 for(i = 0; i < 32; i++) 420 ep[i] = 0; 421 422 neg = 0; 423 if((c = GETC()) == '^') { 424 neg = 1; 425 c = GETC(); 426 } 427 428 do { 429 c &= 0377; 430 if(c == '\0' || c == '\n') 431 ERROR(49); 432 #ifdef REGEXP_H_VI_BACKSLASH 433 if(c == '\\' && ((c = PEEKC()) == ']' || 434 c == '-' || c == '^' || 435 c == '\\')) { 436 c = GETC(); 437 c &= 0377; 438 } else 439 #endif /* REGEXP_H_VI_BACKSLASH */ 440 if(c == '-' && lc != 0) { 441 if ((c = GETC()) == ']') { 442 PLACE('-'); 443 break; 444 } 445 #ifdef REGEXP_H_VI_BACKSLASH 446 if(c == '\\' && 447 ((c = PEEKC()) == ']' || 448 c == '-' || 449 c == '^' || 450 c == '\\')) 451 c = GETC(); 452 #endif /* REGEXP_H_VI_BACKSLASH */ 453 c &= 0377; 454 while(lc < c) { 455 PLACE(lc); 456 lc++; 457 } 458 } 459 lc = c; 460 PLACE(c); 461 } while((c = GETC()) != ']'); 462 if(neg) { 463 for(cclcnt = 0; cclcnt < 32; cclcnt++) 464 ep[cclcnt] ^= 0377; 465 ep[0] &= 0376; 466 } 467 468 ep += 32; 469 #ifdef REGEXP_H_WCHARS 470 } else { 471 if (&ep[18] >= endbuf) 472 ERROR(50); 473 *ep++ = CCL|CMB; 474 *ep++ = 0; 475 lc = 0; 476 for (i = 0; i < 16; i++) 477 ep[i] = 0; 478 eq = &ep[16]; 479 regexp_h_getwc(c); 480 if (c == L'^') { 481 regexp_h_getwc(c); 482 ep[-2] = CNCL|CMB; 483 } 484 do { 485 if (c == '\0' || c == '\n') 486 ERROR(49); 487 #ifdef REGEXP_H_VI_BACKSLASH 488 if(c == '\\' && ((c = PEEKC()) == ']' || 489 c == '-' || c == '^' || 490 c == '\\')) { 491 regexp_h_store(c, eq, endbuf); 492 regexp_h_getwc(c); 493 } else 494 #endif /* REGEXP_H_VI_BACKSLASH */ 495 if (c == '-' && lc != 0 && lc <= 0177) { 496 regexp_h_store(c, eq, endbuf); 497 regexp_h_getwc(c); 498 if (c == ']') { 499 PLACE('-'); 500 break; 501 } 502 #ifdef REGEXP_H_VI_BACKSLASH 503 if(c == '\\' && 504 ((c = PEEKC()) == ']' || 505 c == '-' || 506 c == '^' || 507 c == '\\')) { 508 regexp_h_store(c, eq, 509 endbuf); 510 regexp_h_getwc(c); 511 } 512 #endif /* REGEXP_H_VI_BACKSLASH */ 513 while (lc < (c & 0177)) { 514 PLACE(lc); 515 lc++; 516 } 517 } 518 lc = c; 519 if (c <= 0177) 520 PLACE(c); 521 regexp_h_store(c, eq, endbuf); 522 regexp_h_getwc(c); 523 } while (c != L']'); 524 if ((i = eq - &ep[16]) > 255) 525 ERROR(50); 526 lastep[1] = i; 527 ep = eq; 528 } 529 #endif /* REGEXP_H_WCHARS */ 530 531 continue; 532 533 case '\\': 534 regexp_h_getwc(c); 535 switch(c) { 536 537 case '(': 538 if(nbra >= NBRA) 539 ERROR(43); 540 *bracketp++ = nbra; 541 *ep++ = CBRA; 542 *ep++ = nbra++; 543 continue; 544 545 case ')': 546 if(bracketp <= bracket) 547 ERROR(42); 548 *ep++ = CKET; 549 *ep++ = *--bracketp; 550 closed++; 551 continue; 552 553 case '<': 554 *ep++ = CBRC|regexp_h_wchars; 555 continue; 556 557 case '>': 558 *ep++ = CLET|regexp_h_wchars; 559 continue; 560 561 case '{': 562 if(lastep == (char *) (0)) 563 goto defchar; 564 *lastep |= RNGE; 565 cflg = 0; 566 nlim: 567 c = GETC(); 568 i = 0; 569 do { 570 if ('0' <= c && c <= '9') 571 i = 10 * i + c - '0'; 572 else 573 ERROR(16); 574 } while(((c = GETC()) != '\\') && (c != ',')); 575 if (i > 255) 576 ERROR(11); 577 *ep++ = i; 578 if (c == ',') { 579 if(cflg++) 580 ERROR(44); 581 if((c = GETC()) == '\\') { 582 *ep++ = (char)255; 583 *lastep |= REGEXP_H_LEAST; 584 } else { 585 UNGETC(c); 586 goto nlim; /* get 2'nd number */ 587 } 588 } 589 if(GETC() != '}') 590 ERROR(45); 591 if(!cflg) /* one number */ 592 *ep++ = i; 593 else if((ep[-1] & 0377) < (ep[-2] & 0377)) 594 ERROR(46); 595 continue; 596 597 case '\n': 598 ERROR(36); 599 600 case 'n': 601 c = '\n'; 602 goto defchar; 603 604 default: 605 if(c >= '1' && c <= '9') { 606 if((c -= '1') >= closed) 607 ERROR(25); 608 *ep++ = CBACK; 609 *ep++ = c; 610 continue; 611 } 612 } 613 /* Drop through to default to use \ to turn off special chars */ 614 615 defchar: 616 default: 617 lastep = ep; 618 #ifdef REGEXP_H_WCHARS 619 if (regexp_h_wchars == 0) { 620 #endif 621 *ep++ = CCHR; 622 *ep++ = c; 623 #ifdef REGEXP_H_WCHARS 624 } else { 625 char mbbuf[MB_LEN_MAX]; 626 627 switch (wctomb(mbbuf, c)) { 628 case 1: *ep++ = CCH1; 629 break; 630 case 2: *ep++ = CCH2; 631 break; 632 case 3: *ep++ = CCH3; 633 break; 634 default: 635 *ep++ = CCHR|CMB; 636 } 637 regexp_h_store(c, ep, endbuf); 638 } 639 #endif /* REGEXP_H_WCHARS */ 640 } 641 } 642 } 643 644 int 645 step(const char *p1, const char *p2) 646 { 647 register int c; 648 #ifdef REGEXP_H_WCHARS 649 register int d; 650 #endif /* REGEXP_H_WCHARS */ 651 652 REGEXP_H_STEP_INIT /* get circf */ 653 regexp_h_bol = p1; 654 #ifdef REGEXP_H_WCHARS 655 regexp_h_firstwc = NULL; 656 #endif /* REGEXP_H_WCHARS */ 657 if (circf) { 658 loc1 = (char *)p1; 659 return(regexp_h_advance(p1, p2)); 660 } 661 /* fast check for first character */ 662 if (*p2==CCHR) { 663 c = p2[1] & 0377; 664 do { 665 if ((*p1 & 0377) != c) 666 continue; 667 if (regexp_h_advance(p1, p2)) { 668 loc1 = (char *)p1; 669 return(1); 670 } 671 } while (*p1++); 672 return(0); 673 } 674 #ifdef REGEXP_H_WCHARS 675 else if (*p2==CCH1) { 676 do { 677 if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { 678 loc1 = (char *)p1; 679 return(1); 680 } 681 c = regexp_h_fetch(p1, 1); 682 } while (c); 683 return(0); 684 } else if (*p2==CCH2) { 685 do { 686 if (p1[0] == p2[1] && p1[1] == p2[2] && 687 regexp_h_advance(p1, p2)) { 688 loc1 = (char *)p1; 689 return(1); 690 } 691 c = regexp_h_fetch(p1, 1); 692 } while (c); 693 return(0); 694 } else if (*p2==CCH3) { 695 do { 696 if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& 697 regexp_h_advance(p1, p2)) { 698 loc1 = (char *)p1; 699 return(1); 700 } 701 c = regexp_h_fetch(p1, 1); 702 } while (c); 703 return(0); 704 } else if ((*p2&0377)==(CCHR|CMB)) { 705 d = regexp_h_fetch(p2, 0); 706 do { 707 c = regexp_h_fetch(p1, 1); 708 if (c == d && regexp_h_advance(p1, p2)) { 709 loc1 = (char *)p1; 710 return(1); 711 } 712 } while(c); 713 return(0); 714 } 715 /* regular algorithm */ 716 if (regexp_h_wchars) 717 do { 718 if (regexp_h_advance(p1, p2)) { 719 loc1 = (char *)p1; 720 return(1); 721 } 722 c = regexp_h_fetch(p1, 1); 723 } while (c); 724 else 725 #endif /* REGEXP_H_WCHARS */ 726 do { 727 if (regexp_h_advance(p1, p2)) { 728 loc1 = (char *)p1; 729 return(1); 730 } 731 } while (*p1++); 732 return(0); 733 } 734 735 #ifdef REGEXP_H_WCHARS 736 /* 737 * It is painfully slow to read character-wise backwards in a 738 * multibyte string (see regexp_h_previous() above). For the star 739 * algorithm, we therefore keep track of every character as it is 740 * read in forward direction. 741 * 742 * Don't use alloca() for stack blocks since there is no measurable 743 * speedup and huge amounts of memory are used up for long input 744 * lines. 745 */ 746 #ifndef REGEXP_H_STAKBLOK 747 #define REGEXP_H_STAKBLOK 1000 748 #endif 749 750 struct regexp_h_stack { 751 struct regexp_h_stack *s_nxt; 752 struct regexp_h_stack *s_prv; 753 const char *s_ptr[REGEXP_H_STAKBLOK]; 754 }; 755 756 #define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ 757 regexp_h_pushwc(sb, sp, sc, lp) : (void)0) 758 759 static regexp_h_inline void 760 regexp_h_pushwc(struct regexp_h_stack **sb, 761 struct regexp_h_stack **sp, 762 const char ***sc, const char *lp) 763 { 764 if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) 765 return; 766 if (*sb == NULL) { 767 if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) 768 return; 769 (*sb)->s_nxt = (*sb)->s_prv = NULL; 770 *sp = *sb; 771 *sc = &(*sb)->s_ptr[0]; 772 } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { 773 if ((*sp)->s_nxt == NULL) { 774 struct regexp_h_stack *bq; 775 776 if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) 777 return; 778 bq->s_nxt = NULL; 779 bq->s_prv = *sp; 780 (*sp)->s_nxt = bq; 781 *sp = bq; 782 } else 783 *sp = (*sp)->s_nxt; 784 *sc = &(*sp)->s_ptr[0]; 785 } 786 *(*sc)++ = lp; 787 } 788 789 static regexp_h_inline const char * 790 regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, 791 const char ***sc, const char *lp) 792 { 793 if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) 794 return &lp[-1]; 795 if (*sp == NULL) 796 return regexp_h_firstwc; 797 if (*sc == &(*sp)->s_ptr[0]) { 798 if ((*sp)->s_prv == NULL) { 799 regexp_h_free(*sp); 800 *sp = NULL; 801 *sb = NULL; 802 return regexp_h_firstwc; 803 } 804 *sp = (*sp)->s_prv; 805 regexp_h_free((*sp)->s_nxt); 806 (*sp)->s_nxt = NULL ; 807 *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; 808 } 809 return *(--(*sc)); 810 } 811 812 static void 813 regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) 814 { 815 for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) 816 if ((*sp)->s_prv) 817 regexp_h_free((*sp)->s_prv); 818 if (*sp) { 819 if ((*sp)->s_prv) 820 regexp_h_free((*sp)->s_prv); 821 regexp_h_free(*sp); 822 } 823 *sp = *sb = NULL; 824 } 825 #else /* !REGEXP_H_WCHARS */ 826 #define regexp_h_push(sb, sp, sc, lp) 827 #endif /* !REGEXP_H_WCHARS */ 828 829 static int 830 regexp_h_advance(const char *lp, const char *ep) 831 { 832 register const char *curlp; 833 int c, least; 834 #ifdef REGEXP_H_WCHARS 835 int d; 836 struct regexp_h_stack *sb = NULL, *sp = NULL; 837 const char **sc; 838 #endif /* REGEXP_H_WCHARS */ 839 char *bbeg; 840 int ct; 841 842 for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { 843 844 case CCHR: 845 #ifdef REGEXP_H_WCHARS 846 case CCH1: 847 #endif 848 if (*ep++ == *lp++) 849 continue; 850 return(0); 851 852 #ifdef REGEXP_H_WCHARS 853 case CCHR|CMB: 854 if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) 855 continue; 856 return(0); 857 858 case CCH2: 859 if (ep[0] == lp[0] && ep[1] == lp[1]) { 860 ep += 2, lp += 2; 861 continue; 862 } 863 return(0); 864 865 case CCH3: 866 if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { 867 ep += 3, lp += 3; 868 continue; 869 } 870 return(0); 871 #endif /* REGEXP_H_WCHARS */ 872 873 case CDOT: 874 if (*lp++) 875 continue; 876 return(0); 877 #ifdef REGEXP_H_WCHARS 878 case CDOT|CMB: 879 if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) 880 continue; 881 return(0); 882 #endif /* REGEXP_H_WCHARS */ 883 884 case CDOL: 885 if (*lp==0) 886 continue; 887 return(0); 888 889 case CCEOF: 890 loc2 = (char *)lp; 891 return(1); 892 893 case CCL: 894 c = *lp++ & 0377; 895 if(ISTHERE(c)) { 896 ep += 32; 897 continue; 898 } 899 return(0); 900 901 #ifdef REGEXP_H_WCHARS 902 case CCL|CMB: 903 case CNCL|CMB: 904 c = regexp_h_fetch(lp, 1); 905 if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { 906 ep += (*ep & 0377) + 17; 907 continue; 908 } 909 return 0; 910 #endif /* REGEXP_H_WCHARS */ 911 912 case CBRA: 913 braslist[*ep++ & 0377] = (char *)lp; 914 continue; 915 916 case CKET: 917 braelist[*ep++ & 0377] = (char *)lp; 918 continue; 919 920 case CBRC: 921 if (lp == regexp_h_bol && locs == NULL) 922 continue; 923 if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) 924 && !regexp_h_uletter(lp[-1] & 0377) 925 && !isdigit(lp[-1] & 0377)) 926 continue; 927 return(0); 928 929 #ifdef REGEXP_H_WCHARS 930 case CBRC|CMB: 931 c = regexp_h_show(lp); 932 d = regexp_h_previous(lp); 933 if ((iswdigit(c) || regexp_h_wuletter(c)) 934 && !regexp_h_wuletter(d) 935 && !iswdigit(d)) 936 continue; 937 return(0); 938 #endif /* REGEXP_H_WCHARS */ 939 940 case CLET: 941 if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) 942 continue; 943 return(0); 944 945 #ifdef REGEXP_H_WCHARS 946 case CLET|CMB: 947 c = regexp_h_show(lp); 948 if (!regexp_h_wuletter(c) && !iswdigit(c)) 949 continue; 950 return(0); 951 #endif /* REGEXP_H_WCHARS */ 952 953 case CCHR|RNGE: 954 c = *ep++; 955 regexp_h_getrnge(ep, least); 956 while(low--) 957 if(*lp++ != c) 958 return(0); 959 curlp = lp; 960 while(size--) { 961 regexp_h_push(&sb, &sp, &sc, lp); 962 if(*lp++ != c) 963 break; 964 } 965 if(size < 0) { 966 regexp_h_push(&sb, &sp, &sc, lp); 967 lp++; 968 } 969 ep += 2; 970 goto star; 971 972 #ifdef REGEXP_H_WCHARS 973 case CCHR|RNGE|CMB: 974 case CCH1|RNGE: 975 case CCH2|RNGE: 976 case CCH3|RNGE: 977 c = regexp_h_fetch(ep, 0); 978 regexp_h_getrnge(ep, least); 979 while (low--) 980 if (regexp_h_fetch(lp, 1) != c) 981 return 0; 982 curlp = lp; 983 while (size--) { 984 regexp_h_push(&sb, &sp, &sc, lp); 985 if (regexp_h_fetch(lp, 1) != c) 986 break; 987 } 988 if(size < 0) { 989 regexp_h_push(&sb, &sp, &sc, lp); 990 regexp_h_fetch(lp, 1); 991 } 992 ep += 2; 993 goto star; 994 #endif /* REGEXP_H_WCHARS */ 995 996 case CDOT|RNGE: 997 regexp_h_getrnge(ep, least); 998 while(low--) 999 if(*lp++ == '\0') 1000 return(0); 1001 curlp = lp; 1002 while(size--) { 1003 regexp_h_push(&sb, &sp, &sc, lp); 1004 if(*lp++ == '\0') 1005 break; 1006 } 1007 if(size < 0) { 1008 regexp_h_push(&sb, &sp, &sc, lp); 1009 lp++; 1010 } 1011 ep += 2; 1012 goto star; 1013 1014 #ifdef REGEXP_H_WCHARS 1015 case CDOT|RNGE|CMB: 1016 regexp_h_getrnge(ep, least); 1017 while (low--) 1018 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) 1019 return 0; 1020 curlp = lp; 1021 while (size--) { 1022 regexp_h_push(&sb, &sp, &sc, lp); 1023 if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) 1024 break; 1025 } 1026 if (size < 0) { 1027 regexp_h_push(&sb, &sp, &sc, lp); 1028 regexp_h_fetch(lp, 1); 1029 } 1030 ep += 2; 1031 goto star; 1032 #endif /* REGEXP_H_WCHARS */ 1033 1034 case CCL|RNGE: 1035 regexp_h_getrnge(ep + 32, least); 1036 while(low--) { 1037 c = *lp++ & 0377; 1038 if(!ISTHERE(c)) 1039 return(0); 1040 } 1041 curlp = lp; 1042 while(size--) { 1043 regexp_h_push(&sb, &sp, &sc, lp); 1044 c = *lp++ & 0377; 1045 if(!ISTHERE(c)) 1046 break; 1047 } 1048 if(size < 0) { 1049 regexp_h_push(&sb, &sp, &sc, lp); 1050 lp++; 1051 } 1052 ep += 34; /* 32 + 2 */ 1053 goto star; 1054 1055 #ifdef REGEXP_H_WCHARS 1056 case CCL|RNGE|CMB: 1057 case CNCL|RNGE|CMB: 1058 regexp_h_getrnge(ep + (*ep & 0377) + 17, least); 1059 while (low--) { 1060 c = regexp_h_fetch(lp, 1); 1061 if (!regexp_h_cclass(ep, c, 1062 (ep[-1] & 0377 & ~REGEXP_H_LEAST) 1063 == (CCL|RNGE|CMB))) 1064 return 0; 1065 } 1066 curlp = lp; 1067 while (size--) { 1068 regexp_h_push(&sb, &sp, &sc, lp); 1069 c = regexp_h_fetch(lp, 1); 1070 if (!regexp_h_cclass(ep, c, 1071 (ep[-1] & 0377 & ~REGEXP_H_LEAST) 1072 == (CCL|RNGE|CMB))) 1073 break; 1074 } 1075 if (size < 0) { 1076 regexp_h_push(&sb, &sp, &sc, lp); 1077 regexp_h_fetch(lp, 1); 1078 } 1079 ep += (*ep & 0377) + 19; 1080 goto star; 1081 #endif /* REGEXP_H_WCHARS */ 1082 1083 case CBACK: 1084 bbeg = braslist[*ep & 0377]; 1085 ct = braelist[*ep++ & 0377] - bbeg; 1086 1087 if(strncmp(bbeg, lp, ct) == 0) { 1088 lp += ct; 1089 continue; 1090 } 1091 return(0); 1092 1093 case CBACK|STAR: 1094 bbeg = braslist[*ep & 0377]; 1095 ct = braelist[*ep++ & 0377] - bbeg; 1096 curlp = lp; 1097 while(strncmp(bbeg, lp, ct) == 0) 1098 lp += ct; 1099 1100 while(lp >= curlp) { 1101 if(regexp_h_advance(lp, ep)) return(1); 1102 lp -= ct; 1103 } 1104 return(0); 1105 1106 1107 case CDOT|STAR: 1108 curlp = lp; 1109 do 1110 regexp_h_push(&sb, &sp, &sc, lp); 1111 while (*lp++); 1112 goto star; 1113 1114 #ifdef REGEXP_H_WCHARS 1115 case CDOT|STAR|CMB: 1116 curlp = lp; 1117 do 1118 regexp_h_push(&sb, &sp, &sc, lp); 1119 while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); 1120 goto star; 1121 #endif /* REGEXP_H_WCHARS */ 1122 1123 case CCHR|STAR: 1124 curlp = lp; 1125 do 1126 regexp_h_push(&sb, &sp, &sc, lp); 1127 while (*lp++ == *ep); 1128 ep++; 1129 goto star; 1130 1131 #ifdef REGEXP_H_WCHARS 1132 case CCHR|STAR|CMB: 1133 case CCH1|STAR: 1134 case CCH2|STAR: 1135 case CCH3|STAR: 1136 curlp = lp; 1137 d = regexp_h_fetch(ep, 0); 1138 do 1139 regexp_h_push(&sb, &sp, &sc, lp); 1140 while (regexp_h_fetch(lp, 1) == d); 1141 goto star; 1142 #endif /* REGEXP_H_WCHARS */ 1143 1144 case CCL|STAR: 1145 curlp = lp; 1146 do { 1147 regexp_h_push(&sb, &sp, &sc, lp); 1148 c = *lp++ & 0377; 1149 } while(ISTHERE(c)); 1150 ep += 32; 1151 goto star; 1152 1153 #ifdef REGEXP_H_WCHARS 1154 case CCL|STAR|CMB: 1155 case CNCL|STAR|CMB: 1156 curlp = lp; 1157 do { 1158 regexp_h_push(&sb, &sp, &sc, lp); 1159 c = regexp_h_fetch(lp, 1); 1160 } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) 1161 == (CCL|STAR|CMB))); 1162 ep += (*ep & 0377) + 17; 1163 goto star; 1164 #endif /* REGEXP_H_WCHARS */ 1165 1166 star: 1167 #ifdef REGEXP_H_WCHARS 1168 if (regexp_h_wchars == 0) { 1169 #endif 1170 do { 1171 if(--lp == locs) 1172 break; 1173 if (regexp_h_advance(lp, ep)) 1174 return(1); 1175 } while (lp > curlp); 1176 #ifdef REGEXP_H_WCHARS 1177 } else { 1178 do { 1179 lp = regexp_h_pop(&sb, &sp, &sc, lp); 1180 if (lp <= locs) 1181 break; 1182 if (regexp_h_advance(lp, ep)) { 1183 regexp_h_zerostak(&sb, &sp); 1184 return(1); 1185 } 1186 } while (lp > curlp); 1187 regexp_h_zerostak(&sb, &sp); 1188 } 1189 #endif /* REGEXP_H_WCHARS */ 1190 return(0); 1191 1192 } 1193 } 1194 1195 static void 1196 regexp_h_getrnge(register const char *str, int least) 1197 { 1198 low = *str++ & 0377; 1199 size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; 1200 } 1201 1202 int 1203 advance(const char *lp, const char *ep) 1204 { 1205 REGEXP_H_ADVANCE_INIT /* skip past circf */ 1206 regexp_h_bol = lp; 1207 #ifdef REGEXP_H_WCHARS 1208 regexp_h_firstwc = NULL; 1209 #endif /* REGEXP_H_WCHARS */ 1210 return regexp_h_advance(lp, ep); 1211 }