regnfa.c (22968B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 /* #include "synonyms.h" */ 29 #include <string.h> 30 #include <stdlib.h> 31 #include "re.h" 32 #include <stddef.h> 33 #include <ctype.h> 34 35 typedef unsigned char Uchar; 36 typedef unsigned short Ushort; 37 38 /* 39 * Nondeterministic Finite Automata. 40 */ 41 typedef struct t_graph Graph; 42 struct t_graph 43 { 44 union 45 { 46 Graph *ptr; 47 Info info; 48 } alt; 49 Graph *next; 50 w_type op; 51 }; 52 53 typedef struct t_stack Stack; 54 struct t_stack 55 { 56 Stack *link; /* simplifies cleanup */ 57 Stack *prev; /* covered states */ 58 Graph *wasgp; /* node associated with this state */ 59 const Uchar *str; /* saved position in the string */ 60 Ushort cnt; /* ROP_BRACE: traversal count */ 61 }; 62 63 /* 64 * A Context holds all the information needed for each 65 * potential path through the NFA graph. 66 */ 67 typedef struct t_ctxt Context; 68 struct t_ctxt 69 { 70 Context *link; /* simplifies cleanup */ 71 Context *next; /* singly linked */ 72 Stack *sp; /* nested counts */ 73 Graph *gp; /* starting node */ 74 Graph *wasgp; /* node associated with this state */ 75 const Uchar *str; /* saved position in the string */ 76 Ushort cnt; /* ROP_BRACE: traversal count */ 77 size_t nset; /* length of rm[] that is currently set */ 78 regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */ 79 }; 80 81 struct re_nfa_ /*Nfa*/ 82 { 83 Graph *gp; /* entire NFA */ 84 Stack *sp; /* unused Stacks */ 85 Stack *allsp; /* linked Stacks (for cleanup) */ 86 Context *allcp; /* linked Contexts (for cleanup) */ 87 Context *cur; /* Contexts to be continued now */ 88 Context *step; /* Contexts waiting for a step of the NFA */ 89 Context *avail; /* unused Contexts */ 90 Context **ecur; /* ends cur list of Contexts */ 91 Context **estp; /* ends step list of Contexts */ 92 size_t rmlen; /* length of rm[] in each Context */ 93 size_t rmmin; /* minimum length needed */ 94 size_t used; /* length used for this libuxre_regnfaexec() */ 95 w_type beg; /* nonzero for fixed char initial node NFAs */ 96 }; 97 98 #define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */ 99 100 /* 101 * Depth first traversal. 102 * Make a singly linked list (in alt.ptr) of the graph's nodes. 103 * Must toss any ROP_BKTs, too, since "alt" is overwritten. 104 */ 105 static void 106 deltolist(Graph *gp, Graph **list) 107 { 108 Graph *ptr; 109 110 if ((ptr = gp->next) != 0) /* first time */ 111 { 112 gp->next = 0; 113 if (gp->op == ROP_OR || gp->op == ROP_MTOR) 114 deltolist(gp->alt.ptr, list); 115 deltolist(ptr, list); 116 if (gp->op == ROP_BKT) 117 { 118 libuxre_bktfree(gp->alt.info.bkt); 119 free(gp->alt.info.bkt); 120 } 121 } 122 else if (gp->op == ROP_END) 123 gp->op = ROP_NOP; 124 else 125 return; 126 gp->alt.ptr = *list; 127 *list = gp; 128 } 129 130 /* 131 * After the list is turned into a linked list, 132 * walk that list freeing the nodes. 133 */ 134 static void 135 delgraph(Graph *gp) 136 { 137 Graph *gp2, end; 138 139 gp2 = &end; 140 deltolist(gp, &gp2); 141 while ((gp = gp2) != &end) 142 { 143 gp2 = gp->alt.ptr; 144 free(gp); 145 } 146 } 147 148 /* 149 * Depth first traversal. 150 * Look for ROP_NOPs and prune them from the graph. 151 * Chain them all together on *nop's list. 152 */ 153 static Graph * 154 nopskip(Graph *gp, Graph **nop) 155 { 156 Graph *ptr; 157 158 if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */ 159 { 160 if (gp->op == ROP_NOP) 161 { 162 if (gp->alt.ptr != 0) /* touched */ 163 return gp->next; /* already did it */ 164 gp->alt.ptr = *nop; 165 *nop = gp; 166 } 167 gp->next = 0; /* this subgraph's pending */ 168 if (gp->op == ROP_OR || gp->op == ROP_MTOR) 169 gp->alt.ptr = nopskip(gp->alt.ptr, nop); 170 gp->next = nopskip(ptr, nop); 171 if (gp->op == ROP_NOP) 172 return gp->next; 173 } 174 return gp; 175 } 176 177 /* 178 * Postorder traversal of the parse tree. 179 * Build a graph using "Thompson's" algorithm. 180 * The only significant modification is the 181 * ROP_BRACE->ROP_MTOR construction. 182 * Returns 1 => graph might match empty 183 * 0 => graph cannot match empty 184 * -1 => error (in allocation) 185 */ 186 static int 187 mkgraph(Tree *tp, Graph **first, Graph **last) 188 { 189 Graph *new = 0, *nop, *lf, *ll, *rf, *rl; 190 int lmt, rmt = 0; 191 192 if (tp->op != ROP_CAT) 193 { 194 if ((new = malloc(sizeof(Graph))) == 0) 195 return 0; 196 new->op = tp->op; /* usually */ 197 } 198 switch (tp->op) 199 { 200 case ROP_REF: 201 new->alt.info.sub = tp->right.info.sub; 202 *first = new; 203 *last = new; 204 return 1; /* safe--can't really tell */ 205 case ROP_BKT: 206 tp->op = ROP_BKTCOPY; /* now graph owns clean up */ 207 /*FALLTHROUGH*/ 208 case ROP_BKTCOPY: 209 new->alt.info.bkt = tp->right.info.bkt; 210 /*FALLTHROUGH*/ 211 default: 212 *first = new; 213 *last = new; 214 return 0; 215 case ROP_EMPTY: 216 new->op = ROP_NOP; 217 new->alt.ptr = 0; /* untouched */ 218 *first = new; 219 *last = new; 220 return 1; 221 case ROP_OR: 222 case ROP_CAT: 223 lf = 0; /* in case of error */ 224 if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0) 225 goto err; 226 /*FALLTHROUGH*/ 227 case ROP_STAR: 228 case ROP_PLUS: 229 case ROP_QUEST: 230 case ROP_BRACE: 231 case ROP_LP: 232 if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0) 233 goto err; 234 break; 235 } 236 /* 237 * Note that ROP_NOP only serves as the node that reconnects 238 * the two choices of an incoming ROP_OR or ROP_QUEST. To 239 * prevent rewalking portions of the graph in nopskip(), 240 * this code marks all ROP_NOP nodes as currently untouched. 241 */ 242 switch (tp->op) 243 { 244 case ROP_OR: 245 if ((nop = malloc(sizeof(Graph))) == 0) 246 goto err; 247 nop->op = ROP_NOP; 248 nop->alt.ptr = 0; /* untouched */ 249 ll->next = nop; 250 rl->next = nop; 251 new->next = lf; 252 new->alt.ptr = rf; 253 *first = new; 254 *last = nop; 255 return lmt | rmt; 256 case ROP_CAT: /* no "new" */ 257 ll->next = rf; 258 *first = lf; 259 *last = rl; 260 return lmt & rmt; 261 case ROP_QUEST: 262 if ((nop = malloc(sizeof(Graph))) == 0) 263 goto err; 264 nop->op = ROP_NOP; 265 nop->alt.ptr = 0; /* untouched */ 266 new->op = ROP_OR; 267 new->next = lf; 268 new->alt.ptr = nop; 269 ll->next = nop; 270 *first = new; 271 *last = nop; 272 return 1; 273 case ROP_STAR: 274 *first = new; 275 rmt = 1; 276 star:; 277 new->op = lmt ? ROP_MTOR : ROP_OR; 278 new->alt.ptr = lf; 279 ll->next = new; 280 *last = new; 281 return rmt; 282 case ROP_PLUS: 283 *first = lf; 284 rmt = lmt; 285 goto star; 286 case ROP_BRACE: 287 if ((nop = malloc(sizeof(Graph))) == 0) 288 goto err; 289 nop->op = ROP_MTOR; /* going to save state anyway... */ 290 nop->alt.ptr = lf; 291 ll->next = new; 292 new->next = nop; 293 new->alt.info.num[1] = tp->right.info.num[1]; 294 if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0) 295 { 296 lmt = 1; 297 *first = new; 298 } 299 else 300 { 301 new->alt.info.num[0]--; /* already done 1 */ 302 if (new->alt.info.num[1] != BRACE_INF) 303 new->alt.info.num[1]--; /* likewise */ 304 *first = lf; 305 } 306 *last = nop; 307 return lmt; 308 case ROP_LP: 309 if ((nop = malloc(sizeof(Graph))) == 0) 310 goto err; 311 nop->op = ROP_RP; 312 nop->alt.info.sub = tp->right.info.sub; 313 new->alt.info.sub = tp->right.info.sub; 314 new->next = lf; 315 ll->next = nop; 316 *first = new; 317 *last = nop; 318 return lmt; 319 } 320 err:; 321 if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0) 322 delgraph(rf); 323 if (lf != 0) 324 delgraph(lf); 325 if (tp->op != ROP_CAT) 326 free(new); 327 return -1; 328 } 329 330 /* 331 * Semi-preorder traversal. 332 * Return zero if there's no simple first character 333 * (including the operation ROP_BOL) that must always 334 * be at the start of a matching string. 335 * This code doesn't attempt to get an answer if the 336 * first of the tree many be empty. 337 */ 338 static w_type 339 firstop(Tree *tp) 340 { 341 w_type op; 342 343 switch (tp->op) 344 { 345 case ROP_OR: 346 if ((op = firstop(tp->left.ptr)) == 0 347 || op != firstop(tp->right.ptr)) 348 { 349 return 0; 350 } 351 return op; 352 case ROP_BRACE: 353 if (tp->right.info.num[0] == 0) 354 return 0; 355 /*FALLTHROUGH*/ 356 case ROP_CAT: 357 case ROP_PLUS: 358 case ROP_LP: 359 return firstop(tp->left.ptr); 360 default: 361 if (tp->op < 0) 362 return 0; 363 /*FALLTHROUGH*/ 364 case ROP_BOL: 365 return tp->op; 366 } 367 } 368 369 void 370 libuxre_regdelnfa(Nfa *np) 371 { 372 Context *cp, *cpn; 373 Stack *sp, *spn; 374 375 if (np->gp != 0) 376 delgraph(np->gp); 377 for (cp = np->allcp; cp != 0; cp = cpn) 378 { 379 cpn = cp->link; 380 free(cp); 381 } 382 for (sp = np->allsp; sp != 0; sp = spn) 383 { 384 spn = sp->link; 385 free(sp); 386 } 387 free(np); 388 } 389 390 LIBUXRE_STATIC int 391 libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp) 392 { 393 Graph *gp, end; 394 Nfa *np; 395 396 if ((np = malloc(sizeof(Nfa))) == 0) 397 goto err; 398 np->gp = 0; /* in case of error */ 399 if (mkgraph(tp, &np->gp, &gp) < 0) 400 goto err; 401 gp->next = 0; /* nothing follows ROP_END */ 402 np->rmlen = 0; 403 if ((ep->re_flags & REG_NOSUB) == 0) 404 np->rmlen = ep->re_nsub + 1; 405 np->rmmin = 0; 406 if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen) 407 np->rmlen = np->rmmin; 408 /* 409 * Delete all ROP_NOPs from the graph. 410 * nopskip() disconnects them from the graph and 411 * links them together through their alt.ptr's. 412 */ 413 gp = &end; 414 np->gp = nopskip(np->gp, &gp); 415 while (gp != &end) 416 { 417 Graph *gp2 = gp; 418 419 gp = gp->alt.ptr; 420 free(gp2); 421 } 422 np->sp = 0; 423 np->allsp = 0; 424 np->avail = 0; 425 np->allcp = 0; 426 ep->re_nfa = np; 427 np->beg = firstop(tp); 428 return 0; 429 err:; 430 if (np != 0) 431 { 432 if (np->gp != 0) 433 delgraph(np->gp); 434 free(np); 435 } 436 return REG_ESPACE; 437 } 438 439 static Stack * 440 newstck(Nfa *np) 441 { 442 Stack *sp, **spp; 443 int i; 444 445 if ((sp = np->sp) == 0) /* get more */ 446 { 447 spp = &np->sp; 448 i = 4; 449 while ((sp = malloc(sizeof(Stack))) != 0) 450 { 451 sp->link = np->allsp; 452 np->allsp = sp; 453 *spp = sp; 454 spp = &sp->prev; 455 if (--i == 0) 456 break; 457 } 458 *spp = 0; 459 if ((sp = np->sp) == 0) /* first malloc failed */ 460 return 0; 461 } 462 np->sp = sp->prev; 463 return sp; 464 } 465 466 static int 467 mkstck(Nfa *np, Context *cp, Graph *gp) 468 { 469 Stack *new, *sp; 470 471 if (gp == 0) /* copy existing stack tail */ 472 { 473 /* 474 * Hoist up top of stack. 475 */ 476 new = cp->sp; 477 cp->wasgp = new->wasgp; 478 cp->str = new->str; 479 cp->cnt = new->cnt; 480 cp->sp = new->prev; 481 if ((sp = new->prev) == 0) /* only one below */ 482 { 483 new->prev = np->sp; 484 np->sp = new; 485 cp->sp = 0; 486 return 0; 487 } 488 for (;;) /* copy the rest; reusing the old top */ 489 { 490 new->wasgp = sp->wasgp; 491 new->str = sp->str; 492 new->cnt = sp->cnt; 493 if ((new->prev = sp->prev) == 0) 494 break; 495 if ((new->prev = newstck(np)) == 0) 496 return REG_ESPACE; 497 new = new->prev; 498 sp = sp->prev; 499 } 500 return 0; 501 } 502 if (cp->wasgp != 0) /* push current down */ 503 { 504 if ((new = newstck(np)) == 0) 505 return REG_ESPACE; 506 new->prev = cp->sp; 507 cp->sp = new; 508 new->wasgp = cp->wasgp; 509 new->str = cp->str; 510 new->cnt = cp->cnt; 511 } 512 cp->wasgp = gp; 513 cp->str = 0; 514 cp->cnt = 0; 515 return 0; 516 } 517 518 /* 519 * Allocate a new Context (from np->avail) 520 * and add it to the end of the current list. 521 */ 522 static int 523 newctxt(Nfa *np, Context *cp, Graph *gp) 524 { 525 Context *new; 526 size_t n; 527 528 if ((new = np->avail) == 0) /* need more */ 529 { 530 Context *ncp, **cpp; 531 int i; 532 533 /* 534 * Can't easily allocate Contexts in one call because 535 * the alignments (given the varying length of rm[]) 536 * are potentially nontrivial. 537 */ 538 n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t); 539 i = 4; 540 cpp = &np->avail; 541 while ((ncp = malloc(n)) != 0) 542 { 543 ncp->link = np->allcp; 544 np->allcp = ncp; 545 *cpp = ncp; 546 cpp = &ncp->next; 547 if (--i == 0) 548 break; 549 } 550 *cpp = 0; 551 if ((new = np->avail) == 0) /* first malloc failed */ 552 return REG_ESPACE; 553 } 554 np->avail = new->next; 555 new->next = 0; 556 new->gp = gp; 557 new->sp = 0; 558 new->wasgp = 0; 559 new->nset = 0; 560 if (cp != 0) /* copy existing context information */ 561 { 562 if (cp->sp != 0) /* copy tail of stack */ 563 { 564 new->sp = cp->sp; 565 if (mkstck(np, new, 0) != 0) 566 return REG_ESPACE; 567 } 568 new->wasgp = cp->wasgp; 569 new->str = cp->str; 570 new->cnt = cp->cnt; 571 /* 572 * Copy any valid subexpression match information 573 * from the existing context. 574 */ 575 if (np->used != 0 && (n = cp->nset) != 0) 576 { 577 regmatch_t *rmn = new->rm, *rmo = cp->rm; 578 579 new->nset = n; 580 for (;; ++rmn, ++rmo) 581 { 582 rmn->rm_so = rmo->rm_so; 583 rmn->rm_eo = rmo->rm_eo; 584 if (--n == 0) 585 break; 586 } 587 } 588 } 589 /* 590 * Append it to the end of the current Context list. 591 */ 592 *np->ecur = new; 593 np->ecur = &new->next; 594 return 0; 595 } 596 597 /* 598 * Compare two byte string sequences for equality. 599 * If REG_ICASE, walk through the strings doing 600 * caseless comparisons of the wide characters. 601 */ 602 static int 603 casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max) 604 { 605 const Uchar *p = &xp->str[i]; 606 const Uchar *end; 607 w_type wc1, wc2; 608 int k; 609 610 if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */ 611 return 1; 612 if ((xp->flags & REG_ICASE) == 0) 613 return 0; 614 /* 615 * Walk through each testing for a match, ignoring case, 616 * of the resulting wide characters. 617 * Note that only "s" can run out of characters. 618 */ 619 end = &p[n]; 620 do 621 { 622 if ((wc1 = *s++) == '\0') 623 return 0; 624 if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0) 625 s += k; 626 if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0) 627 p += k; 628 if (wc1 != wc2) 629 { 630 wc1 = to_lower(wc1); 631 wc2 = to_lower(wc2); 632 if (wc1 != wc2) 633 return 0; 634 } 635 } while (p < end); 636 return 1; 637 } 638 639 LIBUXRE_STATIC int 640 libuxre_regnfaexec(Nfa *np, Exec *xp) 641 { 642 const Uchar *s, *s1, *s2; 643 Context *cp, *cpn; 644 Graph *gp, *brace; 645 Stack *sp, *spn; 646 ssize_t rmso, len; 647 int i, ret, mb_cur_max; 648 w_type wc; 649 size_t n; 650 651 ret = 0; /* assume it matches */ 652 rmso = -1; /* but no match yet */ 653 np->cur = 0; 654 np->step = 0; 655 np->ecur = &np->cur; 656 np->estp = &np->step; 657 if ((np->used = xp->nmatch) < np->rmmin) 658 np->used = np->rmmin; 659 s1 = 0; /* one char back */ 660 s = xp->str; /* current high water in string */ 661 mb_cur_max = xp->mb_cur_max; 662 for (;;) 663 { 664 /* 665 * Get next character from string. 666 * If the engine proper hasn't started and the engine 667 * requires a particular character to start and this 668 * character isn't it, try the next one. 669 */ 670 for (;;) 671 { 672 s2 = s1; 673 s1 = s; 674 if (!ISONEBYTE(wc = *s++) && 675 (i = libuxre_mb2wc(&wc, s)) > 0) 676 s += i; 677 if (np->cur != 0 || np->beg == wc || np->beg == 0) 678 break; 679 if (np->beg == ROP_BOL) 680 { 681 if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0) 682 break; 683 if ((xp->flags & REG_NEWLINE) == 0) 684 goto nomatch; 685 if (s2 != 0 && *s2 == '\n') 686 break; 687 } 688 if (wc == '\0') 689 goto nomatch; 690 } 691 /* 692 * Start the engine by inserting a fresh initial context 693 * if there's no known match as yet. (Once some match 694 * has been found, the end is near.) 695 */ 696 if (rmso < 0 && newctxt(np, 0, np->gp) != 0) 697 goto err; 698 /* 699 * Walk the current Contexts list, trying each. 700 * "loop" is when a new Context is to be tried, 701 * "again" is when the same Context continues, 702 * but wc was not yet matched. 703 */ 704 cp = np->cur; 705 loop:; 706 gp = cp->gp; 707 again:; 708 switch (gp->op) 709 { 710 case ROP_BRACE: /* gp->next->op == ROP_MTOR */ 711 brace = gp; 712 gp = gp->next; 713 goto mtor; 714 case ROP_MTOR: 715 brace = 0; 716 mtor:; 717 if (cp->wasgp != gp) /* first time */ 718 { 719 if (mkstck(np, cp, gp) != 0) 720 goto err; 721 } 722 else if (cp->str == s) /* spinning */ 723 goto poptonext; 724 cp->str = s; 725 if (brace != 0) 726 { 727 if (cp->cnt >= brace->alt.info.num[1]) 728 goto poptonext; 729 if (++cp->cnt <= brace->alt.info.num[0]) 730 { 731 gp = gp->alt.ptr; 732 goto again; 733 } 734 if (cp->cnt > BRACE_MAX) 735 cp->cnt = BRACE_MAX; 736 } 737 if (newctxt(np, cp, gp->alt.ptr) != 0) 738 goto err; 739 poptonext:; 740 cp->wasgp = 0; 741 if ((sp = cp->sp) != 0) /* pop stack */ 742 { 743 cp->sp = sp->prev; 744 cp->wasgp = sp->wasgp; 745 cp->str = sp->str; 746 cp->cnt = sp->cnt; 747 sp->prev = np->sp; 748 np->sp = sp; 749 } 750 /*FALLTHROUGH*/ 751 case ROP_EMPTY: 752 tonext:; 753 gp = gp->next; 754 goto again; 755 case ROP_OR: 756 if (newctxt(np, cp, gp->alt.ptr) != 0) 757 goto err; 758 goto tonext; 759 case ROP_LP: 760 if ((n = gp->alt.info.sub) < np->used) 761 { 762 size_t k; 763 764 cp->rm[n].rm_so = s1 - xp->str; 765 cp->rm[n].rm_eo = -1; 766 /* 767 * Mark any skipped subexpressions as 768 * failing to participate in the match. 769 */ 770 if ((k = cp->nset) < n) 771 { 772 regmatch_t *rmp = &cp->rm[k]; 773 774 for (;; rmp++) 775 { 776 rmp->rm_so = -1; 777 rmp->rm_eo = -1; 778 if (++k >= n) 779 break; 780 } 781 } 782 cp->nset = n + 1; 783 } 784 goto tonext; 785 case ROP_RP: 786 if ((n = gp->alt.info.sub) < np->used) 787 cp->rm[n].rm_eo = s1 - xp->str; 788 goto tonext; 789 case ROP_BOL: 790 if (s2 == 0) 791 { 792 if (xp->flags & REG_NOTBOL) 793 goto failed; 794 } 795 else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n') 796 goto failed; 797 goto tonext; 798 case ROP_EOL: 799 if (wc == '\0') 800 { 801 if (xp->flags & REG_NOTEOL) 802 goto failed; 803 } 804 else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n') 805 goto failed; 806 goto tonext; 807 default: /* character match */ 808 if (gp->op != wc) 809 { 810 if ((xp->flags & REG_ICASE) == 0 811 || gp->op != to_lower(wc)) 812 { 813 goto failed; 814 } 815 } 816 nextwc:; 817 cp->gp = gp->next; 818 tostep:; 819 cpn = cp->next; 820 cp->next = 0; 821 *np->estp = cp; 822 np->estp = &cp->next; 823 if ((cp = cpn) == 0) 824 break; 825 goto loop; 826 case ROP_NOTNL: 827 if (wc == '\n') 828 goto failed; 829 /*FALLTHROUGH*/ 830 case ROP_ANYCH: 831 if (wc > '\0') 832 goto nextwc; 833 /*FALLTHROUGH*/ 834 case ROP_NONE: 835 failed:; 836 cpn = cp->next; 837 cp->next = np->avail; 838 np->avail = cp; 839 if ((cp = cpn) == 0) 840 break; 841 goto loop; 842 case ROP_LT: 843 if (s2 == 0) 844 { 845 if (xp->flags & REG_NOTBOL) 846 goto failed; 847 } 848 else 849 { 850 w_type pwc; 851 852 if (wc != '_' && 853 !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) 854 goto failed; 855 if (!ISONEBYTE(pwc = *s2)) 856 libuxre_mb2wc(&pwc, &s2[1]); 857 if (pwc == '_' || 858 iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc)) 859 goto failed; 860 } 861 goto tonext; 862 case ROP_GT: 863 if (wc == '_' || 864 iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) 865 goto failed; 866 goto tonext; 867 case ROP_BKT: 868 case ROP_BKTCOPY: 869 if (cp->wasgp == gp) /* rest of MCCE */ 870 { 871 checkspin:; 872 if (s1 >= cp->str) /* got it all */ 873 goto poptonext; 874 goto tostep; 875 } 876 if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s, 877 mb_cur_max)) < 0) 878 goto failed; 879 if ((n = i) == 0) /* only matched wc */ 880 goto nextwc; 881 spin:; 882 if (mkstck(np, cp, gp) != 0) 883 goto err; 884 cp->gp = gp; /* stay here until reach past s+n */ 885 cp->str = s + n; 886 goto tostep; 887 case ROP_REF: 888 if (cp->wasgp == gp) /* rest of matched string */ 889 goto checkspin; 890 if ((n = gp->alt.info.sub) >= cp->nset) 891 goto failed; 892 if ((len = cp->rm[n].rm_eo) < 0) 893 goto failed; 894 if ((len -= n = cp->rm[n].rm_so) == 0) 895 goto tonext; 896 if (casecmp(s1, xp, n, len, mb_cur_max) == 0) 897 goto failed; 898 if ((n = s - s1) >= len) 899 goto nextwc; 900 n = len - n; 901 goto spin; 902 case ROP_END: /* success! */ 903 if (xp->flags & REG_NONEMPTY) 904 { 905 if (s2 == 0) 906 goto failed; 907 } 908 if (xp->nmatch == 0) 909 goto match; 910 /* 911 * Mark any skipped subexpressions as failing to match. 912 */ 913 if ((n = cp->nset) < xp->nmatch) 914 { 915 do 916 { 917 cp->rm[n].rm_so = -1; 918 cp->rm[n].rm_eo = -1; 919 } while (++n < xp->nmatch); 920 } 921 /* 922 * Note the left-most match that's longest. 923 */ 924 n = cp->rm[0].rm_so; 925 if (rmso < 0 || n < rmso) 926 { 927 rmso = n; 928 record:; 929 memcpy(xp->match, cp->rm, 930 xp->nmatch * sizeof(regmatch_t)); 931 goto failed; 932 } 933 if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo) 934 goto failed; 935 if (xp->match[0].rm_eo < cp->rm[0].rm_eo) 936 goto record; 937 #if 0 /* maximize the lengths of earlier LP...RPs */ 938 /* 939 * If both are of the same length and start 940 * at the same point, choose the one with 941 * a "longest submatch from left to right" 942 * where an empty string wins over a nonmatch. 943 */ 944 for (n = 1; n < xp->nmatch; n++) 945 { 946 ssize_t nlen; 947 948 /* 949 * First, go with the choice that has any 950 * match for subexpr n. 951 */ 952 len = xp->match[n].rm_eo; 953 nlen = cp->rm[n].rm_eo; 954 if (nlen < 0) 955 { 956 if (len >= 0) 957 break; 958 } 959 else if (len < 0) 960 goto record; 961 /* 962 * Both have a match; go with the longer. 963 */ 964 len -= xp->match[n].rm_so; 965 nlen -= cp->rm[n].rm_so; 966 if (nlen < len) 967 break; 968 if (nlen > len) 969 goto record; 970 } 971 #else /* take LP and RP as "fence posts" and maximize earlier gaps */ 972 /* 973 * If both are of the same length and start 974 * at the same point, choose the one with 975 * the larger earlier subpatterns, in which 976 * each rm_so and rm_eo serves as a separator. 977 */ 978 for (n = 1; n < xp->nmatch; n++) 979 { 980 ssize_t nlen; 981 int use; 982 983 if (xp->flags & REG_AVOIDNULL) { 984 /* 985 * This is to to satisfy POSIX.1-2001 986 * XBD pp. 172-173 ll. 6127-6129, whose 987 * translation is "do not match null 988 * expressions if there is a choice". 989 * See also POSIX.2 interpretation #43 990 * in which the question was raised. 991 * 992 * The first subexpression of "\(x*\)*" 993 * must thus match the string "xxx". 994 */ 995 use = cp->rm[n].rm_eo - 996 cp->rm[n].rm_so >= 997 xp->match[n].rm_eo - 998 xp->match[n].rm_so || 999 xp->match[n].rm_so < 0; 1000 } else 1001 use = 1; 1002 /* 1003 * Choose the rightmost ROP_LP as that 1004 * maximizes the gap from before. 1005 */ 1006 len = xp->match[n].rm_so; 1007 nlen = cp->rm[n].rm_so; 1008 if (len < nlen && use) 1009 goto record; 1010 if (len > nlen) 1011 break; 1012 /* 1013 * The ROP_LPs are at the same point: 1014 * Choose the rightmost ROP_RP. 1015 */ 1016 len = xp->match[n].rm_eo; 1017 nlen = cp->rm[n].rm_eo; 1018 if (len < nlen && use) 1019 goto record; 1020 if (len > nlen) 1021 break; 1022 } 1023 #endif 1024 goto failed; 1025 } 1026 /* 1027 * Finished the current Context list. If the input string 1028 * has been entirely scanned, we're done. Otherwise, make 1029 * the next step list current for the next character. 1030 * If the next step list was empty and there's an existing 1031 * match, that's the left-most longest. 1032 */ 1033 if (wc == '\0') 1034 { 1035 if (rmso >= 0) 1036 goto match; 1037 goto nomatch; 1038 } 1039 np->ecur = np->estp; 1040 if ((np->cur = np->step) == 0) 1041 { 1042 if (rmso >= 0) 1043 goto match; 1044 np->ecur = &np->cur; /* was pointing at step */ 1045 } 1046 np->step = 0; 1047 np->estp = &np->step; 1048 } 1049 nomatch:; 1050 ret = REG_NOMATCH; 1051 match:; 1052 np->avail = 0; 1053 for (cp = np->allcp; cp != 0; cp = cpn) 1054 { 1055 cpn = cp->link; 1056 cp->next = np->avail; 1057 np->avail = cp; 1058 } 1059 np->sp = 0; 1060 for (sp = np->allsp; sp != 0; sp = spn) 1061 { 1062 spn = sp->link; 1063 sp->prev = np->sp; 1064 np->sp = sp; 1065 } 1066 return ret; 1067 err:; 1068 ret = REG_ESPACE; 1069 goto match; 1070 }