regparse.c (21226B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 /* #include "synonyms.h" */ 29 #include <stdlib.h> 30 #include <ctype.h> 31 #include "re.h" 32 33 LIBUXRE_STATIC void 34 libuxre_regdeltree(Tree *tp, int all) 35 { 36 if (tp == 0) 37 return; 38 if (tp->op < 0) 39 { 40 switch (KIND_ROP(tp->op)) 41 { 42 case BINARY_ROP: 43 libuxre_regdeltree(tp->right.ptr, all); 44 /*FALLTHROUGH*/ 45 case UNARY_ROP: 46 libuxre_regdeltree(tp->left.ptr, all); 47 break; 48 default: 49 if (tp->op == ROP_BKT && all) 50 { 51 libuxre_bktfree(tp->right.info.bkt); 52 free(tp->right.info.bkt); 53 } 54 break; 55 } 56 } 57 free(tp); 58 } 59 60 LIBUXRE_STATIC Tree * 61 libuxre_reg1tree(w_type op, Tree *lp) 62 { 63 Tree *tp; 64 65 if ((tp = malloc(sizeof(Tree))) == 0) 66 { 67 if (lp != 0) 68 libuxre_regdeltree(lp, 1); 69 return 0; 70 } 71 tp->op = op; 72 tp->left.ptr = lp; 73 if (lp != 0) 74 lp->parent = tp; 75 return tp; 76 } 77 78 LIBUXRE_STATIC Tree * 79 libuxre_reg2tree(w_type op, Tree *lp, Tree *rp) 80 { 81 Tree *tp; 82 83 if ((tp = malloc(sizeof(Tree))) == 0) 84 { 85 libuxre_regdeltree(lp, 1); 86 libuxre_regdeltree(rp, 1); 87 return 0; 88 } 89 tp->op = op; 90 tp->left.ptr = lp; 91 lp->parent = tp; 92 tp->right.ptr = rp; 93 rp->parent = tp; 94 return tp; 95 } 96 97 static int 98 lex(Lex *lxp) 99 { 100 size_t num; 101 w_type wc; 102 int n, mb_cur_max; 103 104 mb_cur_max = lxp->mb_cur_max; 105 nextc: switch (wc = *lxp->pat++) /* interesting ones are single bytes */ 106 { 107 case '\0': 108 lxp->pat--; /* continue to report ROP_END */ 109 wc = ROP_END; 110 break; 111 case '(': 112 if (lxp->flags & REG_PARENS) 113 { 114 leftparen:; 115 /* 116 * Must keep track of the closed and 117 * yet-to-be closed groups as a list. 118 * Consider (()a(()b(()c(()d... in which 119 * at each letter another even-numbered 120 * group is made available, but no 121 * odd-numbered ones are. 122 */ 123 if ((lxp->flags & REG_NOBACKREF) == 0) 124 { 125 if (lxp->nleft >= lxp->nclist) /* grow it */ 126 { 127 unsigned char *p; 128 129 lxp->nclist += 8; /* arbitrary */ 130 if ((p = realloc(lxp->clist, 131 lxp->nclist)) == 0) 132 { 133 lxp->err = REG_ESPACE; 134 return -1; 135 } 136 lxp->clist = p; 137 } 138 lxp->clist[lxp->nleft] = 0; /* unavailable */ 139 } 140 lxp->nleft++; 141 wc = ROP_LP; 142 } 143 break; 144 case ')': 145 /* 146 * For REG_PARENS, only take a right paren as a close 147 * if there is a matching left paren. 148 */ 149 if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft) 150 { 151 lxp->nright++; 152 rightparen:; 153 /* 154 * The group that is being closed is the highest 155 * numbered as-yet-unclosed group. 156 */ 157 if ((lxp->flags & REG_NOBACKREF) == 0) 158 { 159 num = lxp->nleft; 160 while (lxp->clist[--num] != 0) 161 ; 162 lxp->clist[num] = 1; 163 } 164 wc = ROP_RP; 165 } 166 break; 167 case '.': 168 wc = ROP_ANYCH; 169 if (lxp->flags & REG_NEWLINE) 170 wc = ROP_NOTNL; 171 break; 172 case '*': 173 if (lxp->flags & REG_ADDITIVE) 174 { 175 nxtstar: switch (*lxp->pat) 176 { 177 case '+': 178 if ((lxp->flags & REG_PLUS) == 0) 179 break; 180 lxp->pat++; 181 goto nxtstar; 182 case '?': 183 if ((lxp->flags & REG_QUEST) == 0) 184 break; 185 /*FALLTHRU*/ 186 case '*': 187 lxp->pat++; 188 goto nxtstar; 189 } 190 } 191 wc = ROP_STAR; 192 break; 193 case '^': 194 /* 195 * Look "behind" to see if this is an anchor. 196 * Take it as an anchor if it follows an alternation 197 * operator. (lxp->tok is initially set to ROP_OR.) 198 */ 199 if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) { 200 if (lxp->flags & REG_ADDITIVE) 201 { 202 int optional = 0; 203 204 nxtcar: switch (*lxp->pat) 205 { 206 case '+': 207 if ((lxp->flags & REG_PLUS) == 0) 208 break; 209 lxp->pat++; 210 goto nxtcar; 211 case '?': 212 if ((lxp->flags & REG_QUEST) == 0) 213 break; 214 /*FALLTHRU*/ 215 case '*': 216 optional = 1; 217 lxp->pat++; 218 goto nxtcar; 219 } 220 if (optional) 221 goto nextc; 222 } 223 wc = ROP_BOL; 224 } 225 break; 226 case '$': 227 /* 228 * Look ahead to see if this is an anchor, 229 * unless any '$' is an anchor. 230 * Take it as an anchor if it occurs just before 231 * the pattern end or an alternation operator. 232 */ 233 if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0' 234 || (lxp->flags & REG_OR && *lxp->pat == '|') 235 || (lxp->flags & REG_NLALT && *lxp->pat == '\n')) 236 { 237 if (lxp->flags & REG_ADDITIVE) 238 { 239 int optional = 0; 240 241 nxtdol: switch (*lxp->pat) 242 { 243 case '+': 244 if ((lxp->flags & REG_PLUS) == 0) 245 break; 246 lxp->pat++; 247 goto nxtdol; 248 case '?': 249 if ((lxp->flags & REG_QUEST) == 0) 250 break; 251 /*FALLTHRU*/ 252 case '*': 253 optional = 1; 254 lxp->pat++; 255 goto nxtdol; 256 } 257 if (optional) 258 goto nextc; 259 } 260 wc = ROP_EOL; 261 } 262 break; 263 case '+': 264 if (lxp->flags & REG_PLUS) 265 { 266 wc = ROP_PLUS; 267 if (lxp->flags & REG_ADDITIVE) 268 { 269 nxtplus: switch (*lxp->pat) 270 { 271 case '?': 272 if ((lxp->flags & REG_QUEST) == 0) 273 break; 274 case '*': 275 wc = ROP_STAR; 276 /*FALLTHRU*/ 277 case '+': 278 lxp->pat++; 279 goto nxtplus; 280 } 281 } 282 } 283 break; 284 case '?': 285 if (lxp->flags & REG_QUEST) 286 { 287 wc = ROP_QUEST; 288 if (lxp->flags & REG_ADDITIVE) 289 { 290 nxtquest: switch (*lxp->pat) 291 { 292 case '+': 293 if ((lxp->flags & REG_PLUS) == 0) 294 break; 295 case '*': 296 wc = ROP_STAR; 297 /*FALLTHRU*/ 298 case '?': 299 lxp->pat++; 300 goto nxtquest; 301 } 302 } 303 } 304 break; 305 case '\n': 306 if (lxp->flags & REG_NLALT) 307 { 308 /* 309 * Even when newline is an alternative separator, 310 * it doesn't permit parenthesized subexpressions 311 * to include it. 312 */ 313 if (lxp->nleft != lxp->nright) 314 { 315 lxp->err = REG_EPAREN; 316 return -1; 317 } 318 wc = ROP_OR; 319 } 320 else if (lxp->flags & REG_NEWLINE) 321 lxp->flags |= REG_NFA; 322 break; 323 case '|': 324 if (lxp->flags & REG_OR) 325 wc = ROP_OR; 326 break; 327 case '[': 328 if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0) 329 { 330 lxp->err = REG_ESPACE; 331 return -1; 332 } 333 if ((lxp->flags & REG_GOTBKT) == 0) /* first time */ 334 { 335 struct lc_collate *col; 336 337 lxp->flags |= REG_GOTBKT; 338 lxp->bktflags = 0; 339 if (lxp->flags & REG_ICASE) 340 lxp->bktflags |= BKT_ONECASE; 341 if (lxp->flags & REG_NEWLINE) 342 lxp->bktflags |= BKT_NOTNL; 343 if (lxp->flags & REG_BADRANGE) 344 lxp->bktflags |= BKT_BADRANGE; 345 if (lxp->flags & REG_ODDRANGE) 346 lxp->bktflags |= BKT_ODDRANGE; 347 if (lxp->flags & REG_SEPRANGE) 348 lxp->bktflags |= BKT_SEPRANGE; 349 if (lxp->flags & REG_BKTQUOTE) 350 lxp->bktflags |= BKT_QUOTE; 351 if (lxp->flags & REG_BKTEMPTY) 352 lxp->bktflags |= BKT_EMPTY; 353 if (lxp->flags & REG_ESCNL) 354 lxp->bktflags |= BKT_ESCNL; 355 if (lxp->flags & REG_NLALT) 356 lxp->bktflags |= BKT_NLBAD; 357 if (lxp->flags & REG_ESCSEQ) 358 lxp->bktflags |= BKT_ESCSEQ; 359 if (lxp->flags & REG_BKTESCAPE) 360 lxp->bktflags |= BKT_ESCAPE; 361 if (lxp->flags & REG_NOI18N) 362 lxp->bktflags |= BKT_NOI18N; 363 if (lxp->flags & REG_OLDESC) 364 lxp->bktflags |= BKT_OLDESC; 365 if ((col = libuxre_lc_collate(0)) != 0) 366 { 367 if (col->maintbl == 0 368 || col->flags & CHF_ENCODED) 369 { 370 (void)libuxre_lc_collate(col); 371 col = 0; 372 } 373 else if (col->flags & CHF_MULTICH) 374 lxp->flags |= REG_NFA; 375 } 376 lxp->col = col; 377 } 378 n = lxp->bktflags; 379 if (*lxp->pat == '^') 380 { 381 n |= BKT_NEGATED; 382 lxp->pat++; 383 } 384 lxp->info.bkt->col = lxp->col; 385 if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat, 386 n, mb_cur_max)) < 0) 387 { 388 free(lxp->info.bkt); 389 lxp->err = -n; /* convert to REG_* errors */ 390 return -1; 391 } 392 /* 393 * NFA forced if newline can be a match and REG_NEWLINE is set. 394 */ 395 if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE 396 && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */ 397 && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0) 398 { 399 lxp->flags |= REG_NFA; 400 } 401 lxp->pat += n; 402 wc = ROP_BKT; 403 break; 404 case '{': 405 if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0) 406 break; 407 interval:; 408 if (!isdigit(num = *lxp->pat)) 409 { 410 badbr:; 411 lxp->err = REG_BADBR; 412 if (*lxp->pat == '\0') 413 lxp->err = REG_EBRACE; /* more accurate */ 414 return -1; 415 } 416 num -= '0'; 417 while (isdigit(wc = *++lxp->pat)) 418 { 419 num *= 10; 420 if ((num += wc - '0') > BRACE_MAX) 421 goto badbr; 422 } 423 lxp->info.num[0] = num; 424 lxp->info.num[1] = num; 425 if (wc == ',') 426 { 427 lxp->info.num[1] = BRACE_INF; 428 if (isdigit(wc = *++lxp->pat)) 429 { 430 num = wc - '0'; 431 while (isdigit(wc = *++lxp->pat)) 432 { 433 num *= 10; 434 if ((num += wc - '0') > BRACE_MAX) 435 goto badbr; 436 } 437 if (num < lxp->info.num[0]) 438 goto badbr; 439 lxp->info.num[1] = num; 440 } 441 } 442 if ((lxp->flags & REG_BRACES) == 0) 443 { 444 if (wc != '\\') 445 goto badbr; 446 wc = *++lxp->pat; 447 } 448 if (wc != '}') 449 goto badbr; 450 lxp->pat++; 451 wc = ROP_BRACE; 452 /* 453 * Replace interval with simpler equivalents where possible, 454 * even when the operators are not otherwise available. 455 */ 456 if (lxp->info.num[1] <= 1) 457 { 458 if (lxp->info.num[0] == 1) 459 wc = ROP_NOP; /* {1,1} is noise */ 460 else if (lxp->info.num[1] == 0) 461 wc = ROP_EMPTY; /* {0,0} is empty string */ 462 else 463 wc = ROP_QUEST; /* {0,1} is ? */ 464 } 465 else if (lxp->info.num[1] == BRACE_INF) 466 { 467 if (lxp->info.num[0] == 0) 468 wc = ROP_STAR; 469 else if (lxp->info.num[0] == 1) 470 wc = ROP_PLUS; 471 else if (lxp->info.num[0] > BRACE_DFAMAX) 472 lxp->flags |= REG_NFA; 473 } 474 else if (lxp->info.num[1] > BRACE_DFAMAX) 475 { 476 lxp->flags |= REG_NFA; 477 } 478 break; 479 case '\\': 480 switch (wc = *lxp->pat++) 481 { 482 case '\0': 483 lxp->err = REG_EESCAPE; 484 return -1; 485 case '<': 486 if (lxp->flags & REG_ANGLES) 487 { 488 lxp->flags |= REG_NFA; 489 wc = ROP_LT; 490 } 491 goto out; 492 case '>': 493 if (lxp->flags & REG_ANGLES) 494 { 495 lxp->flags |= REG_NFA; 496 wc = ROP_GT; 497 } 498 goto out; 499 case '(': 500 if ((lxp->flags & REG_PARENS) == 0) 501 goto leftparen; 502 goto out; 503 case ')': 504 if ((lxp->flags & REG_PARENS) == 0) 505 { 506 if (++lxp->nright > lxp->nleft) 507 { 508 lxp->err = REG_EPAREN; 509 return -1; 510 } 511 goto rightparen; 512 } 513 goto out; 514 case '{': 515 if (lxp->flags & (REG_BRACES|REG_NOBRACES)) 516 goto out; 517 goto interval; 518 case '1': 519 case '2': 520 case '3': 521 case '4': 522 case '5': 523 case '6': 524 case '7': 525 case '8': 526 case '9': 527 num = wc - '0'; 528 if ((lxp->flags & REG_NOBACKREF) == 0) 529 { 530 backref:; 531 if (num > lxp->nleft 532 || lxp->clist[num - 1] == 0) 533 { 534 lxp->err = REG_ESUBREG; 535 return -1; 536 } 537 lxp->info.sub = num; 538 if (lxp->maxref < num) 539 lxp->maxref = num; 540 lxp->flags |= REG_NFA; 541 wc = ROP_REF; 542 goto out; 543 } 544 /* 545 * For compatibility (w/awk), permit "octal" 8 and 9. 546 * Already have the value of the first digit in num. 547 * 548 * If REG_OLDESC, exactly three digits must be present. 549 */ 550 tryoctal:; 551 if ((lxp->flags & REG_ESCSEQ) == 0) 552 goto out; 553 if ((wc = *lxp->pat) >= '0' && wc <= '9') 554 { 555 num <<= 3; 556 num += wc - '0'; 557 if ((wc = *++lxp->pat) >= '0' && wc <= '9') 558 { 559 num <<= 3; 560 num += wc - '0'; 561 lxp->pat++; 562 } 563 else if (lxp->flags & REG_OLDESC) 564 { 565 lxp->pat--; 566 wc = lxp->pat[-1]; 567 goto out; 568 } 569 } 570 else if (lxp->flags & REG_OLDESC) 571 { 572 wc = lxp->pat[-1]; 573 goto out; 574 } 575 if ((wc = num) <= 0) 576 { 577 lxp->err = REG_BADESC; 578 return -1; 579 } 580 goto out; 581 case '0': 582 if ((lxp->flags & REG_NOBACKREF) == 0 583 && (num = *lxp->pat) >= '0' && num <= '9') 584 { 585 num -= '0'; 586 /* 587 * This loop ignores wraparounds. 588 * Keep track of number of digits in n. 589 */ 590 n = 1; 591 while ((wc = *++lxp->pat) >= '0' && wc <= '9') 592 { 593 num *= 10; 594 num += wc - '0'; 595 n++; 596 } 597 if (num != 0) 598 goto backref; 599 lxp->pat -= n; 600 } 601 num = 0; 602 goto tryoctal; 603 case 'a': 604 if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) 605 wc = '\a'; 606 goto out; 607 case 'b': 608 if (lxp->flags & REG_ESCSEQ) 609 wc = '\b'; 610 goto out; 611 case 'f': 612 if (lxp->flags & REG_ESCSEQ) 613 wc = '\f'; 614 goto out; 615 case 'n': 616 if (lxp->flags & (REG_ESCSEQ | REG_ESCNL)) 617 { 618 wc = '\n'; 619 if (lxp->flags & REG_NEWLINE) 620 lxp->flags |= REG_NFA; 621 } 622 goto out; 623 case 'r': 624 if (lxp->flags & REG_ESCSEQ) 625 wc = '\r'; 626 goto out; 627 case 't': 628 if (lxp->flags & REG_ESCSEQ) 629 wc = '\t'; 630 goto out; 631 case 'v': 632 if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) 633 wc = '\v'; 634 goto out; 635 case 'x': 636 if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ 637 && isxdigit(num = *lxp->pat)) 638 { 639 wc = num; 640 num = 0; 641 /* 642 * Take as many hex digits as possible, 643 * ignoring overflows. 644 * If the result (squeezed into a w_type) 645 * is positive, it's okay. 646 */ 647 do 648 { 649 if (isdigit(wc)) 650 wc -= '0'; 651 else if (isupper(wc)) 652 wc -= 'A' + 10; 653 else 654 wc -= 'a' + 10; 655 num <<= 4; 656 num |= wc; 657 } while (isxdigit(wc = *++lxp->pat)); 658 if ((wc = num) <= 0) 659 { 660 lxp->err = REG_BADESC; 661 return -1; 662 } 663 } 664 goto out; 665 } 666 /*FALLTHROUGH*/ 667 default: 668 if (!ISONEBYTE(wc)) 669 { 670 if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0) 671 lxp->pat += n; 672 else if (n < 0) 673 { 674 lxp->err = REG_ILLSEQ; 675 return -1; 676 } 677 } 678 if (lxp->flags & REG_ICASE) 679 wc = to_lower(wc); 680 break; 681 } 682 out:; 683 lxp->tok = wc; 684 return 0; 685 } 686 687 static Tree *alt(Lex *); 688 689 static Tree * 690 leaf(Lex *lxp) 691 { 692 Tree *tp; 693 694 if ((tp = malloc(sizeof(Tree))) == 0) 695 { 696 lxp->err = REG_ESPACE; 697 return 0; 698 } 699 switch (tp->op = lxp->tok) /* covers most cases */ 700 { 701 default: 702 if (tp->op < 0) 703 { 704 lxp->err = REG_BADPAT; 705 tp->right.ptr = 0; 706 goto badunary; 707 } 708 break; 709 case ROP_STAR: 710 case ROP_PLUS: 711 case ROP_QUEST: 712 if ((lxp->flags & REG_NOAUTOQUOTE) == 0 713 && lxp->pat[-1] != '}') 714 { 715 tp->op = lxp->pat[-1]; 716 break; 717 } 718 /*FALLTHROUGH*/ 719 case ROP_BRACE: 720 case ROP_EMPTY: /* was {0,0} ROP_BRACE */ 721 case ROP_NOP: /* was {1,1} ROP_BRACE */ 722 lxp->err = REG_BADRPT; 723 badunary:; 724 tp->left.ptr = 0; 725 goto err; 726 case ROP_ANYCH: 727 case ROP_NOTNL: 728 break; 729 case ROP_BOL: 730 case ROP_EOL: 731 case ROP_LT: 732 case ROP_GT: 733 /* 734 * Look ahead for what would have been taken to be 735 * postfix operators. 736 */ 737 if (lex(lxp) != 0) 738 goto err; 739 switch (lxp->tok) 740 { 741 case ROP_STAR: 742 case ROP_PLUS: 743 case ROP_QUEST: 744 if ((lxp->flags & REG_NOAUTOQUOTE) == 0 745 && lxp->pat[-1] != '}') 746 { 747 lxp->tok = lxp->pat[-1]; 748 break; 749 } 750 /*FALLTHROUGH*/ 751 case ROP_BRACE: 752 case ROP_EMPTY: /* was {0,0} ROP_BRACE */ 753 case ROP_NOP: /* was {1,1} ROP_BRACE */ 754 lxp->err = REG_BADRPT; 755 goto err; 756 } 757 return tp; 758 case ROP_BKT: 759 tp->right.info.bkt = lxp->info.bkt; 760 break; 761 case ROP_REF: 762 tp->right.info.sub = lxp->info.sub; 763 break; 764 case ROP_LP: 765 tp->right.info.sub = lxp->nleft; 766 if (lex(lxp) != 0) 767 goto badunary; 768 if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */ 769 { 770 if (lxp->flags & REG_MTPARENBAD) 771 { 772 lxp->err = REG_EMPTYPAREN; 773 goto badunary; 774 } 775 lxp->tok = ROP_EMPTY; 776 if (lxp->flags & REG_MTPARENFAIL) 777 lxp->tok = ROP_NONE; 778 if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0) 779 goto badunary; 780 } 781 else if ((tp->left.ptr = alt(lxp)) == 0) 782 { 783 if (lxp->err == REG_BADPAT) 784 goto parenerr; 785 goto badunary; 786 } 787 else if (lxp->tok != ROP_RP) 788 { 789 lxp->err = REG_BADPAT; 790 parenerr:; 791 if (lxp->nleft != lxp->nright) 792 lxp->err = REG_EPAREN; /* better choice */ 793 goto badunary; 794 } 795 tp->left.ptr->parent = tp; 796 break; 797 } 798 if (lex(lxp) != 0) 799 { 800 err:; 801 libuxre_regdeltree(tp, 1); 802 tp = 0; 803 } 804 return tp; 805 } 806 807 static Tree * 808 post(Lex *lxp) 809 { 810 Tree *lp; 811 812 if ((lp = leaf(lxp)) == 0) 813 return 0; 814 switch (lxp->tok) 815 { 816 case ROP_EMPTY: /* this was {0,0} ROP_BRACE */ 817 libuxre_regdeltree(lp, 1); 818 lp = 0; 819 /*FALLTHROUGH*/ 820 case ROP_BRACE: 821 case ROP_STAR: 822 case ROP_PLUS: 823 case ROP_QUEST: 824 if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0) 825 { 826 lxp->err = REG_ESPACE; 827 return 0; 828 } 829 if (lxp->tok == ROP_BRACE) 830 lp->right.info = lxp->info; 831 /*FALLTHROUGH*/ 832 case ROP_NOP: /* this was {1,1} ROP_BRACE */ 833 if (lex(lxp) != 0) 834 { 835 libuxre_regdeltree(lp, 1); 836 return 0; 837 } 838 break; 839 } 840 return lp; 841 } 842 843 static Tree * 844 cat(Lex *lxp) 845 { 846 Tree *lp, *rp; 847 848 if ((lp = post(lxp)) == 0) 849 return 0; 850 for (;;) 851 { 852 if (lxp->tok == ROP_OR || lxp->tok == ROP_RP 853 || lxp->tok == ROP_END) 854 { 855 return lp; 856 } 857 if ((rp = post(lxp)) == 0) 858 break; 859 if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) 860 { 861 lxp->err = REG_ESPACE; 862 return 0; 863 } 864 } 865 libuxre_regdeltree(lp, 1); 866 return 0; 867 } 868 869 static Tree * 870 alt(Lex *lxp) 871 { 872 Tree *lp, *rp; 873 874 if ((lp = cat(lxp)) == 0) 875 return 0; 876 for (;;) 877 { 878 if (lxp->tok != ROP_OR) 879 return lp; 880 if (lex(lxp) != 0) 881 break; 882 if (lxp->tok == ROP_END) 883 return lp; /* ignore trailing '|' */ 884 if ((rp = cat(lxp)) == 0) 885 break; 886 if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0) 887 { 888 lxp->err = REG_ESPACE; 889 return 0; 890 } 891 } 892 libuxre_regdeltree(lp, 1); 893 return 0; 894 } 895 896 LIBUXRE_STATIC Tree * 897 libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags) 898 { 899 Tree *lp, *rp; 900 901 lp = 0; /* in case of error */ 902 lxp->clist = 0; 903 lxp->col = 0; 904 lxp->err = 0; 905 lxp->maxref = 0; 906 lxp->nleft = 0; 907 lxp->nright = 0; 908 lxp->nclist = 0; 909 lxp->mb_cur_max = MB_CUR_MAX; 910 if (flags & REG_OR && *pat == '|') 911 pat++; /* skip initial OR like egrep did */ 912 lxp->pat = pat; 913 lxp->flags = flags; 914 lxp->tok = ROP_OR; /* enables ^ as anchor */ 915 /* 916 * Get initial token. 917 */ 918 if (lex(lxp) != 0) 919 { 920 err:; 921 if (lp != 0) 922 { 923 libuxre_regdeltree(lp, 1); 924 lp = 0; 925 } 926 if (lxp->err == 0) 927 lxp->err = REG_ESPACE; 928 goto ret; 929 } 930 if (lxp->tok == ROP_END) 931 { 932 lxp->err = REG_NOPAT; 933 goto err; 934 } 935 if ((lp = alt(lxp)) == 0) /* parse entire RE */ 936 goto err; 937 if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0) 938 { 939 if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0) 940 goto err; 941 lp->right.info.sub = 0; 942 } 943 if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0) 944 goto err; 945 if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) 946 goto err; 947 lp->parent = 0; 948 ret:; 949 if (lxp->clist != 0) 950 free(lxp->clist); 951 return lp; 952 } 953 954 #ifdef REGDEBUG 955 956 LIBUXRE_STATIC void 957 libuxre_regtree(Tree *tp, int n) 958 { 959 const char *opstr; 960 char buf[32]; 961 int kind, next; 962 963 if (n < 0) 964 next = -n + 2; 965 else 966 next = n + 2; 967 switch (tp->op) 968 { 969 case ROP_OR: 970 opstr = "|"; 971 kind = BINARY_ROP; 972 break; 973 case ROP_CAT: 974 opstr = "&"; 975 kind = BINARY_ROP; 976 break; 977 case ROP_STAR: 978 opstr = "*"; 979 kind = UNARY_ROP; 980 break; 981 case ROP_PLUS: 982 opstr = "+"; 983 kind = UNARY_ROP; 984 break; 985 case ROP_QUEST: 986 opstr = "?"; 987 kind = UNARY_ROP; 988 break; 989 case ROP_BRACE: 990 opstr = buf; 991 if (tp->right.info.num[1] == BRACE_INF) 992 { 993 sprintf(buf, "{%u,inf}", 994 (unsigned)tp->right.info.num[0]); 995 } 996 else 997 { 998 sprintf(buf, "{%u,%u}", 999 (unsigned)tp->right.info.num[0], 1000 (unsigned)tp->right.info.num[1]); 1001 } 1002 kind = UNARY_ROP; 1003 break; 1004 case ROP_LP: 1005 opstr = buf; 1006 sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub); 1007 kind = UNARY_ROP; 1008 break; 1009 case ROP_RP: 1010 opstr = buf; 1011 sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub); 1012 kind = UNARY_ROP; 1013 break; 1014 case ROP_NOP: 1015 opstr = "<NOP>"; 1016 kind = LEAF_ROP; 1017 break; 1018 case ROP_BOL: 1019 opstr = "<BOL>"; 1020 kind = LEAF_ROP; 1021 break; 1022 case ROP_EOL: 1023 opstr = "<EOL>"; 1024 kind = LEAF_ROP; 1025 break; 1026 case ROP_ALL: 1027 opstr = "<ALL>"; 1028 kind = LEAF_ROP; 1029 break; 1030 case ROP_ANYCH: 1031 opstr = "<ANYCH>"; 1032 kind = LEAF_ROP; 1033 break; 1034 case ROP_NOTNL: 1035 opstr = "<NOTNL>"; 1036 kind = LEAF_ROP; 1037 break; 1038 case ROP_EMPTY: 1039 opstr = "<MT>"; 1040 kind = LEAF_ROP; 1041 break; 1042 case ROP_NONE: 1043 opstr = "<NONE>"; 1044 kind = LEAF_ROP; 1045 break; 1046 case ROP_BKT: 1047 opstr = buf; 1048 sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt); 1049 kind = LEAF_ROP; 1050 break; 1051 case ROP_BKTCOPY: 1052 opstr = buf; 1053 sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt); 1054 kind = LEAF_ROP; 1055 break; 1056 case ROP_LT: 1057 opstr = "\\<"; 1058 kind = LEAF_ROP; 1059 break; 1060 case ROP_GT: 1061 opstr = "\\>"; 1062 kind = LEAF_ROP; 1063 break; 1064 case ROP_REF: 1065 opstr = buf; 1066 sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub); 1067 kind = LEAF_ROP; 1068 break; 1069 case ROP_END: 1070 opstr = "<END>"; 1071 kind = LEAF_ROP; 1072 break; 1073 default: 1074 opstr = buf; 1075 if (tp->op > UCHAR_MAX) 1076 sprintf(buf, "W%#x", tp->op); 1077 else if (tp->op <= 0) 1078 sprintf(buf, "UNK=%u", tp->op); 1079 else 1080 sprintf(buf, "%c", tp->op); 1081 kind = LEAF_ROP; 1082 break; 1083 } 1084 if (kind == BINARY_ROP) 1085 libuxre_regtree(tp->right.ptr, -next); 1086 printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr); 1087 if (kind != LEAF_ROP) 1088 libuxre_regtree(tp->left.ptr, next); 1089 } 1090 1091 #endif /*REGDEBUG*/