bracket.c (18517B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 /* #include "synonyms.h" */ 29 #include <ctype.h> 30 #include <stdlib.h> 31 #include <string.h> 32 #include "re.h" 33 34 /* 35 * Build and match the [...] part of REs. 36 * 37 * In general, each compiled bracket construct holds a set of mapped 38 * wide character values and a set of character classifications. 39 * The mapping applied (when the current LC_COLLATE is not CHF_ENCODED) 40 * is the "basic" weight (cep->weight[0]); otherwise the actual wide 41 * character is used. 42 * 43 * To support simplified range handling, this code assumes that a w_type, 44 * a signed integer type, can hold all valid basic weight values (as well 45 * as all wide character values for CHF_ENCODED locales) and that these 46 * are all positive. Negative values indicate error conditions (BKT_*); 47 * zero (which must be the same as WGHT_IGNORE) indicates success, but 48 * that the item installed is not a range endpoint. 49 */ 50 51 static int 52 addwide(Bracket *bp, wchar_t ord) 53 { 54 unsigned int nw; 55 56 if ((nw = bp->nwide) < NWIDE) 57 bp->wide[nw] = ord; 58 else 59 { 60 if (nw % NWIDE == 0 && (bp->exwide = 61 realloc(bp->exwide, nw * sizeof(wchar_t))) == 0) 62 { 63 return BKT_ESPACE; 64 } 65 nw -= NWIDE; 66 bp->exwide[nw] = ord; 67 } 68 bp->nwide++; 69 return 0; 70 } 71 72 #if USHRT_MAX == 65535 /* have 16 bits */ 73 #define PLIND(n) ((n) >> 4) 74 #define PLBIT(n) (1 << ((n) & 0xf)) 75 #else 76 #define PLIND(n) ((n) / CHAR_BIT) 77 #define PLBIT(n) (1 << ((n) % CHAR_BIT)) 78 #endif 79 80 #define RANGE ((wchar_t)'-') /* separates wide chars in ranges */ 81 82 static int 83 addrange(Bracket *bp, wchar_t ord, w_type prev) 84 { 85 int ret; 86 87 if (prev > 0 && prev != ord) /* try for range */ 88 { 89 if (prev > ord) 90 { 91 if (bp->flags & BKT_ODDRANGE) /* prev only - done */ 92 return 0; 93 else if ((bp->flags & BKT_BADRANGE) == 0) 94 return BKT_ERANGE; 95 } 96 else 97 { 98 if (++prev <= UCHAR_MAX) /* "prev" already there */ 99 { 100 do 101 { 102 bp->byte[PLIND(prev)] |= PLBIT(prev); 103 if (prev == ord) 104 return 0; 105 } while (++prev <= UCHAR_MAX); 106 } 107 if ((ret = addwide(bp, prev)) != 0) 108 return ret; 109 if (++prev > ord) 110 return 0; 111 if (prev < ord && (ret = addwide(bp, RANGE)) != 0) 112 return ret; 113 return addwide(bp, ord); 114 } 115 } 116 if (ord <= UCHAR_MAX) 117 { 118 bp->byte[PLIND(ord)] |= PLBIT(ord); 119 return 0; 120 } 121 if (prev == ord) /* don't bother */ 122 return 0; 123 return addwide(bp, ord); 124 } 125 126 static w_type 127 place(Bracket *bp, wchar_t wc, w_type prev, int mb_cur_max) 128 { 129 const CollElem *cep; 130 CollElem spare; 131 int ret; 132 133 if ((cep = libuxre_collelem(bp->col, &spare, wc)) != ELEM_ENCODED) 134 { 135 if (cep == ELEM_BADCHAR) 136 return BKT_BADCHAR; 137 wc = cep->weight[0]; 138 } 139 if ((ret = addrange(bp, wc, prev)) != 0) 140 return ret; 141 return wc; 142 } 143 144 #ifndef CHARCLASS_NAME_MAX 145 # define CHARCLASS_NAME_MAX 127 146 #endif 147 148 static w_type 149 chcls(Bracket *bp, const unsigned char *s, int n) 150 { 151 char clsstr[CHARCLASS_NAME_MAX + 1]; 152 unsigned int nt; 153 wctype_t wct; 154 155 if (n > CHARCLASS_NAME_MAX) 156 return BKT_ECTYPE; 157 (void)memcpy(clsstr, s, n); 158 clsstr[n] = '\0'; 159 if ((wct = wctype(clsstr)) == 0) 160 return BKT_ECTYPE; 161 if ((nt = bp->ntype) < NTYPE) 162 bp->type[nt] = wct; 163 else 164 { 165 if (nt % NTYPE == 0 && (bp->extype = 166 realloc(bp->extype, nt * sizeof(wctype_t))) == 0) 167 { 168 return BKT_ESPACE; 169 } 170 nt -= NTYPE; 171 bp->extype[nt] = wct; 172 } 173 bp->ntype++; 174 return 0; /* cannot be end point of a range */ 175 } 176 177 /* 178 * The purpose of mcce() and its Mcce structure is to locate 179 * the next full collation element from "wc" and "s". It is 180 * called both at compile and execute time. These two differ 181 * primarily in that at compile time there is an exact number 182 * of bytes to be consumed, while at execute time the longest 183 * valid collation element is to be found. 184 * 185 * When BKT_ONECASE is set, MCCEs become particularly messy. 186 * There is no guarantee that all possible combinations of 187 * upper/lower case are defined as MCCEs. Thus, this code 188 * tries both lower- and uppercase (in that order) for each 189 * character than might be part of an MCCE. 190 */ 191 192 typedef struct 193 { 194 const unsigned char *max; /* restriction by caller */ 195 const unsigned char *aft; /* longest successful */ 196 Bracket *bp; /* readonly */ 197 struct lc_collate *col; /* readonly */ 198 const CollElem *cep; /* entry matching longest */ 199 wchar_t ch; /* initial character (if any) */ 200 w_type wc; /* character matching "aft" */ 201 } Mcce; 202 203 static int 204 mcce(Mcce *mcp, const CollElem *cep, const unsigned char *s, int mb_cur_max, 205 int compile_time) 206 { 207 const CollElem *nxt; 208 CollElem spare; 209 w_type ch, wc; 210 int i; 211 212 /* 213 * Get next character. 214 */ 215 if ((wc = mcp->ch) != '\0') 216 { 217 mcp->ch = '\0'; 218 } 219 else if (ISONEBYTE(wc = *s++)) 220 { 221 if (wc == '\0') 222 return 0; 223 } 224 else if ((i = libuxre_mb2wc(&wc, s)) > 0) 225 { 226 s += i; 227 if (mcp->max != 0 && s > mcp->max) 228 return 0; 229 } 230 else if (i < 0) 231 return BKT_ILLSEQ; 232 /* 233 * Try out the this character as part of an MCCE. 234 * If BKT_ONECASE is set, this code tries both the lower- and 235 * uppercase version, continuing if it matches so far. 236 */ 237 ch = wc; 238 if (mcp->bp->flags & BKT_ONECASE) 239 { 240 if ((wc = to_lower(wc)) == ch) 241 ch = to_upper(wc); 242 } 243 for (;;) /* at most twice */ 244 { 245 if (cep == ELEM_BADCHAR) /* first character */ 246 { 247 if ((nxt = libuxre_collelem(mcp->col, &spare, wc)) 248 == ELEM_ENCODED 249 || (mcp->col->flags & CHF_MULTICH) == 0 250 || s == mcp->max) 251 { 252 mcp->aft = s; 253 mcp->cep = nxt; 254 mcp->wc = wc; 255 break; 256 } 257 } 258 else 259 { 260 nxt = libuxre_collmult(mcp->col, cep, wc); 261 } 262 if (nxt != ELEM_BADCHAR) 263 { 264 /* 265 * Okay so far. Record this collating element 266 * if it's really one (not WGHT_IGNORE) and 267 * we've reached a new high point or it's the 268 * first match. 269 * 270 * If there's a possibility for more, call mcce() 271 * recursively for the subsequent characters. 272 */ 273 if (nxt->weight[0] != WGHT_IGNORE 274 && (mcp->aft < s || mcp->cep == ELEM_BADCHAR)) 275 { 276 mcp->aft = s; 277 mcp->cep = nxt; 278 mcp->wc = wc; 279 } 280 if (nxt->multbeg != 0 281 && (mcp->max == 0 || s < mcp->max)) 282 { 283 if ((i = mcce(mcp, nxt, s, mb_cur_max, 284 compile_time)) != 0) 285 return i; 286 } 287 } 288 if (wc == ch) 289 break; 290 wc = ch; 291 } 292 return 0; 293 } 294 295 static w_type 296 eqcls(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) 297 { 298 w_type last; 299 Mcce mcbuf; 300 int err; 301 302 mcbuf.max = &s[n]; 303 mcbuf.aft = &s[0]; 304 mcbuf.bp = bp; 305 mcbuf.col = bp->col; 306 mcbuf.cep = ELEM_BADCHAR; 307 mcbuf.ch = '\0'; 308 if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) 309 return err; 310 if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) 311 return BKT_EEQUIV; 312 last = mcbuf.wc; 313 if (mcbuf.cep != ELEM_ENCODED && mcbuf.col->nweight > 1) 314 { 315 const CollElem *cep; 316 317 /* 318 * The first and last weight[0] values for equivalence 319 * classes are stuffed into the terminator for the 320 * multiple character lists. If these values are 321 * scattered (elements that are not part of this 322 * equivalence class have weight[0] values between the 323 * two end points), then SUBN_SPECIAL is placed in 324 * this terminator. Note that weight[1] of the 325 * terminator must be other than WGHT_IGNORE, too. 326 */ 327 last = mcbuf.cep->weight[0]; 328 if ((cep = libuxre_collmult(bp->col, mcbuf.cep, 0)) 329 != ELEM_BADCHAR 330 && cep->weight[1] != WGHT_IGNORE) 331 { 332 last = cep->weight[1]; 333 if (cep->subnbeg == SUBN_SPECIAL) 334 { 335 unsigned int nq; 336 337 /* 338 * Permit ranges up to the first and 339 * after the last. 340 */ 341 if (prev > 0 && prev != cep->weight[0] 342 && (prev = addrange(bp, 343 cep->weight[0], prev)) != 0) 344 { 345 return prev; 346 } 347 /* 348 * Record the equivalence class by storing 349 * the primary weight. 350 */ 351 if ((nq = bp->nquiv) < NQUIV) 352 bp->quiv[nq] = mcbuf.cep->weight[1]; 353 else 354 { 355 if (nq % NQUIV == 0 && (bp->exquiv = 356 realloc(bp->exquiv, 357 nq * sizeof(wuchar_type))) 358 == 0) 359 { 360 return REG_ESPACE; 361 } 362 nq -= NQUIV; 363 bp->exquiv[nq] = mcbuf.cep->weight[1]; 364 } 365 bp->nquiv++; 366 return last; 367 } 368 mcbuf.cep = cep; 369 } 370 mcbuf.wc = mcbuf.cep->weight[0]; 371 } 372 /* 373 * Determine range, if any, to install. 374 * 375 * If there's a pending low (prev > 0), then try to use it. 376 * 377 * Otherwise, try to use mcbuf.wc as the low end of the range. 378 * Since addrange() assumes that the low point has already been 379 * placed, we try to fool it by using a prev of one less than 380 * mcbuf.wc. But, if that value would not look like a valid 381 * low point of a range, we have to explicitly place mcbuf.wc. 382 */ 383 if (prev <= 0 && (prev = mcbuf.wc - 1) <= 0) 384 { 385 if ((prev = addrange(bp, mcbuf.wc, 0)) != 0) 386 return prev; 387 } 388 if ((mcbuf.wc = addrange(bp, last, prev)) != 0) 389 return mcbuf.wc; 390 return last; 391 } 392 393 static w_type 394 clsym(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) 395 { 396 Mcce mcbuf; 397 int err; 398 399 mcbuf.max = &s[n]; 400 mcbuf.aft = &s[0]; 401 mcbuf.bp = bp; 402 mcbuf.col = bp->col; 403 mcbuf.cep = ELEM_BADCHAR; 404 mcbuf.ch = '\0'; 405 if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) 406 return err; 407 if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) 408 return BKT_ECOLLATE; 409 if (mcbuf.cep != ELEM_ENCODED) 410 mcbuf.wc = mcbuf.cep->weight[0]; 411 if ((err = addrange(bp, mcbuf.wc, prev)) != 0) 412 return err; 413 return mcbuf.wc; 414 } 415 416 /* 417 * Scans the rest of a bracket construction within a regular 418 * expression and fills in a description for it. 419 * The leading [ and the optional set complement indicator 420 * were handled already by the caller. 421 * Returns: 422 * <0 error (a BKT_* value) 423 * >0 success; equals how many bytes were scanned. 424 */ 425 LIBUXRE_STATIC int 426 libuxre_bktmbcomp(Bracket *bp, const unsigned char *pat0, 427 int flags, int mb_cur_max) 428 { 429 static const Bracket zero = {0}; 430 const unsigned char *pat = pat0; 431 struct lc_collate *savecol; 432 w_type n, wc, prev = 0; 433 434 /* 435 * Set represented set to empty. Easiest to copy an empty 436 * version over the caller's, (re)setting col and flags. 437 */ 438 savecol = bp->col; 439 *bp = zero; 440 bp->col = savecol; 441 bp->flags = flags 442 & (BKT_NEGATED | BKT_ONECASE | BKT_NOTNL | BKT_BADRANGE | 443 BKT_ODDRANGE); 444 /* 445 * Handle optional "empty" brackets; typically only used 446 * in combination with BKT_QUOTE or BKT_ESCAPE. 447 */ 448 if ((wc = *pat) == ']' && (flags & BKT_EMPTY) != 0) 449 return 1; 450 /* 451 * Populate *bp. 452 */ 453 for (;; prev = n) 454 { 455 switch (wc) 456 { 457 case '\0': 458 ebrack:; 459 n = BKT_EBRACK; 460 goto err; 461 case '\n': 462 if (flags & BKT_NLBAD) 463 goto ebrack; 464 goto regular; 465 case '/': 466 if (flags & BKT_SLASHBAD) 467 goto ebrack; 468 goto regular; 469 case '\\': 470 if ((flags & (BKT_ESCAPE | BKT_QUOTE 471 | BKT_ESCNL | BKT_ESCSEQ)) == 0) 472 { 473 goto regular; 474 } 475 switch (wc = *++pat) 476 { 477 default: 478 noesc:; 479 if ((flags & BKT_ESCAPE) == 0) 480 { 481 wc = '\\'; 482 pat--; 483 } 484 break; 485 case '\\': 486 case ']': 487 case '-': 488 case '^': 489 if ((flags & BKT_QUOTE) == 0) 490 goto noesc; 491 break; 492 case 'a': 493 if ((flags & BKT_ESCSEQ) == 0 || 494 (flags & BKT_OLDESC)) 495 goto noesc; 496 wc = '\a'; 497 break; 498 case 'b': 499 if ((flags & BKT_ESCSEQ) == 0) 500 goto noesc; 501 wc = '\b'; 502 break; 503 case 'f': 504 if ((flags & BKT_ESCSEQ) == 0) 505 goto noesc; 506 wc = '\f'; 507 break; 508 case 'n': 509 if ((flags & (BKT_ESCSEQ | BKT_ESCNL)) == 0) 510 goto noesc; 511 wc = '\n'; 512 break; 513 case 'r': 514 if ((flags & BKT_ESCSEQ) == 0) 515 goto noesc; 516 wc = '\r'; 517 break; 518 case 't': 519 if ((flags & BKT_ESCSEQ) == 0) 520 goto noesc; 521 wc = '\t'; 522 break; 523 case 'v': 524 if ((flags & BKT_ESCSEQ) == 0 || 525 (flags & BKT_OLDESC)) 526 goto noesc; 527 wc = '\v'; 528 break; 529 case 'x': 530 if ((flags & BKT_ESCSEQ) == 0 || 531 (flags & BKT_OLDESC)) 532 goto noesc; 533 if (!isxdigit(wc = *++pat)) 534 { 535 pat--; 536 goto noesc; 537 } 538 /* 539 * Take as many hex digits as possible, 540 * ignoring overflows. 541 * Any positive result is okay. 542 */ 543 n = 0; 544 do 545 { 546 if (isdigit(wc)) 547 wc -= '0'; 548 else if (isupper(wc)) 549 wc -= 'A' + 10; 550 else 551 wc -= 'a' + 10; 552 n <<= 4; 553 n |= wc; 554 } while (isxdigit(wc = *++pat)); 555 pat--; 556 if ((wc = n) <= 0) 557 { 558 n = BKT_BADESC; 559 goto err; 560 } 561 break; 562 case '0': 563 case '1': 564 case '2': 565 case '3': 566 case '4': 567 case '5': 568 case '6': 569 case '7': 570 case '8': 571 case '9': 572 if ((flags & BKT_ESCSEQ) == 0 || 573 (flags & BKT_OLDESC)) 574 goto noesc; 575 /* 576 * For compatibility (w/awk), 577 * permit "octal" 8 and 9. 578 */ 579 n = wc - '0'; 580 if ((wc = *++pat) >= '0' && wc <= '9') 581 { 582 n <<= 3; 583 n += wc - '0'; 584 if ((wc = *++pat) >= '0' && wc <= '9') 585 { 586 n <<= 3; 587 n += wc - '0'; 588 } 589 } 590 pat--; 591 if ((wc = n) <= 0) 592 { 593 n = BKT_BADESC; 594 goto err; 595 } 596 break; 597 } 598 goto regular; 599 case '[': 600 if (((wc = *++pat) == ':' || wc == '=' || wc == '.') && 601 (flags & BKT_NOI18N) == 0) 602 { 603 n = 0; 604 while (*++pat != wc || pat[1] != ']') 605 { 606 if (*pat == '\0') 607 { 608 badpat:; 609 n = BKT_BADPAT; 610 goto err; 611 } 612 else if (*pat == '/') 613 { 614 if (flags & BKT_SLASHBAD) 615 goto badpat; 616 } 617 else if (*pat == '\n') 618 { 619 if (flags & BKT_NLBAD) 620 goto badpat; 621 } 622 n++; 623 } 624 if (n == 0) 625 { 626 n = BKT_EMPTYSUBBKT; 627 goto err; 628 } 629 if (wc == ':') 630 n = chcls(bp, &pat[-n], n); 631 else if (wc == '=') 632 n = eqcls(bp, &pat[-n], n, prev, 633 mb_cur_max); 634 else /* wc == '.' */ 635 n = clsym(bp, &pat[-n], n, prev, 636 mb_cur_max); 637 pat++; 638 break; 639 } 640 wc = '['; 641 pat--; 642 goto regular; 643 default: 644 if (!ISONEBYTE(wc) && 645 (n = libuxre_mb2wc(&wc, pat + 1)) > 0) 646 pat += n; 647 regular:; 648 n = place(bp, wc, prev, mb_cur_max); 649 break; 650 } 651 if (n < 0) { 652 n = BKT_ILLSEQ; 653 goto err; 654 } 655 if ((wc = *++pat) == ']') 656 break; 657 if (wc == '-' && n != 0) 658 { 659 if (prev == 0 || (flags & BKT_SEPRANGE) == 0) 660 { 661 if ((wc = *++pat) != ']') 662 continue; /* valid range */ 663 wc = '-'; 664 pat--; 665 } 666 } 667 n = 0; /* no range this time */ 668 } 669 return pat - pat0 + 1; 670 err:; 671 libuxre_bktfree(bp); 672 return n; 673 } 674 675 LIBUXRE_STATIC void 676 libuxre_bktfree(Bracket *bp) 677 { 678 if (bp->extype != 0) 679 free(bp->extype); 680 if (bp->exquiv != 0) 681 free(bp->exquiv); 682 if (bp->exwide != 0) 683 free(bp->exwide); 684 } 685 686 LIBUXRE_STATIC int 687 libuxre_bktmbexec(Bracket *bp, wchar_t wc, 688 const unsigned char *str, int mb_cur_max) 689 { 690 unsigned int i; 691 wchar_t lc, uc; 692 Mcce mcbuf; 693 694 mcbuf.aft = str; /* in case of match in character classes */ 695 mcbuf.ch = wc; 696 /* 697 * First: check the single wc against any character classes. 698 * Since multiple character collating elements are not part 699 * of this world, they don't apply here. 700 */ 701 if ((i = bp->ntype) != 0) 702 { 703 wctype_t *wctp = &bp->type[0]; 704 705 if (bp->flags & BKT_ONECASE) 706 { 707 if ((wc = to_lower(wc)) == mcbuf.ch) 708 mcbuf.ch = to_upper(wc); 709 } 710 for (;;) 711 { 712 if (iswctype(mb_cur_max==1?btowc(wc):wc, *wctp)) 713 goto match; 714 if (wc != mcbuf.ch && 715 iswctype(mb_cur_max==1?btowc(mcbuf.ch):mcbuf.ch, 716 *wctp)) 717 goto match; 718 if (--i == 0) 719 break; 720 if (++wctp == &bp->type[NTYPE]) 721 wctp = &bp->extype[0]; 722 } 723 } 724 /* 725 * The main match is determined by the weight[0] value 726 * of the character (or characters, if the input can be 727 * taken as a multiple character collating element). 728 */ 729 mcbuf.max = 0; 730 mcbuf.bp = bp; 731 mcbuf.col = bp->col; 732 mcbuf.cep = ELEM_BADCHAR; 733 mcce(&mcbuf, ELEM_BADCHAR, str, mb_cur_max, 0); 734 if (mcbuf.cep == ELEM_BADCHAR) 735 return -1; /* never matches */ 736 if (mcbuf.cep != ELEM_ENCODED) 737 mcbuf.wc = mcbuf.cep->weight[0]; 738 /* 739 * POSIX.2 demands that both a character and its case counterpart 740 * can match if REG_ICASE is set. This means that [B-z] matches 741 * 'A', 'a', and '['. 742 */ 743 if (bp->flags & BKT_ONECASE) 744 { 745 lc = to_lower(mcbuf.wc); 746 uc = to_upper(mcbuf.wc); 747 } 748 else 749 lc = uc = mcbuf.wc; 750 /* 751 * See if it's in the set. Note that the list of true wide 752 * character values has explicit ranges. 753 */ 754 if (mcbuf.wc <= UCHAR_MAX) 755 { 756 if (bp->byte[PLIND(lc)] & PLBIT(lc)) 757 goto match; 758 if (lc != uc && (bp->byte[PLIND(uc)] & PLBIT(uc))) 759 goto match; 760 } 761 else if ((i = bp->nwide) != 0) 762 { 763 wchar_t *wcp = &bp->wide[0]; 764 long lcmp, ucmp; 765 766 for (;;) 767 { 768 if ((lcmp = lc - *wcp) == 0) 769 goto match; 770 ucmp = uc - *wcp; 771 if (lc != uc && ucmp == 0) 772 goto match; 773 if (--i == 0) 774 break; 775 if (++wcp == &bp->wide[NWIDE]) 776 wcp = &bp->exwide[0]; 777 if (*wcp == RANGE) 778 { 779 if (++wcp == &bp->wide[NWIDE]) 780 wcp = &bp->exwide[0]; 781 if (lcmp > 0 && lc <= *wcp) 782 goto match; 783 if (lc != uc && ucmp > 0 && uc < *wcp) 784 goto match; 785 if ((i -= 2) == 0) 786 break; 787 if (++wcp == &bp->wide[NWIDE]) 788 wcp = &bp->exwide[0]; 789 } 790 } 791 } 792 /* 793 * The last chance for a match is if an equivalence class 794 * was specified for which the primary weights are scattered 795 * through the weight[0]s. 796 */ 797 if ((i = bp->nquiv) != 0 && mcbuf.cep != ELEM_ENCODED) 798 { 799 wuchar_type *wucp = &bp->quiv[0]; 800 801 mcbuf.wc = mcbuf.cep->weight[1]; 802 for (;;) 803 { 804 if (mcbuf.wc == *wucp) 805 goto match; 806 if (--i == 0) 807 break; 808 if (++wucp == &bp->quiv[NQUIV]) 809 wucp = &bp->exquiv[0]; 810 } 811 } 812 /* 813 * Only here when no match against the set was found. 814 * One final special case w/r/t newline. 815 */ 816 if (bp->flags & BKT_NEGATED) 817 { 818 if (wc != '\n' || (bp->flags & BKT_NOTNL) == 0) 819 return mcbuf.aft - str; 820 } 821 return -1; 822 match:; 823 /* 824 * Only here when a match against the described set is found. 825 */ 826 if (bp->flags & BKT_NEGATED) 827 return -1; 828 return mcbuf.aft - str; 829 }