_collelem.c (3355B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 /* #include "synonyms.h" */ 29 #include "colldata.h" 30 #include <stddef.h> 31 32 #define CCE(p) ((const CollElem *)(p)) 33 #define CCM(p) ((const CollMult *)(p)) 34 35 LIBUXRE_STATIC const CollElem * 36 libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) 37 { 38 const char *tbl; 39 size_t hi, lo, cur; 40 const CollMult *cmp; 41 const CollElem *cep; 42 long diff; 43 int sz; 44 45 /* 46 * ELEM_ENCODED is returned when the collation is entirely 47 * based on the encoded value of the character. 48 */ 49 if (col == 0 || col->flags & CHF_ENCODED 50 || (tbl = (const char *)col->maintbl) == 0) 51 { 52 return ELEM_ENCODED; 53 } 54 if ((wuchar_type)wc <= UCHAR_MAX) 55 { 56 indexed:; 57 cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); 58 if (cep->weight[0] == WGHT_SPECIAL) 59 return ELEM_BADCHAR; 60 return cep; 61 } 62 if (col->flags & CHF_INDEXED) 63 { 64 if ((wuchar_type)wc >= col->nmain) 65 return ELEM_BADCHAR; 66 goto indexed; 67 } 68 /* 69 * Binary search for a match. Could speed up the search if 70 * some interpolation was used, but keep it simple for now. 71 * Note that this is actually a table of CollMult's. 72 * 73 * To save space in the file, sequences of similar elements 74 * are sometimes compressed into a single CollMult that 75 * describes many entries. This is denoted by a subnbeg 76 * with the SUBN_SPECIAL bit set. The rest of the bits give 77 * the range covered by this entry. 78 */ 79 sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); 80 tbl += (1 + UCHAR_MAX) * col->elemsize; 81 lo = 0; 82 hi = col->nmain - UCHAR_MAX; 83 while (lo < hi) 84 { 85 if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ 86 cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ 87 cmp = CCM(&tbl[cur * sz]); 88 if ((diff = wc - cmp->ch) < 0) 89 hi = cur; 90 else if (cmp->elem.subnbeg & SUBN_SPECIAL) 91 { 92 if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) 93 lo = cur + 1; 94 else /* create an entry from the sequence in spare */ 95 { 96 spare->multbeg = cmp->elem.multbeg; 97 spare->subnbeg = 0; 98 spare->weight[0] = cmp->elem.weight[0] + diff; 99 for (lo = 1; lo < col->nweight; lo++) 100 { 101 wuchar_type w; 102 103 if ((w = cmp->elem.weight[lo]) 104 == WGHT_SPECIAL) 105 { 106 w = spare->weight[0]; 107 } 108 spare->weight[lo] = w; 109 } 110 return spare; 111 } 112 } 113 else if (diff == 0) 114 return &cmp->elem; 115 else 116 lo = cur + 1; 117 } 118 return ELEM_BADCHAR; 119 }