hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

_collelem.c (3355B)


      1 /*
      2  * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
      3  *
      4  * Sccsid @(#)_collelem.c	1.4 (gritter) 10/18/03
      5  */
      6 /*  UNIX(R) Regular Expresssion Library
      7  *
      8  *  Note: Code is released under the GNU LGPL
      9  *
     10  *  Copyright (C) 2001 Caldera International, Inc.
     11  *
     12  *  This library is free software; you can redistribute it and/or
     13  *  modify it under the terms of the GNU Lesser General Public
     14  *  License as published by the Free Software Foundation; either
     15  *  version 2 of the License, or (at your option) any later version.
     16  *
     17  *  This library is distributed in the hope that it will be useful,
     18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20  *  Lesser General Public License for more details.
     21  *
     22  *  You should have received a copy of the GNU Lesser General Public
     23  *  License along with this library; if not, write to:
     24  *        Free Software Foundation, Inc.
     25  *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     26  */
     27 
     28 /*	#include "synonyms.h"	*/
     29 #include "colldata.h"
     30 #include <stddef.h>
     31 
     32 #define CCE(p)	((const CollElem *)(p))
     33 #define CCM(p)	((const CollMult *)(p))
     34 
     35 LIBUXRE_STATIC const CollElem *
     36 libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc)
     37 {
     38 	const char *tbl;
     39 	size_t hi, lo, cur;
     40 	const CollMult *cmp;
     41 	const CollElem *cep;
     42 	long diff;
     43 	int sz;
     44 
     45 	/*
     46 	* ELEM_ENCODED is returned when the collation is entirely
     47 	* based on the encoded value of the character.
     48 	*/
     49 	if (col == 0 || col->flags & CHF_ENCODED
     50 		|| (tbl = (const char *)col->maintbl) == 0)
     51 	{
     52 		return ELEM_ENCODED;
     53 	}
     54 	if ((wuchar_type)wc <= UCHAR_MAX)
     55 	{
     56 	indexed:;
     57 		cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]);
     58 		if (cep->weight[0] == WGHT_SPECIAL)
     59 			return ELEM_BADCHAR;
     60 		return cep;
     61 	}
     62 	if (col->flags & CHF_INDEXED)
     63 	{
     64 		if ((wuchar_type)wc >= col->nmain)
     65 			return ELEM_BADCHAR;
     66 		goto indexed;
     67 	}
     68 	/*
     69 	* Binary search for a match.  Could speed up the search if
     70 	* some interpolation was used, but keep it simple for now.
     71 	* Note that this is actually a table of CollMult's.
     72 	*
     73 	* To save space in the file, sequences of similar elements
     74 	* are sometimes compressed into a single CollMult that
     75 	* describes many entries.  This is denoted by a subnbeg
     76 	* with the SUBN_SPECIAL bit set.  The rest of the bits give
     77 	* the range covered by this entry.
     78 	*/
     79 	sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem));
     80 	tbl += (1 + UCHAR_MAX) * col->elemsize;
     81 	lo = 0;
     82 	hi = col->nmain - UCHAR_MAX;
     83 	while (lo < hi)
     84 	{
     85 		if ((cur = (hi + lo) >> 1) < lo)   /* hi+lo overflowed */
     86 			cur |= ~(~(size_t)0 >> 1); /* lost high order bit */
     87 		cmp = CCM(&tbl[cur * sz]);
     88 		if ((diff = wc - cmp->ch) < 0)
     89 			hi = cur;
     90 		else if (cmp->elem.subnbeg & SUBN_SPECIAL)
     91 		{
     92 			if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL))
     93 				lo = cur + 1;
     94 			else /* create an entry from the sequence in spare */
     95 			{
     96 				spare->multbeg = cmp->elem.multbeg;
     97 				spare->subnbeg = 0;
     98 				spare->weight[0] = cmp->elem.weight[0] + diff;
     99 				for (lo = 1; lo < col->nweight; lo++)
    100 				{
    101 					wuchar_type w;
    102 
    103 					if ((w = cmp->elem.weight[lo])
    104 						== WGHT_SPECIAL)
    105 					{
    106 						w = spare->weight[0];
    107 					}
    108 					spare->weight[lo] = w;
    109 				}
    110 				return spare;
    111 			}
    112 		}
    113 		else if (diff == 0)
    114 			return &cmp->elem;
    115 		else
    116 			lo = cur + 1;
    117 	}
    118 	return ELEM_BADCHAR;
    119 }