hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

colldata.h (8274B)


      1 /*
      2  * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
      3  *
      4  * Sccsid @(#)colldata.h	1.5 (gritter) 5/1/04
      5  */
      6 /*  UNIX(R) Regular Expresssion Library
      7  *
      8  *  Note: Code is released under the GNU LGPL
      9  *
     10  *  Copyright (C) 2001 Caldera International, Inc.
     11  *
     12  *  This library is free software; you can redistribute it and/or
     13  *  modify it under the terms of the GNU Lesser General Public
     14  *  License as published by the Free Software Foundation; either
     15  *  version 2 of the License, or (at your option) any later version.
     16  *
     17  *  This library is distributed in the hope that it will be useful,
     18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20  *  Lesser General Public License for more details.
     21  *
     22  *  You should have received a copy of the GNU Lesser General Public
     23  *  License along with this library; if not, write to:
     24  *        Free Software Foundation, Inc.
     25  *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     26  */
     27 
     28 #ifndef	LIBUXRE_COLLDATA_H
     29 #define	LIBUXRE_COLLDATA_H
     30 
     31 typedef struct
     32 {
     33 	long	coll_offst;	/* offset to xnd table */
     34 	long	sub_cnt;	/* length of subnd table */
     35 	long	sub_offst;	/* offset to subnd table */
     36 	long	str_offst;	/* offset to strings for subnd table */
     37 	long	flags;		/* nonzero if reg.exp. used */
     38 } hd;
     39 
     40 typedef struct
     41 {
     42 	unsigned char	ch;	/* character or number of followers */
     43 	unsigned char	pwt;	/* primary weight */
     44 	unsigned char	swt;	/* secondary weight */
     45 	unsigned char	ns;	/* index of follower state list */
     46 } xnd;
     47 
     48 typedef struct
     49 {
     50 	char	*exp;	/* expression to be replaced */
     51 	long	explen; /* length of expression */
     52 	char	*repl;	/* replacement string */
     53 } subnd;
     54 
     55 /*----------------------------------*/
     56 
     57 #include <wcharm.h>
     58 #include <limits.h>
     59 /*	#include <stdlock.h>	*/
     60 
     61 /*
     62 * Structure of a collation file:
     63 *  1. CollHead (maintbl is 0 if CHF_ENCODED)
     64 *   if !CHF_ENCODED then
     65 *    2. CollElem[bytes] (256 for 8 bit bytes)
     66 *    3. if CHF_INDEXED then
     67 *	 CollElem[wides] (nmain-256 for 8 bit bytes)
     68 *	else
     69 *	 CollMult[wides]
     70 *    4. CollMult[*] (none if multtbl is 0)
     71 *    5. wuchar_type[*] (none if repltbl is 0)
     72 *    6. CollSubn[*] (none if subntbl is 0)
     73 *    7. strings (first is pathname for .so if CHF_DYNAMIC)
     74 *
     75 * The actual location of parts 2 through 7 is not important.
     76 *
     77 * The main table is in encoded value order.
     78 *
     79 * All indeces/offsets must be nonzero to be effective; zero is reserved
     80 * to indicate no-such-entry.  This implies either that an unused initial
     81 * entry is placed in each of (4) through (7), or that the "start offset"
     82 * given by the header is artificially pushed back by an entry size.
     83 *
     84 * Note that if CHF_ENCODED is not set, then nweight must be positive.
     85 *
     86 * If an element can begin a multiple character element, it contains a
     87 * nonzero multbeg which is the initial index into (4) for its list;
     88 * the list is terminated by a CollMult with a ch of zero.
     89 *
     90 * If there are elements with the same primary weight (weight[1]), then
     91 * for each such element, it must have a CollMult list.  The CollMult
     92 * that terminates the list (ch==0) notes the lowest and highest basic
     93 * weights for those elements with that same primary weight value
     94 * respectively in weight[0] and weight[1].  If there are some basic
     95 * weights between these values that do not have the same primary
     96 * weight--are not in the equivalence class--then the terminator also
     97 * has a SUBN_SPECIAL mark.  Note that this list terminator should be
     98 * shared when the elements are not multiple character collating
     99 * elements because they wouldn't otherwise have a CollMult list.
    100 *
    101 * WGHT_IGNORE is used to denote ignored collating elements for a
    102 * particular collation ordering pass.  All main table entries other
    103 * than for '\0' will have a non-WGHT_IGNORE weight[0].  However, it is
    104 * possible for a CollMult entries from (4) to have a WGHT_IGNORE
    105 * weight[0]:  If, for example, "xyz" is a multiple character collating
    106 * element, but "xy" is not, then the CollMult for "y" will have a
    107 * WGHT_IGNORE weight[0].  Also, WGHT_IGNORE is used to terminate each
    108 * list of replacement weights.
    109 *
    110 * Within (3), it is possible to describe a sequence of unremarkable
    111 * collating elements with a single CollMult entry.  If the SUBN_SPECIAL
    112 * bit is set, the rest of subnbeg represents the number of collating
    113 * elements covered by this entry.  The weight[0] values are determined
    114 * by adding the difference between the encoded value and the entry's ch
    115 * value to the entry's weight[0].  This value is then substituted for
    116 * any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem()
    117 * hides any match to such an entry by filling in a "spare" CollElem.
    118 *
    119 * If there are substitution strings, then for each character that begins
    120 * a string, it has a nonzero subnbeg which is similarly the initial
    121 * index into (6).  The indeces in (6) refer to offsets within (7).
    122 */
    123 
    124 #define TOPBIT(t)	(((t)1) << (sizeof(t) * CHAR_BIT - 1))
    125 
    126 #define CHF_ENCODED	0x1	/* collation by encoded values only */
    127 #define CHF_INDEXED	0x2	/* main table indexed by encoded values */
    128 #define CHF_MULTICH	0x4	/* a multiple char. coll. elem. exists */
    129 #define CHF_DYNAMIC	0x8	/* shared object has collation functions */
    130 
    131 #define CWF_BACKWARD	0x1	/* reversed ordering for this weight */
    132 #define CWF_POSITION	0x2	/* weight takes position into account */
    133 
    134 #define CLVERS		1	/* most recent version */
    135 
    136 #define WGHT_IGNORE	0	/* ignore this collating element */
    137 #define WGHT_SPECIAL	TOPBIT(wuchar_type)
    138 #define SUBN_SPECIAL	TOPBIT(unsigned short)
    139 
    140 #ifndef	COLL_WEIGHTS_MAX
    141 #define	COLL_WEIGHTS_MAX	1
    142 #endif
    143 
    144 typedef struct
    145 {
    146 	unsigned long	maintbl;	/* start of main table */
    147 	unsigned long	multtbl;	/* start of multi-char table */
    148 	unsigned long	repltbl;	/* start of replacement weights */
    149 	unsigned long	subntbl;	/* start of substitutions */
    150 	unsigned long	strstbl;	/* start of sub. strings */
    151 	unsigned long	nmain;		/* # entries in main table */
    152 	unsigned short	flags;		/* CHF_* bits */
    153 	unsigned short	version;	/* handle future changes */
    154 	unsigned char	elemsize;	/* # bytes/element (w/padding) */
    155 	unsigned char	nweight;	/* # weights/element */
    156 	unsigned char	order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */
    157 } CollHead;
    158 
    159 typedef struct
    160 {
    161 	unsigned short	multbeg;	/* start of multi-chars */
    162 	unsigned short	subnbeg;	/* start of substitutions */
    163 	wuchar_type	weight[COLL_WEIGHTS_MAX];
    164 } CollElem;
    165 
    166 typedef struct
    167 {
    168 	wchar_t		ch;	/* "this" character (of sequence) */
    169 	CollElem	elem;	/* its full information */
    170 } CollMult;
    171 
    172 typedef struct
    173 {
    174 	unsigned short	strbeg;		/* start of match string */
    175 	unsigned short	length;		/* length of match string */
    176 	unsigned short	repbeg;		/* start of replacement */
    177 } CollSubn;
    178 
    179 struct lc_collate
    180 {
    181 	const unsigned char	*strstbl;
    182 	const wuchar_type	*repltbl;
    183 	const CollElem		*maintbl;
    184 	const CollMult		*multtbl;
    185 	const CollSubn		*subntbl;
    186 #ifdef DSHLIB
    187 	void	*handle;
    188 	void	(*done)(struct lc_collate *);
    189 	int	(*strc)(struct lc_collate *, const char *, const char *);
    190 	int	(*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *);
    191 	size_t	(*strx)(struct lc_collate *, char *, const char *, size_t);
    192 	size_t	(*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t);
    193 #endif
    194 	const char		*mapobj;
    195 	size_t			mapsize;
    196 	unsigned long		nmain;
    197 	short			nuse;
    198 	unsigned short		flags;
    199 	unsigned char		elemsize;
    200 	unsigned char		nweight;
    201 	unsigned char		order[COLL_WEIGHTS_MAX];
    202 };
    203 
    204 #define ELEM_BADCHAR	((CollElem *)0)
    205 #define ELEM_ENCODED	((CollElem *)-1)
    206 
    207 /*
    208 LIBUXRE_STATIC int	libuxre_old_collate(struct lc_collate *);
    209 LIBUXRE_STATIC int	libuxre_strqcoll(struct lc_collate *, const char *,
    210 				const char *);
    211 LIBUXRE_STATIC int	libuxre_wcsqcoll(struct lc_collate *, const wchar_t *,
    212 				const wchar_t *);
    213 */
    214 extern struct lc_collate *libuxre_lc_collate(struct lc_collate *);
    215 LIBUXRE_STATIC const CollElem	*libuxre_collelem(struct lc_collate *,
    216 					CollElem *, wchar_t);
    217 LIBUXRE_STATIC const CollElem	*libuxre_collmult(struct lc_collate *,
    218 					const CollElem *, wchar_t);
    219 /*
    220 LIBUXRE_STATIC const CollElem	*libuxre_collmbs(struct lc_collate *,
    221 					CollElem *, const unsigned char **);
    222 LIBUXRE_STATIC const CollElem	*libuxre_collwcs(struct lc_collate *,
    223 					CollElem *, const wchar_t **);
    224 */
    225 
    226 #endif	/* !LIBUXRE_COLLDATA_H */