colldata.h (8274B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 #ifndef LIBUXRE_COLLDATA_H 29 #define LIBUXRE_COLLDATA_H 30 31 typedef struct 32 { 33 long coll_offst; /* offset to xnd table */ 34 long sub_cnt; /* length of subnd table */ 35 long sub_offst; /* offset to subnd table */ 36 long str_offst; /* offset to strings for subnd table */ 37 long flags; /* nonzero if reg.exp. used */ 38 } hd; 39 40 typedef struct 41 { 42 unsigned char ch; /* character or number of followers */ 43 unsigned char pwt; /* primary weight */ 44 unsigned char swt; /* secondary weight */ 45 unsigned char ns; /* index of follower state list */ 46 } xnd; 47 48 typedef struct 49 { 50 char *exp; /* expression to be replaced */ 51 long explen; /* length of expression */ 52 char *repl; /* replacement string */ 53 } subnd; 54 55 /*----------------------------------*/ 56 57 #include <wcharm.h> 58 #include <limits.h> 59 /* #include <stdlock.h> */ 60 61 /* 62 * Structure of a collation file: 63 * 1. CollHead (maintbl is 0 if CHF_ENCODED) 64 * if !CHF_ENCODED then 65 * 2. CollElem[bytes] (256 for 8 bit bytes) 66 * 3. if CHF_INDEXED then 67 * CollElem[wides] (nmain-256 for 8 bit bytes) 68 * else 69 * CollMult[wides] 70 * 4. CollMult[*] (none if multtbl is 0) 71 * 5. wuchar_type[*] (none if repltbl is 0) 72 * 6. CollSubn[*] (none if subntbl is 0) 73 * 7. strings (first is pathname for .so if CHF_DYNAMIC) 74 * 75 * The actual location of parts 2 through 7 is not important. 76 * 77 * The main table is in encoded value order. 78 * 79 * All indeces/offsets must be nonzero to be effective; zero is reserved 80 * to indicate no-such-entry. This implies either that an unused initial 81 * entry is placed in each of (4) through (7), or that the "start offset" 82 * given by the header is artificially pushed back by an entry size. 83 * 84 * Note that if CHF_ENCODED is not set, then nweight must be positive. 85 * 86 * If an element can begin a multiple character element, it contains a 87 * nonzero multbeg which is the initial index into (4) for its list; 88 * the list is terminated by a CollMult with a ch of zero. 89 * 90 * If there are elements with the same primary weight (weight[1]), then 91 * for each such element, it must have a CollMult list. The CollMult 92 * that terminates the list (ch==0) notes the lowest and highest basic 93 * weights for those elements with that same primary weight value 94 * respectively in weight[0] and weight[1]. If there are some basic 95 * weights between these values that do not have the same primary 96 * weight--are not in the equivalence class--then the terminator also 97 * has a SUBN_SPECIAL mark. Note that this list terminator should be 98 * shared when the elements are not multiple character collating 99 * elements because they wouldn't otherwise have a CollMult list. 100 * 101 * WGHT_IGNORE is used to denote ignored collating elements for a 102 * particular collation ordering pass. All main table entries other 103 * than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is 104 * possible for a CollMult entries from (4) to have a WGHT_IGNORE 105 * weight[0]: If, for example, "xyz" is a multiple character collating 106 * element, but "xy" is not, then the CollMult for "y" will have a 107 * WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each 108 * list of replacement weights. 109 * 110 * Within (3), it is possible to describe a sequence of unremarkable 111 * collating elements with a single CollMult entry. If the SUBN_SPECIAL 112 * bit is set, the rest of subnbeg represents the number of collating 113 * elements covered by this entry. The weight[0] values are determined 114 * by adding the difference between the encoded value and the entry's ch 115 * value to the entry's weight[0]. This value is then substituted for 116 * any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem() 117 * hides any match to such an entry by filling in a "spare" CollElem. 118 * 119 * If there are substitution strings, then for each character that begins 120 * a string, it has a nonzero subnbeg which is similarly the initial 121 * index into (6). The indeces in (6) refer to offsets within (7). 122 */ 123 124 #define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1)) 125 126 #define CHF_ENCODED 0x1 /* collation by encoded values only */ 127 #define CHF_INDEXED 0x2 /* main table indexed by encoded values */ 128 #define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */ 129 #define CHF_DYNAMIC 0x8 /* shared object has collation functions */ 130 131 #define CWF_BACKWARD 0x1 /* reversed ordering for this weight */ 132 #define CWF_POSITION 0x2 /* weight takes position into account */ 133 134 #define CLVERS 1 /* most recent version */ 135 136 #define WGHT_IGNORE 0 /* ignore this collating element */ 137 #define WGHT_SPECIAL TOPBIT(wuchar_type) 138 #define SUBN_SPECIAL TOPBIT(unsigned short) 139 140 #ifndef COLL_WEIGHTS_MAX 141 #define COLL_WEIGHTS_MAX 1 142 #endif 143 144 typedef struct 145 { 146 unsigned long maintbl; /* start of main table */ 147 unsigned long multtbl; /* start of multi-char table */ 148 unsigned long repltbl; /* start of replacement weights */ 149 unsigned long subntbl; /* start of substitutions */ 150 unsigned long strstbl; /* start of sub. strings */ 151 unsigned long nmain; /* # entries in main table */ 152 unsigned short flags; /* CHF_* bits */ 153 unsigned short version; /* handle future changes */ 154 unsigned char elemsize; /* # bytes/element (w/padding) */ 155 unsigned char nweight; /* # weights/element */ 156 unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */ 157 } CollHead; 158 159 typedef struct 160 { 161 unsigned short multbeg; /* start of multi-chars */ 162 unsigned short subnbeg; /* start of substitutions */ 163 wuchar_type weight[COLL_WEIGHTS_MAX]; 164 } CollElem; 165 166 typedef struct 167 { 168 wchar_t ch; /* "this" character (of sequence) */ 169 CollElem elem; /* its full information */ 170 } CollMult; 171 172 typedef struct 173 { 174 unsigned short strbeg; /* start of match string */ 175 unsigned short length; /* length of match string */ 176 unsigned short repbeg; /* start of replacement */ 177 } CollSubn; 178 179 struct lc_collate 180 { 181 const unsigned char *strstbl; 182 const wuchar_type *repltbl; 183 const CollElem *maintbl; 184 const CollMult *multtbl; 185 const CollSubn *subntbl; 186 #ifdef DSHLIB 187 void *handle; 188 void (*done)(struct lc_collate *); 189 int (*strc)(struct lc_collate *, const char *, const char *); 190 int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *); 191 size_t (*strx)(struct lc_collate *, char *, const char *, size_t); 192 size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t); 193 #endif 194 const char *mapobj; 195 size_t mapsize; 196 unsigned long nmain; 197 short nuse; 198 unsigned short flags; 199 unsigned char elemsize; 200 unsigned char nweight; 201 unsigned char order[COLL_WEIGHTS_MAX]; 202 }; 203 204 #define ELEM_BADCHAR ((CollElem *)0) 205 #define ELEM_ENCODED ((CollElem *)-1) 206 207 /* 208 LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *); 209 LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *, 210 const char *); 211 LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *, 212 const wchar_t *); 213 */ 214 extern struct lc_collate *libuxre_lc_collate(struct lc_collate *); 215 LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *, 216 CollElem *, wchar_t); 217 LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *, 218 const CollElem *, wchar_t); 219 /* 220 LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *, 221 CollElem *, const unsigned char **); 222 LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *, 223 CollElem *, const wchar_t **); 224 */ 225 226 #endif /* !LIBUXRE_COLLDATA_H */