re.h (7258B)
1 /* 2 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. 3 * 4 * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 5 */ 6 /* UNIX(R) Regular Expresssion Library 7 * 8 * Note: Code is released under the GNU LGPL 9 * 10 * Copyright (C) 2001 Caldera International, Inc. 11 * 12 * This library is free software; you can redistribute it and/or 13 * modify it under the terms of the GNU Lesser General Public 14 * License as published by the Free Software Foundation; either 15 * version 2 of the License, or (at your option) any later version. 16 * 17 * This library is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 20 * Lesser General Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser General Public 23 * License along with this library; if not, write to: 24 * Free Software Foundation, Inc. 25 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 26 */ 27 28 #ifndef LIBUXRE_RE_H 29 #define LIBUXRE_RE_H 30 31 /* 32 * Maps safe external tag to internal one 33 */ 34 #define re_coll_ lc_collate /* <regex.h> */ 35 /* #define __fnm_collate lc_collate */ /* <fnmatch.h> */ 36 37 #include <limits.h> 38 #include <regex.h> 39 /* #include <fnmatch.h> */ 40 #include <colldata.h> 41 42 #define NBSHT (sizeof(unsigned short) * CHAR_BIT) 43 #define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT) 44 #define NTYPE 4 45 #define NWIDE 32 46 #define NQUIV 4 47 48 typedef struct 49 { 50 struct lc_collate *col; /* only member set by caller */ 51 wctype_t *extype; 52 wuchar_type *exquiv; 53 wchar_t *exwide; 54 wctype_t type[NTYPE]; 55 wuchar_type quiv[NQUIV]; 56 wchar_t wide[NWIDE]; 57 unsigned short byte[NBYTE]; 58 unsigned short ntype; 59 unsigned short nquiv; 60 unsigned short nwide; 61 unsigned int flags; 62 } Bracket; 63 64 #define BKT_NEGATED 0x001 /* complemented set */ 65 #define BKT_ONECASE 0x002 /* uppercase same as lowercase */ 66 #define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */ 67 #define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */ 68 #define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */ 69 #define BKT_NLBAD 0x020 /* newline disallowed */ 70 #define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */ 71 #define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */ 72 #define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */ 73 #define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */ 74 #define BKT_ESCNL 0x400 /* take \n as the newline character */ 75 #define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */ 76 #define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */ 77 #define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */ 78 #define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */ 79 80 /* 81 * These error returns for libuxre_bktmbcomp() are directly tied to 82 * the error returns for regcomp() for convenience. 83 */ 84 #define BKT_BADPAT (-REG_BADPAT) 85 #define BKT_ECOLLATE (-REG_ECOLLATE) 86 #define BKT_ECTYPE (-REG_ECTYPE) 87 #define BKT_EEQUIV (-REG_EEQUIV) 88 #define BKT_BADCHAR (-REG_EBKTCHAR) 89 #define BKT_EBRACK (-REG_EBRACK) 90 #define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT) 91 #define BKT_ERANGE (-REG_ERANGE) 92 #define BKT_ESPACE (-REG_ESPACE) 93 #define BKT_BADESC (-REG_BADESC) 94 #define BKT_ILLSEQ (-REG_ILLSEQ) 95 96 /* 97 * These must be distinct from the flags in <fnmatch.h>. 98 */ 99 #define FNM_COLLATE 0x2000 /* have collation information */ 100 #define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */ 101 102 /* 103 * These must be distinct from the flags in <regex.h>. 104 */ 105 #define REG_NFA 0x20000000 106 #define REG_DFA 0x40000000 107 #define REG_GOTBKT 0x80000000 108 109 #define BRACE_INF USHRT_MAX 110 #define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */ 111 #define BRACE_DFAMAX 255 /* max amount for r.e. duplication */ 112 113 typedef union /* extra info always kept for some tokens/nodes */ 114 { 115 Bracket *bkt; /* ROP_BKT */ 116 size_t sub; /* ROP_LP (ROP_RP), ROP_REF */ 117 unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */ 118 } Info; 119 120 typedef struct /* lexical context while parsing */ 121 { 122 Info info; 123 const unsigned char *pat; 124 unsigned char *clist; 125 struct lc_collate *col; 126 unsigned long flags; 127 w_type tok; 128 size_t maxref; 129 size_t nleft; 130 size_t nright; 131 size_t nclist; 132 int bktflags; 133 int err; 134 int mb_cur_max; 135 } Lex; 136 137 typedef struct t_tree Tree; /* RE parse tree node */ 138 struct t_tree 139 { 140 union 141 { 142 Tree *ptr; /* unary & binary nodes */ 143 size_t pos; /* position for DFA leaves */ 144 } left; 145 union 146 { 147 Tree *ptr; /* binary nodes */ 148 Info info; 149 } right; 150 Tree *parent; 151 w_type op; /* positive => char. to match */ 152 }; 153 154 typedef struct re_dfa_ Dfa; /* DFA engine description */ 155 typedef struct re_nfa_ Nfa; /* NFA engine description */ 156 157 typedef struct 158 { 159 const unsigned char *str; 160 regmatch_t *match; 161 size_t nmatch; 162 unsigned long flags; 163 int mb_cur_max; 164 } Exec; 165 166 /* 167 * Regular expression operators. Some only used internally. 168 * All are negative, to distinguish them from the regular 169 * "match this particular wide character" operation. 170 */ 171 #define BINARY_ROP 0x02 172 #define UNARY_ROP 0x01 173 #define LEAF_ROP 0x00 174 175 #define MAKE_ROP(k, v) (-((v) | ((k) << 4))) 176 #define KIND_ROP(v) ((-(v)) >> 4) 177 178 #define ROP_OR MAKE_ROP(BINARY_ROP, 1) 179 #define ROP_CAT MAKE_ROP(BINARY_ROP, 2) 180 181 #define ROP_STAR MAKE_ROP(UNARY_ROP, 1) 182 #define ROP_PLUS MAKE_ROP(UNARY_ROP, 2) 183 #define ROP_QUEST MAKE_ROP(UNARY_ROP, 3) 184 #define ROP_BRACE MAKE_ROP(UNARY_ROP, 4) 185 #define ROP_LP MAKE_ROP(UNARY_ROP, 5) 186 #define ROP_RP MAKE_ROP(UNARY_ROP, 6) 187 188 #define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */ 189 #define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */ 190 #define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */ 191 #define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */ 192 #define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */ 193 #define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */ 194 #define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */ 195 #define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */ 196 #define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */ 197 #define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */ 198 #define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */ 199 #define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */ 200 #define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */ 201 #define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */ 202 203 /* 204 * Return values: 205 * libuxre_bktmbcomp() 206 * <0 error (see BKT_* above); >0 #bytes scanned 207 * libuxre_bktmbexec() 208 * <0 doesn't match; >=0 matches, #extra bytes scanned 209 */ 210 LIBUXRE_STATIC void libuxre_bktfree(Bracket *); 211 LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *, 212 int, int); 213 LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t, 214 const unsigned char *, int); 215 216 LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int); 217 LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *); 218 LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *); 219 LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int); 220 221 extern void libuxre_regdeldfa(Dfa *); 222 LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *); 223 LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *); 224 225 extern void libuxre_regdelnfa(Nfa *); 226 LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *); 227 LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *); 228 #endif /* !LIBUXRE_RE_H */