hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

re.h (7258B)


      1 /*
      2  * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
      3  *
      4  * Sccsid @(#)re.h	1.15 (gritter) 2/6/05
      5  */
      6 /*  UNIX(R) Regular Expresssion Library
      7  *
      8  *  Note: Code is released under the GNU LGPL
      9  *
     10  *  Copyright (C) 2001 Caldera International, Inc.
     11  *
     12  *  This library is free software; you can redistribute it and/or
     13  *  modify it under the terms of the GNU Lesser General Public
     14  *  License as published by the Free Software Foundation; either
     15  *  version 2 of the License, or (at your option) any later version.
     16  *
     17  *  This library is distributed in the hope that it will be useful,
     18  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     19  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     20  *  Lesser General Public License for more details.
     21  *
     22  *  You should have received a copy of the GNU Lesser General Public
     23  *  License along with this library; if not, write to:
     24  *        Free Software Foundation, Inc.
     25  *        59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
     26  */
     27 
     28 #ifndef	LIBUXRE_RE_H
     29 #define	LIBUXRE_RE_H
     30 
     31 	/*
     32 	* Maps safe external tag to internal one
     33 	*/
     34 #define re_coll_	lc_collate	/* <regex.h> */
     35 /*	#define __fnm_collate	lc_collate	*/	/* <fnmatch.h> */
     36 
     37 #include <limits.h>
     38 #include <regex.h>
     39 /*	#include <fnmatch.h>	*/
     40 #include <colldata.h>
     41 
     42 #define NBSHT	(sizeof(unsigned short) * CHAR_BIT)
     43 #define NBYTE	(((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
     44 #define NTYPE	4
     45 #define NWIDE	32
     46 #define NQUIV	4
     47 
     48 typedef struct
     49 {
     50 	struct lc_collate	*col;	/* only member set by caller */
     51 	wctype_t		*extype;
     52 	wuchar_type		*exquiv;
     53 	wchar_t			*exwide;
     54 	wctype_t		type[NTYPE];
     55 	wuchar_type		quiv[NQUIV];
     56 	wchar_t			wide[NWIDE];
     57 	unsigned short		byte[NBYTE];
     58 	unsigned short		ntype;
     59 	unsigned short		nquiv;
     60 	unsigned short		nwide;
     61 	unsigned int		flags;
     62 } Bracket;
     63 
     64 #define BKT_NEGATED	0x001	/* complemented set */
     65 #define BKT_ONECASE	0x002	/* uppercase same as lowercase */
     66 #define BKT_NOTNL	0x004	/* do not match newline when BKT_NEGATED */
     67 #define BKT_BADRANGE	0x008	/* accept [m-a] ranges as [ma] */
     68 #define BKT_SEPRANGE	0x010	/* disallow [a-m-z] style ranges */
     69 #define BKT_NLBAD	0x020	/* newline disallowed */
     70 #define BKT_SLASHBAD	0x040	/* slash disallowed (for pathnames) */
     71 #define BKT_EMPTY	0x080	/* take leading ] is end (empty set) */
     72 #define BKT_ESCAPE	0x100	/* allow \ as quote for next anything */
     73 #define BKT_QUOTE	0x200	/* allow \ as quote for \\, \^, \- or \] */
     74 #define BKT_ESCNL	0x400	/* take \n as the newline character */
     75 #define BKT_ESCSEQ	0x800	/* otherwise, take \ as in C escapes */
     76 #define	BKT_ODDRANGE	0x1000	/* oawk oddity: [m-a] means [m] */
     77 #define	BKT_NOI18N	0x2000	/* disable [::] [==] [..] */
     78 #define	BKT_OLDESC	0x4000	/* enable \b \f \n \r \t only */
     79 
     80 	/*
     81 	* These error returns for libuxre_bktmbcomp() are directly tied to
     82 	* the error returns for regcomp() for convenience.
     83 	*/
     84 #define BKT_BADPAT	(-REG_BADPAT)
     85 #define BKT_ECOLLATE	(-REG_ECOLLATE)
     86 #define BKT_ECTYPE	(-REG_ECTYPE)
     87 #define BKT_EEQUIV	(-REG_EEQUIV)
     88 #define BKT_BADCHAR	(-REG_EBKTCHAR)
     89 #define BKT_EBRACK	(-REG_EBRACK)
     90 #define BKT_EMPTYSUBBKT	(-REG_EMPTYSUBBKT)
     91 #define BKT_ERANGE	(-REG_ERANGE)
     92 #define BKT_ESPACE	(-REG_ESPACE)
     93 #define BKT_BADESC	(-REG_BADESC)
     94 #define	BKT_ILLSEQ	(-REG_ILLSEQ)
     95 
     96 	/*
     97 	* These must be distinct from the flags in <fnmatch.h>.
     98 	*/
     99 #define FNM_COLLATE	0x2000	/* have collation information */
    100 #define FNM_CURRENT	0x4000	/* have full-sized fnm_t structure */
    101 
    102 	/*
    103 	* These must be distinct from the flags in <regex.h>.
    104 	*/
    105 #define REG_NFA		0x20000000
    106 #define REG_DFA		0x40000000
    107 #define REG_GOTBKT	0x80000000
    108 
    109 #define BRACE_INF	USHRT_MAX
    110 #define BRACE_MAX	5100	/* arbitrary number < SHRT_MAX */
    111 #define BRACE_DFAMAX	255	/* max amount for r.e. duplication */
    112 
    113 typedef union	/* extra info always kept for some tokens/nodes */
    114 {
    115 	Bracket		*bkt;	/* ROP_BKT */
    116 	size_t		sub;	/* ROP_LP (ROP_RP), ROP_REF */
    117 	unsigned short	num[2];	/* ROP_BRACE: num[0]=low, num[1]=high */
    118 } Info;
    119 
    120 typedef struct	/* lexical context while parsing */
    121 {
    122 	Info			info;
    123 	const unsigned char	*pat;
    124 	unsigned char		*clist;
    125 	struct lc_collate	*col;
    126 	unsigned long		flags;
    127 	w_type			tok;
    128 	size_t			maxref;
    129 	size_t			nleft;
    130 	size_t			nright;
    131 	size_t			nclist;
    132 	int			bktflags;
    133 	int			err;
    134 	int			mb_cur_max;
    135 } Lex;
    136 
    137 typedef struct t_tree	Tree;	/* RE parse tree node */
    138 struct t_tree
    139 {
    140 	union
    141 	{
    142 		Tree	*ptr;	/* unary & binary nodes */
    143 		size_t	pos;	/* position for DFA leaves */
    144 	} left;
    145 	union
    146 	{
    147 		Tree	*ptr;	/* binary nodes */
    148 		Info	info;
    149 	} right;
    150 	Tree		*parent;
    151 	w_type		op;	/* positive => char. to match */
    152 };
    153 
    154 typedef struct re_dfa_	Dfa;	/* DFA engine description */
    155 typedef struct re_nfa_	Nfa;	/* NFA engine description */
    156 
    157 typedef struct
    158 {
    159 	const unsigned char	*str;
    160 	regmatch_t		*match;
    161 	size_t			nmatch;
    162 	unsigned long		flags;
    163 	int			mb_cur_max;
    164 } Exec;
    165 
    166 	/*
    167 	* Regular expression operators.  Some only used internally.
    168 	* All are negative, to distinguish them from the regular
    169 	* "match this particular wide character" operation.
    170 	*/
    171 #define BINARY_ROP	0x02
    172 #define UNARY_ROP	0x01
    173 #define LEAF_ROP	0x00
    174 
    175 #define MAKE_ROP(k, v)	(-((v) | ((k) << 4)))
    176 #define KIND_ROP(v)	((-(v)) >> 4)
    177 
    178 #define ROP_OR		MAKE_ROP(BINARY_ROP, 1)
    179 #define ROP_CAT		MAKE_ROP(BINARY_ROP, 2)
    180 
    181 #define ROP_STAR	MAKE_ROP(UNARY_ROP, 1)
    182 #define ROP_PLUS	MAKE_ROP(UNARY_ROP, 2)
    183 #define ROP_QUEST	MAKE_ROP(UNARY_ROP, 3)
    184 #define ROP_BRACE	MAKE_ROP(UNARY_ROP, 4)
    185 #define ROP_LP		MAKE_ROP(UNARY_ROP, 5)
    186 #define ROP_RP		MAKE_ROP(UNARY_ROP, 6)
    187 
    188 #define ROP_NOP		MAKE_ROP(LEAF_ROP, 1)	/* temporary */
    189 #define ROP_BOL		MAKE_ROP(LEAF_ROP, 2)	/* ^ anchor */
    190 #define ROP_EOL		MAKE_ROP(LEAF_ROP, 3)	/* $ anchor */
    191 #define ROP_ALL		MAKE_ROP(LEAF_ROP, 4)	/* anything (added) */
    192 #define ROP_ANYCH	MAKE_ROP(LEAF_ROP, 5)	/* . w/\n */
    193 #define ROP_NOTNL	MAKE_ROP(LEAF_ROP, 6)	/* . w/out \n */
    194 #define ROP_EMPTY	MAKE_ROP(LEAF_ROP, 7)	/* empty string */
    195 #define ROP_NONE	MAKE_ROP(LEAF_ROP, 8)	/* match failure */
    196 #define ROP_BKT		MAKE_ROP(LEAF_ROP, 9)	/* [...] */
    197 #define ROP_BKTCOPY	MAKE_ROP(LEAF_ROP, 10)	/* [...] (duplicated) */
    198 #define ROP_LT		MAKE_ROP(LEAF_ROP, 11)	/* \< word begin */
    199 #define ROP_GT		MAKE_ROP(LEAF_ROP, 12)	/* \> word end */
    200 #define ROP_REF		MAKE_ROP(LEAF_ROP, 13)	/* \digit */
    201 #define ROP_END		MAKE_ROP(LEAF_ROP, 14)	/* final (added) */
    202 
    203 	/*
    204 	* Return values:
    205 	*  libuxre_bktmbcomp()
    206 	*	<0 error (see BKT_* above); >0 #bytes scanned
    207 	*  libuxre_bktmbexec()
    208 	*	<0 doesn't match; >=0 matches, #extra bytes scanned
    209 	*/
    210 LIBUXRE_STATIC void	libuxre_bktfree(Bracket *);
    211 LIBUXRE_STATIC int	libuxre_bktmbcomp(Bracket *, const unsigned char *,
    212 				int, int);
    213 LIBUXRE_STATIC int	libuxre_bktmbexec(Bracket *, wchar_t,
    214 				const unsigned char *, int);
    215 
    216 LIBUXRE_STATIC void	libuxre_regdeltree(Tree *, int);
    217 LIBUXRE_STATIC Tree	*libuxre_reg1tree(w_type, Tree *);
    218 LIBUXRE_STATIC Tree	*libuxre_reg2tree(w_type, Tree *, Tree *);
    219 LIBUXRE_STATIC Tree	*libuxre_regparse(Lex *, const unsigned char *, int);
    220 
    221 extern void		libuxre_regdeldfa(Dfa *);
    222 LIBUXRE_STATIC int	libuxre_regdfacomp(regex_t *, Tree *, Lex *);
    223 LIBUXRE_STATIC int	libuxre_regdfaexec(Dfa *, Exec *);
    224 
    225 extern void		libuxre_regdelnfa(Nfa *);
    226 LIBUXRE_STATIC int	libuxre_regnfacomp(regex_t *, Tree *, Lex *);
    227 LIBUXRE_STATIC int	libuxre_regnfaexec(Nfa *, Exec *);
    228 #endif	/* !LIBUXRE_RE_H */