rcomp.c (8795B)
1 /* 2 * grep - search a file for a pattern 3 * 4 * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. 5 */ 6 /* 7 * Copyright (c) 2003 Gunnar Ritter 8 * 9 * This software is provided 'as-is', without any express or implied 10 * warranty. In no event will the authors be held liable for any damages 11 * arising from the use of this software. 12 * 13 * Permission is granted to anyone to use this software for any purpose, 14 * including commercial applications, and to alter it and redistribute 15 * it freely, subject to the following restrictions: 16 * 17 * 1. The origin of this software must not be misrepresented; you must not 18 * claim that you wrote the original software. If you use this software 19 * in a product, an acknowledgment in the product documentation would be 20 * appreciated but is not required. 21 * 22 * 2. Altered source versions must be plainly marked as such, and must not be 23 * misrepresented as being the original software. 24 * 25 * 3. This notice may not be removed or altered from any source distribution. 26 */ 27 28 /* Sccsid @(#)rcomp.c 1.27 (gritter) 2/6/05> */ 29 30 /* 31 * Code involving POSIX.2 regcomp()/regexpr() routines. 32 */ 33 34 #include "grep.h" 35 #include "alloc.h" 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <mbtowi.h> 40 41 static int emptypat; 42 43 #ifdef UXRE 44 #include <regdfa.h> 45 static int rc_range(struct iblok *, char *); 46 static int rc_rangew(struct iblok *, char *); 47 #endif 48 49 /* 50 * Check whether line matches any pattern of the pattern list. 51 */ 52 static int 53 rc_match(const char *str, size_t sz) 54 { 55 #ifndef UXRE 56 struct expr *e; 57 #endif 58 regmatch_t pmatch[1]; 59 int gotcha = 0; 60 61 if (emptypat) { 62 if (xflag) { 63 if (*str == '\0') 64 return 1; 65 } else 66 return 1; 67 } 68 #ifdef UXRE 69 if (e0->e_exp) 70 gotcha = (regexec(e0->e_exp, str, 1, pmatch, 0) == 0); 71 #else /* !UXRE */ 72 for (e = e0; e; e = e->e_nxt) { 73 if (e->e_exp) { 74 gotcha = (regexec(e->e_exp, str, 1, pmatch, 0) == 0); 75 if (gotcha) 76 break; 77 } 78 } 79 #endif /* !UXRE */ 80 if (gotcha) 81 if (!xflag || (pmatch[0].rm_so == 0 82 && pmatch[0].rm_eo == sz)) 83 return 1; 84 return 0; 85 } 86 87 /* 88 * Compile a pattern structure using regcomp(). 89 */ 90 static void 91 rc_build(void) 92 { 93 int rerror = REG_BADPAT; 94 int rflags = 0; 95 size_t sz; 96 #ifdef UXRE 97 char *pat, *cp; 98 #endif /* UXRE */ 99 struct expr *e; 100 101 if ((e0->e_flg & E_NULL) == 0) { 102 for (sz = 0, e = e0; e; e = e->e_nxt) { 103 if (e->e_len > 0) 104 sz += e->e_len + 1; 105 else 106 emptypat = 1; 107 } 108 } else 109 sz = 1; 110 if ((e0->e_flg & E_NULL || emptypat) && sus == 0) 111 rc_error(e0, rerror); 112 if (sz == 0 || (emptypat && xflag == 0)) { 113 e0->e_exp = NULL; 114 return; 115 } 116 #ifdef UXRE 117 pat = smalloc(sz); 118 for (cp = pat, e = e0; e; e = e->e_nxt) { 119 if (e->e_len > 0) { 120 memcpy(cp, e->e_pat, e->e_len); 121 cp[e->e_len] = '\n'; 122 cp = &cp[e->e_len + 1]; 123 } 124 } 125 pat[sz - 1] = '\0'; 126 if (iflag) 127 rflags |= REG_ICASE; 128 if (Eflag) 129 rflags |= (sus ? REG_EXTENDED : REG_OLDERE|REG_NOI18N) | 130 REG_MTPARENBAD; 131 else { 132 rflags |= REG_ANGLES; 133 if (sus >= 3) 134 rflags |= REG_AVOIDNULL; 135 } 136 if (xflag) 137 rflags |= REG_ONESUB; 138 else 139 rflags |= REG_NOSUB; 140 if ((e = e0)->e_nxt) 141 rflags |= REG_NLALT; 142 e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); 143 if ((rerror = regcomp(e->e_exp, pat, rflags)) != 0) 144 rc_error(e, rerror); 145 free(pat); 146 if (!xflag && e->e_exp->re_flags & REG_DFA) 147 range = mbcode ? rc_rangew : rc_range; 148 #else /* !UXRE */ 149 if (iflag) 150 rflags |= REG_ICASE; 151 if (Eflag) 152 rflags |= REG_EXTENDED; 153 if (!xflag) 154 rflags |= REG_NOSUB; 155 for (e = e0; e; e = e->e_nxt) { 156 e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); 157 if ((rerror = regcomp(e->e_exp, e->e_pat, rflags)) != 0) 158 rc_error(e, rerror); 159 } 160 #endif /* !UXRE */ 161 } 162 163 void 164 rc_select(void) 165 { 166 build = rc_build; 167 match = rc_match; 168 matchflags |= MF_NULTERM; 169 matchflags &= ~MF_LOCONV; 170 } 171 172 /* 173 * Derived from Unix 32V /usr/src/cmd/egrep.y 174 * 175 * Changes by Gunnar Ritter, Freiburg i. Br., Germany, April 2001. 176 */ 177 /* 178 * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. 179 * 180 * Redistribution and use in source and binary forms, with or without 181 * modification, are permitted provided that the following conditions 182 * are met: 183 * Redistributions of source code and documentation must retain the 184 * above copyright notice, this list of conditions and the following 185 * disclaimer. 186 * Redistributions in binary form must reproduce the above copyright 187 * notice, this list of conditions and the following disclaimer in the 188 * documentation and/or other materials provided with the distribution. 189 * All advertising materials mentioning features or use of this software 190 * must display the following acknowledgement: 191 * This product includes software developed or owned by Caldera 192 * International, Inc. 193 * Neither the name of Caldera International, Inc. nor the names of 194 * other contributors may be used to endorse or promote products 195 * derived from this software without specific prior written permission. 196 * 197 * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA 198 * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR 199 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 200 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201 * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE 202 * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR 203 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 204 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR 205 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 206 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 207 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 208 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 209 */ 210 #ifdef UXRE 211 /* 212 * Range search for singlebyte locales using the modified UNIX(R) Regular 213 * Expression Library DFA. 214 */ 215 static int 216 rc_range(struct iblok *ip, char *last) 217 { 218 char *p; 219 int c, cstat, nstat; 220 Dfa *dp = e0->e_exp->re_dfa; 221 222 p = ip->ib_cur; 223 lineno++; 224 cstat = dp->anybol; 225 if (dp->acc[cstat]) 226 goto found; 227 for (;;) { 228 if ((nstat = dp->trans[cstat][*p & 0377]) == 0) { 229 /* 230 * '\0' is used to indicate end-of-line. If a '\0' 231 * character appears in input, it matches '$' but 232 * the DFA remains in dead state afterwards; there 233 * is thus no need to handle this condition 234 * specially to get the same behavior as in plain 235 * regexec(). 236 */ 237 if ((c = *p & 0377) == '\n') 238 c = '\0'; 239 if ((nstat = regtrans(dp, cstat, c, 1)) == 0) 240 goto fail; 241 dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; 242 } 243 if (dp->acc[cstat = nstat - 1]) { 244 found: for (;;) { 245 if (vflag == 0) { 246 succeed: outline(ip, last, p - ip->ib_cur); 247 if (qflag || lflag) 248 return 1; 249 } else { 250 fail: ip->ib_cur = p; 251 while (*ip->ib_cur++ != '\n'); 252 } 253 if ((p = ip->ib_cur) > last) 254 return 0; 255 lineno++; 256 if (dp->acc[cstat = dp->anybol] == 0) 257 goto brk2; 258 } 259 } 260 if (*p++ == '\n') { 261 if (vflag) { 262 p--; 263 goto succeed; 264 } 265 if ((ip->ib_cur = p) > last) 266 return 0; 267 lineno++; 268 if (dp->acc[cstat = dp->anybol]) 269 goto found; 270 } 271 brk2:; 272 } 273 } 274 275 /* 276 * Range search for multibyte locales using the modified UNIX(R) Regular 277 * Expression Library DFA. 278 */ 279 static int 280 rc_rangew(struct iblok *ip, char *last) 281 { 282 char *p; 283 int n, cstat, nstat; 284 wint_t wc; 285 Dfa *dp = e0->e_exp->re_dfa; 286 287 p = ip->ib_cur; 288 lineno++; 289 cstat = dp->anybol; 290 if (dp->acc[cstat]) 291 goto found; 292 for (;;) { 293 if (*p & 0200) { 294 if ((n = mbtowi(&wc, p, last + 1 - p)) < 0) { 295 n = 1; 296 wc = WEOF; 297 } 298 } else { 299 wc = *p; 300 n = 1; 301 } 302 if ((wc & ~(wchar_t)(NCHAR-1)) != 0 || 303 (nstat = dp->trans[cstat][wc]) == 0) { 304 /* 305 * '\0' is used to indicate end-of-line. If a '\0' 306 * character appears in input, it matches '$' but 307 * the DFA remains in dead state afterwards; there 308 * is thus no need to handle this condition 309 * specially to get the same behavior as in plain 310 * regexec(). 311 */ 312 if (wc == '\n') 313 wc = '\0'; 314 if ((nstat = regtrans(dp, cstat, wc, mb_cur_max)) == 0) 315 goto fail; 316 dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; 317 } 318 if (dp->acc[cstat = nstat - 1]) { 319 found: for (;;) { 320 if (vflag == 0) { 321 succeed: outline(ip, last, p - ip->ib_cur); 322 if (qflag || lflag) 323 return 1; 324 } else { 325 fail: ip->ib_cur = p; 326 while (*ip->ib_cur++ != '\n'); 327 } 328 if ((p = ip->ib_cur) > last) 329 return 0; 330 lineno++; 331 if (dp->acc[cstat = dp->anybol] == 0) 332 goto brk2; 333 } 334 } 335 p += n; 336 if (p[-n] == '\n') { 337 if (vflag) { 338 p--; 339 goto succeed; 340 } 341 if ((ip->ib_cur = p) > last) 342 return 0; 343 lineno++; 344 if (dp->acc[cstat = dp->anybol]) 345 goto found; 346 } 347 brk2:; 348 } 349 } 350 #endif /* UXRE */