grep.c (15239B)
1 /* 2 * grep - search a file for a pattern 3 * 4 * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. 5 */ 6 /* 7 * Copyright (c) 2003 Gunnar Ritter 8 * 9 * This software is provided 'as-is', without any express or implied 10 * warranty. In no event will the authors be held liable for any damages 11 * arising from the use of this software. 12 * 13 * Permission is granted to anyone to use this software for any purpose, 14 * including commercial applications, and to alter it and redistribute 15 * it freely, subject to the following restrictions: 16 * 17 * 1. The origin of this software must not be misrepresented; you must not 18 * claim that you wrote the original software. If you use this software 19 * in a product, an acknowledgment in the product documentation would be 20 * appreciated but is not required. 21 * 22 * 2. Altered source versions must be plainly marked as such, and must not be 23 * misrepresented as being the original software. 24 * 25 * 3. This notice may not be removed or altered from any source distribution. 26 */ 27 28 /* Sccsid @(#)grep.c 1.53 (gritter) 12/27/06> */ 29 30 /* 31 * Code common to all grep flavors. 32 */ 33 34 #include <sys/types.h> 35 #include <sys/stat.h> 36 #include <sys/mman.h> 37 #include <sys/wait.h> 38 #include <fcntl.h> 39 #include <unistd.h> 40 #include <stdio.h> 41 #include <string.h> 42 #include <stdlib.h> 43 #include <libgen.h> 44 #include <locale.h> 45 #include <limits.h> 46 #include <ctype.h> 47 #include <dirent.h> 48 #include <errno.h> 49 50 #include "grep.h" 51 #include "alloc.h" 52 53 /* 54 * Generic flags and the like. 55 */ 56 int Eflag; /* use EREs */ 57 int Fflag; /* use fixed strings */ 58 int bflag; /* print buffer count */ 59 int cflag; /* print count only */ 60 int fflag; /* had pattern file argument */ 61 int hflag; /* do not print filenames */ 62 int iflag; /* ignore case */ 63 int lflag; /* print filenames only */ 64 int nflag; /* print line numbers */ 65 int qflag; /* no output at all */ 66 int (*rflag)(const char *, struct stat *); /* operate recursively */ 67 int sflag; /* avoid error messages */ 68 int vflag; /* inverse selection */ 69 int wflag; /* search for words */ 70 int xflag; /* match entire line */ 71 int zflag; /* decompress compressed files */ 72 int mb_cur_max; /* avoid multiple calls to MB_CUR_MAX */ 73 unsigned status = 1; /* exit status */ 74 off_t lmatch; /* count of line matches */ 75 off_t lineno; /* current line number */ 76 char *progname; /* argv[0] to main() */ 77 char *filename; /* name of current file */ 78 char *options; /* for getopt() */ 79 void (*build)(void); /* compile function */ 80 int (*match)(const char *, size_t); /* comparison function */ 81 int (*range)(struct iblok *, char *); /* grep range of lines */ 82 83 /* 84 * Regexp variables. 85 */ 86 struct expr *e0; /* start of expression list */ 87 enum matchflags matchflags; /* matcher flags */ 88 89 /* 90 * To avoid link loops with -r. 91 */ 92 static struct visit { 93 ino_t v_ino; 94 dev_t v_dev; 95 } *visited; 96 static int vismax; /* number of members in visited */ 97 98 /* 99 * Lower-case a character string. 100 */ 101 size_t 102 loconv(register char *dst, register char *src, size_t sz) 103 { 104 char *odst = dst; 105 106 if (mbcode) { 107 char mb[MB_LEN_MAX]; 108 wchar_t wc; 109 int len, i, nlen; 110 111 while (sz > 0) { 112 if ((*src & 0200) == 0) { 113 *dst++ = tolower(*src); 114 src++; 115 sz--; 116 } else if ((len = mbtowc(&wc, src, sz)) <= 0 || 117 len > sz) { 118 *dst++ = *src++; 119 sz--; 120 } else { 121 wc = towlower(wc); 122 if (len >= mb_cur_max) { 123 if ((nlen = wctomb(dst, wc)) <= len) { 124 dst += nlen; 125 src += len; 126 sz -= len; 127 } else { 128 *dst++ = *src++; 129 sz--; 130 } 131 } else { 132 if ((nlen = wctomb(mb, wc)) <= len) { 133 sz -= len; 134 src += len; 135 for (i = 0; i < nlen; i++) 136 *dst++ = mb[i]; 137 } else { 138 *dst++ = *src++; 139 sz--; 140 } 141 } 142 } 143 } 144 } else { 145 while (sz--) { 146 *dst++ = tolower(*src & 0377); 147 src++; 148 } 149 } 150 return dst - odst; 151 } 152 153 /* 154 * Determine if pat ends with an unescaped dollar sign. 155 */ 156 static int 157 termdollar(const char *pat, long len) 158 { 159 int dollar = 1; 160 161 if (len == 0 || pat[len - 1] != '$') 162 return 0; 163 pat += --len - 1; 164 while (len-- && *pat-- == '\\') 165 dollar = !dollar; 166 return dollar; 167 } 168 169 /* 170 * Surround the pattern with \< \>. 171 */ 172 void 173 wcomp(char **pat, long *len) 174 { 175 char *wp = smalloc(*len + 5); 176 177 memcpy(&wp[2], *pat, *len); 178 if ((*pat)[0] == '^') 179 memcpy(wp, "^\\<", 3); 180 else 181 memcpy(wp, "\\<", 2); 182 if (termdollar(*pat, *len)) 183 strcpy(&wp[*len-1+2], "\\>$"); 184 else 185 strcpy(&wp[*len+2], "\\>"); 186 *len += 4; 187 *pat = wp; 188 } 189 190 static struct iblok * 191 redirect(struct iblok *ip, const char *arg0, const char *arg1) 192 { 193 struct iblok *nip = NULL; 194 int pd[2]; 195 pid_t pid; 196 197 if (pipe(pd) < 0) 198 return NULL; 199 switch (pid = fork()) { 200 case 0: 201 if (lseek(ip->ib_fd, -(ip->ib_end - ip->ib_cur), 202 SEEK_CUR) == (off_t)-1) { 203 int xpd[2]; 204 if (pipe(xpd) == 0 && fork() == 0) { 205 ssize_t rd, wo, wt; 206 close(xpd[0]); 207 for (;;) { 208 rd = ip->ib_end - ip->ib_cur; 209 wo = wt = 0; 210 do { 211 if ((wo = write(xpd[1], 212 &ip->ib_cur[wt], 213 rd - wt)) 214 <= 0) { 215 if (errno == EINTR) 216 continue; 217 _exit(0); 218 } 219 wt += wo; 220 } while (wt < rd); 221 if (ib_read(ip) == EOF) 222 break; 223 ip->ib_cur--; 224 } 225 _exit(0); 226 } else { 227 close(xpd[1]); 228 dup2(xpd[0], 0); 229 close(xpd[0]); 230 } 231 } else { 232 if (ip->ib_fd) 233 dup2(ip->ib_fd, 0); 234 } 235 if (ip->ib_fd) 236 ib_close(ip); 237 else 238 ib_free(ip); 239 dup2(pd[1], 1); 240 close(pd[0]); 241 close(pd[1]); 242 execlp(arg0, arg0, arg1, NULL); 243 fprintf(stderr, "%s: could not exec %s\n", progname, arg0); 244 _exit(0177); 245 /*NOTREACHED*/ 246 case -1: 247 fprintf(stderr, "%s: cannot fork()\n", progname); 248 status = 2; 249 return NULL; 250 default: 251 close(pd[1]); 252 nip = ib_alloc(pd[0], 0); 253 nip->ib_pid = pid; 254 return nip; 255 } 256 } 257 258 /* 259 * Report a matching line. 260 */ 261 void 262 report(const char *line, size_t llen, off_t bcnt, int addnl) 263 { 264 if (filename && !hflag) 265 printf("%s:", filename); 266 #ifdef LONGLONG 267 if (bflag) 268 printf("%llu:", (long long)bcnt); 269 if (nflag) 270 printf("%llu:", (long long)lineno); 271 #else /* !LONGLONG */ 272 if (bflag) 273 printf("%lu:", (long)bcnt); 274 if (nflag) 275 printf("%lu:", (long)lineno); 276 #endif /* !LONGLONG */ 277 if (line && llen) 278 fwrite(line, sizeof *line, llen, stdout); 279 if (addnl) 280 putchar('\n'); 281 } 282 283 /* 284 * Check line for match. If necessary, the line gets NUL-terminated (so 285 * its address range must be writable then). When ignoring character case, 286 * a lower-case-only copy of the line is made instead. If a match is found, 287 * statistics are printed. Returns 1 if main loop shall terminate, 0 else. 288 */ 289 static int 290 matchline(char *line, size_t sz, int putnl, struct iblok *ip) 291 { 292 size_t csz = sz; 293 int terminate = 0; 294 char lbuf[512], *abuf = NULL, *cline = line; 295 296 if (iflag && (matchflags & MF_LOCONV)) { 297 if (sz >= sizeof lbuf - 1) { 298 abuf = smalloc(sz + 1); 299 cline = abuf; 300 } else 301 cline = lbuf; 302 csz = loconv(cline, line, sz); 303 cline[csz] = '\0'; 304 } else if (matchflags & MF_NULTERM) 305 cline[sz] = '\0'; 306 lineno++; 307 if (match(cline, csz) ^ vflag) { 308 lmatch++; 309 if (qflag == 0) { 310 if (status == 1) 311 status = 0; 312 if (lflag) { 313 puts(filename ? filename : stdinmsg); 314 } else if (!cflag) 315 report(line, sz, (ib_offs(ip)-1) / BSZ, putnl); 316 } else 317 exit(0); 318 if (qflag || lflag) 319 terminate = 1; 320 } 321 if (abuf) 322 free(abuf); 323 return terminate; 324 } 325 326 /* 327 * Check all lines within ip->ib_cur and last which contains the last 328 * newline. If the main loop shall terminate, 1 is returned. 329 */ 330 static int 331 gn_range(struct iblok *ip, char *last) 332 { 333 char *nl; 334 335 while ((nl = memchr(ip->ib_cur, '\n', last + 1 - ip->ib_cur)) != NULL) { 336 if (matchline(ip->ib_cur, nl - ip->ib_cur, 1, ip)) 337 return 1; 338 if (nl == last) 339 return 0; 340 ip->ib_cur = nl + 1; 341 } 342 return 0; 343 } 344 345 /* 346 * Main grep routine. The line buffer herein is only used for overlaps 347 * between file buffer fills. 348 */ 349 static struct iblok * 350 grep(struct iblok *ip) 351 { 352 char *line = NULL; /* line buffer */ 353 register char *lastnl; /* last newline in file buffer */ 354 size_t sz = 0; /* length of line in line buffer */ 355 char *cp; 356 int hadnl; /* lastnl points to newline char */ 357 int oom = 0; /* got out of memory */ 358 359 lineno = lmatch = 0; 360 if (ib_read(ip) == EOF) 361 goto endgrep; 362 ip->ib_cur--; 363 if (zflag) { 364 struct iblok *np; 365 for (;;) { 366 sz = ip->ib_end - ip->ib_cur; 367 if (sz > 3 && memcmp(ip->ib_cur, "BZh", 3) == 0) 368 np = redirect(ip, "bzip2", "-cd"); 369 else if (sz > 2 && 370 memcmp(ip->ib_cur, "\37\235", 2) == 0) 371 np = redirect(ip, "zcat", NULL); 372 else if (sz > 2 && 373 memcmp(ip->ib_cur, "\37\213", 2) == 0) 374 np = redirect(ip, "gzip", "-cd"); 375 else 376 break; 377 if (np == NULL) 378 break; 379 if (ip->ib_fd) 380 ib_close(ip); 381 else 382 ib_free(ip); 383 ip = np; 384 if (ib_read(ip) == EOF) 385 goto endgrep; 386 ip->ib_cur--; 387 } 388 } 389 for (;;) { 390 for (lastnl = ip->ib_end - 1; 391 *lastnl != '\n' && lastnl > ip->ib_cur; 392 lastnl--); 393 if (hadnl = (ip->ib_cur < ip->ib_end && *lastnl == '\n')) 394 if (range(ip, lastnl)) 395 break; 396 if (lastnl < ip->ib_end - hadnl) { 397 /* 398 * Copy the partial line from file buffer to line 399 * buffer. Allocate enough space to zero-terminate 400 * the line later if necessary. 401 */ 402 sz = ip->ib_end - lastnl - hadnl; 403 line = smalloc(sz + 1); 404 memcpy(line, lastnl + hadnl, sz); 405 ip->ib_cur = lastnl + hadnl; 406 } else 407 line = NULL; 408 nextbuf: 409 if (ib_read(ip) == EOF) { 410 if (line) { 411 matchline(line, sz, sus, ip); 412 free(line); 413 line = NULL; 414 sz = 0; 415 } 416 break; 417 } 418 ip->ib_cur--; 419 if (line) { 420 /* 421 * Append the partial line at the beginning of the 422 * file buffer to the line buffer. 423 */ 424 size_t oldsz = sz; 425 if ((cp = memchr(ip->ib_cur, '\n', 426 ip->ib_end - ip->ib_cur)) == NULL) { 427 char *nline; 428 /* 429 * Ugh. This is really a huge line. Store the 430 * entire file buffer in the line buffer and 431 * read the next part of the file. 432 */ 433 sz += ip->ib_end - ip->ib_cur; 434 if ((nline = realloc(line, sz + 1)) == NULL) { 435 sz = oldsz; 436 cp = &ip->ib_end[-1]; 437 oom++; 438 } else { 439 line = nline; 440 memcpy(line + oldsz, ip->ib_cur, 441 ip->ib_end - ip->ib_cur); 442 goto nextbuf; 443 } 444 } 445 if ((sz = cp - ip->ib_cur) > 0) { 446 char *nline; 447 sz += oldsz; 448 if ((nline = realloc(line, sz + 1)) == NULL) { 449 sz = oldsz; 450 oom++; 451 } else { 452 line = nline; 453 memcpy(line + oldsz, ip->ib_cur, 454 cp - ip->ib_cur); 455 } 456 } else 457 sz = oldsz; 458 if (matchline(line, sz, 1, ip)) 459 break; 460 free(line); 461 line = NULL; 462 sz = 0; 463 ip->ib_cur = cp + (oom == 0); 464 oom = 0; 465 } 466 } 467 endgrep: 468 if (!qflag && cflag) { 469 if (filename && !hflag) 470 printf("%s:", filename); 471 #ifdef LONGLONG 472 printf("%llu\n", (long long)lmatch); 473 #else 474 printf("%lu\n", (long)lmatch); 475 #endif 476 } 477 return ip; 478 } 479 480 /* 481 * Grep a named file. 482 */ 483 static void 484 fngrep(const char *fn, int level) 485 { 486 struct iblok *ip; 487 struct stat st; 488 int i; 489 490 if (rflag && fn && (level ? rflag : stat)(fn, &st) == 0) { 491 if (rflag != lstat) { 492 for (i = 0; i < level; i++) 493 if (st.st_dev == visited[i].v_dev && 494 st.st_ino == visited[i].v_ino) 495 return; 496 if (level >= vismax) { 497 vismax += 20; 498 visited = srealloc(visited, sizeof *visited * 499 vismax); 500 } 501 visited[level].v_dev = st.st_dev; 502 visited[level].v_ino = st.st_ino; 503 } 504 mode: switch (st.st_mode&S_IFMT) { 505 #define ignoring(t, s) fprintf(stderr, "%s: ignoring %s %s\n", progname, t, s) 506 case S_IFIFO: 507 ignoring("named pipe", fn); 508 return; 509 case S_IFBLK: 510 ignoring("block device", fn); 511 return; 512 case S_IFCHR: 513 ignoring("block device", fn); 514 return; 515 #ifdef S_IFSOCK 516 case S_IFSOCK: 517 ignoring("socket", fn); 518 return; 519 #endif /* S_IFSOCK */ 520 case S_IFLNK: 521 if (stat(fn, &st) < 0 || (st.st_mode&S_IFMT) == S_IFDIR) 522 return; 523 goto mode; 524 default: 525 break; 526 case S_IFDIR: { 527 char *path; 528 int pend, psize, i; 529 DIR *df; 530 struct dirent *dp; 531 532 if (hflag == 2) 533 hflag = 0; 534 if ((df = opendir(fn)) == NULL) { 535 if (sflag == 0) 536 fprintf(stderr, "%s: can't open " 537 "directory %s\n", 538 progname, fn); 539 if (!qflag || status == 1) 540 status = 2; 541 return; 542 } 543 pend = strlen(fn); 544 path = malloc(psize = pend + 2); 545 strcpy(path, fn); 546 path[pend++] = '/'; 547 while ((dp = readdir(df)) != NULL) { 548 if (dp->d_name[0] == '.' && 549 (dp->d_name[1] == '\0' || 550 dp->d_name[1] == '.' && 551 dp->d_name[2] == '\0')) 552 continue; 553 i = 0; 554 do { 555 if (pend + i >= psize) 556 path = srealloc(path, 557 psize += 14); 558 path[pend+i] = dp->d_name[i]; 559 } while (dp->d_name[i++]); 560 filename = path; 561 fngrep(path, level+1); 562 } 563 free(path); 564 closedir(df); 565 return; 566 } 567 } 568 } 569 if (fn) { 570 if ((ip = ib_open(fn, 0)) == NULL) { 571 if (sflag == 0) 572 fprintf(stderr, "%s: can't open %s\n", 573 progname, fn); 574 if (!qflag || status == 1) 575 status = 2; 576 return; 577 } 578 } else 579 ip = ib_alloc(0, 0); 580 ip = grep(ip); 581 if (ip->ib_fd) { 582 ib_close(ip); 583 if (zflag && ip->ib_pid) { 584 int s; 585 waitpid(ip->ib_pid, &s, 0); 586 if (s) 587 status = 2; 588 } 589 } else 590 ib_free(ip); 591 } 592 593 int 594 main(int argc, char **argv) 595 { 596 int i, hadpat = 0; 597 598 #ifdef __GLIBC__ 599 putenv("POSIXLY_CORRECT=1"); 600 #endif 601 progname = basename(argv[0]); 602 setlocale(LC_COLLATE, ""); 603 setlocale(LC_CTYPE, ""); 604 mb_cur_max = MB_CUR_MAX; 605 range = gn_range; 606 init(); 607 while ((i = getopt(argc, argv, options)) != EOF) { 608 switch (i) { 609 case 'E': 610 Eflag |= 1; 611 rc_select(); 612 break; 613 case 'F': 614 if (Eflag&2) 615 Eflag = 0; 616 Fflag |= 1; 617 ac_select(); 618 break; 619 case 'b': 620 bflag = 1; 621 break; 622 case 'c': 623 cflag = 1; 624 break; 625 case 'e': 626 patstring(optarg); 627 hadpat++; 628 break; 629 case 'f': 630 fflag++; 631 patfile(optarg); 632 hadpat++; 633 break; 634 case 'h': 635 hflag = 1; 636 break; 637 case 'i': 638 case 'y': 639 iflag = 1; 640 break; 641 case 'l': 642 lflag = 1; 643 break; 644 case 'n': 645 nflag = 1; 646 break; 647 case 'q': 648 qflag = 1; 649 break; 650 case 'r': 651 rflag = stat; 652 break; 653 case 'R': 654 rflag = lstat; 655 break; 656 case 's': 657 sflag = 1; 658 break; 659 case 'v': 660 vflag = 1; 661 break; 662 case 'w': 663 wflag = 1; 664 break; 665 case 'x': 666 xflag = 1; 667 break; 668 case 'z': 669 zflag = 1; 670 break; 671 default: 672 if (!(Fflag&2)) 673 usage(); 674 status = 2; 675 } 676 } 677 if (sus) { 678 if (Fflag == 2) { 679 if (sflag) { 680 optind = 1; 681 argv[1] = "-s"; 682 getopt(argc, argv, ""); 683 usage(); 684 } 685 if (qflag) { 686 optind = 1; 687 argv[1] = "-q"; 688 getopt(argc, argv, ""); 689 usage(); 690 } 691 } 692 if (Fflag && status == 2) 693 usage(); 694 if (Eflag == 1 && Fflag == 1 || cflag + lflag + qflag > 1) 695 usage(); 696 if (wflag && (Eflag || Fflag)) 697 usage(); 698 } 699 if (cflag) 700 lflag = 0; 701 if (hadpat == 0) { 702 if (optind >= argc) 703 misop(); 704 patstring(argv[optind++]); 705 } else if (e0 == NULL) 706 patstring(NULL); 707 build(); 708 if (optind != argc) { 709 if (optind + 1 == argc) 710 hflag = 2; 711 do { 712 if (sus && argv[optind][0] == '-' && 713 argv[optind][1] == '\0') { 714 filename = NULL; 715 fngrep(NULL, 0); 716 } else { 717 filename = argv[optind]; 718 fngrep(argv[optind], 0); 719 } 720 } while (++optind < argc); 721 } else { 722 if (lflag && !sus && (Eflag || Fflag)) 723 exit(1); 724 fngrep(NULL, 0); 725 } 726 return status; 727 }