cut.c (3174B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <string.h> 5 6 #include "text.h" 7 #include "util.h" 8 9 typedef struct Range { 10 size_t min, max; 11 struct Range *next; 12 } Range; 13 14 static Range *list = NULL; 15 static char mode = 0; 16 static char delim = '\t'; 17 static int nflag = 0; 18 static int sflag = 0; 19 20 static void 21 insert(Range *r) 22 { 23 Range *l, *p, *t; 24 25 for (p = NULL, l = list; l; p = l, l = l->next) { 26 if (r->max && r->max + 1 < l->min) { 27 r->next = l; 28 break; 29 } else if (!l->max || r->min < l->max + 2) { 30 l->min = MIN(r->min, l->min); 31 for (p = l, t = l->next; t; p = t, t = t->next) 32 if (r->max && r->max + 1 < t->min) 33 break; 34 l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0; 35 l->next = t; 36 return; 37 } 38 } 39 if (p) 40 p->next = r; 41 else 42 list = r; 43 } 44 45 static void 46 parselist(char *str) 47 { 48 char *s; 49 size_t n = 1; 50 Range *r; 51 52 for (s = str; *s; s++) { 53 if (*s == ' ') 54 *s = ','; 55 if (*s == ',') 56 n++; 57 } 58 r = emalloc(n * sizeof(Range)); 59 for (s = str; n; n--, s++) { 60 r->min = (*s == '-') ? 1 : strtoul(s, &s, 10); 61 r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min; 62 r->next = NULL; 63 if (!r->min || (r->max && r->max < r->min) || (*s && *s != ',')) 64 eprintf("cut: bad list value\n"); 65 insert(r++); 66 } 67 } 68 69 static size_t 70 seek(const char *s, size_t pos, size_t *prev, size_t count) 71 { 72 const char *t; 73 size_t n = pos - *prev; 74 75 if (mode == 'b') { 76 if ((t = memchr(s, 0, n))) 77 return t - s; 78 if (nflag) 79 while (n && !UTF8_POINT(s[n])) 80 n--; 81 *prev += n; 82 return n; 83 } else if (mode == 'c') { 84 for (n++, t = s; *t; t++) 85 if (UTF8_POINT(*t) && !--n) 86 break; 87 } else { 88 for (t = (count < 2) ? s : s + 1; n && *t; t++) 89 if (*t == delim && !--n && count) 90 break; 91 } 92 *prev = pos; 93 return t - s; 94 } 95 96 static void 97 cut(FILE *fp) 98 { 99 static char *buf = NULL; 100 static size_t size = 0; 101 char *s; 102 size_t i, n, p; 103 ssize_t len; 104 Range *r; 105 106 while ((len = getline(&buf, &size, fp)) != -1) { 107 if (len && buf[len - 1] == '\n') 108 buf[len - 1] = '\0'; 109 if (mode == 'f' && !strchr(buf, delim)) { 110 if (!sflag) 111 puts(buf); 112 continue; 113 } 114 for (i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) { 115 s += seek(s, r->min, &p, i++); 116 if (!*s) 117 break; 118 if (!r->max) { 119 fputs(s, stdout); 120 break; 121 } 122 n = seek(s, r->max + 1, &p, i++); 123 if (fwrite(s, 1, n, stdout) != n) 124 eprintf("write error:"); 125 } 126 putchar('\n'); 127 } 128 } 129 130 static void 131 usage(void) 132 { 133 eprintf("usage: cut -b list [-n] [file ...]\n" 134 " cut -c list [file ...]\n" 135 " cut -f list [-d delim] [-s] [file ...]\n"); 136 } 137 138 int 139 main(int argc, char *argv[]) 140 { 141 FILE *fp; 142 143 ARGBEGIN { 144 case 'b': 145 case 'c': 146 case 'f': 147 mode = ARGC(); 148 parselist(ARGF()); 149 break; 150 case 'd': 151 delim = *ARGF(); 152 break; 153 case 'n': 154 nflag = 1; 155 break; 156 case 's': 157 sflag = 1; 158 break; 159 default: 160 usage(); 161 } ARGEND; 162 163 if (!mode) 164 usage(); 165 166 if (!argc) 167 cut(stdin); 168 else for (; argc--; argv++) { 169 if (!strcmp(*argv, "-")) 170 cut(stdin); 171 else { 172 if (!(fp = fopen(*argv, "r"))) { 173 weprintf("fopen %s:", *argv); 174 continue; 175 } 176 cut(fp); 177 fclose(fp); 178 } 179 } 180 return 0; 181 }