cut.c (4145B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <string.h> 5 6 #include "text.h" 7 #include "utf.h" 8 #include "util.h" 9 10 typedef struct Range { 11 size_t min, max; 12 struct Range *next; 13 } Range; 14 15 static Range *list = NULL; 16 static char mode = 0; 17 static char *delim = "\t"; 18 static size_t delimlen = 1; 19 static int nflag = 0; 20 static int sflag = 0; 21 22 static void 23 insert(Range *r) 24 { 25 Range *l, *p, *t; 26 27 for (p = NULL, l = list; l; p = l, l = l->next) { 28 if (r->max && r->max + 1 < l->min) { 29 r->next = l; 30 break; 31 } else if (!l->max || r->min < l->max + 2) { 32 l->min = MIN(r->min, l->min); 33 for (p = l, t = l->next; t; p = t, t = t->next) 34 if (r->max && r->max + 1 < t->min) 35 break; 36 l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0; 37 l->next = t; 38 return; 39 } 40 } 41 if (p) 42 p->next = r; 43 else 44 list = r; 45 } 46 47 static void 48 parselist(char *str) 49 { 50 char *s; 51 size_t n = 1; 52 Range *r; 53 54 if (!*str) 55 eprintf("empty list\n"); 56 for (s = str; *s; s++) { 57 if (*s == ' ') 58 *s = ','; 59 if (*s == ',') 60 n++; 61 } 62 r = ereallocarray(NULL, n, sizeof(*r)); 63 for (s = str; n; n--, s++) { 64 r->min = (*s == '-') ? 1 : strtoul(s, &s, 10); 65 r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min; 66 r->next = NULL; 67 if (!r->min || (r->max && r->max < r->min) || (*s && *s != ',')) 68 eprintf("bad list value\n"); 69 insert(r++); 70 } 71 } 72 73 static size_t 74 seek(struct line *s, size_t pos, size_t *prev, size_t count) 75 { 76 size_t n = pos - *prev, i, j; 77 78 if (mode == 'b') { 79 if (n >= s->len) 80 return s->len; 81 if (nflag) 82 while (n && !UTF8_POINT(s->data[n])) 83 n--; 84 *prev += n; 85 return n; 86 } else if (mode == 'c') { 87 for (n++, i = 0; i < s->len; i++) 88 if (UTF8_POINT(s->data[i]) && !--n) 89 break; 90 } else { 91 for (i = (count < delimlen + 1) ? 0 : delimlen; n && i < s->len; ) { 92 if ((s->len - i) >= delimlen && 93 !memcmp(s->data + i, delim, delimlen)) { 94 if (!--n && count) 95 break; 96 i += delimlen; 97 continue; 98 } 99 for (j = 1; j + i <= s->len && !fullrune(s->data + i, j); j++); 100 i += j; 101 } 102 } 103 *prev = pos; 104 105 return i; 106 } 107 108 static void 109 cut(FILE *fp, const char *fname) 110 { 111 Range *r; 112 struct line s; 113 static struct line line; 114 static size_t size; 115 size_t i, n, p; 116 ssize_t len; 117 118 while ((len = getline(&line.data, &size, fp)) > 0) { 119 line.len = len; 120 if (line.data[line.len - 1] == '\n') 121 line.data[--line.len] = '\0'; 122 if (mode == 'f' && !memmem(line.data, line.len, delim, delimlen)) { 123 if (!sflag) { 124 fwrite(line.data, 1, line.len, stdout); 125 fputc('\n', stdout); 126 } 127 continue; 128 } 129 for (i = 0, p = 1, s = line, r = list; r; r = r->next) { 130 n = seek(&s, r->min, &p, i); 131 s.data += n; 132 s.len -= n; 133 i += (mode == 'f') ? delimlen : 1; 134 if (!s.len) 135 break; 136 if (!r->max) { 137 fwrite(s.data, 1, s.len, stdout); 138 break; 139 } 140 n = seek(&s, r->max + 1, &p, i); 141 i += (mode == 'f') ? delimlen : 1; 142 if (fwrite(s.data, 1, n, stdout) != n) 143 eprintf("fwrite <stdout>:"); 144 s.data += n; 145 s.len -= n; 146 } 147 putchar('\n'); 148 } 149 if (ferror(fp)) 150 eprintf("getline %s:", fname); 151 } 152 153 static void 154 usage(void) 155 { 156 eprintf("usage: %s -b list [-n] [file ...]\n" 157 " %s -c list [file ...]\n" 158 " %s -f list [-d delim] [-s] [file ...]\n", 159 argv0, argv0, argv0); 160 } 161 162 int 163 main(int argc, char *argv[]) 164 { 165 FILE *fp; 166 int ret = 0; 167 168 ARGBEGIN { 169 case 'b': 170 case 'c': 171 case 'f': 172 mode = ARGC(); 173 parselist(EARGF(usage())); 174 break; 175 case 'd': 176 delim = EARGF(usage()); 177 if (!*delim) 178 eprintf("empty delimiter\n"); 179 delimlen = unescape(delim); 180 break; 181 case 'n': 182 nflag = 1; 183 break; 184 case 's': 185 sflag = 1; 186 break; 187 default: 188 usage(); 189 } ARGEND 190 191 if (!mode) 192 usage(); 193 194 if (!argc) 195 cut(stdin, "<stdin>"); 196 else { 197 for (; *argv; argc--, argv++) { 198 if (!strcmp(*argv, "-")) { 199 *argv = "<stdin>"; 200 fp = stdin; 201 } else if (!(fp = fopen(*argv, "r"))) { 202 weprintf("fopen %s:", *argv); 203 ret = 1; 204 continue; 205 } 206 cut(fp, *argv); 207 if (fp != stdin && fshut(fp, *argv)) 208 ret = 1; 209 } 210 } 211 212 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); 213 214 return ret; 215 }