sbase

suckless unix tools
git clone git://git.2f30.org/sbase.git
Log | Files | Refs | README | LICENSE

cut.c (4145B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 
      6 #include "text.h"
      7 #include "utf.h"
      8 #include "util.h"
      9 
     10 typedef struct Range {
     11 	size_t min, max;
     12 	struct Range *next;
     13 } Range;
     14 
     15 static Range *list     = NULL;
     16 static char   mode     = 0;
     17 static char  *delim    = "\t";
     18 static size_t delimlen = 1;
     19 static int    nflag    = 0;
     20 static int    sflag    = 0;
     21 
     22 static void
     23 insert(Range *r)
     24 {
     25 	Range *l, *p, *t;
     26 
     27 	for (p = NULL, l = list; l; p = l, l = l->next) {
     28 		if (r->max && r->max + 1 < l->min) {
     29 			r->next = l;
     30 			break;
     31 		} else if (!l->max || r->min < l->max + 2) {
     32 			l->min = MIN(r->min, l->min);
     33 			for (p = l, t = l->next; t; p = t, t = t->next)
     34 				if (r->max && r->max + 1 < t->min)
     35 					break;
     36 			l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
     37 			l->next = t;
     38 			return;
     39 		}
     40 	}
     41 	if (p)
     42 		p->next = r;
     43 	else
     44 		list = r;
     45 }
     46 
     47 static void
     48 parselist(char *str)
     49 {
     50 	char *s;
     51 	size_t n = 1;
     52 	Range *r;
     53 
     54 	if (!*str)
     55 		eprintf("empty list\n");
     56 	for (s = str; *s; s++) {
     57 		if (*s == ' ')
     58 			*s = ',';
     59 		if (*s == ',')
     60 			n++;
     61 	}
     62 	r = ereallocarray(NULL, n, sizeof(*r));
     63 	for (s = str; n; n--, s++) {
     64 		r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
     65 		r->max = (*s == '-') ? strtoul(s + 1, &s, 10) : r->min;
     66 		r->next = NULL;
     67 		if (!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
     68 			eprintf("bad list value\n");
     69 		insert(r++);
     70 	}
     71 }
     72 
     73 static size_t
     74 seek(struct line *s, size_t pos, size_t *prev, size_t count)
     75 {
     76 	size_t n = pos - *prev, i, j;
     77 
     78 	if (mode == 'b') {
     79 		if (n >= s->len)
     80 			return s->len;
     81 		if (nflag)
     82 			while (n && !UTF8_POINT(s->data[n]))
     83 				n--;
     84 		*prev += n;
     85 		return n;
     86 	} else if (mode == 'c') {
     87 		for (n++, i = 0; i < s->len; i++)
     88 			if (UTF8_POINT(s->data[i]) && !--n)
     89 				break;
     90 	} else {
     91 		for (i = (count < delimlen + 1) ? 0 : delimlen; n && i < s->len; ) {
     92 			if ((s->len - i) >= delimlen &&
     93 			    !memcmp(s->data + i, delim, delimlen)) {
     94 				if (!--n && count)
     95 					break;
     96 				i += delimlen;
     97 				continue;
     98 			}
     99 			for (j = 1; j + i <= s->len && !fullrune(s->data + i, j); j++);
    100 			i += j;
    101 		}
    102 	}
    103 	*prev = pos;
    104 
    105 	return i;
    106 }
    107 
    108 static void
    109 cut(FILE *fp, const char *fname)
    110 {
    111 	Range *r;
    112 	struct line s;
    113 	static struct line line;
    114 	static size_t size;
    115 	size_t i, n, p;
    116 	ssize_t len;
    117 
    118 	while ((len = getline(&line.data, &size, fp)) > 0) {
    119 		line.len = len;
    120 		if (line.data[line.len - 1] == '\n')
    121 			line.data[--line.len] = '\0';
    122 		if (mode == 'f' && !memmem(line.data, line.len, delim, delimlen)) {
    123 			if (!sflag) {
    124 				fwrite(line.data, 1, line.len, stdout);
    125 				fputc('\n', stdout);
    126 			}
    127 			continue;
    128 		}
    129 		for (i = 0, p = 1, s = line, r = list; r; r = r->next) {
    130 			n = seek(&s, r->min, &p, i);
    131 			s.data += n;
    132 			s.len -= n;
    133 			i += (mode == 'f') ? delimlen : 1;
    134 			if (!s.len)
    135 				break;
    136 			if (!r->max) {
    137 				fwrite(s.data, 1, s.len, stdout);
    138 				break;
    139 			}
    140 			n = seek(&s, r->max + 1, &p, i);
    141 			i += (mode == 'f') ? delimlen : 1;
    142 			if (fwrite(s.data, 1, n, stdout) != n)
    143 				eprintf("fwrite <stdout>:");
    144 			s.data += n;
    145 			s.len -= n;
    146 		}
    147 		putchar('\n');
    148 	}
    149 	if (ferror(fp))
    150 		eprintf("getline %s:", fname);
    151 }
    152 
    153 static void
    154 usage(void)
    155 {
    156 	eprintf("usage: %s -b list [-n] [file ...]\n"
    157 	        "       %s -c list [file ...]\n"
    158 	        "       %s -f list [-d delim] [-s] [file ...]\n",
    159 		argv0, argv0, argv0);
    160 }
    161 
    162 int
    163 main(int argc, char *argv[])
    164 {
    165 	FILE *fp;
    166 	int ret = 0;
    167 
    168 	ARGBEGIN {
    169 	case 'b':
    170 	case 'c':
    171 	case 'f':
    172 		mode = ARGC();
    173 		parselist(EARGF(usage()));
    174 		break;
    175 	case 'd':
    176 		delim = EARGF(usage());
    177 		if (!*delim)
    178 			eprintf("empty delimiter\n");
    179 		delimlen = unescape(delim);
    180 		break;
    181 	case 'n':
    182 		nflag = 1;
    183 		break;
    184 	case 's':
    185 		sflag = 1;
    186 		break;
    187 	default:
    188 		usage();
    189 	} ARGEND
    190 
    191 	if (!mode)
    192 		usage();
    193 
    194 	if (!argc)
    195 		cut(stdin, "<stdin>");
    196 	else {
    197 		for (; *argv; argc--, argv++) {
    198 			if (!strcmp(*argv, "-")) {
    199 				*argv = "<stdin>";
    200 				fp = stdin;
    201 			} else if (!(fp = fopen(*argv, "r"))) {
    202 				weprintf("fopen %s:", *argv);
    203 				ret = 1;
    204 				continue;
    205 			}
    206 			cut(fp, *argv);
    207 			if (fp != stdin && fshut(fp, *argv))
    208 				ret = 1;
    209 		}
    210 	}
    211 
    212 	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
    213 
    214 	return ret;
    215 }