sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

tr.c (6226B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <stdlib.h>
      3 
      4 #include "utf.h"
      5 #include "util.h"
      6 
      7 static int cflag = 0;
      8 static int dflag = 0;
      9 static int sflag = 0;
     10 
     11 struct range {
     12 	Rune   start;
     13 	Rune   end;
     14 	size_t quant;
     15 };
     16 
     17 static struct {
     18 	char    *name;
     19 	int    (*check)(Rune);
     20 } classes[] = {
     21 	{ "alnum",  isalnumrune  },
     22 	{ "alpha",  isalpharune  },
     23 	{ "blank",  isblankrune  },
     24 	{ "cntrl",  iscntrlrune  },
     25 	{ "digit",  isdigitrune  },
     26 	{ "graph",  isgraphrune  },
     27 	{ "lower",  islowerrune  },
     28 	{ "print",  isprintrune  },
     29 	{ "punct",  ispunctrune  },
     30 	{ "space",  isspacerune  },
     31 	{ "upper",  isupperrune  },
     32 	{ "xdigit", isxdigitrune },
     33 };
     34 
     35 static struct range *set1        = NULL;
     36 static size_t set1ranges         = 0;
     37 static int    (*set1check)(Rune) = NULL;
     38 static struct range *set2        = NULL;
     39 static size_t set2ranges         = 0;
     40 static int    (*set2check)(Rune) = NULL;
     41 
     42 static size_t
     43 rangelen(struct range r)
     44 {
     45 	return (r.end - r.start + 1) * r.quant;
     46 }
     47 
     48 static size_t
     49 setlen(struct range *set, size_t setranges)
     50 {
     51 	size_t len = 0, i;
     52 
     53 	for (i = 0; i < setranges; i++)
     54 		len += rangelen(set[i]);
     55 
     56 	return len;
     57 }
     58 
     59 static int
     60 rstrmatch(Rune *r, char *s, size_t n)
     61 {
     62 	size_t i;
     63 
     64 	for (i = 0; i < n; i++)
     65 		if (r[i] != s[i])
     66 			return 0;
     67 	return 1;
     68 }
     69 
     70 static size_t
     71 makeset(char *str, struct range **set, int (**check)(Rune))
     72 {
     73 	Rune  *rstr;
     74 	size_t len, i, j, m, n;
     75 	size_t q, setranges = 0;
     76 	int    factor, base;
     77 
     78 	/* rstr defines at most len ranges */
     79 	unescape(str);
     80 	rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr));
     81 	len = utftorunestr(str, rstr);
     82 	*set = ereallocarray(NULL, len, sizeof(**set));
     83 
     84 	for (i = 0; i < len; i++) {
     85 		if (rstr[i] == '[') {
     86 			j = i;
     87 nextbrack:
     88 			if (j >= len)
     89 				goto literal;
     90 			for (m = j; m < len; m++)
     91 				if (rstr[m] == ']') {
     92 					j = m;
     93 					break;
     94 				}
     95 			if (j == i)
     96 				goto literal;
     97 
     98 			/* CLASSES [=EQUIV=] (skip) */
     99 			if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
    100 				if (j - i != 4)
    101 					goto literal;
    102 				(*set)[setranges].start = rstr[i + 2];
    103 				(*set)[setranges].end   = rstr[i + 2];
    104 				(*set)[setranges].quant = 1;
    105 				setranges++;
    106 				i = j;
    107 				continue;
    108 			}
    109 
    110 			/* CLASSES [:CLASS:] */
    111 			if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
    112 				for (n = 0; n < LEN(classes); n++) {
    113 					if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
    114 						*check = classes[n].check;
    115 						return 0;
    116 					}
    117 				}
    118 				eprintf("Invalid character class.\n");
    119 			}
    120 
    121 			/* REPEAT  [_*n] (only allowed in set2) */
    122 			if (j - i > 2 && rstr[i + 2] == '*') {
    123 				/* check if right side of '*' is a number */
    124 				q = 0;
    125 				factor = 1;
    126 				base = (rstr[i + 3] == '0') ? 8 : 10;
    127 				for (n = j - 1; n > i + 2; n--) {
    128 					if (rstr[n] < '0' || rstr[n] > '9') {
    129 						n = 0;
    130 						break;
    131 					}
    132 					q += (rstr[n] - '0') * factor;
    133 					factor *= base;
    134 				}
    135 				if (n == 0) {
    136 					j = m + 1;
    137 					goto nextbrack;
    138 				}
    139 				(*set)[setranges].start = rstr[i + 1];
    140 				(*set)[setranges].end   = rstr[i + 1];
    141 				(*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1));
    142 				setranges++;
    143 				i = j;
    144 				continue;
    145 			}
    146 
    147 			j = m + 1;
    148 			goto nextbrack;
    149 		}
    150 literal:
    151 		/* RANGES [_-__-_], _-__-_ */
    152 		/* LITERALS _______ */
    153 		(*set)[setranges].start = rstr[i];
    154 
    155 		if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
    156 			i += 2;
    157 		(*set)[setranges].end = rstr[i];
    158 		(*set)[setranges].quant = 1;
    159 		setranges++;
    160 	}
    161 
    162 	free(rstr);
    163 	return setranges;
    164 }
    165 
    166 static void
    167 usage(void)
    168 {
    169 	eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
    170 }
    171 
    172 int
    173 main(int argc, char *argv[])
    174 {
    175 	Rune r, lastrune = 0;
    176 	size_t off1, off2, i, m;
    177 	int ret = 0;
    178 
    179 	ARGBEGIN {
    180 	case 'c':
    181 	case 'C':
    182 		cflag = 1;
    183 		break;
    184 	case 'd':
    185 		dflag = 1;
    186 		break;
    187 	case 's':
    188 		sflag = 1;
    189 		break;
    190 	default:
    191 		usage();
    192 	} ARGEND
    193 
    194 	if (!argc || argc > 2 || (argc == 1 && dflag == sflag))
    195 		usage();
    196 	set1ranges = makeset(argv[0], &set1, &set1check);
    197 	if (argc == 2)
    198 		set2ranges = makeset(argv[1], &set2, &set2check);
    199 
    200 	if (!dflag || (argc == 2 && sflag)) {
    201 		/* sanity checks as we are translating */
    202 		if (!sflag && !set2ranges && !set2check)
    203 			eprintf("cannot map to an empty set.\n");
    204 		if (set2check && set2check != islowerrune &&
    205 		    set2check != isupperrune) {
    206 			eprintf("can only map to 'lower' and 'upper' class.\n");
    207 		}
    208 	}
    209 read:
    210 	if (!efgetrune(&r, stdin, "<stdin>")) {
    211 		ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
    212 		return ret;
    213 	}
    214 	if (argc == 1 && sflag)
    215 		goto write;
    216 	for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) {
    217 		if (set1[i].start <= r && r <= set1[i].end) {
    218 			if (dflag) {
    219 				if (cflag)
    220 					goto write;
    221 				else
    222 					goto read;
    223 			}
    224 			if (cflag)
    225 				goto write;
    226 
    227 			/* map r to set2 */
    228 			if (set2check) {
    229 				if (set2check == islowerrune)
    230 					r = tolowerrune(r);
    231 				else
    232 					r = toupperrune(r);
    233 			} else {
    234 				off1 += r - set1[i].start;
    235 				if (off1 > setlen(set2, set2ranges) - 1) {
    236 					r = set2[set2ranges - 1].end;
    237 					goto write;
    238 				}
    239 				for (m = 0, off2 = 0; m < set2ranges; m++) {
    240 					if (off2 + rangelen(set2[m]) > off1) {
    241 						m++;
    242 						break;
    243 					}
    244 					off2 += rangelen(set2[m]);
    245 				}
    246 				m--;
    247 				r = set2[m].start + (off1 - off2) / set2[m].quant;
    248 			}
    249 			goto write;
    250 		}
    251 	}
    252 	if (set1check && set1check(r)) {
    253 		if (dflag) {
    254 			if (cflag)
    255 				goto write;
    256 			else
    257 				goto read;
    258 		}
    259 		if (set2check) {
    260 			if (set2check == islowerrune)
    261 				r = tolowerrune(r);
    262 			else
    263 				r = toupperrune(r);
    264 		} else {
    265 			r = set2[set2ranges - 1].end;
    266 		}
    267 		goto write;
    268 	}
    269 	if (!dflag && cflag) {
    270 		if (set2check) {
    271 			if (set2check == islowerrune)
    272 				r = tolowerrune(r);
    273 			else
    274 				r = toupperrune(r);
    275 		} else {
    276 			r = set2[set2ranges - 1].end;
    277 		}
    278 		goto write;
    279 	}
    280 	if (dflag && cflag)
    281 		goto read;
    282 write:
    283 	if (argc == 1 && sflag && r == lastrune) {
    284 		if (set1check && set1check(r))
    285 			goto read;
    286 		for (i = 0; i < set1ranges; i++) {
    287 			if (set1[i].start <= r && r <= set1[i].end)
    288 				goto read;
    289 		}
    290 	}
    291 	if (argc == 2 && sflag && r == lastrune) {
    292 		if (set2check && set2check(r))
    293 			goto read;
    294 		for (i = 0; i < set2ranges; i++) {
    295 			if (set2[i].start <= r && r <= set2[i].end)
    296 				goto read;
    297 		}
    298 	}
    299 	efputrune(&r, stdout, "<stdout>");
    300 	lastrune = r;
    301 	goto read;
    302 }