morpheus-base

morpheus base system
git clone git://git.2f30.org/morpheus-base
Log | Files | Refs

tr.c (5934B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <wctype.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 
      6 #include "utf.h"
      7 #include "util.h"
      8 
      9 static int cflag = 0;
     10 static int dflag = 0;
     11 static int sflag = 0;
     12 
     13 struct range {
     14 	Rune   start;
     15 	Rune   end;
     16 	size_t quant;
     17 };
     18 
     19 static struct {
     20 	char    *name;
     21 	int    (*check)(wint_t);
     22 } classes[] = {
     23 	{ "alnum",  iswalnum  },
     24 	{ "alpha",  iswalpha  },
     25 	{ "blank",  iswblank  },
     26 	{ "cntrl",  iswcntrl  },
     27 	{ "digit",  iswdigit  },
     28 	{ "graph",  iswgraph  },
     29 	{ "lower",  iswlower  },
     30 	{ "print",  iswprint  },
     31 	{ "punct",  iswpunct  },
     32 	{ "space",  iswspace  },
     33 	{ "upper",  iswupper  },
     34 	{ "xdigit", iswxdigit },
     35 };
     36 
     37 static struct range *set1          = NULL;
     38 static size_t set1ranges           = 0;
     39 static int    (*set1check)(wint_t) = NULL;
     40 static struct range *set2          = NULL;
     41 static size_t set2ranges           = 0;
     42 static int    (*set2check)(wint_t) = NULL;
     43 
     44 
     45 static size_t
     46 rangelen(struct range r)
     47 {
     48 	return (r.end - r.start + 1) * r.quant;
     49 }
     50 
     51 static size_t
     52 setlen(struct range *set, size_t setranges)
     53 {
     54 	size_t len = 0, i;
     55 
     56 	for (i = 0; i < setranges; i++)
     57 		len += rangelen(set[i]);
     58 
     59 	return len;
     60 }
     61 
     62 static int
     63 rstrmatch(Rune *r, char *s, size_t n)
     64 {
     65 	size_t i;
     66 
     67 	for (i = 0; i < n; i++)
     68 		if (r[i] != s[i])
     69 			return 0;
     70 	return 1;
     71 }
     72 
     73 static size_t
     74 resolveescapes(Rune *r, size_t len)
     75 {
     76 	size_t i, off, m;
     77 
     78 	for (i = 0; i < len - 1; i++) {
     79 		if (r[i] != '\\')
     80 			continue;
     81 		off = 0;
     82 
     83 		switch (r[i + 1]) {
     84 		case '\\': r[i] = '\\'; off++; break;
     85 		case 'a':  r[i] = '\a'; off++; break;
     86 		case 'b':  r[i] = '\b'; off++; break;
     87 		case 'f':  r[i] = '\f'; off++; break;
     88 		case 'n':  r[i] = '\n'; off++; break;
     89 		case 'r':  r[i] = '\r'; off++; break;
     90 		case 't':  r[i] = '\t'; off++; break;
     91 		case 'v':  r[i] = '\v'; off++; break;
     92 		default:   continue;
     93 		}
     94 
     95 		for (m = i + 1; m <= len - off; m++)
     96 			r[m] = r[m + off];
     97 		len -= off;
     98 	}
     99 
    100 	return len;
    101 }
    102 
    103 static size_t
    104 makeset(char *str, struct range **set, int (**check)(wint_t))
    105 {
    106 	Rune  *rstr;
    107 	size_t len, i, j, m, n;
    108 	size_t q, setranges = 0;
    109 	int    factor, base;
    110 
    111 	/* rstr defines at most len ranges */
    112 	len = chartorunearr(str, &rstr);
    113 	len = resolveescapes(rstr, len);
    114 	*set = emalloc(len * sizeof(**set));
    115 
    116 	for (i = 0; i < len; i++) {
    117 		if (rstr[i] == '[') {
    118 			j = i;
    119 nextbrack:
    120 			if (j == len)
    121 				goto literal;
    122 			for (m = j; m < len; m++)
    123 				if (rstr[m] == ']') {
    124 					j = m;
    125 					break;
    126 				}
    127 			if (j == i)
    128 				goto literal;
    129 
    130 			/* CLASSES [=EQUIV=] (skip) */
    131 			if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') {
    132 				i = j;
    133 				continue;
    134 			}
    135 
    136 			/* CLASSES [:CLASS:] */
    137 			if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') {
    138 				for (n = 0; n < LEN(classes); n++) {
    139 					if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) {
    140 						*check = classes[n].check;
    141 						return 0;
    142 					}
    143 				}
    144 				eprintf("Invalid character class.\n");
    145 			}
    146 
    147 			/* REPEAT  [_*n] (only allowed in set2) */
    148 			if (j - i > 2 && rstr[i + 2] == '*' && set1ranges > 0) {
    149 				/* check if right side of '*' is a number */
    150 				q = 0;
    151 				factor = 1;
    152 				base = (rstr[i + 3] == '0') ? 8 : 10;
    153 				for (n = j - 1; n > i + 2; n--) {
    154 					if (rstr[n] < '0' && rstr[n] > '9') {
    155 						n = 0;
    156 						break;
    157 					}
    158 					q += (rstr[n] - '0') * factor;
    159 					factor *= base;
    160 				}
    161 
    162 				if (n == 0) {
    163 					j = m + 1;
    164 					goto nextbrack;
    165 				}
    166 				(*set)[setranges].start = rstr[i + 1];
    167 				(*set)[setranges].end   = rstr[i + 1];
    168 				(*set)[setranges].quant = q ? q : setlen(set1, set1ranges);
    169 				setranges++;
    170 				i = j;
    171 				continue;
    172 			}
    173 
    174 			j = m + 1;
    175 			goto nextbrack;
    176 		}
    177 literal:
    178 		/* RANGES [_-__-_], _-__-_ */
    179 		/* LITERALS _______ */
    180 		(*set)[setranges].start = rstr[i];
    181 
    182 		if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i])
    183 			i += 2;
    184 		(*set)[setranges].end = rstr[i];
    185 		(*set)[setranges].quant = 1;
    186 		setranges++;
    187 	}
    188 
    189 	free(rstr);
    190 	return setranges;
    191 }
    192 
    193 static void
    194 usage(void)
    195 {
    196 	eprintf("usage: %s [-cCds] set1 [set2]\n", argv0);
    197 }
    198 
    199 int
    200 main(int argc, char *argv[])
    201 {
    202 	Rune r = 0, lastrune = 0;
    203 	size_t off1, off2, i, m;
    204 
    205 	ARGBEGIN {
    206 	case 'c':
    207 	case 'C':
    208 		cflag = 1;
    209 		break;
    210 	case 'd':
    211 		dflag = 1;
    212 		break;
    213 	case 's':
    214 		sflag = 1;
    215 		break;
    216 	default:
    217 		usage();
    218 	} ARGEND;
    219 
    220 	if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag))
    221 		usage();
    222 	set1ranges = makeset(argv[0], &set1, &set1check);
    223 	if (argc == 2)
    224 		set2ranges = makeset(argv[1], &set2, &set2check);
    225 	if (dflag == sflag && !set2ranges && !set2check)
    226 		eprintf("set2 must be non-empty.\n");
    227 	if (set2check && set2check != iswlower && set2check != iswupper)
    228 		eprintf("set2 can only be the 'lower' or 'upper' class.\n");
    229 read:
    230 	if (!readrune("<stdin>", stdin, &r))
    231 		return 0;
    232 	off1 = off2 = 0;
    233 	for (i = 0; i < set1ranges; i++) {
    234 		if (set1[i].start <= r && r <= set1[i].end) {
    235 			if (dflag && !cflag)
    236 				goto read;
    237 			if (sflag) {
    238 				if (r == lastrune)
    239 					goto read;
    240 				else
    241 					goto write;
    242 			}
    243 			for (m = 0; m < i; m++)
    244 				off1 += rangelen(set1[m]);
    245 			off1 += r - set1[m].start;
    246 			if (off1 > setlen(set2, set2ranges) - 1) {
    247 				r = set2[set2ranges - 1].end;
    248 				goto write;
    249 			}
    250 			for (m = 0; m < set2ranges; m++) {
    251 				if (off2 + rangelen(set2[m]) > off1) {
    252 					m++;
    253 					break;
    254 				}
    255 				off2 += rangelen(set2[m]);
    256 			}
    257 			m--;
    258 			r = set2[m].start + (off1 - off2) / set2[m].quant;
    259 
    260 			goto write;
    261 		}
    262 	}
    263 	if (set1check && set1check((wint_t)r)) {
    264 		if (dflag && !cflag)
    265 			goto read;
    266 		if (sflag) {
    267 			if (r == lastrune)
    268 				goto read;
    269 			else
    270 				goto write;
    271 		}
    272 		if (set1check == iswupper && set2check == iswlower)
    273 			r = towlower((wint_t)r);
    274 		else if (set1check == iswlower && set2check == iswupper)
    275 			r = towupper((wint_t)r);
    276 		else if (set2ranges > 0)
    277 			r = set2[set2ranges - 1].end;
    278 		else
    279 			eprintf("Misaligned character classes.\n");
    280 	}
    281 	if (dflag && cflag)
    282 		goto read;
    283 	if (dflag && sflag && r == lastrune)
    284 		goto read;
    285 write:
    286 	lastrune = r;
    287 	writerune("<stdout>", stdout, &r);
    288 	goto read;
    289 }