tr.c (5934B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <wctype.h> 3 #include <stdio.h> 4 #include <stdlib.h> 5 6 #include "utf.h" 7 #include "util.h" 8 9 static int cflag = 0; 10 static int dflag = 0; 11 static int sflag = 0; 12 13 struct range { 14 Rune start; 15 Rune end; 16 size_t quant; 17 }; 18 19 static struct { 20 char *name; 21 int (*check)(wint_t); 22 } classes[] = { 23 { "alnum", iswalnum }, 24 { "alpha", iswalpha }, 25 { "blank", iswblank }, 26 { "cntrl", iswcntrl }, 27 { "digit", iswdigit }, 28 { "graph", iswgraph }, 29 { "lower", iswlower }, 30 { "print", iswprint }, 31 { "punct", iswpunct }, 32 { "space", iswspace }, 33 { "upper", iswupper }, 34 { "xdigit", iswxdigit }, 35 }; 36 37 static struct range *set1 = NULL; 38 static size_t set1ranges = 0; 39 static int (*set1check)(wint_t) = NULL; 40 static struct range *set2 = NULL; 41 static size_t set2ranges = 0; 42 static int (*set2check)(wint_t) = NULL; 43 44 45 static size_t 46 rangelen(struct range r) 47 { 48 return (r.end - r.start + 1) * r.quant; 49 } 50 51 static size_t 52 setlen(struct range *set, size_t setranges) 53 { 54 size_t len = 0, i; 55 56 for (i = 0; i < setranges; i++) 57 len += rangelen(set[i]); 58 59 return len; 60 } 61 62 static int 63 rstrmatch(Rune *r, char *s, size_t n) 64 { 65 size_t i; 66 67 for (i = 0; i < n; i++) 68 if (r[i] != s[i]) 69 return 0; 70 return 1; 71 } 72 73 static size_t 74 resolveescapes(Rune *r, size_t len) 75 { 76 size_t i, off, m; 77 78 for (i = 0; i < len - 1; i++) { 79 if (r[i] != '\\') 80 continue; 81 off = 0; 82 83 switch (r[i + 1]) { 84 case '\\': r[i] = '\\'; off++; break; 85 case 'a': r[i] = '\a'; off++; break; 86 case 'b': r[i] = '\b'; off++; break; 87 case 'f': r[i] = '\f'; off++; break; 88 case 'n': r[i] = '\n'; off++; break; 89 case 'r': r[i] = '\r'; off++; break; 90 case 't': r[i] = '\t'; off++; break; 91 case 'v': r[i] = '\v'; off++; break; 92 default: continue; 93 } 94 95 for (m = i + 1; m <= len - off; m++) 96 r[m] = r[m + off]; 97 len -= off; 98 } 99 100 return len; 101 } 102 103 static size_t 104 makeset(char *str, struct range **set, int (**check)(wint_t)) 105 { 106 Rune *rstr; 107 size_t len, i, j, m, n; 108 size_t q, setranges = 0; 109 int factor, base; 110 111 /* rstr defines at most len ranges */ 112 len = chartorunearr(str, &rstr); 113 len = resolveescapes(rstr, len); 114 *set = emalloc(len * sizeof(**set)); 115 116 for (i = 0; i < len; i++) { 117 if (rstr[i] == '[') { 118 j = i; 119 nextbrack: 120 if (j == len) 121 goto literal; 122 for (m = j; m < len; m++) 123 if (rstr[m] == ']') { 124 j = m; 125 break; 126 } 127 if (j == i) 128 goto literal; 129 130 /* CLASSES [=EQUIV=] (skip) */ 131 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') { 132 i = j; 133 continue; 134 } 135 136 /* CLASSES [:CLASS:] */ 137 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') { 138 for (n = 0; n < LEN(classes); n++) { 139 if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) { 140 *check = classes[n].check; 141 return 0; 142 } 143 } 144 eprintf("Invalid character class.\n"); 145 } 146 147 /* REPEAT [_*n] (only allowed in set2) */ 148 if (j - i > 2 && rstr[i + 2] == '*' && set1ranges > 0) { 149 /* check if right side of '*' is a number */ 150 q = 0; 151 factor = 1; 152 base = (rstr[i + 3] == '0') ? 8 : 10; 153 for (n = j - 1; n > i + 2; n--) { 154 if (rstr[n] < '0' && rstr[n] > '9') { 155 n = 0; 156 break; 157 } 158 q += (rstr[n] - '0') * factor; 159 factor *= base; 160 } 161 162 if (n == 0) { 163 j = m + 1; 164 goto nextbrack; 165 } 166 (*set)[setranges].start = rstr[i + 1]; 167 (*set)[setranges].end = rstr[i + 1]; 168 (*set)[setranges].quant = q ? q : setlen(set1, set1ranges); 169 setranges++; 170 i = j; 171 continue; 172 } 173 174 j = m + 1; 175 goto nextbrack; 176 } 177 literal: 178 /* RANGES [_-__-_], _-__-_ */ 179 /* LITERALS _______ */ 180 (*set)[setranges].start = rstr[i]; 181 182 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i]) 183 i += 2; 184 (*set)[setranges].end = rstr[i]; 185 (*set)[setranges].quant = 1; 186 setranges++; 187 } 188 189 free(rstr); 190 return setranges; 191 } 192 193 static void 194 usage(void) 195 { 196 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0); 197 } 198 199 int 200 main(int argc, char *argv[]) 201 { 202 Rune r = 0, lastrune = 0; 203 size_t off1, off2, i, m; 204 205 ARGBEGIN { 206 case 'c': 207 case 'C': 208 cflag = 1; 209 break; 210 case 'd': 211 dflag = 1; 212 break; 213 case 's': 214 sflag = 1; 215 break; 216 default: 217 usage(); 218 } ARGEND; 219 220 if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag)) 221 usage(); 222 set1ranges = makeset(argv[0], &set1, &set1check); 223 if (argc == 2) 224 set2ranges = makeset(argv[1], &set2, &set2check); 225 if (dflag == sflag && !set2ranges && !set2check) 226 eprintf("set2 must be non-empty.\n"); 227 if (set2check && set2check != iswlower && set2check != iswupper) 228 eprintf("set2 can only be the 'lower' or 'upper' class.\n"); 229 read: 230 if (!readrune("<stdin>", stdin, &r)) 231 return 0; 232 off1 = off2 = 0; 233 for (i = 0; i < set1ranges; i++) { 234 if (set1[i].start <= r && r <= set1[i].end) { 235 if (dflag && !cflag) 236 goto read; 237 if (sflag) { 238 if (r == lastrune) 239 goto read; 240 else 241 goto write; 242 } 243 for (m = 0; m < i; m++) 244 off1 += rangelen(set1[m]); 245 off1 += r - set1[m].start; 246 if (off1 > setlen(set2, set2ranges) - 1) { 247 r = set2[set2ranges - 1].end; 248 goto write; 249 } 250 for (m = 0; m < set2ranges; m++) { 251 if (off2 + rangelen(set2[m]) > off1) { 252 m++; 253 break; 254 } 255 off2 += rangelen(set2[m]); 256 } 257 m--; 258 r = set2[m].start + (off1 - off2) / set2[m].quant; 259 260 goto write; 261 } 262 } 263 if (set1check && set1check((wint_t)r)) { 264 if (dflag && !cflag) 265 goto read; 266 if (sflag) { 267 if (r == lastrune) 268 goto read; 269 else 270 goto write; 271 } 272 if (set1check == iswupper && set2check == iswlower) 273 r = towlower((wint_t)r); 274 else if (set1check == iswlower && set2check == iswupper) 275 r = towupper((wint_t)r); 276 else if (set2ranges > 0) 277 r = set2[set2ranges - 1].end; 278 else 279 eprintf("Misaligned character classes.\n"); 280 } 281 if (dflag && cflag) 282 goto read; 283 if (dflag && sflag && r == lastrune) 284 goto read; 285 write: 286 lastrune = r; 287 writerune("<stdout>", stdout, &r); 288 goto read; 289 }