tr.c (6226B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <stdlib.h> 3 4 #include "utf.h" 5 #include "util.h" 6 7 static int cflag = 0; 8 static int dflag = 0; 9 static int sflag = 0; 10 11 struct range { 12 Rune start; 13 Rune end; 14 size_t quant; 15 }; 16 17 static struct { 18 char *name; 19 int (*check)(Rune); 20 } classes[] = { 21 { "alnum", isalnumrune }, 22 { "alpha", isalpharune }, 23 { "blank", isblankrune }, 24 { "cntrl", iscntrlrune }, 25 { "digit", isdigitrune }, 26 { "graph", isgraphrune }, 27 { "lower", islowerrune }, 28 { "print", isprintrune }, 29 { "punct", ispunctrune }, 30 { "space", isspacerune }, 31 { "upper", isupperrune }, 32 { "xdigit", isxdigitrune }, 33 }; 34 35 static struct range *set1 = NULL; 36 static size_t set1ranges = 0; 37 static int (*set1check)(Rune) = NULL; 38 static struct range *set2 = NULL; 39 static size_t set2ranges = 0; 40 static int (*set2check)(Rune) = NULL; 41 42 static size_t 43 rangelen(struct range r) 44 { 45 return (r.end - r.start + 1) * r.quant; 46 } 47 48 static size_t 49 setlen(struct range *set, size_t setranges) 50 { 51 size_t len = 0, i; 52 53 for (i = 0; i < setranges; i++) 54 len += rangelen(set[i]); 55 56 return len; 57 } 58 59 static int 60 rstrmatch(Rune *r, char *s, size_t n) 61 { 62 size_t i; 63 64 for (i = 0; i < n; i++) 65 if (r[i] != s[i]) 66 return 0; 67 return 1; 68 } 69 70 static size_t 71 makeset(char *str, struct range **set, int (**check)(Rune)) 72 { 73 Rune *rstr; 74 size_t len, i, j, m, n; 75 size_t q, setranges = 0; 76 int factor, base; 77 78 /* rstr defines at most len ranges */ 79 unescape(str); 80 rstr = ereallocarray(NULL, utflen(str) + 1, sizeof(*rstr)); 81 len = utftorunestr(str, rstr); 82 *set = ereallocarray(NULL, len, sizeof(**set)); 83 84 for (i = 0; i < len; i++) { 85 if (rstr[i] == '[') { 86 j = i; 87 nextbrack: 88 if (j >= len) 89 goto literal; 90 for (m = j; m < len; m++) 91 if (rstr[m] == ']') { 92 j = m; 93 break; 94 } 95 if (j == i) 96 goto literal; 97 98 /* CLASSES [=EQUIV=] (skip) */ 99 if (j - i > 3 && rstr[i + 1] == '=' && rstr[m - 1] == '=') { 100 if (j - i != 4) 101 goto literal; 102 (*set)[setranges].start = rstr[i + 2]; 103 (*set)[setranges].end = rstr[i + 2]; 104 (*set)[setranges].quant = 1; 105 setranges++; 106 i = j; 107 continue; 108 } 109 110 /* CLASSES [:CLASS:] */ 111 if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') { 112 for (n = 0; n < LEN(classes); n++) { 113 if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) { 114 *check = classes[n].check; 115 return 0; 116 } 117 } 118 eprintf("Invalid character class.\n"); 119 } 120 121 /* REPEAT [_*n] (only allowed in set2) */ 122 if (j - i > 2 && rstr[i + 2] == '*') { 123 /* check if right side of '*' is a number */ 124 q = 0; 125 factor = 1; 126 base = (rstr[i + 3] == '0') ? 8 : 10; 127 for (n = j - 1; n > i + 2; n--) { 128 if (rstr[n] < '0' || rstr[n] > '9') { 129 n = 0; 130 break; 131 } 132 q += (rstr[n] - '0') * factor; 133 factor *= base; 134 } 135 if (n == 0) { 136 j = m + 1; 137 goto nextbrack; 138 } 139 (*set)[setranges].start = rstr[i + 1]; 140 (*set)[setranges].end = rstr[i + 1]; 141 (*set)[setranges].quant = q ? q : setlen(set1, MAX(set1ranges, 1)); 142 setranges++; 143 i = j; 144 continue; 145 } 146 147 j = m + 1; 148 goto nextbrack; 149 } 150 literal: 151 /* RANGES [_-__-_], _-__-_ */ 152 /* LITERALS _______ */ 153 (*set)[setranges].start = rstr[i]; 154 155 if (i < len - 2 && rstr[i + 1] == '-' && rstr[i + 2] >= rstr[i]) 156 i += 2; 157 (*set)[setranges].end = rstr[i]; 158 (*set)[setranges].quant = 1; 159 setranges++; 160 } 161 162 free(rstr); 163 return setranges; 164 } 165 166 static void 167 usage(void) 168 { 169 eprintf("usage: %s [-cCds] set1 [set2]\n", argv0); 170 } 171 172 int 173 main(int argc, char *argv[]) 174 { 175 Rune r, lastrune = 0; 176 size_t off1, off2, i, m; 177 int ret = 0; 178 179 ARGBEGIN { 180 case 'c': 181 case 'C': 182 cflag = 1; 183 break; 184 case 'd': 185 dflag = 1; 186 break; 187 case 's': 188 sflag = 1; 189 break; 190 default: 191 usage(); 192 } ARGEND 193 194 if (!argc || argc > 2 || (argc == 1 && dflag == sflag)) 195 usage(); 196 set1ranges = makeset(argv[0], &set1, &set1check); 197 if (argc == 2) 198 set2ranges = makeset(argv[1], &set2, &set2check); 199 200 if (!dflag || (argc == 2 && sflag)) { 201 /* sanity checks as we are translating */ 202 if (!sflag && !set2ranges && !set2check) 203 eprintf("cannot map to an empty set.\n"); 204 if (set2check && set2check != islowerrune && 205 set2check != isupperrune) { 206 eprintf("can only map to 'lower' and 'upper' class.\n"); 207 } 208 } 209 read: 210 if (!efgetrune(&r, stdin, "<stdin>")) { 211 ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>"); 212 return ret; 213 } 214 if (argc == 1 && sflag) 215 goto write; 216 for (i = 0, off1 = 0; i < set1ranges; off1 += rangelen(set1[i]), i++) { 217 if (set1[i].start <= r && r <= set1[i].end) { 218 if (dflag) { 219 if (cflag) 220 goto write; 221 else 222 goto read; 223 } 224 if (cflag) 225 goto write; 226 227 /* map r to set2 */ 228 if (set2check) { 229 if (set2check == islowerrune) 230 r = tolowerrune(r); 231 else 232 r = toupperrune(r); 233 } else { 234 off1 += r - set1[i].start; 235 if (off1 > setlen(set2, set2ranges) - 1) { 236 r = set2[set2ranges - 1].end; 237 goto write; 238 } 239 for (m = 0, off2 = 0; m < set2ranges; m++) { 240 if (off2 + rangelen(set2[m]) > off1) { 241 m++; 242 break; 243 } 244 off2 += rangelen(set2[m]); 245 } 246 m--; 247 r = set2[m].start + (off1 - off2) / set2[m].quant; 248 } 249 goto write; 250 } 251 } 252 if (set1check && set1check(r)) { 253 if (dflag) { 254 if (cflag) 255 goto write; 256 else 257 goto read; 258 } 259 if (set2check) { 260 if (set2check == islowerrune) 261 r = tolowerrune(r); 262 else 263 r = toupperrune(r); 264 } else { 265 r = set2[set2ranges - 1].end; 266 } 267 goto write; 268 } 269 if (!dflag && cflag) { 270 if (set2check) { 271 if (set2check == islowerrune) 272 r = tolowerrune(r); 273 else 274 r = toupperrune(r); 275 } else { 276 r = set2[set2ranges - 1].end; 277 } 278 goto write; 279 } 280 if (dflag && cflag) 281 goto read; 282 write: 283 if (argc == 1 && sflag && r == lastrune) { 284 if (set1check && set1check(r)) 285 goto read; 286 for (i = 0; i < set1ranges; i++) { 287 if (set1[i].start <= r && r <= set1[i].end) 288 goto read; 289 } 290 } 291 if (argc == 2 && sflag && r == lastrune) { 292 if (set2check && set2check(r)) 293 goto read; 294 for (i = 0; i < set2ranges; i++) { 295 if (set2[i].start <= r && r <= set2[i].end) 296 goto read; 297 } 298 } 299 efputrune(&r, stdout, "<stdout>"); 300 lastrune = r; 301 goto read; 302 }