sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit 09704afc2475794edcc35fc5bc7fae67bd1d4f78
parent 369bb01eb14ccaeee42ae30bb97285c35cd3ef1f
Author: FRIGN <dev@frign.de>
Date:   Sun, 11 Jan 2015 20:26:20 +0100

Add Unicode character class support

Thinking about it long enough, the solution seems almost trivial.

Diffstat:
Mtr.c | 77+++++++++++++++++++++++++++++++++++++++++++----------------------------------
1 file changed, 43 insertions(+), 34 deletions(-)

diff --git a/tr.c b/tr.c @@ -1,3 +1,4 @@ +#include <wctype.h> #include <stdio.h> #include <stdlib.h> @@ -14,34 +15,31 @@ struct range { size_t quant; }; -#define DIGIT "0-9" -#define UPPER "A-Z" -#define LOWER "a-z" -#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" -#define ALNUM DIGIT UPPER LOWER - static struct { - char *name; - char *str; + char *name; + int (*check)(wint_t); } classes[] = { - { "alnum", ALNUM }, - { "alpha", UPPER LOWER }, - { "blank", " \t" }, - { "cntrl", "\000-\037\177" }, - { "digit", DIGIT }, - { "graph", ALNUM PUNCT }, - { "lower", LOWER }, - { "print", ALNUM PUNCT " " }, - { "punct", PUNCT }, - { "space", "\t\n\v\f\r" }, - { "upper", UPPER }, - { "xdigit", DIGIT "A-Fa-f" }, + { "alnum", iswalnum }, + { "alpha", iswalpha }, + { "blank", iswblank }, + { "cntrl", iswcntrl }, + { "digit", iswdigit }, + { "graph", iswgraph }, + { "lower", iswlower }, + { "print", iswlower }, + { "punct", iswpunct }, + { "space", iswspace }, + { "upper", iswupper }, + { "xdigit", iswxdigit }, }; -static struct range *set1 = NULL; -static size_t set1ranges = 0; -static struct range *set2 = NULL; -static size_t set2ranges = 0; +static struct range *set1 = NULL; +static size_t set1ranges = 0; +static int (*set1check)(wint_t) = NULL; +static struct range *set2 = NULL; +static size_t set2ranges = 0; +static int (*set2check)(wint_t) = NULL; + static size_t rangelen(struct range r) @@ -72,16 +70,13 @@ rstrmatch(Rune *r, char *s, size_t n) } static size_t -makeset(char *str, struct range **set) +makeset(char *str, struct range **set, int (**check)(wint_t)) { Rune *rstr; size_t len, i, j, m, n; - size_t q, setranges; + size_t q, setranges = 0; int factor, base; -reset: - setranges = 0; - /* rstr defines at most len ranges */ len = chartorunearr(str, &rstr); *set = emalloc(len * sizeof(**set)); @@ -111,8 +106,8 @@ nextbrack: if (j - i > 3 && rstr[i + 1] == ':' && rstr[m - 1] == ':') { for (n = 0; n < LEN(classes); n++) { if (rstrmatch(rstr + i + 2, classes[n].name, j - i - 3)) { - str = classes[n].str; - goto reset; + *check = classes[n].check; + return 0; } } eprintf("Invalid character class\n"); @@ -193,10 +188,10 @@ main(int argc, char *argv[]) if (argc < 1 || argc > 2 || (argc == 1 && dflag == sflag)) usage(); - set1ranges = makeset(argv[0], &set1); + set1ranges = makeset(argv[0], &set1, &set1check); if (argc == 2) - set2ranges = makeset(argv[1], &set2); - if (!dflag && !set2ranges) + set2ranges = makeset(argv[1], &set2, &set2check); + if (dflag == sflag && !set2ranges && !set2check) eprintf("set2 must be non-empty\n"); read: if (!readrune("<stdin>", stdin, &r)) @@ -232,6 +227,20 @@ read: goto write; } } + if (set1check && set1check(r)) { + if (dflag && !cflag) + goto read; + if (sflag) { + if (r == lastrune) + goto read; + else + goto write; + } + if (set1check == iswupper && set2check == iswlower) + r = towlower(r); + if (set1check == iswlower && set2check == iswupper) + r = towupper(r); + } if (dflag && cflag) goto read; if (dflag && sflag && r == lastrune)