sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit 692c11bf2b095cb6a5b221811656921e2a3f330d
parent 48bf88851a20afa021c6739d0716e48c1abf5267
Author: FRIGN <dev@frign.de>
Date:   Sun, 25 Jan 2015 14:31:02 +0100

Add tablist support and a mandoc-manpage to expand(1)

and mark it as finished in the README.

This is another example showing how broken the GNU coreutils are:

$ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
äää    üüü    ööö
$ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
äää  üüü  ööö

This is due to the fact that they are still not UTF8-aware and
actually see "ä" as two single characters, expanding the "äää" with
4 spaces to a tab of length 10.
The correct way however is to expand the "äää" with 2 spaces to a
tab of length 5.
One can only imagine how this silently breaks a lot of code around
the world.
WHAT WERE THEY THINKING?

Diffstat:
MREADME | 2+-
Mexpand.1 | 75++++++++++++++++++++++++++++++++++++++++++++++++++-------------------------
Mexpand.c | 143+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
3 files changed, 143 insertions(+), 77 deletions(-)

diff --git a/README b/README @@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, = du no -H, -L, (-x) =* echo yes none =* env yes none -# expand yes none +#* expand yes none expr yes none =* false yes none fold yes none diff --git a/expand.1 b/expand.1 @@ -1,25 +1,50 @@ -.TH EXPAND 1 sbase\-VERSION -.SH NAME -expand \- expand tabs to spaces -.SH SYNOPSIS -.B expand -.RB [ \-t -.IR n ] -.RI [ file ...] -.SH DESCRIPTION -expand processes the named files or the standard input, writing the -standard output with tabs changed into spaces. Backspace characters -are preserved into the output and decrement the column count for tab -calculations. -.SH OPTIONS -.TP -.BI \-i -Only change tabs to spaces at the start of lines. -.TP -.BI \-t " n" -Expand tabs to -.I n -spaces. We currently support only a single numerical argument. -.SH SEE ALSO -.IR unexpand (1), -.IR fold (1) +.Dd January 25, 2015 +.Dt EXPAND 1 sbase\-VERSION +.Sh NAME +.Nm expand +.Nd expand tabs to spaces +.Sh SYNOPSIS +.Nm expand +.Op Fl i +.Op Fl t Ar tablist +.Op Ar file ... +.Sh DESCRIPTION +.Nm +converts tabs to spaces in each +.Ar file +as specified in +.Ar tablist . +If no file is given, +.Nm +reads from stdin. +.Pp +Backspace characters are preserved and decrement the column count +for tab calculations. +.Sh OPTIONS +.Bl -tag -width Ds +.It Fl i +Only expand tabs at the beginning of lines, i.e. expand each +line until a character different from '\et' and ' ' is reached. +.It Fl t Ar tablist +Specify tab size or tabstops. +.Ar tablist +is a list of one (in the former case) or multiple (in the latter case) +strictly positive integers separated by ' ' or ','. +.Pp +The default +.Ar tablist +is "8". +.El +.Sh SEE ALSO +.Xr unexpand 1 , +.Xr fold 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The +.Op Fl i +flag is an extension to that specification diff --git a/expand.c b/expand.c @@ -1,89 +1,86 @@ /* See LICENSE file for copyright and license details. */ #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "utf.h" #include "util.h" -static int expand(const char *, FILE *, int); +static int iflag = 0; +static size_t *tablist = NULL; +static size_t tablistlen = 0; -static int iflag = 0; - -static void -usage(void) +static size_t +parselist(const char *s, size_t slen) { - eprintf("usage: %s [-i] [-t n] [file...]\n", argv0); -} - -int -main(int argc, char *argv[]) -{ - FILE *fp; - int tabstop = 8; - int ret = 0; + size_t i, m, len; + char *sep; - ARGBEGIN { - case 'i': - iflag = 1; - break; - case 't': - tabstop = estrtol(EARGF(usage()), 0); - if (!tabstop) - eprintf("tab size cannot be zero\n"); - break; - default: - usage(); - } ARGEND; + if (s[0] == ',' || s[0] == ' ') + eprintf("expand: tablist can't begin with a ',' or ' '.\n"); + if (s[slen - 1] == ',' || s[slen - 1] == ' ') + eprintf("expand: tablist can't end with a ',' or ' '.\n"); - if (argc == 0) { - expand("<stdin>", stdin, tabstop); - } else { - for (; argc > 0; argc--, argv++) { - if (!(fp = fopen(argv[0], "r"))) { - weprintf("fopen %s:", argv[0]); - ret = 1; - continue; - } - expand(argv[0], fp, tabstop); - fclose(fp); + len = 1; + for (i = 0; i < slen; i++) { + if (s[i] == ',' || s[i] == ' ') { + if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' ')) + eprintf("expand: empty field in tablist.\n"); + len++; } } - return ret; + tablist = emalloc((len + 1) * sizeof(size_t)); + + m = 0; + for (i = 0; i < slen; i += sep - (s + i) + 1) { + tablist[m++] = strtol(s + i, &sep, 0); + if (tablist[m - 1] == 0) + eprintf("expand: tab size can't be zero.\n"); + if (*sep && *sep != ',' && *sep != ' ') + eprintf("expand: invalid number in tablist.\n"); + if (m > 1 && tablist[m - 1] < tablist[m - 2]) + eprintf("expand: tablist must be ascending.\n"); + } + + /* tab length = 1 for the overflowing case later in the matcher */ + tablist[len] = 1; + return len; } static int -expand(const char *file, FILE *fp, int tabstop) +expand(const char *file, FILE *fp) { - int col = 0; + size_t bol = 1, col = 0, i; Rune r; - int bol = 1; - - for (;;) { - if (!readrune(file, fp, &r)) - break; + while (readrune(file, fp, &r)) { switch (r) { case '\t': + if (tablistlen == 1) + i = 0; + else for (i = 0; i < tablistlen; i++) + if (col < tablist[i]) + break; if (bol || !iflag) { do { col++; putchar(' '); - } while (col % tabstop); + } while (col % tablist[i]); } else { putchar('\t'); - col += tabstop - col % tabstop; + col = tablist[i]; } break; case '\b': + bol = 0; if (col) col--; - bol = 0; - writerune("<stdout>", stdout, &r); + putchar('\b'); break; case '\n': - col = 0; bol = 1; - writerune("<stdout>", stdout, &r); + col = 0; + putchar('\n'); break; default: col++; @@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop) return 0; } + +static void +usage(void) +{ + eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0); +} + +int +main(int argc, char *argv[]) +{ + FILE *fp; + char *tl = "8"; + int ret = 0; + + ARGBEGIN { + case 'i': + iflag = 1; + break; + case 't': + tl = EARGF(usage()); + if (!*tl) + eprintf("expand: tablist cannot be empty.\n"); + break; + default: + usage(); + } ARGEND; + + tablistlen = parselist(tl, strlen(tl)); + + if (argc == 0) + expand("<stdin>", stdin); + else { + for (; argc > 0; argc--, argv++) { + if (!(fp = fopen(argv[0], "r"))) { + weprintf("fopen %s:", argv[0]); + ret = 1; + continue; + } + expand(argv[0], fp); + fclose(fp); + } + } + return ret; +}