commit 692c11bf2b095cb6a5b221811656921e2a3f330d
parent 48bf88851a20afa021c6739d0716e48c1abf5267
Author: FRIGN <dev@frign.de>
Date: Sun, 25 Jan 2015 14:31:02 +0100
Add tablist support and a mandoc-manpage to expand(1)
and mark it as finished in the README.
This is another example showing how broken the GNU coreutils are:
$ echo -e "äää\tüüü\tööö" | gnu-expand -t "5,10,20"
äää üüü ööö
$ echo -e "äää\tüüü\tööö" | sbase-expand -t "5,10,20"
äää üüü ööö
This is due to the fact that they are still not UTF8-aware and
actually see "ä" as two single characters, expanding the "äää" with
4 spaces to a tab of length 10.
The correct way however is to expand the "äää" with 2 spaces to a
tab of length 5.
One can only imagine how this silently breaks a lot of code around
the world.
WHAT WERE THEY THINKING?
Diffstat:
M | README | | | 2 | +- |
M | expand.1 | | | 75 | ++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
M | expand.c | | | 143 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
3 files changed, 143 insertions(+), 77 deletions(-)
diff --git a/README b/README
@@ -28,7 +28,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
= du no -H, -L, (-x)
=* echo yes none
=* env yes none
-# expand yes none
+#* expand yes none
expr yes none
=* false yes none
fold yes none
diff --git a/expand.1 b/expand.1
@@ -1,25 +1,50 @@
-.TH EXPAND 1 sbase\-VERSION
-.SH NAME
-expand \- expand tabs to spaces
-.SH SYNOPSIS
-.B expand
-.RB [ \-t
-.IR n ]
-.RI [ file ...]
-.SH DESCRIPTION
-expand processes the named files or the standard input, writing the
-standard output with tabs changed into spaces. Backspace characters
-are preserved into the output and decrement the column count for tab
-calculations.
-.SH OPTIONS
-.TP
-.BI \-i
-Only change tabs to spaces at the start of lines.
-.TP
-.BI \-t " n"
-Expand tabs to
-.I n
-spaces. We currently support only a single numerical argument.
-.SH SEE ALSO
-.IR unexpand (1),
-.IR fold (1)
+.Dd January 25, 2015
+.Dt EXPAND 1 sbase\-VERSION
+.Sh NAME
+.Nm expand
+.Nd expand tabs to spaces
+.Sh SYNOPSIS
+.Nm expand
+.Op Fl i
+.Op Fl t Ar tablist
+.Op Ar file ...
+.Sh DESCRIPTION
+.Nm
+converts tabs to spaces in each
+.Ar file
+as specified in
+.Ar tablist .
+If no file is given,
+.Nm
+reads from stdin.
+.Pp
+Backspace characters are preserved and decrement the column count
+for tab calculations.
+.Sh OPTIONS
+.Bl -tag -width Ds
+.It Fl i
+Only expand tabs at the beginning of lines, i.e. expand each
+line until a character different from '\et' and ' ' is reached.
+.It Fl t Ar tablist
+Specify tab size or tabstops.
+.Ar tablist
+is a list of one (in the former case) or multiple (in the latter case)
+strictly positive integers separated by ' ' or ','.
+.Pp
+The default
+.Ar tablist
+is "8".
+.El
+.Sh SEE ALSO
+.Xr unexpand 1 ,
+.Xr fold 1
+.Sh STANDARDS
+The
+.Nm
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The
+.Op Fl i
+flag is an extension to that specification
diff --git a/expand.c b/expand.c
@@ -1,89 +1,86 @@
/* See LICENSE file for copyright and license details. */
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
#include "utf.h"
#include "util.h"
-static int expand(const char *, FILE *, int);
+static int iflag = 0;
+static size_t *tablist = NULL;
+static size_t tablistlen = 0;
-static int iflag = 0;
-
-static void
-usage(void)
+static size_t
+parselist(const char *s, size_t slen)
{
- eprintf("usage: %s [-i] [-t n] [file...]\n", argv0);
-}
-
-int
-main(int argc, char *argv[])
-{
- FILE *fp;
- int tabstop = 8;
- int ret = 0;
+ size_t i, m, len;
+ char *sep;
- ARGBEGIN {
- case 'i':
- iflag = 1;
- break;
- case 't':
- tabstop = estrtol(EARGF(usage()), 0);
- if (!tabstop)
- eprintf("tab size cannot be zero\n");
- break;
- default:
- usage();
- } ARGEND;
+ if (s[0] == ',' || s[0] == ' ')
+ eprintf("expand: tablist can't begin with a ',' or ' '.\n");
+ if (s[slen - 1] == ',' || s[slen - 1] == ' ')
+ eprintf("expand: tablist can't end with a ',' or ' '.\n");
- if (argc == 0) {
- expand("<stdin>", stdin, tabstop);
- } else {
- for (; argc > 0; argc--, argv++) {
- if (!(fp = fopen(argv[0], "r"))) {
- weprintf("fopen %s:", argv[0]);
- ret = 1;
- continue;
- }
- expand(argv[0], fp, tabstop);
- fclose(fp);
+ len = 1;
+ for (i = 0; i < slen; i++) {
+ if (s[i] == ',' || s[i] == ' ') {
+ if (i > 0 && (s[i - 1] == ',' || s[i - 1] == ' '))
+ eprintf("expand: empty field in tablist.\n");
+ len++;
}
}
- return ret;
+ tablist = emalloc((len + 1) * sizeof(size_t));
+
+ m = 0;
+ for (i = 0; i < slen; i += sep - (s + i) + 1) {
+ tablist[m++] = strtol(s + i, &sep, 0);
+ if (tablist[m - 1] == 0)
+ eprintf("expand: tab size can't be zero.\n");
+ if (*sep && *sep != ',' && *sep != ' ')
+ eprintf("expand: invalid number in tablist.\n");
+ if (m > 1 && tablist[m - 1] < tablist[m - 2])
+ eprintf("expand: tablist must be ascending.\n");
+ }
+
+ /* tab length = 1 for the overflowing case later in the matcher */
+ tablist[len] = 1;
+ return len;
}
static int
-expand(const char *file, FILE *fp, int tabstop)
+expand(const char *file, FILE *fp)
{
- int col = 0;
+ size_t bol = 1, col = 0, i;
Rune r;
- int bol = 1;
-
- for (;;) {
- if (!readrune(file, fp, &r))
- break;
+ while (readrune(file, fp, &r)) {
switch (r) {
case '\t':
+ if (tablistlen == 1)
+ i = 0;
+ else for (i = 0; i < tablistlen; i++)
+ if (col < tablist[i])
+ break;
if (bol || !iflag) {
do {
col++;
putchar(' ');
- } while (col % tabstop);
+ } while (col % tablist[i]);
} else {
putchar('\t');
- col += tabstop - col % tabstop;
+ col = tablist[i];
}
break;
case '\b':
+ bol = 0;
if (col)
col--;
- bol = 0;
- writerune("<stdout>", stdout, &r);
+ putchar('\b');
break;
case '\n':
- col = 0;
bol = 1;
- writerune("<stdout>", stdout, &r);
+ col = 0;
+ putchar('\n');
break;
default:
col++;
@@ -96,3 +93,47 @@ expand(const char *file, FILE *fp, int tabstop)
return 0;
}
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-i] [-t tablist] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+ char *tl = "8";
+ int ret = 0;
+
+ ARGBEGIN {
+ case 'i':
+ iflag = 1;
+ break;
+ case 't':
+ tl = EARGF(usage());
+ if (!*tl)
+ eprintf("expand: tablist cannot be empty.\n");
+ break;
+ default:
+ usage();
+ } ARGEND;
+
+ tablistlen = parselist(tl, strlen(tl));
+
+ if (argc == 0)
+ expand("<stdin>", stdin);
+ else {
+ for (; argc > 0; argc--, argv++) {
+ if (!(fp = fopen(argv[0], "r"))) {
+ weprintf("fopen %s:", argv[0]);
+ ret = 1;
+ continue;
+ }
+ expand(argv[0], fp);
+ fclose(fp);
+ }
+ }
+ return ret;
+}