commit 8b87d0098a1c91c3e7b3ba6cc77a176d178a6812
parent 7f7e7dcbb92151e8d0793e62599ffc4c63ec28df
Author: Tuukka Kataja <stuge@xor.fi>
Date: Mon, 9 Jun 2014 16:52:20 +0100
Add unexpand(1)
Diffstat:
7 files changed, 170 insertions(+), 4 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -26,6 +26,7 @@ MIT/X Consortium License
© 2014 Silvan Jegen <s.jegen@gmail.com>
© 2014 Laslo Hunhold <dev@frign.de>
© 2014 Daniel Bainton <dpb@driftaway.org>
+© 2014 Tuukka Kataja <stuge@xor.fi>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
diff --git a/Makefile b/Makefile
@@ -89,6 +89,7 @@ SRC = \
uudecode.c \
uuencode.c \
uname.c \
+ unexpand.c \
uniq.c \
unlink.c \
seq.c \
diff --git a/README b/README
@@ -11,8 +11,8 @@ The following programs are currently implemented:
kill ln ls mc md5sum mkdir mkfifo mktemp mv nice nl nohup paste
printenv pwd readlink renice rm rmdir sleep setsid sort split
sponge strings sync tail tar tee test touch tr true tty uudecode
- uuencode uname uniq unlink seq sha1sum sha256sum sha512sum wc
- xargs yes
+ uuencode uname unexpand uniq unlink seq sha1sum sha256sum
+ sha512sum wc xargs yes
sbase is mostly following POSIX but we deviate wherever we think it is
appropriate.
diff --git a/TODO b/TODO
@@ -11,8 +11,6 @@ test [expression...]
tr:
support for character classes [:alnum:]
-unexpand [-a] [-t N] [file...]
-
od/hd
uuencode, uudecode:
diff --git a/expand.1 b/expand.1
@@ -18,4 +18,5 @@ Expand tabs to
.I n
spaces. We currently support only a single numerical argument.
.SH SEE ALSO
+.IR unexpand (1),
.IR fold (1)
diff --git a/unexpand.1 b/unexpand.1
@@ -0,0 +1,25 @@
+.TH EXPAND 1 sbase\-VERSION
+.SH NAME
+unexpand \- convert blanks to tabs
+.SH SYNOPSIS
+.B unexpand
+.RB [ \-a ]
+.RB [ \-t
+.IR n ]
+.RI [ file ...]
+.SH DESCRIPTION
+unexpand processes the named files or the standard input, writing the
+standard output with consecutive blanks (spaces and tabs) converted
+into tabs. Backspace characters are preserved into the output and
+decrement the column count for tab calculations.
+.SH OPTIONS
+.TP
+.BI \-a
+convert blanks to tabs everywhere, not just at the start of lines
+.TP
+.BI \-t " n"
+set tab size to
+.I n
+spaces (default: 8)
+.SH SEE ALSO
+.IR expand (1)
diff --git a/unexpand.c b/unexpand.c
@@ -0,0 +1,140 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <wchar.h>
+#include "util.h"
+
+typedef struct {
+ FILE *fp;
+ const char *name;
+} Fdescr;
+
+static void unexpand(Fdescr *dsc);
+
+static bool aflag = false;
+static int tabsize = 8;
+
+static void
+usage(void)
+{
+ eprintf("usage: %s [-a] [-t n] [file ...]\n", argv0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ Fdescr dsc;
+ FILE *fp;
+
+ ARGBEGIN {
+ case 't':
+ tabsize = estrtol(EARGF(usage()), 0);
+ if(tabsize <= 0)
+ eprintf("unexpand: invalid tabsize\n", argv[0]);
+ /* Fallthrough: -t implies -a */
+ case 'a':
+ aflag = true;
+ break;
+ default:
+ usage();
+ } ARGEND;
+
+ if (argc == 0) {
+ dsc.name = "<stdin>";
+ dsc.fp = stdin;
+ unexpand(&dsc);
+ } else {
+ for (; argc > 0; argc--, argv++) {
+ if (!(fp = fopen(*argv, "r"))) {
+ weprintf("fopen %s:", *argv);
+ continue;
+ }
+ dsc.name = *argv;
+ dsc.fp = fp;
+ unexpand(&dsc);
+ fclose(fp);
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static wint_t
+in(Fdescr *f)
+{
+ wint_t c = fgetwc(f->fp);
+
+ if (c == WEOF && ferror(f->fp))
+ eprintf("'%s' read error:", f->name);
+
+ return c;
+}
+
+static void
+out(wint_t c)
+{
+ putwchar(c);
+ if (ferror(stdout))
+ eprintf("write error:");
+}
+
+static void
+unexpandspan(unsigned int n, unsigned int col)
+{
+ unsigned int off = (col-n) % tabsize;
+
+ if(n + off >= tabsize && n > 1)
+ n += off;
+
+ for(; n >= tabsize; n -= tabsize)
+ out('\t');
+ while(n--)
+ out(' ');
+}
+
+static void
+unexpand(Fdescr *dsc)
+{
+ unsigned int n = 0, col = 0;
+ bool bol = true;
+ wint_t c;
+
+ while ((c = in(dsc)) != EOF) {
+ switch (c) {
+ case ' ':
+ if (bol || aflag)
+ n++;
+ col++;
+ break;
+ case '\t':
+ if (bol || aflag)
+ n += tabsize - col % tabsize;
+ col += tabsize - col % tabsize;
+ break;
+ case '\b':
+ if (bol || aflag)
+ unexpandspan(n, col);
+ col -= (col > 0);
+ n = 0;
+ bol = false;
+ break;
+ case '\n':
+ if (bol || aflag)
+ unexpandspan(n, col);
+ n = col = 0;
+ bol = true;
+ break;
+ default:
+ if (bol || aflag)
+ unexpandspan(n, col);
+ n = 0;
+ col++;
+ bol = false;
+ }
+ if ((c != ' ' && c != '\t') || (!aflag && !bol))
+ out(c);
+ }
+ if (n > 0 && (bol || aflag))
+ unexpandspan(n, col);
+}