commit 38f429a3d26122af61b73a8965b1e8e9043c0a0c
parent cb005c150dda582d2377dac3213fdf41d1a0913e
Author: Silvan Jegen <s.jegen@gmail.com>
Date: Fri, 15 Nov 2013 17:25:10 +0100
Add the tr program including man page
Diffstat:
M | Makefile | | | 1 | + |
A | tr.1 | | | 50 | ++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | tr.c | | | 151 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
3 files changed, 202 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -81,6 +81,7 @@ SRC = \
tee.c \
test.c \
touch.c \
+ tr.c \
true.c \
tty.c \
uname.c \
diff --git a/tr.1 b/tr.1
@@ -0,0 +1,50 @@
+.TH TR 1 sbase\-VERSION
+.SH NAME
+tr \- translate characters
+.SH SYNOPSIS
+.B tr
+.RB set1
+.RI [ set2 ]
+.SH DESCRIPTION
+.B tr
+reads input from stdin replacing every character in
+.B set1
+with the character at the same index in
+.B set2.
+If set2 is not given
+.B tr
+deletes the characters in set1 from the input.
+
+Sets are specified as strings of characters. Almost all represent themselves. The following ones will be interpreted:
+.TP
+\e\e
+backslash
+.TP
+\ea
+audible BEL
+.TP
+\ef
+form feed
+.TP
+\en
+new line
+.TP
+\er
+return
+.TP
+\et
+horizontal tab
+.TP
+\ev
+vertical tab
+.PP
+If set1 is longer than set2
+.B tr
+will map all the remaining characters to the last one in set2. In case set2 is longer than set1, the remaining characters from set2 will be ignored.
+.B
+.SH NOTES
+.B tr
+is Unicode-aware but does not yet handle character classes (e.g. [:alnum:] or [:digit:]).
+.SH SEE ALSO
+.IR sed(1)
+.IR awk(1)
diff --git a/tr.c b/tr.c
@@ -0,0 +1,151 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <locale.h>
+#include <wchar.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+ eprintf("usage: %s set1 [set2]\n", argv0);
+}
+
+void
+handleescapes(char *s)
+{
+ switch(*s) {
+ case 'n':
+ *s = '\n';
+ break;
+ case 't':
+ *s = '\t';
+ break;
+ case '\\':
+ *s = '\\';
+ break;
+ case 'r':
+ *s = '\r';
+ break;
+ case 'f':
+ *s = '\f';
+ break;
+ case 'a':
+ *s = '\a';
+ break;
+ case 'b':
+ *s = '\b';
+ break;
+ case 'v':
+ *s = '\v';
+ break;
+ }
+}
+
+void
+parsemapping(const char *set1, const char *set2, wchar_t *mappings)
+{
+ char *s;
+ wchar_t runeleft;
+ wchar_t runeright;
+ int leftbytes;
+ int rightbytes;
+ size_t n = 0;
+ size_t lset2;
+
+ if(set2) {
+ lset2 = strnlen(set2, 255 * sizeof(wchar_t));
+ } else {
+ set2 = &set1[0];
+ lset2 = 0;
+ }
+
+ s = (char *)set1;
+ while(*s) {
+ if(*s == '\\')
+ handleescapes(++s);
+ leftbytes = mbtowc(&runeleft, s, 4);
+ if(set2[n] != '\0')
+ rightbytes = mbtowc(&runeright, set2 + n, 4);
+ mappings[runeleft] = runeright;
+ s += leftbytes;
+ if(n < lset2)
+ n += rightbytes;
+ }
+}
+
+void
+maptonull(const wchar_t *mappings, char *in)
+{
+ const char *s;
+ wchar_t runeleft;
+ int leftbytes = 0;
+
+ s = in;
+ while(*s) {
+ leftbytes = mbtowc(&runeleft, s, 4);
+ if(!mappings[runeleft])
+ putwchar(runeleft);
+ s += leftbytes;
+ }
+}
+
+void
+maptoset(const wchar_t *mappings, char *in)
+{
+ const char *s;
+ wchar_t runeleft;
+ int leftbytes = 0;
+
+ s = in;
+ while(*s) {
+ leftbytes = mbtowc(&runeleft, s, 4);
+ if(!mappings[runeleft])
+ putwchar(runeleft);
+ else
+ putwchar(mappings[runeleft]);
+ s += leftbytes;
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ wchar_t *mappings;
+ char *buf = NULL;
+ size_t size = 0;
+ void (*mapfunc)(const wchar_t*, char*);
+
+ setlocale(LC_ALL, "");
+
+ mappings = (wchar_t *)mmap(NULL, 0x110000 * sizeof(wchar_t),
+ PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+
+ ARGBEGIN {
+ default:
+ usage();
+ } ARGEND;
+
+ if(argc == 0)
+ usage();
+
+ if(argc >= 2) {
+ parsemapping(argv[0], argv[1], mappings);
+ mapfunc = maptoset;
+ } else {
+ parsemapping(argv[0], NULL, mappings);
+ mapfunc = maptonull;
+ }
+
+ while(afgets(&buf, &size, stdin))
+ mapfunc(mappings, buf);
+ free(buf);
+ if(ferror(stdin))
+ eprintf("<stdin>: read error:");
+
+ return EXIT_SUCCESS;
+}