commit e5b54977734030067adc7d17352a2a90563d4833
parent 949dafc17118e458da712a9d203dc66e306eb282
Author: FRIGN <dev@frign.de>
Date: Tue, 17 Feb 2015 17:04:36 +0100
Add UTF-8-support to strings(1), add t-flag and refactor code
Previously, the string-length was limited to BUFSIZ, which is an
obvious deficiency.
Now the buffer only needs to be as long as the user specifies the
minimal string length.
I added UTF-8-support, because that's how POSIX wants it and there
are cases where you need this. It doesn't add ELF-barf compared to
the previous implementation.
The t-flag is also pretty important for POSIX-compliance, so I added
it.
The only trouble previously was the a-flag, but given that POSIX
leaves undefined what the a-flag actually does, we set it as default
and don't care about parsing ELF-headers, which has already
turned out to be a security issue in GNU coreutils[0].
[0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html
Diffstat:
M | README | | | 2 | +- |
M | strings.1 | | | 48 | ++++++++++++++++++++++++++++++++++-------------- |
M | strings.c | | | 61 | +++++++++++++++++++++++++++++++++++++++++++------------------ |
3 files changed, 78 insertions(+), 33 deletions(-)
diff --git a/README b/README
@@ -67,7 +67,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
sort no -m, -o, -d, -f, -i
=* split yes none
=* sponge non-posix none
- strings no -t
+#* strings yes none
=* sync non-posix none
=* tail yes none
=* tar non-posix none
diff --git a/strings.1 b/strings.1
@@ -1,32 +1,52 @@
-.Dd November 23, 2014
+.Dd Februrary 17, 2015
.Dt STRINGS 1
.Os sbase
.Sh NAME
.Nm strings
-.Nd print the strings of printable characters in files
+.Nd print strings of printable characters in files
.Sh SYNOPSIS
.Nm
.Op Fl a
-.Op Fl n Ar len
+.Op Fl n Ar num
+.Op Fl t Ar format
.Op Ar file ...
.Sh DESCRIPTION
.Nm
-prints the printable character sequences that are at least 4 characters
-long. If no
-.Ar files
-are given,
+writes sequences of at least 4 printable characters in each
+.Ar file
+to stdout.
+If no
+.Ar file
+is given,
.Nm
reads from stdin.
.Sh OPTIONS
.Bl -tag -width Ds
.It Fl a
-Scan files in their entirety. This is the default.
-.It Fl n Ar len
-Only print sequences that are at least
-.Ar len
-characters. The default is 4 characters.
+Scan each
+.Ar file
+entirely. This is the default.
+.It Fl n Ar num
+Print sequences of at least
+.Ar num
+characters. The default is 4.
+.It Fl t Ar format
+Prepend each string with its byte offset, with
+.Ar format
+being one of
+.Sy d , o , x
+for decimal, octal or hexadecimal numbers.
.El
.Sh STANDARDS
+The
.Nm
-mirrors the semantics of Plan9
-.Xr strings 1 .
+utility is compliant with the
+.St -p1003.1-2008
+specification.
+.Pp
+The
+.Op Fl t
+output format has been changed from "%F %s" to "%8lF: %s", with
+.Sy F
+being one of
+.Sy d , o , x .
diff --git a/strings.c b/strings.c
@@ -1,50 +1,75 @@
/* See LICENSE file for copyright and license details. */
-#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
+#include "utf.h"
#include "util.h"
+static char *format = "";
+
static void
-strings(FILE *fp, const char *fname, int len)
+strings(FILE *fp, const char *fname, size_t len)
{
- unsigned char buf[BUFSIZ];
- int c, i = 0;
- off_t offset = 0;
+ Rune r, *rbuf;
+ size_t i, bread;
+ off_t off;
+
+ rbuf = emalloc(len * sizeof(*rbuf));
- do {
- offset++;
- if (isprint(c = getc(fp)))
- buf[i++] = c;
- if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) {
- buf[i] = '\0';
- printf("%8ld: %s\n", (long)offset - i - 1, buf);
+ for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) {
+ off += bread;
+ if (r == Runeerror)
+ continue;
+ else if (!isprintrune(r)) {
+ if (i > len)
+ putchar('\n');
i = 0;
+ continue;
+ }
+ if (i < len) {
+ rbuf[i++] = r;
+ continue;
+ } else if (i > len) {
+ efputrune(&r, stdout, "<stdout>");
+ continue;
}
- } while (c != EOF);
- if (ferror(fp))
- eprintf("%s: read error:", fname);
+ printf(format, (long)off - i);
+ for (i = 0; i < len; i++) {
+ efputrune(rbuf + i, stdout, "<stdout>");
+ }
+ i++;
+ }
+ free(rbuf);
}
static void
usage(void)
{
- eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0);
+ eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
FILE *fp;
+ size_t len = 4;
int ret = 0;
- int len = 4;
+ char f;
ARGBEGIN {
case 'a':
break;
case 'n':
- len = estrtonum(EARGF(usage()), 1, INT_MAX);
+ len = estrtonum(EARGF(usage()), 1, LLONG_MAX);
+ break;
+ case 't':
+ format = estrdup("%8l#: ");
+ f = *EARGF(usage());
+ if (f == 'd' || f == 'o' || f == 'x')
+ format[3] = f;
+ else
+ usage();
break;
default:
usage();