sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit e5b54977734030067adc7d17352a2a90563d4833
parent 949dafc17118e458da712a9d203dc66e306eb282
Author: FRIGN <dev@frign.de>
Date:   Tue, 17 Feb 2015 17:04:36 +0100

Add UTF-8-support to strings(1), add t-flag and refactor code

Previously, the string-length was limited to BUFSIZ, which is an
obvious deficiency.
Now the buffer only needs to be as long as the user specifies the
minimal string length.
I added UTF-8-support, because that's how POSIX wants it and there
are cases where you need this. It doesn't add ELF-barf compared to
the previous implementation.
The t-flag is also pretty important for POSIX-compliance, so I added
it.
The only trouble previously was the a-flag, but given that POSIX
leaves undefined what the a-flag actually does, we set it as default
and don't care about parsing ELF-headers, which has already
turned out to be a security issue in GNU coreutils[0].

[0]: http://lcamtuf.blogspot.ro/2014/10/psa-dont-run-strings-on-untrusted-files.html

Diffstat:
MREADME | 2+-
Mstrings.1 | 48++++++++++++++++++++++++++++++++++--------------
Mstrings.c | 61+++++++++++++++++++++++++++++++++++++++++++------------------
3 files changed, 78 insertions(+), 33 deletions(-)

diff --git a/README b/README @@ -67,7 +67,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, sort no -m, -o, -d, -f, -i =* split yes none =* sponge non-posix none - strings no -t +#* strings yes none =* sync non-posix none =* tail yes none =* tar non-posix none diff --git a/strings.1 b/strings.1 @@ -1,32 +1,52 @@ -.Dd November 23, 2014 +.Dd Februrary 17, 2015 .Dt STRINGS 1 .Os sbase .Sh NAME .Nm strings -.Nd print the strings of printable characters in files +.Nd print strings of printable characters in files .Sh SYNOPSIS .Nm .Op Fl a -.Op Fl n Ar len +.Op Fl n Ar num +.Op Fl t Ar format .Op Ar file ... .Sh DESCRIPTION .Nm -prints the printable character sequences that are at least 4 characters -long. If no -.Ar files -are given, +writes sequences of at least 4 printable characters in each +.Ar file +to stdout. +If no +.Ar file +is given, .Nm reads from stdin. .Sh OPTIONS .Bl -tag -width Ds .It Fl a -Scan files in their entirety. This is the default. -.It Fl n Ar len -Only print sequences that are at least -.Ar len -characters. The default is 4 characters. +Scan each +.Ar file +entirely. This is the default. +.It Fl n Ar num +Print sequences of at least +.Ar num +characters. The default is 4. +.It Fl t Ar format +Prepend each string with its byte offset, with +.Ar format +being one of +.Sy d , o , x +for decimal, octal or hexadecimal numbers. .El .Sh STANDARDS +The .Nm -mirrors the semantics of Plan9 -.Xr strings 1 . +utility is compliant with the +.St -p1003.1-2008 +specification. +.Pp +The +.Op Fl t +output format has been changed from "%F %s" to "%8lF: %s", with +.Sy F +being one of +.Sy d , o , x . diff --git a/strings.c b/strings.c @@ -1,50 +1,75 @@ /* See LICENSE file for copyright and license details. */ -#include <ctype.h> #include <limits.h> #include <stdio.h> #include <stdlib.h> +#include "utf.h" #include "util.h" +static char *format = ""; + static void -strings(FILE *fp, const char *fname, int len) +strings(FILE *fp, const char *fname, size_t len) { - unsigned char buf[BUFSIZ]; - int c, i = 0; - off_t offset = 0; + Rune r, *rbuf; + size_t i, bread; + off_t off; + + rbuf = emalloc(len * sizeof(*rbuf)); - do { - offset++; - if (isprint(c = getc(fp))) - buf[i++] = c; - if ((!isprint(c) && i >= len) || i == sizeof(buf) - 1) { - buf[i] = '\0'; - printf("%8ld: %s\n", (long)offset - i - 1, buf); + for (off = 0, i = 0; (bread = efgetrune(&r, fp, fname)); ) { + off += bread; + if (r == Runeerror) + continue; + else if (!isprintrune(r)) { + if (i > len) + putchar('\n'); i = 0; + continue; + } + if (i < len) { + rbuf[i++] = r; + continue; + } else if (i > len) { + efputrune(&r, stdout, "<stdout>"); + continue; } - } while (c != EOF); - if (ferror(fp)) - eprintf("%s: read error:", fname); + printf(format, (long)off - i); + for (i = 0; i < len; i++) { + efputrune(rbuf + i, stdout, "<stdout>"); + } + i++; + } + free(rbuf); } static void usage(void) { - eprintf("usage: %s [-a] [-n len] [file ...]\n", argv0); + eprintf("usage: %s [-a] [-n num] [-t format] [file ...]\n", argv0); } int main(int argc, char *argv[]) { FILE *fp; + size_t len = 4; int ret = 0; - int len = 4; + char f; ARGBEGIN { case 'a': break; case 'n': - len = estrtonum(EARGF(usage()), 1, INT_MAX); + len = estrtonum(EARGF(usage()), 1, LLONG_MAX); + break; + case 't': + format = estrdup("%8l#: "); + f = *EARGF(usage()); + if (f == 'd' || f == 'o' || f == 'x') + format[3] = f; + else + usage(); break; default: usage();