sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit 49e27c1b0cd40be4e8a5527cb1e06f8156434da0
parent 71adaed51930c72971e202701e3dce860291f178
Author: FRIGN <dev@frign.de>
Date:   Sun, 22 Mar 2015 23:37:37 +0100

Add -m and -o flags to sort(1)

Sort comes pretty much automatically, as no script relies on the
undefined behaviour of the input _not_ being sorted, we might as well
sort the sorted input already.
The only downside is memory usage, which can be an issue for large
files.
The o-flag was trivial to implement.

Diffstat:
MREADME | 2+-
Msort.1 | 30++++++++++++++++++++----------
Msort.c | 39++++++++++++++++++++++++++++-----------
3 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/README b/README @@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support, =*| sha256sum non-posix none =*| sha512sum non-posix none =*| sleep yes none - sort no -m, -o, -d, -f, -i + sort no -d, -f, -i =*| split yes none =*| sponge non-posix none #*| strings yes none diff --git a/sort.1 b/sort.1 @@ -1,4 +1,4 @@ -.Dd January 30, 2015 +.Dd March 22, 2015 .Dt SORT 1 .Os sbase .Sh NAME @@ -6,17 +6,19 @@ .Nd sort lines .Sh SYNOPSIS .Nm -.Op Fl bnru +.Op Fl Cbcmnru +.Op Fl o Ar outfile .Op Fl t Ar delim .Op Fl k Ar key ... .Op Ar file ... .Sh DESCRIPTION .Nm -writes the sorted concatenation of the given -.Ar files -to stdout. If no +writes the sorted concatenation of each +.Ar file +to stdout. +If no .Ar file -is given, +is given .Nm reads from stdin. .Sh OPTIONS @@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting. .It Fl c The same as .Fl C -except that when disorder is detected, a message is printed to stderr +except that when disorder is detected, a message is written to stderr indicating the location of the disorder. .It Fl k Ar key -Specifies a key definition of the form +Specify a key definition of the form .Sm off .Sy S .No [. @@ -67,12 +69,20 @@ can be used to specify options that only apply to this key definition. .Sy b is special in that it only applies to the column that it was specified after. +.It Fl m +Assume sorted input, merge only. .It Fl n Perform a numeric sort. +.It Fl o Ar outfile +Write output to +.Ar outfile +rather than stdout. .It Fl r Reverses the sort. .It Fl t Ar delim -Specifies the field delimiter. +Set +.Ar delim +as the field delimiter. .It Fl u -Prints equal lines only once. +Print equal lines only once. .El diff --git a/sort.c b/sort.c @@ -107,6 +107,7 @@ linecmp(const char **a, const char **b) free(s1); free(s2); } + return res; } @@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag) default: return -1; } + } + return 0; } @@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags) if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0) return -1; } - if (*rest != '\0') - return -1; - return 0; + + return -(*rest); } static char * @@ -173,6 +175,7 @@ skipblank(char *s) { while (*s && isblank(*s)) s++; + return s; } @@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd) static void usage(void) { - enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0); + enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0); } int main(int argc, char *argv[]) { - size_t i; - FILE *fp; + FILE *fp, *ofp = stdout; struct linebuf linebuf = EMPTY_LINEBUF; + size_t i; int global_flags = 0; + char *outfile = NULL; ARGBEGIN { case 'C': @@ -248,9 +252,19 @@ main(int argc, char *argv[]) case 'k': addkeydef(EARGF(usage()), global_flags); break; + case 'm': + /* more or less for free, but for perfomance-reasons, + * we should keep this flag in mind and maybe some later + * day implement it properly so we don't run out of memory + * while merging large sorted files. + */ + break; case 'n': global_flags |= MOD_N; break; + case 'o': + outfile = EARGF(usage()); + break; case 'r': global_flags |= MOD_R; break; @@ -270,15 +284,15 @@ main(int argc, char *argv[]) addkeydef("1", global_flags); addkeydef("1", global_flags & MOD_R); - if (argc == 0) { + if (!argc) { if (Cflag || cflag) { check(stdin); } else { getlines(stdin, &linebuf); } - } else for (; argc > 0; argc--, argv++) { - if (!(fp = fopen(argv[0], "r"))) { - enprintf(2, "fopen %s:", argv[0]); + } else for (; *argv; argc--, argv++) { + if (!(fp = fopen(*argv, "r"))) { + enprintf(2, "fopen %s:", *argv); continue; } if (Cflag || cflag) { @@ -290,13 +304,16 @@ main(int argc, char *argv[]) } if (!Cflag && !cflag) { + if (outfile && !(ofp = fopen(outfile, "w"))) + eprintf("fopen %s:", outfile); + qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines, (int (*)(const void *, const void *))linecmp); for (i = 0; i < linebuf.nlines; i++) { if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i], (const char **)&linebuf.lines[i-1])) { - fputs(linebuf.lines[i], stdout); + fputs(linebuf.lines[i], ofp); } } }