commit 49e27c1b0cd40be4e8a5527cb1e06f8156434da0
parent 71adaed51930c72971e202701e3dce860291f178
Author: FRIGN <dev@frign.de>
Date: Sun, 22 Mar 2015 23:37:37 +0100
Add -m and -o flags to sort(1)
Sort comes pretty much automatically, as no script relies on the
undefined behaviour of the input _not_ being sorted, we might as well
sort the sorted input already.
The only downside is memory usage, which can be an issue for large
files.
The o-flag was trivial to implement.
Diffstat:
M | README | | | 2 | +- |
M | sort.1 | | | 30 | ++++++++++++++++++++---------- |
M | sort.c | | | 39 | ++++++++++++++++++++++++++++----------- |
3 files changed, 49 insertions(+), 22 deletions(-)
diff --git a/README b/README
@@ -66,7 +66,7 @@ The following tools are implemented ('*' == finished, '#' == UTF-8 support,
=*| sha256sum non-posix none
=*| sha512sum non-posix none
=*| sleep yes none
- sort no -m, -o, -d, -f, -i
+ sort no -d, -f, -i
=*| split yes none
=*| sponge non-posix none
#*| strings yes none
diff --git a/sort.1 b/sort.1
@@ -1,4 +1,4 @@
-.Dd January 30, 2015
+.Dd March 22, 2015
.Dt SORT 1
.Os sbase
.Sh NAME
@@ -6,17 +6,19 @@
.Nd sort lines
.Sh SYNOPSIS
.Nm
-.Op Fl bnru
+.Op Fl Cbcmnru
+.Op Fl o Ar outfile
.Op Fl t Ar delim
.Op Fl k Ar key ...
.Op Ar file ...
.Sh DESCRIPTION
.Nm
-writes the sorted concatenation of the given
-.Ar files
-to stdout. If no
+writes the sorted concatenation of each
+.Ar file
+to stdout.
+If no
.Ar file
-is given,
+is given
.Nm
reads from stdin.
.Sh OPTIONS
@@ -31,10 +33,10 @@ Skip leading whitespace of columns when sorting.
.It Fl c
The same as
.Fl C
-except that when disorder is detected, a message is printed to stderr
+except that when disorder is detected, a message is written to stderr
indicating the location of the disorder.
.It Fl k Ar key
-Specifies a key definition of the form
+Specify a key definition of the form
.Sm off
.Sy S
.No [.
@@ -67,12 +69,20 @@ can be used to specify options
that only apply to this key definition.
.Sy b
is special in that it only applies to the column that it was specified after.
+.It Fl m
+Assume sorted input, merge only.
.It Fl n
Perform a numeric sort.
+.It Fl o Ar outfile
+Write output to
+.Ar outfile
+rather than stdout.
.It Fl r
Reverses the sort.
.It Fl t Ar delim
-Specifies the field delimiter.
+Set
+.Ar delim
+as the field delimiter.
.It Fl u
-Prints equal lines only once.
+Print equal lines only once.
.El
diff --git a/sort.c b/sort.c
@@ -107,6 +107,7 @@ linecmp(const char **a, const char **b)
free(s1);
free(s2);
}
+
return res;
}
@@ -127,6 +128,8 @@ parse_flags(char **s, int *flags, int bflag)
default:
return -1;
}
+ }
+
return 0;
}
@@ -163,9 +166,8 @@ parse_keydef(struct keydef *kd, char *s, int flags)
if (parse_flags(&rest, &kd->flags, MOD_ENDB) < 0)
return -1;
}
- if (*rest != '\0')
- return -1;
- return 0;
+
+ return -(*rest);
}
static char *
@@ -173,6 +175,7 @@ skipblank(char *s)
{
while (*s && isblank(*s))
s++;
+
return s;
}
@@ -224,16 +227,17 @@ columns(char *line, const struct keydef *kd)
static void
usage(void)
{
- enprintf(2, "usage: %s [-Cbcnru] [-t delim] [-k def]... [file...]\n", argv0);
+ enprintf(2, "usage: %s [-Cbcmnru] [-o outfile] [-t delim] [-k def]... [file ...]\n", argv0);
}
int
main(int argc, char *argv[])
{
- size_t i;
- FILE *fp;
+ FILE *fp, *ofp = stdout;
struct linebuf linebuf = EMPTY_LINEBUF;
+ size_t i;
int global_flags = 0;
+ char *outfile = NULL;
ARGBEGIN {
case 'C':
@@ -248,9 +252,19 @@ main(int argc, char *argv[])
case 'k':
addkeydef(EARGF(usage()), global_flags);
break;
+ case 'm':
+ /* more or less for free, but for perfomance-reasons,
+ * we should keep this flag in mind and maybe some later
+ * day implement it properly so we don't run out of memory
+ * while merging large sorted files.
+ */
+ break;
case 'n':
global_flags |= MOD_N;
break;
+ case 'o':
+ outfile = EARGF(usage());
+ break;
case 'r':
global_flags |= MOD_R;
break;
@@ -270,15 +284,15 @@ main(int argc, char *argv[])
addkeydef("1", global_flags);
addkeydef("1", global_flags & MOD_R);
- if (argc == 0) {
+ if (!argc) {
if (Cflag || cflag) {
check(stdin);
} else {
getlines(stdin, &linebuf);
}
- } else for (; argc > 0; argc--, argv++) {
- if (!(fp = fopen(argv[0], "r"))) {
- enprintf(2, "fopen %s:", argv[0]);
+ } else for (; *argv; argc--, argv++) {
+ if (!(fp = fopen(*argv, "r"))) {
+ enprintf(2, "fopen %s:", *argv);
continue;
}
if (Cflag || cflag) {
@@ -290,13 +304,16 @@ main(int argc, char *argv[])
}
if (!Cflag && !cflag) {
+ if (outfile && !(ofp = fopen(outfile, "w")))
+ eprintf("fopen %s:", outfile);
+
qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines,
(int (*)(const void *, const void *))linecmp);
for (i = 0; i < linebuf.nlines; i++) {
if (!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i],
(const char **)&linebuf.lines[i-1])) {
- fputs(linebuf.lines[i], stdout);
+ fputs(linebuf.lines[i], ofp);
}
}
}