sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit 98a12cc14637306f8ed9522643229c889cff038e
parent 02d91696b4bb162325ea3e313cc6f0d0f126951a
Author: Jakob Kramer <jakob.kramer@gmx.de>
Date:   Sat, 12 Apr 2014 17:53:10 +0200

sort: add -k, fix -u

Options  that are  specific to  a  single  key  definition are not
supported (e.g. "sort -k 2,3n -k 4,4").  Should you try to specify
such definitions, sort will  return with EXIT_FAILURE and an error
message.   Instead, all key definitions exclusively use the global
settings.

It always behaves like -b was set.

Diffstat:
Msort.1 | 19++++++++++++++++++-
Msort.c | 167++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 175 insertions(+), 11 deletions(-)

diff --git a/sort.1 b/sort.1 @@ -4,6 +4,9 @@ sort \- sort lines .SH SYNOPSIS .B sort .RB [ \-nru ] +.RB [ \-k +.I key +.R ]... .RI [ file ...] .SH DESCRIPTION .B sort @@ -18,4 +21,18 @@ perform a numeric sort. reverses the sort. .TP .B \-u -prints repeated lines only once. +prints equal lines only once. +.TP +.B \-k key +specifies a key definition of the form \fBS\fR[.\fBs\fR][,\fBE\fR[.\fBe\fR]], +where +.B S, +.B s, +.B E, +and +.B e +are the starting column, starting character in that column, ending column and +the ending character of that column respectively. If they are not specified, +s refers to the first character of the specified starting column, E refers to +the last column of every line, and e refers to the last character of that last +column. diff --git a/sort.c b/sort.c @@ -1,4 +1,5 @@ /* See LICENSE file for copyright and license details. */ +#include <ctype.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> @@ -7,7 +8,28 @@ #include "text.h" #include "util.h" +struct keydef { + unsigned start_column; + unsigned end_column; + unsigned start_char; + unsigned end_char; +}; + +struct kdlist { + struct keydef keydef; + struct kdlist *next; +}; + +static struct kdlist *head = NULL; +static struct kdlist *curr = NULL; + +static void addkeydef(char *); +static void freelist(void); static int linecmp(const char **, const char **); +static char *next_nonblank(char *); +static char *next_blank(char *); +static int parse_keydef(struct keydef *, char *); +static char *columns(char *, const struct keydef *); static bool rflag = false; static bool uflag = false; @@ -18,7 +40,7 @@ static struct linebuf linebuf = EMPTY_LINEBUF; static void usage(void) { - eprintf("usage: %s [-nru] [file...]\n", argv0); + enprintf(2, "usage: %s [-nru] [-k def]... [file...]\n", argv0); } int @@ -37,15 +59,20 @@ main(int argc, char *argv[]) case 'u': uflag = true; break; + case 'k': + addkeydef(EARGF(usage())); + break; default: usage(); } ARGEND; + addkeydef("1"); + if(argc == 0) { getlines(stdin, &linebuf); } else for(; argc > 0; argc--, argv++) { if(!(fp = fopen(argv[0], "r"))) { - weprintf("fopen %s:", argv[0]); + enprintf(2, "fopen %s:", argv[0]); continue; } getlines(fp, &linebuf); @@ -55,24 +82,144 @@ main(int argc, char *argv[]) (int (*)(const void *, const void *))linecmp); for(i = 0; i < linebuf.nlines; i++) { - if(!uflag || i == 0 || strcmp(linebuf.lines[i], - linebuf.lines[i-1]) != 0) { + if(!uflag || i == 0 || linecmp((const char **)&linebuf.lines[i], + (const char **)&linebuf.lines[i-1])) { fputs(linebuf.lines[i], stdout); } } + freelist(); return EXIT_SUCCESS; } -int +static void +addkeydef(char *def) +{ + struct kdlist *node; + + node = malloc(sizeof(*node)); + if(!node) + enprintf(2, "malloc:"); + if(!head) + head = node; + if(parse_keydef(&node->keydef, def)) + enprintf(2, "parse_keydef:"); + if(curr) + curr->next = node; + node->next = NULL; + curr = node; +} + +static void +freelist(void) +{ + struct kdlist *node; + struct kdlist *tmp; + + for(node = head; node; node = tmp) { + tmp = node->next; + free(node); + } +} + +static int linecmp(const char **a, const char **b) { - if (nflag) { - if (rflag) - return strtoul(*b, 0, 10) - strtoul(*a, 0, 10); + char *s1, *s2; + int res = 0; + struct kdlist *node; + + for(node = head; node && res == 0; node = node->next) { + s1 = columns((char *)*a, &node->keydef); + s2 = columns((char *)*b, &node->keydef); + + /* don't consider modifiers if it's the default key + * definition that was implicitly added */ + /* if -u is given, don't use default */ + if(uflag && !(node == head) && !node->next) + res = 0; + else if(!(node == head) && !node->next) + res = strcmp(s1, s2); + else if(nflag) + res = strtoul(s1, 0, 10) - strtoul(s2, 0, 10); else - return strtoul(*a, 0, 10) - strtoul(*b, 0, 10); + res = strcmp(s1, s2); + + free(s1); + free(s2); + } + return rflag ? -res : res; +} + +static int +parse_keydef(struct keydef *kd, char *s) +{ + char *rest = s; + kd->start_column = 1; + kd->start_char = 1; + /* 0 means end of line */ + kd->end_column = 0; + kd->end_char = 0; + + kd->start_column = strtoul(rest, &rest, 10); + if(!kd->start_column) + enprintf(2, "starting column cannot be 0\n"); + if(*rest == '.') + kd->start_char = strtoul(rest+1, &rest, 10); + if(*rest == ',') { + kd->end_column = strtoul(rest+1, &rest, 10); + if(kd->end_column < kd->start_column) + enprintf(2, ",%u is too small\n", kd->end_column); } - return strcmp(*a, *b) * (rflag ? -1 : +1); + if(*rest == '.') + kd->end_char = strtoul(rest+1, &rest, 10); + if(*rest != '\0') + return -1; + return 0; } +static char * +next_nonblank(char *s) +{ + for(; *s && isblank(*s); s++); + return s; +} + +static char * +next_blank(char *s) +{ + for(; *s && !isblank(*s); s++); + return s; +} + +static char * +columns(char *line, const struct keydef *kd) +{ + char *rest; + char *start, *end; + unsigned i; + for(rest = line, i = 0; i < kd->start_column; i++) { + if(i != 0) + rest = next_blank(rest); + rest = next_nonblank(rest); + } + for(i = 1; i < kd->start_char && !isblank(*rest); i++, rest++); + start = rest; + + if(kd->end_column) { + for(rest = line, i = 0; i < kd->end_column; i++) { + if(i != 0) + rest = next_blank(rest); + rest = next_nonblank(rest); + } + if(kd->end_char) { + for(i = 1; i < kd->end_char && *rest && !isblank(*rest); i++, rest++); + } else { + rest = next_blank(rest); + } + end = rest; + } else { + end = rest + strlen(rest); + } + return strndup(start, end - start); +}