sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

commit eb9bda878736344d1bef06d42e57e96de542a663
parent e4810f1cdbcdfdce506563200ea99334e82bd03c
Author: FRIGN <dev@frign.de>
Date:   Mon, 29 Feb 2016 00:47:10 +0100

Support NUL-containing lines in sort(1)

For sort(1) we need memmem(), which I imported from OpenBSD.
Inside sort(1), the changes involved working with the explicit lengths
given by getlines() earlier and rewriting some of the functions.

Now we can handle NUL-characters in the input just fine.

Diffstat:
MMakefile | 1+
MREADME | 2+-
Alibutil/memmem.c | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Msort.c | 189++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mutil.h | 2++
5 files changed, 176 insertions(+), 84 deletions(-)

diff --git a/Makefile b/Makefile @@ -58,6 +58,7 @@ LIBUTILSRC =\ libutil/getlines.c\ libutil/human.c\ libutil/md5.c\ + libutil/memmem.c\ libutil/mkdirp.c\ libutil/mode.c\ libutil/parseoffset.c\ diff --git a/README b/README @@ -79,7 +79,7 @@ The following tools are implemented: 0=* x sha512-224sum . 0=* x sha512-256sum . 0=*|o sleep . - #*|o sort . +0#*|o sort . 0=*|o split . 0=*|x sponge . 0#*|o strings . diff --git a/libutil/memmem.c b/libutil/memmem.c @@ -0,0 +1,66 @@ +/* $OpenBSD: memmem.c,v 1.4 2015/08/31 02:53:57 guenther Exp $ */ + +/* + * Copyright (c) 2005 Pascal Gloor <pascal.gloor@spale.com> + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <string.h> + +#include "../util.h" + +/* + * Find the first occurrence of the byte string s in byte string l. + */ + +void * +memmem(const void *l, size_t l_len, const void *s, size_t s_len) +{ + const char *cur, *last; + const char *cl = l; + const char *cs = s; + + /* a zero length needle should just return the haystack */ + if (s_len == 0) + return (void *)cl; + + /* "s" must be smaller or equal to "l" */ + if (l_len < s_len) + return NULL; + + /* special case where s_len == 1 */ + if (s_len == 1) + return memchr(l, *cs, l_len); + + /* the last position where its possible to find "s" in "l" */ + last = cl + l_len - s_len; + + for (cur = cl; cur <= last; cur++) + if (cur[0] == cs[0] && memcmp(cur, cs, s_len) == 0) + return (void *)cur; + + return NULL; +} diff --git a/sort.c b/sort.c @@ -33,119 +33,133 @@ static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead); static int Cflag = 0, cflag = 0, uflag = 0; static char *fieldsep = NULL; static size_t fieldseplen = 0; -static char *col1, *col2; -static size_t col1siz, col2siz; +static struct linebufline col1, col2; -static char * -skipblank(char *s) +static void +skipblank(struct linebufline *a) { - while (*s == ' ' || *s == '\t') - s++; - - return s; + while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) { + a->data++; + a->len--; + } } -static char * -skipnonblank(char *s) +static void +skipnonblank(struct linebufline *a) { - while (*s && *s != '\n' && *s != ' ' && *s != '\t') - s++; - - return s; + while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' && + *(a->data) != '\t')) { + a->data++; + a->len--; + } } -static char * -skipcolumn(char *s, char *eol, int skip_to_next_col) +static void +skipcolumn(struct linebufline *a, int skip_to_next_col) { + char *s; + if (fieldsep) { - if ((s = strstr(s, fieldsep))) { - if (skip_to_next_col) - s += fieldseplen; + if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) { + if (skip_to_next_col) { + a->len = a->len - (s - a->data); + a->data = s; + } } else { - s = eol; + a->data += a->len - 1; + a->len = 1; } } else { - s = skipblank(s); - s = skipnonblank(s); + skipblank(a); + skipnonblank(a); } - - return s; } static size_t -columns(char *line, const struct keydef *kd, char **col, size_t *colsiz) +columns(struct linebufline *line, const struct keydef *kd, struct linebufline *col) { Rune r; - char *start, *end, *eol = strchr(line, '\n'); + struct linebufline start, end; size_t len, utflen, rlen; int i; - for (i = 1, start = line; i < kd->start_column; i++) - start = skipcolumn(start, eol, 1); + start.data = line->data; + start.len = line->len; + for (i = 1; i < kd->start_column; i++) + skipcolumn(&start, 1); if (kd->flags & MOD_STARTB) - start = skipblank(start); - for (utflen = 0; start < eol && utflen < kd->start_char - 1;) { - rlen = chartorune(&r, start); - start += rlen; + skipblank(&start); + for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) { + rlen = chartorune(&r, start.data); + start.data += rlen; + start.len -= rlen; utflen++; } + end.data = line->data; + end.len = line->len; if (kd->end_column) { - for (i = 1, end = line; i < kd->end_column; i++) - end = skipcolumn(end, eol, 1); + for (i = 1; i < kd->end_column; i++) + skipcolumn(&end, 1); if (kd->flags & MOD_ENDB) - end = skipblank(end); + skipblank(&end); if (kd->end_char) { - for (utflen = 0; end < eol && utflen < kd->end_char;) { - rlen = chartorune(&r, end); - end += rlen; + for (utflen = 0; end.len > 1 && utflen < kd->end_char;) { + rlen = chartorune(&r, end.data); + end.data += rlen; + end.len -= rlen; utflen++; } } else { - end = skipcolumn(end, eol, 0); + skipcolumn(&end, 0); } + printf("end.data = '%s'\n", end.data); } else { - end = eol; + end.data += end.len - 1; + end.len = 1; } - len = (start > end) ? 0 : (end - start); - if (!*col || *colsiz < len) - *col = erealloc(*col, len + 1); - memcpy(*col, start, len); - (*col)[len] = '\0'; - if (*colsiz < len) - *colsiz = len; + len = MAX(0, end.data - start.data); + if (!(col->data) || col->len < len) + col->data = erealloc(col->data, len + 1); + memcpy(col->data, start.data, len); + col->data[len] = '\0'; + if (col->len < len) + col->len = len; return len; } static int -skipmodcmp(const char *s1, const char *s2, int flags) +skipmodcmp(struct linebufline *a, struct linebufline *b, int flags) { Rune r1, r2; + size_t offa = 0, offb = 0; do { - s1 += chartorune(&r1, s1); - s2 += chartorune(&r2, s2); + offa += chartorune(&r1, a->data + offa); + offb += chartorune(&r2, b->data + offb); if (flags & MOD_D && flags & MOD_I) { - while (*s1 && ((!isblankrune(r1) && !isalnumrune(r1)) || - (!isprintrune(r1)))) - s1 += chartorune(&r1, s1); - while (*s2 && ((!isblankrune(r2) && !isalnumrune(r2)) || - (!isprintrune(r2)))) - s2 += chartorune(&r2, s2); + while (offa < a->len && ((!isblankrune(r1) && + !isalnumrune(r1)) || (!isprintrune(r1)))) + offa += chartorune(&r1, a->data + offa); + while (offb < b->len && ((!isblankrune(r2) && + !isalnumrune(r2)) || (!isprintrune(r2)))) + offb += chartorune(&r2, b->data + offb); } else if (flags & MOD_D) { - while (*s1 && !isblankrune(r1) && !isalnumrune(r1)) - s1 += chartorune(&r1, s1); - while (*s2 && !isblankrune(r2) && !isalnumrune(r2)) - s2 += chartorune(&r2, s2); + while (offa < a->len && !isblankrune(r1) && + !isalnumrune(r1)) + offa += chartorune(&r1, a->data + offa); + while (offb < b->len && !isblankrune(r2) && + !isalnumrune(r2)) + offb += chartorune(&r2, b->data + offb); } else if (flags & MOD_I) { - while (*s1 && !isprintrune(r1)) - s1 += chartorune(&r1, s1); - while (*s2 && !isprintrune(r2)) - s2 += chartorune(&r2, s2); + while (offa < a->len && !isprintrune(r1)) + offa += chartorune(&r1, a->data + offa); + while (offb < b->len && !isprintrune(r2)) + offb += chartorune(&r2, b->data + offb); } if (flags & MOD_F) { r1 = toupperrune(r1); @@ -157,15 +171,15 @@ skipmodcmp(const char *s1, const char *s2, int flags) } static int -linecmp(const char **a, const char **b) +linecmp(struct linebufline *a, struct linebufline *b) { int res = 0; long double x, y; struct keydef *kd; TAILQ_FOREACH(kd, &kdhead, entry) { - columns((char *)*a, kd, &col1, &col1siz); - columns((char *)*b, kd, &col2, &col2siz); + columns(a, kd, &col1); + columns(b, kd, &col2); /* if -u is given, don't use default key definition * unless it is the only one */ @@ -173,13 +187,17 @@ linecmp(const char **a, const char **b) TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) { res = 0; } else if (kd->flags & MOD_N) { - x = strtold(col1, NULL); - y = strtold(col2, NULL); + x = strtold(col1.data, NULL); + y = strtold(col2.data, NULL); res = (x < y) ? -1 : (x > y); } else if (kd->flags & (MOD_D | MOD_F | MOD_I)) { - res = skipmodcmp(col1, col2, kd->flags); + res = skipmodcmp(&col1, &col2, kd->flags); } else { - res = strcmp(col1, col2); + if (!(res = memcmp(col1.data, col2.data, + MIN(col1.len, col2.len)))) { + res += col1.data[MIN(col1.len, col2.len)] - + col2.data[MIN(col1.len, col2.len)]; + } } if (kd->flags & MOD_R) @@ -194,20 +212,25 @@ linecmp(const char **a, const char **b) static int check(FILE *fp, const char *fname) { - static struct { char *buf; size_t size; } prev, cur, tmp; + static struct linebufline prev, cur, tmp; + static size_t prevsize, cursize, tmpsize; - if (!prev.buf && getline(&prev.buf, &prev.size, fp) < 0) + if (!prev.data && (prev.len = getline(&prev.data, &prevsize, fp)) < 0) eprintf("getline:"); - while (getline(&cur.buf, &cur.size, fp) > 0) { - if (uflag > linecmp((const char **)&cur.buf, - (const char **)&prev.buf)) { - if (!Cflag) - weprintf("disorder %s: %s", fname, cur.buf); + while ((cur.len = getline(&cur.data, &cursize, fp)) > 0) { + if (uflag > linecmp(&cur, &prev)) { + if (!Cflag) { + weprintf("disorder %s: ", fname); + fwrite(cur.data, 1, cur.len, stderr); + } return 1; } tmp = cur; + tmpsize = cursize; cur = prev; + cursize = prevsize; prev = tmp; + prevsize = tmpsize; } return 0; @@ -345,7 +368,7 @@ main(int argc, char *argv[]) break; case 't': fieldsep = EARGF(usage()); - fieldseplen = strlen(fieldsep); + fieldseplen = unescape(fieldsep); break; case 'u': uflag = 1; @@ -388,14 +411,14 @@ main(int argc, char *argv[]) if (outfile && !(ofp = fopen(outfile, "w"))) eprintf("fopen %s:", outfile); - qsort(linebuf.lines, linebuf.nlines, sizeof *linebuf.lines, + qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines), (int (*)(const void *, const void *))linecmp); for (i = 0; i < linebuf.nlines; i++) { if (!uflag || i == 0 || - linecmp((const char **)&linebuf.lines[i], - (const char **)&linebuf.lines[i - 1])) { - fputs(linebuf.lines[i], ofp); + linecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) { + fwrite(linebuf.lines[i].data, 1, + linebuf.lines[i].len, ofp); } } } diff --git a/util.h b/util.h @@ -76,3 +76,5 @@ long long enstrtonum(int, const char *, long long, long long); long long estrtonum(const char *, long long, long long); size_t unescape(char *); int mkdirp(const char *); +#undef memmem +void *memmem(const void *, size_t, const void *, size_t);