commit 576a5ce55ed18bb8617b9d9dac11f8b6cdc0db1d
parent 9eb15ff2326a5114644521cb406e3729bddb94d8
Author: Truls Becken <truls.becken@gmail.com>
Date: Tue, 8 Oct 2013 20:39:08 +0100
Add cut(1)
Diffstat:
M | LICENSE | | | 1 | + |
M | Makefile | | | 1 | + |
A | cut.1 | | | 60 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | cut.c | | | 164 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 226 insertions(+), 0 deletions(-)
diff --git a/LICENSE b/LICENSE
@@ -14,6 +14,7 @@ MIT/X Consortium License
© 2012 Robert Ransom <rransom.8774@gmail.com>
© 2013 Jakob Kramer <jakob.kramer@gmx.de>
© 2013 Anselm R Garbe <anselm@garbe.us>
+© 2013 Truls Becken <truls.becken@gmail.com>
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
diff --git a/Makefile b/Makefile
@@ -36,6 +36,7 @@ SRC = \
cmp.c \
comm.c \
cp.c \
+ cut.c \
date.c \
dirname.c \
echo.c \
diff --git a/cut.1 b/cut.1
@@ -0,0 +1,60 @@
+.TH CUT 1 sbase\-VERSION
+.SH NAME
+cut \- extract columns of data
+.SH SYNOPSIS
+.B cut \-b
+.I list
+.RB [ \-n ]
+.RI [ file ...]
+.br
+.B cut \-c
+.I list
+.RI [ file ...]
+.br
+.B cut \-f
+.I list
+.RB [ \-d
+.IR delim ]
+.RB [ \-s ]
+.RI [ file ...]
+.SH DESCRIPTION
+.B cut
+out bytes, characters, or delimited fields from each line of the given
+files and write to stdout. With no file, or when file is `-', cut reads
+from stdin.
+.P
+.I list
+is a comma or space separated list of numbers and ranges where numbering
+starts from 1. Ranges are on the form `N-M'. If N or M is missing, the
+beginning or end of line is assumed. Numbers and ranges may be repeated,
+overlapping, and in any order. Selected input is written in the same
+order that it is read, and is written exactly once.
+.SH OPTIONS
+.TP
+.BI \-b \ list
+The
+.I list
+specifies byte positions.
+.TP
+.BI \-c \ list
+The
+.I list
+specifies character positions.
+.TP
+.BI \-d \ delim
+Use first byte of
+.I delim
+as field delimiter, instead of tab.
+.TP
+.BI \-f \ list
+The
+.I list
+specifies field numbers. Lines not containing field delimiters are
+passed through untouched.
+.TP
+.B \-n
+Do not split characters. A character is output if its last byte is
+selected.
+.TP
+.B \-s
+Suppress lines not containing field delimiters.
diff --git a/cut.c b/cut.c
@@ -0,0 +1,164 @@
+/* See LICENSE file for copyright and license details. */
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "text.h"
+#include "util.h"
+
+static void
+usage(void)
+{
+ eprintf("usage: cut -b list [-n] [file...]\n"
+ " cut -c list [file...]\n"
+ " cut -f list [-d delim] [-s] [file...]\n");
+}
+
+typedef struct Range {
+ size_t min, max;
+ struct Range *next;
+} Range;
+
+static Range *list = NULL;
+static char mode = 0;
+static char delim = '\t';
+static bool nflag = false;
+static bool sflag = false;
+
+static void
+insert(Range *r)
+{
+ Range *l, *p, *t;
+
+ for(p = NULL, l = list; l; p = l, l = l->next) {
+ if(r->max && r->max+1 < l->min) {
+ r->next = l;
+ break;
+ } else if(!l->max || r->min < l->max+2) {
+ l->min = MIN(r->min, l->min);
+ for(p = l, t = l->next; t; p = t, t = t->next)
+ if(r->max && r->max+1 < t->min) break;
+ l->max = (p->max && r->max) ? MAX(p->max, r->max) : 0;
+ l->next = t;
+ return;
+ }
+ }
+ if(p) p->next = r; else list = r;
+}
+
+static void
+parselist(char *str)
+{
+ char *s;
+ size_t n = 1;
+ Range *r;
+
+ for(s = str; *s; s++) {
+ if(*s == ' ') *s = ',';
+ if(*s == ',') n++;
+ }
+ if(!(r = malloc(n * sizeof(Range))))
+ eprintf("malloc:");
+ for(s = str; n; n--, s++) {
+ r->min = (*s == '-') ? 1 : strtoul(s, &s, 10);
+ r->max = (*s == '-') ? strtoul(++s, &s, 10) : r->min;
+ r->next = NULL;
+ if(!r->min || (r->max && r->max < r->min) || (*s && *s != ','))
+ eprintf("cut: bad list value\n");
+ insert(r++);
+ }
+}
+
+static size_t
+seek(const char *s, size_t pos, size_t *prev, size_t count)
+{
+ const char *t;
+ size_t n = pos - *prev;
+
+ if(mode == 'b') {
+ if((t = memchr(s, 0, n)))
+ return t - s;
+ if(nflag)
+ while(n && !UTF8_POINT(s[n])) n--;
+ *prev += n;
+ return n;
+ } else if(mode == 'c') {
+ for(n++, t = s; *t; t++)
+ if(UTF8_POINT(*t) && !--n) break;
+ } else {
+ for(t = (count < 2) ? s : s+1; n && *t; t++)
+ if(*t == delim && !--n && count) break;
+ }
+ *prev = pos;
+ return t - s;
+}
+
+static void
+cut(FILE *fp)
+{
+ static char *buf = NULL;
+ static size_t size = 0;
+ char *s;
+ size_t i, n, p;
+ Range *r;
+
+ while(afgets(&buf, &size, fp)) {
+ if(buf[i = strlen(buf)-1] == '\n')
+ buf[i] = 0;
+ if(mode == 'f' && !strchr(buf, delim)) {
+ if(!sflag)
+ puts(buf);
+ continue;
+ }
+ for(i = 0, p = 1, s = buf, r = list; r; r = r->next, s += n) {
+ s += seek(s, r->min, &p, i++);
+ if(!*s) break;
+ if(!r->max) {
+ fputs(s, stdout);
+ break;
+ }
+ n = seek(s, r->max + 1, &p, i++);
+ if(fwrite(s, 1, n, stdout) != n)
+ eprintf("write error:");
+ }
+ putchar('\n');
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ FILE *fp;
+
+ ARGBEGIN {
+ case 'b':
+ case 'c':
+ case 'f':
+ mode = ARGC();
+ parselist(ARGF());
+ break;
+ case 'd':
+ delim = *ARGF();
+ break;
+ case 'n':
+ nflag = true;
+ break;
+ case 's':
+ sflag = true;
+ break;
+ default:
+ usage();
+ } ARGEND;
+
+ if(!mode)
+ usage();
+ if(!argc)
+ cut(stdin);
+ else for(; argc--; argv++) {
+ if(!(fp = strcmp(*argv, "-") ? fopen(*argv, "r") : stdin))
+ eprintf("fopen %s:", *argv);
+ cut(fp);
+ fclose(fp);
+ }
+ return EXIT_SUCCESS;
+}