commit c451683c3f49239526f64fab629fd29c3e60cafd
parent e509d56bae7fd9799ae47d081d8b5dbaf6a3efe8
Author: Lorenzo Cogotti <miciamail@hotmail.it>
Date: Mon, 29 Apr 2013 18:54:36 +0200
Added POSIX paste(1) command implementation.
This commit adds a simple implementation of the POSIX
standard paste(1) command, and its man page.
TODO and Makefile have been updated accordingly.
Signed-off-by: Christoph Lohmann <20h@r-36.net>
Diffstat:
M | Makefile | | | 1 | + |
M | TODO | | | 2 | -- |
A | paste.1 | | | 122 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | paste.c | | | 231 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
4 files changed, 354 insertions(+), 2 deletions(-)
diff --git a/Makefile b/Makefile
@@ -43,6 +43,7 @@ SRC = \
mv.c \
nl.c \
nohup.c \
+ paste.c \
pwd.c \
rm.c \
sleep.c \
diff --git a/TODO b/TODO
@@ -28,8 +28,6 @@ md5sum [-c] [file...]
nice [-n N] [command]
-paste [-s] [-d list] [file...]
-
printenv [variable...]
printf [format] [data...]
diff --git a/paste.1 b/paste.1
@@ -0,0 +1,122 @@
+.TH PASTE 1 paste-VERSION "Apr 2013"
+.SH NAME
+paste \- merge corresponding or subsequent lines of files
+.SH "SYNOPSIS"
+.PP
+.B paste
+[
+.B \-s
+]
+[
+.B \-d
+.I list
+]
+.I file...
+.SH DESCRIPTION
+The
+.B paste
+utility concatenates the corresponding lines of the given input files,
+and writes the resulting lines to standard output. The default operation
+of
+.B paste
+concatenates the corresponding lines of the input files.
+The newline of every line except the line from the last input file is
+replaced with a tab.
+If an end-of-file condition is detected on one or more input files,
+but not all input files,
+.B paste
+behaves as though empty lines were read from the files on which
+end-of-file was detected, unless the
+.B \-s
+option is specified.
+.SH OPTIONS
+.TP
+.B \-d list
+unless a backslash character appears in
+.I list
+each character is an element specifying a delimiter.
+If a backslash character appears, that and one or more characters
+following it are an element specifying a delimiter.
+These elements specify one or more characters to use,
+instead of the default tab, to replace the newline of the input
+lines. The elements in
+.I list
+are used circularly; that is, when the
+.I list
+is exhausted the first element from the list is reused.
+When the
+.B \-s
+option is specified, the last newline in a file is not be modified.
+The delimiter is reset to the first element of list after each file
+operand is processed.
+If a backslash character appears in list, it and the character following
+it represents the following delimiters:
+.RS
+.TP
+.I \en
+newline character
+.TP
+.I \et
+tab character
+.TP
+.I \e\e
+backslash character
+.TP
+.I \e0
+empty string (not a null character)
+.TP
+If Any other characters follow the backslash, results are unspecified.
+.RE
+.TP
+.B \-s
+concatenate all of the lines of each separate input file in command line
+order. The newline of every line except the last line in each input file
+are replaced with the tab, unless otherwise specified by the
+.B \-d
+option.
+.PP
+If '\-' is specified for one or more input files, the standard input is
+used; standard input is read one line at a time, circularly for each
+instance of '\-'.
+.SH EXIT VALUES
+The
+.B paste
+utility exits 0 on successful completion, and >0 if an error
+occurs.
+.SH ENVIRONMENT VARIABLES
+The following environment variables affect the execution:
+.TP
+.B LANG
+provide a default value for the internationalization variables
+that are unset or null.
+.TP
+.B LC_ALL
+if set to a non-empty string value, override the values of all the
+other internationalization variables.
+.TP
+.B LC_CTYPE
+determine the locale for the interpretation of sequences of bytes
+of text data as characters (for example, single-byte as opposed to
+multi-byte characters in arguments and input files).
+.TP
+.B LC_MESSAGES
+determine the locale that should be used to affect the format and
+contents of diagnostic messages written to standard error.
+.SH CONFORMING TO
+The
+.B paste
+utility is IEEE Std 1003.2 (POSIX.2) compatible.
+.SH EXAMPLES
+.TP
+.I "ls | paste - - - -"
+.PP
+Write out a directory in four columns.
+.TP
+.I "paste -s -d '\et\en' file"
+.PP
+Combine pairs of lines from a file into single lines.
+.SH AUTHOR
+Written by Lorenzo Cogotti.
+.SH SEE ALSO
+.BR cut(1)
+.BR lam(1)
diff --git a/paste.c b/paste.c
@@ -0,0 +1,231 @@
+/* See LICENSE file for copyright and license details. */
+#include <locale.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <wchar.h>
+#include "util.h"
+
+typedef struct {
+ FILE *fp;
+ const char *name;
+} Fdescr;
+
+static void eusage(void);
+static size_t unescape(wchar_t *);
+static wint_t in(Fdescr *);
+static void out(wchar_t);
+static void sequential(Fdescr *, int, const wchar_t *, size_t);
+static void parallel(Fdescr *, int, const wchar_t *, size_t);
+
+int
+main(int argc, char **argv) {
+ const char *adelim = NULL;
+ bool seq = false;
+ wchar_t *delim;
+ size_t len;
+ Fdescr *dsc;
+ int i, c;
+
+ setlocale(LC_CTYPE, "");
+
+ while((c = getopt(argc, argv, "sd:")) != -1)
+ switch(c) {
+ case 's':
+ seq = true;
+ break;
+ case 'd':
+ adelim = optarg;
+ break;
+ case '?':
+ default:
+ eusage();
+ break;
+ }
+
+ argc -= optind;
+ argv += optind;
+ if(argc == 0)
+ eusage();
+
+ /* populate delimeters */
+ if(!adelim)
+ adelim = "\t";
+
+ len = mbstowcs(NULL, adelim, 0);
+ if(len == (size_t)-1)
+ eprintf("invalid delimiter\n");
+
+ delim = malloc((len + 1) * sizeof(*delim));
+ if(!delim)
+ eprintf("out of memory\n");
+
+ mbstowcs(delim, adelim, len);
+ len = unescape(delim);
+ if(len == 0)
+ eprintf("no delimiters specified\n");
+
+ /* populate file list */
+ dsc = malloc(argc * sizeof(*dsc));
+ if(!dsc)
+ eprintf("out of memory\n");
+
+ for(i = 0; i < argc; i++) {
+ const char *name = argv[i];
+
+ if(strcmp(name, "-") == 0)
+ dsc[i].fp = stdin;
+ else
+ dsc[i].fp = fopen(name, "r");
+
+ if(!dsc[i].fp)
+ eprintf("can't open '%s':", name);
+
+ dsc[i].name = name;
+ }
+
+ if(seq)
+ sequential(dsc, argc, delim, len);
+ else
+ parallel(dsc, argc, delim, len);
+
+ for(i = 0; i < argc; i++) {
+ if(dsc[i].fp != stdin)
+ (void)fclose(dsc[i].fp);
+ }
+
+ free(delim);
+ free(dsc);
+ return 0;
+}
+
+static void
+eusage(void) {
+ eprintf("usage: paste [-s][-d list] file...\n");
+}
+
+static size_t
+unescape(wchar_t *delim) {
+ wchar_t c;
+ size_t i;
+ size_t len;
+
+ for(i = 0, len = 0; (c = delim[i++]) != '\0'; len++) {
+ if(c == '\\') {
+ switch(delim[i++]) {
+ case 'n':
+ delim[len] = '\n';
+ break;
+ case 't':
+ delim[len] = '\t';
+ break;
+ case '0':
+ delim[len] = '\0';
+ break;
+ case '\\':
+ delim[len] = '\\';
+ break;
+ case '\0':
+ default:
+ /* POSIX: unspecified results */
+ return len;
+ }
+ } else
+ delim[len] = c;
+ }
+
+ return len;
+}
+
+static wint_t
+in(Fdescr *f) {
+ wint_t c = fgetwc(f->fp);
+
+ if(c == WEOF && ferror(f->fp))
+ eprintf("'%s' read error:", f->name);
+
+ return c;
+}
+
+static void
+out(wchar_t c) {
+ putwchar(c);
+ if(ferror(stdout))
+ eprintf("write error:");
+}
+
+static void
+sequential(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+ int i;
+
+ for(i = 0; i < len; i++) {
+ size_t d = 0;
+ wint_t c, last = WEOF;
+
+ while((c = in(&dsc[i])) != WEOF) {
+ if(last == '\n') {
+ if(delim[d] != '\0')
+ out(delim[d]);
+
+ d++;
+ d %= cnt;
+ }
+
+ if(c != '\n')
+ out((wchar_t)c);
+
+ last = c;
+ }
+
+ if(last == '\n')
+ out((wchar_t)last);
+ }
+}
+
+static void
+parallel(Fdescr *dsc, int len, const wchar_t *delim, size_t cnt) {
+ int last;
+
+ do {
+ int i;
+
+ last = 0;
+ for(i = 0; i < len; i++) {
+ wint_t c;
+ wchar_t d = delim[i % cnt];
+
+ do {
+ wint_t o = in(&dsc[i]);
+
+ c = o;
+ switch(c) {
+ case WEOF:
+ if(last == 0)
+ break;
+
+ o = '\n';
+ /* fallthrough */
+ case '\n':
+ if(i != len - 1)
+ o = d;
+
+ break;
+ default:
+ break;
+ }
+
+ if(o != WEOF) {
+ /* pad with delimiters up to this point */
+ while(++last < i) {
+ if(d != '\0')
+ out(d);
+ }
+
+ out((wchar_t)o);
+ }
+ } while(c != '\n' && c != WEOF);
+ }
+ } while(last > 0);
+}