sbase

suckless unix tools
git clone git@git.2f30.org/sbase.git
Log | Files | Refs | README | LICENSE

join.c (9795B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <stdint.h>
      4 #include <stdio.h>
      5 #include <stdlib.h>
      6 #include <string.h>
      7 
      8 #include "text.h"
      9 #include "utf.h"
     10 #include "util.h"
     11 
     12 enum {
     13 	INIT = 1,
     14 	GROW = 2,
     15 };
     16 
     17 enum {
     18 	EXPAND = 0,
     19 	RESET  = 1,
     20 };
     21 
     22 enum { FIELD_ERROR = -2, };
     23 
     24 struct field {
     25 	char *s;
     26 	size_t len;
     27 };
     28 
     29 struct jline {
     30 	struct line text;
     31 	size_t nf;
     32 	size_t maxf;
     33 	struct field *fields;
     34 };
     35 
     36 struct spec {
     37 	size_t fileno;
     38 	size_t fldno;
     39 };
     40 
     41 struct outlist {
     42 	size_t ns;
     43 	size_t maxs;
     44 	struct spec **specs;
     45 };
     46 
     47 struct span {
     48 	size_t nl;
     49 	size_t maxl;
     50 	struct jline **lines;
     51 };
     52 
     53 static char *sep = NULL;
     54 static char *replace = NULL;
     55 static const char defaultofs = ' ';
     56 static const int jfield = 1;            /* POSIX default join field */
     57 static int unpairsa = 0, unpairsb = 0;
     58 static int oflag = 0;
     59 static int pairs = 1;
     60 static size_t seplen;
     61 static struct outlist output;
     62 
     63 static void
     64 usage(void)
     65 {
     66 	eprintf("usage: %s [-1 field] [-2 field] [-o list] [-e string] "
     67 	        "[-a | -v fileno] [-t delim] file1 file2\n", argv0);
     68 }
     69 
     70 static void
     71 prfield(struct field *fp)
     72 {
     73 	if (fwrite(fp->s, 1, fp->len, stdout) != fp->len)
     74 		eprintf("fwrite:");
     75 }
     76 
     77 static void
     78 prsep(void)
     79 {
     80 	if (sep)
     81 		fwrite(sep, 1, seplen, stdout);
     82 	else
     83 		putchar(defaultofs);
     84 }
     85 
     86 static void
     87 swaplines(struct jline *la, struct jline *lb)
     88 {
     89 	struct jline tmp;
     90 
     91 	tmp = *la;
     92 	*la = *lb;
     93 	*lb = tmp;
     94 }
     95 
     96 static void
     97 prjoin(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
     98 {
     99 	struct spec *sp;
    100 	struct field *joinfield;
    101 	size_t i;
    102 
    103 	if (jfa >= la->nf || jfb >= lb->nf)
    104 		return;
    105 
    106 	joinfield = &la->fields[jfa];
    107 
    108 	if (oflag) {
    109 		for (i = 0; i < output.ns; i++) {
    110 			sp = output.specs[i];
    111 
    112 			if (sp->fileno == 1) {
    113 				if (sp->fldno < la->nf)
    114 					prfield(&la->fields[sp->fldno]);
    115 				else if (replace)
    116 					fputs(replace, stdout);
    117 			} else if (sp->fileno == 2) {
    118 				if (sp->fldno < lb->nf)
    119 					prfield(&lb->fields[sp->fldno]);
    120 				else if (replace)
    121 					fputs(replace, stdout);
    122 			} else if (sp->fileno == 0) {
    123 				prfield(joinfield);
    124 			}
    125 
    126 			if (i < output.ns - 1)
    127 				prsep();
    128 		}
    129 	} else {
    130 		prfield(joinfield);
    131 		prsep();
    132 
    133 		for (i = 0; i < la->nf; i++) {
    134 			if (i != jfa) {
    135 				prfield(&la->fields[i]);
    136 				prsep();
    137 			}
    138 		}
    139 		for (i = 0; i < lb->nf; i++) {
    140 			if (i != jfb) {
    141 				prfield(&lb->fields[i]);
    142 				if (i < lb->nf - 1)
    143 					prsep();
    144 			}
    145 		}
    146 	}
    147 	putchar('\n');
    148 }
    149 
    150 static void
    151 prline(struct jline *lp)
    152 {
    153 	if (fwrite(lp->text.data, 1, lp->text.len, stdout) != lp->text.len)
    154 		eprintf("fwrite:");
    155 	putchar('\n');
    156 }
    157 
    158 static int
    159 jlinecmp(struct jline *la, struct jline *lb, size_t jfa, size_t jfb)
    160 {
    161 	int status;
    162 
    163 	/* return FIELD_ERROR if both lines are short */
    164 	if (jfa >= la->nf) {
    165 		status = (jfb >= lb->nf) ? FIELD_ERROR : -1;
    166 	} else if (jfb >= lb->nf) {
    167 		status = 1;
    168 	} else {
    169 		status = memcmp(la->fields[jfa].s, lb->fields[jfb].s,
    170 		                MAX(la->fields[jfa].len, lb->fields[jfb].len));
    171 		LIMIT(status, -1, 1);
    172 	}
    173 
    174 	return status;
    175 }
    176 
    177 static void
    178 addfield(struct jline *lp, char *sp, size_t len)
    179 {
    180 	if (lp->nf >= lp->maxf) {
    181 		lp->fields = ereallocarray(lp->fields, (GROW * lp->maxf),
    182 		        sizeof(struct field));
    183 		lp->maxf *= GROW;
    184 	}
    185 	lp->fields[lp->nf].s = sp;
    186 	lp->fields[lp->nf].len = len;
    187 	lp->nf++;
    188 }
    189 
    190 static void
    191 prspanjoin(struct span *spa, struct span *spb, size_t jfa, size_t jfb)
    192 {
    193 	size_t i, j;
    194 
    195 	for (i = 0; i < (spa->nl - 1); i++)
    196 		for (j = 0; j < (spb->nl - 1); j++)
    197 			prjoin(spa->lines[i], spb->lines[j], jfa, jfb);
    198 }
    199 
    200 static struct jline *
    201 makeline(char *s, size_t len)
    202 {
    203 	struct jline *lp;
    204 	char *tmp;
    205 	size_t i, end;
    206 
    207 	if (s[len - 1] == '\n')
    208 		s[--len] = '\0';
    209 
    210 	lp = ereallocarray(NULL, INIT, sizeof(struct jline));
    211 	lp->text.data = s;
    212 	lp->text.len = len;
    213 	lp->fields = ereallocarray(NULL, INIT, sizeof(struct field));
    214 	lp->nf = 0;
    215 	lp->maxf = INIT;
    216 
    217 	for (i = 0; i < lp->text.len && isblank(lp->text.data[i]); i++)
    218 		;
    219 	while (i < lp->text.len) {
    220 		if (sep) {
    221 			if ((lp->text.len - i) < seplen ||
    222 			    !(tmp = memmem(lp->text.data + i,
    223 			                   lp->text.len - i, sep, seplen))) {
    224 				goto eol;
    225 			}
    226 			end = tmp - lp->text.data;
    227 			addfield(lp, lp->text.data + i, end - i);
    228 			i = end + seplen;
    229 		} else {
    230 			for (end = i; !(isblank(lp->text.data[end])); end++) {
    231 				if (end + 1 == lp->text.len)
    232 					goto eol;
    233 			}
    234 			addfield(lp, lp->text.data + i, end - i);
    235 			for (i = end; isblank(lp->text.data[i]); i++)
    236 				;
    237 		}
    238 	}
    239 eol:
    240 	addfield(lp, lp->text.data + i, lp->text.len - i);
    241 
    242 	return lp;
    243 }
    244 
    245 static int
    246 addtospan(struct span *sp, FILE *fp, int reset)
    247 {
    248 	char *newl = NULL;
    249 	ssize_t len;
    250 	size_t size = 0;
    251 
    252 	if ((len = getline(&newl, &size, fp)) < 0) {
    253 		if (ferror(fp))
    254 			eprintf("getline:");
    255 		else
    256 			return 0;
    257 	}
    258 
    259 	if (reset)
    260 		sp->nl = 0;
    261 
    262 	if (sp->nl >= sp->maxl) {
    263 		sp->lines = ereallocarray(sp->lines, (GROW * sp->maxl),
    264 		        sizeof(struct jline *));
    265 		sp->maxl *= GROW;
    266 	}
    267 
    268 	sp->lines[sp->nl] = makeline(newl, len);
    269 	sp->nl++;
    270 	return 1;
    271 }
    272 
    273 static void
    274 initspan(struct span *sp)
    275 {
    276 	sp->nl = 0;
    277 	sp->maxl = INIT;
    278 	sp->lines = ereallocarray(NULL, INIT, sizeof(struct jline *));
    279 }
    280 
    281 static void
    282 freespan(struct span *sp)
    283 {
    284 	size_t i;
    285 
    286 	for (i = 0; i < sp->nl; i++) {
    287 		free(sp->lines[i]->fields);
    288 		free(sp->lines[i]->text.data);
    289 	}
    290 	free(sp->lines);
    291 }
    292 
    293 static void
    294 initolist(struct outlist *olp)
    295 {
    296 	olp->ns = 0;
    297 	olp->maxs = 1;
    298 	olp->specs = ereallocarray(NULL, INIT, sizeof(struct spec *));
    299 }
    300 
    301 static void
    302 addspec(struct outlist *olp, struct spec *sp)
    303 {
    304 	if (olp->ns >= olp->maxs) {
    305 		olp->specs = ereallocarray(olp->specs, (GROW * olp->maxs),
    306 		        sizeof(struct spec *));
    307 		olp->maxs *= GROW;
    308 	}
    309 	olp->specs[olp->ns] = sp;
    310 	olp->ns++;
    311 }
    312 
    313 static struct spec *
    314 makespec(char *s)
    315 {
    316 	struct spec *sp;
    317 	int fileno;
    318 	size_t fldno;
    319 
    320 	if (!strcmp(s, "0")) {   /* join field must be 0 and nothing else */
    321 		fileno = 0;
    322 		fldno = 0;
    323 	} else if ((s[0] == '1' || s[0] == '2') && s[1] == '.') {
    324 		fileno = s[0] - '0';
    325 		fldno = estrtonum(&s[2], 1, MIN(LLONG_MAX, SIZE_MAX)) - 1;
    326 	} else {
    327 		eprintf("%s: invalid format\n", s);
    328 	}
    329 
    330 	sp = ereallocarray(NULL, INIT, sizeof(struct spec));
    331 	sp->fileno = fileno;
    332 	sp->fldno = fldno;
    333 	return sp;
    334 }
    335 
    336 static void
    337 makeolist(struct outlist *olp, char *s)
    338 {
    339 	char *item, *sp;
    340 	sp = s;
    341 
    342 	while (sp) {
    343 		item = sp;
    344 		sp = strpbrk(sp, ", \t");
    345 		if (sp)
    346 			*sp++ = '\0';
    347 		addspec(olp, makespec(item));
    348 	}
    349 }
    350 
    351 static void
    352 freespecs(struct outlist *olp)
    353 {
    354 	size_t i;
    355 
    356 	for (i = 0; i < olp->ns; i++)
    357 		free(olp->specs[i]);
    358 }
    359 
    360 static void
    361 join(FILE *fa, FILE *fb, size_t jfa, size_t jfb)
    362 {
    363 	struct span spa, spb;
    364 	int cmp, eofa, eofb;
    365 
    366 	initspan(&spa);
    367 	initspan(&spb);
    368 	cmp = eofa = eofb = 0;
    369 
    370 	addtospan(&spa, fa, RESET);
    371 	addtospan(&spb, fb, RESET);
    372 
    373 	while (spa.nl && spb.nl) {
    374 		if ((cmp = jlinecmp(spa.lines[0], spb.lines[0], jfa, jfb)) < 0) {
    375 			if (unpairsa)
    376 				prline(spa.lines[0]);
    377 			if (!addtospan(&spa, fa, RESET)) {
    378 				if (unpairsb) {    /* a is EOF'd; print the rest of b */
    379 					do
    380 						prline(spb.lines[0]);
    381 					while (addtospan(&spb, fb, RESET));
    382 				}
    383 				eofa = eofb = 1;
    384 			} else {
    385 				continue;
    386 			}
    387 		} else if (cmp > 0) {
    388 			if (unpairsb)
    389 				prline(spb.lines[0]);
    390 			if (!addtospan(&spb, fb, RESET)) {
    391 				if (unpairsa) {    /* b is EOF'd; print the rest of a */
    392 					do
    393 						prline(spa.lines[0]);
    394 					while (addtospan(&spa, fa, RESET));
    395 				}
    396 				eofa = eofb = 1;
    397 			} else {
    398 				continue;
    399 			}
    400 		} else if (cmp == 0) {
    401 			/* read all consecutive matching lines from a */
    402 			do {
    403 				if (!addtospan(&spa, fa, EXPAND)) {
    404 					eofa = 1;
    405 					spa.nl++;
    406 					break;
    407 				}
    408 			} while (jlinecmp(spa.lines[spa.nl-1], spb.lines[0], jfa, jfb) == 0);
    409 
    410 			/* read all consecutive matching lines from b */
    411 			do {
    412 				if (!addtospan(&spb, fb, EXPAND)) {
    413 					eofb = 1;
    414 					spb.nl++;
    415 					break;
    416 				}
    417 			} while (jlinecmp(spa.lines[0], spb.lines[spb.nl-1], jfa, jfb) == 0);
    418 
    419 			if (pairs)
    420 				prspanjoin(&spa, &spb, jfa, jfb);
    421 
    422 		} else {      /* FIELD_ERROR: both lines lacked join fields */
    423 			if (unpairsa)
    424 				prline(spa.lines[0]);
    425 			if (unpairsb)
    426 				prline(spb.lines[0]);
    427 			eofa = addtospan(&spa, fa, RESET) ? 0 : 1;
    428 			eofb = addtospan(&spb, fb, RESET) ? 0 : 1;
    429 			if (!eofa && !eofb)
    430 				continue;
    431 		}
    432 
    433 		if (eofa) {
    434 			spa.nl = 0;
    435 		} else {
    436 			swaplines(spa.lines[0], spa.lines[spa.nl - 1]);   /* ugly */
    437 			spa.nl = 1;
    438 		}
    439 
    440 		if (eofb) {
    441 			spb.nl = 0;
    442 		} else {
    443 			swaplines(spb.lines[0], spb.lines[spb.nl - 1]);   /* ugly */
    444 			spb.nl = 1;
    445 		}
    446 	}
    447 	freespan(&spa);
    448 	freespan(&spb);
    449 }
    450 
    451 
    452 int
    453 main(int argc, char *argv[])
    454 {
    455 	size_t jf[2] = { jfield, jfield, };
    456 	FILE *fp[2];
    457 	int ret = 0, n;
    458 	char *fno;
    459 
    460 	ARGBEGIN {
    461 	case '1':
    462 		jf[0] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
    463 		break;
    464 	case '2':
    465 		jf[1] = estrtonum(EARGF(usage()), 1, MIN(LLONG_MAX, SIZE_MAX));
    466 		break;
    467 	case 'a':
    468 		fno = EARGF(usage());
    469 		if (strcmp(fno, "1") == 0)
    470 			unpairsa = 1;
    471 		else if (strcmp(fno, "2") == 0)
    472 			unpairsb = 1;
    473 		else
    474 			usage();
    475 		break;
    476 	case 'e':
    477 		replace = EARGF(usage());
    478 		break;
    479 	case 'o':
    480 		oflag = 1;
    481 		initolist(&output);
    482 		makeolist(&output, EARGF(usage()));
    483 		break;
    484 	case 't':
    485 		sep = EARGF(usage());
    486 		break;
    487 	case 'v':
    488 		pairs = 0;
    489 		fno = EARGF(usage());
    490 		if (strcmp(fno, "1") == 0)
    491 			unpairsa = 1;
    492 		else if (strcmp(fno, "2") == 0)
    493 			unpairsb = 1;
    494 		else
    495 			usage();
    496 		break;
    497 	default:
    498 		usage();
    499 	} ARGEND
    500 
    501 	if (sep)
    502 		seplen = unescape(sep);
    503 
    504 	if (argc != 2)
    505 		usage();
    506 
    507 	for (n = 0; n < 2; n++) {
    508 		if (!strcmp(argv[n], "-")) {
    509 			argv[n] = "<stdin>";
    510 			fp[n] = stdin;
    511 		} else if (!(fp[n] = fopen(argv[n], "r"))) {
    512 			eprintf("fopen %s:", argv[n]);
    513 		}
    514 	}
    515 
    516 	jf[0]--;
    517 	jf[1]--;
    518 
    519 	join(fp[0], fp[1], jf[0], jf[1]);
    520 
    521 	if (oflag)
    522 		freespecs(&output);
    523 
    524 	if (fshut(fp[0], argv[0]) | (fp[0] != fp[1] && fshut(fp[1], argv[1])) |
    525 	    fshut(stdout, "<stdout>"))
    526 		ret = 2;
    527 
    528 	return ret;
    529 }