sbase

suckless unix tools
git clone git://git.2f30.org/sbase
Log | Files | Refs | README | LICENSE

sort.c (9405B)


      1 /* See LICENSE file for copyright and license details. */
      2 #include <ctype.h>
      3 #include <stdio.h>
      4 #include <stdlib.h>
      5 #include <string.h>
      6 
      7 #include "queue.h"
      8 #include "text.h"
      9 #include "utf.h"
     10 #include "util.h"
     11 
     12 struct keydef {
     13 	int start_column;
     14 	int end_column;
     15 	int start_char;
     16 	int end_char;
     17 	int flags;
     18 	TAILQ_ENTRY(keydef) entry;
     19 };
     20 
     21 struct column {
     22 	struct line line;
     23 	size_t cap;
     24 };
     25 
     26 enum {
     27 	MOD_N      = 1 << 0,
     28 	MOD_STARTB = 1 << 1,
     29 	MOD_ENDB   = 1 << 2,
     30 	MOD_R      = 1 << 3,
     31 	MOD_D      = 1 << 4,
     32 	MOD_F      = 1 << 5,
     33 	MOD_I      = 1 << 6,
     34 };
     35 
     36 static TAILQ_HEAD(kdhead, keydef) kdhead = TAILQ_HEAD_INITIALIZER(kdhead);
     37 
     38 static int Cflag = 0, cflag = 0, uflag = 0;
     39 static char *fieldsep = NULL;
     40 static size_t fieldseplen = 0;
     41 static struct column col1, col2;
     42 
     43 static void
     44 skipblank(struct line *a)
     45 {
     46 	while (a->len && (*(a->data) == ' ' || *(a->data) == '\t')) {
     47 		a->data++;
     48 		a->len--;
     49 	}
     50 }
     51 
     52 static void
     53 skipnonblank(struct line *a)
     54 {
     55 	while (a->len && (*(a->data) != '\n' && *(a->data) != ' ' &&
     56 	                  *(a->data) != '\t')) {
     57 		a->data++;
     58 		a->len--;
     59 	}
     60 }
     61 
     62 static void
     63 skipcolumn(struct line *a, int skip_to_next_col)
     64 {
     65 	char *s;
     66 
     67 	if (fieldsep) {
     68 		if ((s = memmem(a->data, a->len, fieldsep, fieldseplen))) {
     69 			if (skip_to_next_col) {
     70 				s += fieldseplen;
     71 				a->data = s;
     72 				a->len = a->len - (s - a->data);
     73 			}
     74 		} else {
     75 			a->data += a->len - 1;
     76 			a->len = 1;
     77 		}
     78 	} else {
     79 		skipblank(a);
     80 		skipnonblank(a);
     81 	}
     82 }
     83 
     84 static void
     85 columns(struct line *line, const struct keydef *kd, struct column *col)
     86 {
     87 	Rune r;
     88 	struct line start, end;
     89 	size_t utflen, rlen;
     90 	int i;
     91 
     92 	start.data = line->data;
     93 	start.len = line->len;
     94 	for (i = 1; i < kd->start_column; i++)
     95 		skipcolumn(&start, 1);
     96 	if (kd->flags & MOD_STARTB)
     97 		skipblank(&start);
     98 	for (utflen = 0; start.len > 1 && utflen < kd->start_char - 1;) {
     99 		rlen = chartorune(&r, start.data);
    100 		start.data += rlen;
    101 		start.len -= rlen;
    102 		utflen++;
    103 	}
    104 
    105 	end.data = line->data;
    106 	end.len = line->len;
    107 	if (kd->end_column) {
    108 		for (i = 1; i < kd->end_column; i++)
    109 			skipcolumn(&end, 1);
    110 		if (kd->flags & MOD_ENDB)
    111 			skipblank(&end);
    112 		if (kd->end_char) {
    113 			for (utflen = 0; end.len > 1 && utflen < kd->end_char;) {
    114 				rlen = chartorune(&r, end.data);
    115 				end.data += rlen;
    116 				end.len -= rlen;
    117 				utflen++;
    118 			}
    119 		} else {
    120 			skipcolumn(&end, 0);
    121 		}
    122 	} else {
    123 		end.data += end.len - 1;
    124 		end.len = 1;
    125 	}
    126 	col->line.len = MAX(0, end.data - start.data);
    127 	if (!(col->line.data) || col->cap < col->line.len + 1) {
    128 		free(col->line.data);
    129 		col->line.data = emalloc(col->line.len + 1);
    130 	}
    131 	memcpy(col->line.data, start.data, col->line.len);
    132 	col->line.data[col->line.len] = '\0';
    133 }
    134 
    135 static int
    136 skipmodcmp(struct line *a, struct line *b, int flags)
    137 {
    138 	Rune r1, r2;
    139 	size_t offa = 0, offb = 0;
    140 
    141 	do {
    142 		offa += chartorune(&r1, a->data + offa);
    143 		offb += chartorune(&r2, b->data + offb);
    144 
    145 		if (flags & MOD_D && flags & MOD_I) {
    146 			while (offa < a->len && ((!isblankrune(r1) &&
    147 			       !isalnumrune(r1)) || (!isprintrune(r1))))
    148 				offa += chartorune(&r1, a->data + offa);
    149 			while (offb < b->len && ((!isblankrune(r2) &&
    150 			       !isalnumrune(r2)) || (!isprintrune(r2))))
    151 				offb += chartorune(&r2, b->data + offb);
    152 		}
    153 		else if (flags & MOD_D) {
    154 			while (offa < a->len && !isblankrune(r1) &&
    155 			       !isalnumrune(r1))
    156 				offa += chartorune(&r1, a->data + offa);
    157 			while (offb < b->len && !isblankrune(r2) &&
    158 			       !isalnumrune(r2))
    159 				offb += chartorune(&r2, b->data + offb);
    160 		}
    161 		else if (flags & MOD_I) {
    162 			while (offa < a->len && !isprintrune(r1))
    163 				offa += chartorune(&r1, a->data + offa);
    164 			while (offb < b->len && !isprintrune(r2))
    165 				offb += chartorune(&r2, b->data + offb);
    166 		}
    167 		if (flags & MOD_F) {
    168 			r1 = toupperrune(r1);
    169 			r2 = toupperrune(r2);
    170 		}
    171 	} while (r1 && r1 == r2);
    172 
    173 	return r1 - r2;
    174 }
    175 
    176 static int
    177 slinecmp(struct line *a, struct line *b)
    178 {
    179 	int res = 0;
    180 	double x, y;
    181 	struct keydef *kd;
    182 
    183 	TAILQ_FOREACH(kd, &kdhead, entry) {
    184 		columns(a, kd, &col1);
    185 		columns(b, kd, &col2);
    186 
    187 		/* if -u is given, don't use default key definition
    188 		 * unless it is the only one */
    189 		if (uflag && kd == TAILQ_LAST(&kdhead, kdhead) &&
    190 		    TAILQ_LAST(&kdhead, kdhead) != TAILQ_FIRST(&kdhead)) {
    191 			res = 0;
    192 		} else if (kd->flags & MOD_N) {
    193 			x = strtod(col1.line.data, NULL);
    194 			y = strtod(col2.line.data, NULL);
    195 			res = (x < y) ? -1 : (x > y);
    196 		} else if (kd->flags & (MOD_D | MOD_F | MOD_I)) {
    197 			res = skipmodcmp(&col1.line, &col2.line, kd->flags);
    198 		} else {
    199 			res = linecmp(&col1.line, &col2.line);
    200 		}
    201 
    202 		if (kd->flags & MOD_R)
    203 			res = -res;
    204 		if (res)
    205 			break;
    206 	}
    207 
    208 	return res;
    209 }
    210 
    211 static int
    212 check(FILE *fp, const char *fname)
    213 {
    214 	static struct line prev, cur, tmp;
    215 	static size_t prevsize, cursize, tmpsize;
    216 	ssize_t len;
    217 
    218 	if (!prev.data) {
    219 		if ((len = getline(&prev.data, &prevsize, fp)) < 0)
    220 			eprintf("getline:");
    221 		prev.len = len;
    222 	}
    223 	while ((len = getline(&cur.data, &cursize, fp)) > 0) {
    224 		cur.len = len;
    225 		if (uflag > slinecmp(&cur, &prev)) {
    226 			if (!Cflag) {
    227 				weprintf("disorder %s: ", fname);
    228 				fwrite(cur.data, 1, cur.len, stderr);
    229 			}
    230 			return 1;
    231 		}
    232 		tmp = cur;
    233 		tmpsize = cursize;
    234 		cur = prev;
    235 		cursize = prevsize;
    236 		prev = tmp;
    237 		prevsize = tmpsize;
    238 	}
    239 
    240 	return 0;
    241 }
    242 
    243 static int
    244 parse_flags(char **s, int *flags, int bflag)
    245 {
    246 	while (isalpha((int)**s)) {
    247 		switch (*((*s)++)) {
    248 		case 'b':
    249 			*flags |= bflag;
    250 			break;
    251 		case 'd':
    252 			*flags |= MOD_D;
    253 			break;
    254 		case 'f':
    255 			*flags |= MOD_F;
    256 			break;
    257 		case 'i':
    258 			*flags |= MOD_I;
    259 			break;
    260 		case 'n':
    261 			*flags |= MOD_N;
    262 			break;
    263 		case 'r':
    264 			*flags |= MOD_R;
    265 			break;
    266 		default:
    267 			return -1;
    268 		}
    269 	}
    270 
    271 	return 0;
    272 }
    273 
    274 static void
    275 addkeydef(char *kdstr, int flags)
    276 {
    277 	struct keydef *kd;
    278 
    279 	kd = enmalloc(2, sizeof(*kd));
    280 
    281 	/* parse key definition kdstr with format
    282 	 * start_column[.start_char][flags][,end_column[.end_char][flags]]
    283 	 */
    284 	kd->start_column = 1;
    285 	kd->start_char = 1;
    286 	kd->end_column = 0; /* 0 means end of line */
    287 	kd->end_char = 0;   /* 0 means end of column */
    288 	kd->flags = flags;
    289 
    290 	if ((kd->start_column = strtol(kdstr, &kdstr, 10)) < 1)
    291 		enprintf(2, "invalid start column in key definition\n");
    292 
    293 	if (*kdstr == '.') {
    294 		if ((kd->start_char = strtol(kdstr + 1, &kdstr, 10)) < 1)
    295 			enprintf(2, "invalid start character in key "
    296 			         "definition\n");
    297 	}
    298 	if (parse_flags(&kdstr, &kd->flags, MOD_STARTB) < 0)
    299 		enprintf(2, "invalid start flags in key definition\n");
    300 
    301 	if (*kdstr == ',') {
    302 		if ((kd->end_column = strtol(kdstr + 1, &kdstr, 10)) < 0)
    303 			enprintf(2, "invalid end column in key definition\n");
    304 		if (*kdstr == '.') {
    305 			if ((kd->end_char = strtol(kdstr + 1, &kdstr, 10)) < 0)
    306 				enprintf(2, "invalid end character in key "
    307 				         "definition\n");
    308 		}
    309 		if (parse_flags(&kdstr, &kd->flags, MOD_ENDB) < 0)
    310 			enprintf(2, "invalid end flags in key definition\n");
    311 	}
    312 
    313 	if (*kdstr != '\0')
    314 		enprintf(2, "invalid key definition\n");
    315 
    316 	TAILQ_INSERT_TAIL(&kdhead, kd, entry);
    317 }
    318 
    319 static void
    320 usage(void)
    321 {
    322 	enprintf(2, "usage: %s [-Cbcdfimnru] [-o outfile] [-t delim] "
    323 	         "[-k def]... [file ...]\n", argv0);
    324 }
    325 
    326 int
    327 main(int argc, char *argv[])
    328 {
    329 	FILE *fp, *ofp = stdout;
    330 	struct linebuf linebuf = EMPTY_LINEBUF;
    331 	size_t i;
    332 	int global_flags = 0, ret = 0;
    333 	char *outfile = NULL;
    334 
    335 	ARGBEGIN {
    336 	case 'C':
    337 		Cflag = 1;
    338 		break;
    339 	case 'b':
    340 		global_flags |= MOD_STARTB | MOD_ENDB;
    341 		break;
    342 	case 'c':
    343 		cflag = 1;
    344 		break;
    345 	case 'd':
    346 		global_flags |= MOD_D;
    347 		break;
    348 	case 'f':
    349 		global_flags |= MOD_F;
    350 		break;
    351 	case 'i':
    352 		global_flags |= MOD_I;
    353 		break;
    354 	case 'k':
    355 		addkeydef(EARGF(usage()), global_flags);
    356 		break;
    357 	case 'm':
    358 		/* more or less for free, but for performance-reasons,
    359 		 * we should keep this flag in mind and maybe some later
    360 		 * day implement it properly so we don't run out of memory
    361 		 * while merging large sorted files.
    362 		 */
    363 		break;
    364 	case 'n':
    365 		global_flags |= MOD_N;
    366 		break;
    367 	case 'o':
    368 		outfile = EARGF(usage());
    369 		break;
    370 	case 'r':
    371 		global_flags |= MOD_R;
    372 		break;
    373 	case 't':
    374 		fieldsep = EARGF(usage());
    375 		if (!*fieldsep)
    376 			eprintf("empty delimiter\n");
    377 		fieldseplen = unescape(fieldsep);
    378 		break;
    379 	case 'u':
    380 		uflag = 1;
    381 		break;
    382 	default:
    383 		usage();
    384 	} ARGEND
    385 
    386 	/* -b shall only apply to custom key definitions */
    387 	if (TAILQ_EMPTY(&kdhead) && global_flags)
    388 		addkeydef("1", global_flags & ~(MOD_STARTB | MOD_ENDB));
    389 	addkeydef("1", global_flags & MOD_R);
    390 
    391 	if (!argc) {
    392 		if (Cflag || cflag) {
    393 			if (check(stdin, "<stdin>") && !ret)
    394 				ret = 1;
    395 		} else {
    396 			getlines(stdin, &linebuf);
    397 		}
    398 	} else for (; *argv; argc--, argv++) {
    399 		if (!strcmp(*argv, "-")) {
    400 			*argv = "<stdin>";
    401 			fp = stdin;
    402 		} else if (!(fp = fopen(*argv, "r"))) {
    403 			enprintf(2, "fopen %s:", *argv);
    404 			continue;
    405 		}
    406 		if (Cflag || cflag) {
    407 			if (check(fp, *argv) && !ret)
    408 				ret = 1;
    409 		} else {
    410 			getlines(fp, &linebuf);
    411 		}
    412 		if (fp != stdin && fshut(fp, *argv))
    413 			ret = 2;
    414 	}
    415 
    416 	if (!Cflag && !cflag) {
    417 		if (outfile && !(ofp = fopen(outfile, "w")))
    418 			eprintf("fopen %s:", outfile);
    419 
    420 		qsort(linebuf.lines, linebuf.nlines, sizeof(*linebuf.lines),
    421 				(int (*)(const void *, const void *))slinecmp);
    422 
    423 		for (i = 0; i < linebuf.nlines; i++) {
    424 			if (!uflag || i == 0 ||
    425 			    slinecmp(&linebuf.lines[i], &linebuf.lines[i - 1])) {
    426 				fwrite(linebuf.lines[i].data, 1,
    427 				       linebuf.lines[i].len, ofp);
    428 			}
    429 		}
    430 	}
    431 
    432 	if (fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>") |
    433 	    fshut(stderr, "<stderr>"))
    434 		ret = 2;
    435 
    436 	return ret;
    437 }