hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

fmt.c (14590B)


      1 /*
      2  * This code contains changes by
      3  * Gunnar Ritter, Freiburg i. Br., Germany, April 2003. All rights reserved.
      4  *
      5  * Conditions 1, 2, and 4 and the no-warranty notice below apply
      6  * to these changes.
      7  *
      8  *
      9  * Copyright (c) 1991
     10  * 	The Regents of the University of California.  All rights reserved.
     11  *
     12  * Redistribution and use in source and binary forms, with or without
     13  * modification, are permitted provided that the following conditions
     14  * are met:
     15  * 1. Redistributions of source code must retain the above copyright
     16  *    notice, this list of conditions and the following disclaimer.
     17  * 2. Redistributions in binary form must reproduce the above copyright
     18  *    notice, this list of conditions and the following disclaimer in the
     19  *    documentation and/or other materials provided with the distribution.
     20  * 3. All advertising materials mentioning features or use of this software
     21  *    must display the following acknowledgement:
     22  * 	This product includes software developed by the University of
     23  * 	California, Berkeley and its contributors.
     24  * 4. Neither the name of the University nor the names of its contributors
     25  *    may be used to endorse or promote products derived from this software
     26  *    without specific prior written permission.
     27  *
     28  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     29  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     31  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     32  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     33  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     34  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     35  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     36  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     37  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     38  * SUCH DAMAGE.
     39  */
     40 /*
     41  * Copyright (c) 1980 Regents of the University of California.
     42  * All rights reserved.  The Berkeley software License Agreement
     43  * specifies the terms and conditions for redistribution.
     44  */
     45 
     46 /*	from 4.3BSD fmt.c	5.2 (Berkeley) 6/21/85	*/
     47 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4
     48 #define	USED	__attribute__ ((used))
     49 #elif defined __GNUC__
     50 #define	USED	__attribute__ ((unused))
     51 #else
     52 #define	USED
     53 #endif
     54 static const char sccsid[] USED = "@(#)fmt.sl	1.9 (gritter) 5/29/05";
     55 
     56 #include <stdio.h>
     57 #include <string.h>
     58 #include <wchar.h>
     59 #include <wctype.h>
     60 #include <ctype.h>
     61 #include <stdlib.h>
     62 #include <libgen.h>
     63 #include <locale.h>
     64 
     65 #ifdef	__GLIBC__
     66 #ifdef	_IO_putc_unlocked
     67 #undef	putchar
     68 #define	putchar(c)	_IO_putc_unlocked(c, stdout)
     69 #endif
     70 #endif
     71 
     72 #include <iblok.h>
     73 #include <asciitype.h>
     74 
     75 /*
     76  * fmt -- format the concatenation of input files or standard input
     77  * onto standard output.  Designed for use with Mail ~|
     78  *
     79  * Syntax: fmt [ -width ] [ name ... ]
     80  * Author: Kurt Shoens (UCB) 12/7/78
     81  */
     82 
     83 static int	pfx;			/* Current leading blank count */
     84 static long long	lineno;			/* Current input line */
     85 static int	mark;			/* we saw a head line */
     86 static long	width = 72;		/* Width that we will not exceed */
     87 static int	cflag;			/* crown margin mode */
     88 static int	sflag;			/* split only */
     89 static const char	*progname;	/* argv0 */
     90 static int	mb_cur_max;
     91 
     92 
     93 static const char	*headnames[] = {"To", "Subject", "Cc", "Bcc", "bcc", 0};
     94 
     95 static void	setwidth(const char *);
     96 static void	usage(void);
     97 static void	fmt(struct iblok *);
     98 static void	prefix(const wchar_t *);
     99 static void	split(const wchar_t *);
    100 static void	setout(void);
    101 static void	pack(const wchar_t *);
    102 static void	oflush(void);
    103 static void	tabulate(wchar_t *);
    104 static void	leadin(void);
    105 static int	chkhead(const char *, const wchar_t *);
    106 static int	fromline(const wchar_t *);
    107 static size_t	colwidth(const wchar_t *);
    108 static size_t	colwidthn(const wchar_t *, const wchar_t *);
    109 static void	growibuf(void);
    110 static void	growobuf(void);
    111 
    112 /*
    113  * Drive the whole formatter by managing input files.  Also,
    114  * cause initialization of the output stuff and flush it out
    115  * at the end.
    116  */
    117 
    118 int
    119 main(int argc, char **argv)
    120 {
    121 	register struct iblok *fi;
    122 	register int errs = 0, i;
    123 
    124 	progname = basename(argv[0]);
    125 	setlocale(LC_CTYPE, "");
    126 	mb_cur_max = MB_CUR_MAX;
    127 	setout();
    128 	lineno = 1;
    129 	for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) {
    130 		if (argv[i][1] == '-' && argv[i][2] == '\0') {
    131 			i++;
    132 			break;
    133 		}
    134 	nopt:	switch (argv[i][1]) {
    135 		case '\0':
    136 			continue;
    137 		case 'c':
    138 			cflag = 1;
    139 			break;
    140 		case 's':
    141 			sflag = 1;
    142 			break;
    143 		case 'w':
    144 			if (argv[i][2]) {
    145 				setwidth(&argv[i][2]);
    146 				continue;
    147 			} else if (i < argc) {
    148 				setwidth(argv[++i]);
    149 				continue;
    150 			} else
    151 				setwidth(NULL);
    152 			break;
    153 		case '0':
    154 		case '1': case '2': case '3':
    155 		case '4': case '5': case '6':
    156 		case '7': case '8': case '9':
    157 			setwidth(&argv[i][1]);
    158 			continue;
    159 		default:
    160 			usage();
    161 			exit(2);
    162 		}
    163 		argv[i]++;
    164 		goto nopt;
    165 	}
    166 	if (i < argc) {
    167 		while (i < argc) {
    168 			if ((fi = ib_open(argv[i], 0)) == NULL) {
    169 				perror(argv[i]);
    170 				errs |= 1;
    171 			} else
    172 				fmt(fi);
    173 			i++;
    174 		}
    175 	} else {
    176 		if ((fi = ib_alloc(0, 0)) == NULL) {
    177 			perror("stdin");
    178 			errs |= 1;
    179 		} else
    180 			fmt(fi);
    181 	}
    182 	oflush();
    183 	exit(errs);
    184 }
    185 
    186 static void
    187 setwidth(const char *s)
    188 {
    189 	char	*x;
    190 
    191 	if (s == NULL || (width = strtol(s, &x, 10),
    192 				width <= 0 ||
    193 				*x != '\0' || *s == '+' || *s == '-')) {
    194 		usage();
    195 		fprintf(stderr, "       Non-numeric character found "
    196 				"in width specification\n");
    197 		exit(2);
    198 	}
    199 }
    200 
    201 static void
    202 usage(void)
    203 {
    204 	fprintf(stderr,
    205 		"usage: %s [-c] [-s] [-w width | -width] [inputfile...]\n",
    206 		progname);
    207 }
    208 
    209 static char *
    210 getvalid(struct iblok *ip, wint_t *wp, int *mp)
    211 {
    212 	char	*cp;
    213 
    214 	do
    215 		cp = ib_getw(ip, wp, mp);
    216 	while (cp && *wp == WEOF);
    217 	return cp;
    218 }
    219 
    220 #define	get(mp, fi, c, m, b)	(mp = mb_cur_max > 1 ? getvalid(fi, &c, &m) : \
    221 		(b = c = ib_get(fi), m = 1, c != (wint_t)EOF ? &b : 0))
    222 
    223 static int	ibufsize;
    224 static wchar_t	*linebuf;
    225 static wchar_t	*canonb;
    226 
    227 /*
    228  * Read up characters from the passed input file, forming lines,
    229  * doing ^H processing, expanding tabs, stripping trailing blanks,
    230  * and sending each line down for analysis.
    231  */
    232 static void
    233 fmt(struct iblok *fi)
    234 {
    235 	register int p, p2;
    236 	wint_t c;
    237 	register long col;
    238 	char	*mp;
    239 	int m;
    240 	char	b;
    241 
    242 	get(mp, fi, c, m, b);
    243 	while (c != (wint_t)EOF) {
    244 		
    245 		/*
    246 		 * Collect a line, doing ^H processing.
    247 		 * Leave tabs for now.
    248 		 */
    249 
    250 		p = 0;
    251 		while (c != '\n' && c != (wint_t)EOF) {
    252 			if (c == '\b') {
    253 				get(mp, fi, c, m, b);
    254 				continue;
    255 			}
    256 			if (!(mb_cur_max > 1 ? iswprint(c) : isprint(c)) &&
    257 					c != '\t') {
    258 				get(mp, fi, c, m, b);
    259 				continue;
    260 			}
    261 			if (p >= ibufsize)
    262 				growibuf();
    263 			linebuf[p++] = c;
    264 			get(mp, fi, c, m, b);
    265 		}
    266 		if (p >= ibufsize)
    267 			growibuf();
    268 		linebuf[p] = '\0';
    269 
    270 		/*
    271 		 * Toss anything remaining on the input line.
    272 		 */
    273 
    274 		while (c != '\n' && c != (wint_t)EOF)
    275 			get(mp, fi, c, m, b);
    276 		
    277 		/*
    278 		 * Expand tabs on the way to canonb.
    279 		 */
    280 
    281 		col = 0;
    282 		p = p2 = 0;
    283 		while (c = linebuf[p++]) {
    284 			if (c != '\t') {
    285 				if (mb_cur_max > 1)
    286 					col += wcwidth(c);
    287 				else
    288 					col++;
    289 				if (p2 >= ibufsize)
    290 					growibuf();
    291 				canonb[p2++] = c;
    292 				continue;
    293 			}
    294 			do {
    295 				if (p2 >= ibufsize)
    296 					growibuf();
    297 				canonb[p2++] = ' ';
    298 				col++;
    299 			} while ((col & 07) != 0);
    300 		}
    301 
    302 		/*
    303 		 * Swipe trailing blanks from the line.
    304 		 */
    305 
    306 		for (p2--; p2 >= 0 && canonb[p2] == ' '; p2--)
    307 			;
    308 		if (p2 >= ibufsize-1)
    309 			growibuf();
    310 		canonb[++p2] = '\0';
    311 		prefix(canonb);
    312 		if (c != (wint_t)EOF)
    313 			get(mp, fi, c, m, b);
    314 	}
    315 }
    316 
    317 /*
    318  * Take a line devoid of tabs and other garbage and determine its
    319  * blank prefix.  If the indent changes, call for a linebreak.
    320  * If the input line is blank, echo the blank line on the output.
    321  * Finally, if the line minus the prefix is a mail header, try to keep
    322  * it on a line by itself.
    323  */
    324 
    325 static void
    326 prefix(const wchar_t *line)
    327 {
    328 	register const wchar_t *cp;
    329 	register const char **hp;
    330 	register long np;
    331 	register int h;
    332 	static int	nlpp;	/* number of lines on current paragraph */
    333 
    334 	if (wcslen(line) == 0) {
    335 		nlpp = 0;
    336 		oflush();
    337 		putchar('\n');
    338 		mark = 0;
    339 		return;
    340 	}
    341 	for (cp = line; *cp == ' '; cp++)
    342 		;
    343 	np = cp - line;
    344 
    345 	/*
    346 	 * The following horrible expression attempts to avoid linebreaks
    347 	 * when the indent changes due to a paragraph.
    348 	 */
    349 
    350 	if (!cflag && np != pfx && (np > pfx || abs(pfx-np) > 8))
    351 		oflush();
    352 	if (h = fromline(cp))
    353 		oflush(), mark = 1;
    354 	else if (mark) {
    355 		for (hp = &headnames[0]; *hp != NULL; hp++)
    356 			if (chkhead(*hp, cp)) {
    357 				h = 1;
    358 				oflush();
    359 				break;
    360 			}
    361 	}
    362 	if (!h && (h = (*cp == '.' || sflag)))
    363 		oflush();
    364 	if (!cflag || nlpp < 2)
    365 		pfx = np;
    366 	split(cp);
    367 	if (h)
    368 		oflush();
    369 	nlpp++;
    370 	lineno++;
    371 }
    372 
    373 /*
    374  * Split up the passed line into output "words" which are
    375  * maximal strings of non-blanks with the blank separation
    376  * attached at the end.  Pass these words along to the output
    377  * line packer.
    378  */
    379 
    380 static wchar_t	*word;
    381 
    382 static void
    383 split(const wchar_t *line)
    384 {
    385 	register const wchar_t *cp;
    386 	register wchar_t *cp2;
    387 
    388 	cp = line;
    389 	while (*cp) {
    390 		cp2 = word;
    391 
    392 		/*
    393 		 * Collect a 'word,' allowing it to contain escaped
    394 		 * white space.
    395 		 */
    396 
    397 		while (*cp && *cp != ' ') {
    398 			if (*cp == '\\' && iswspace(cp[1]))
    399 				*cp2++ = *cp++;
    400 			*cp2++ = *cp++;
    401 		}
    402 
    403 		/*
    404 		 * Guarantee a space at end of line.
    405 		 * Two spaces after end of sentence punctuation.
    406 		 */
    407 
    408 		if (*cp == '\0') {
    409 			*cp2++ = ' ';
    410 			if (strchr(".:!?", cp[-1]))
    411 				*cp2++ = ' ';
    412 		}
    413 		while (*cp == ' ')
    414 			*cp2++ = *cp++;
    415 		*cp2 = '\0';
    416 		pack(word);
    417 	}
    418 }
    419 
    420 /*
    421  * Output section.
    422  * Build up line images from the words passed in.  Prefix
    423  * each line with correct number of blanks.  The buffer "outbuf"
    424  * contains the current partial line image, including prefixed blanks.
    425  * "outp" points to the next available space therein.  When outp is NOSTR,
    426  * there ain't nothing in there yet.  At the bottom of this whole mess,
    427  * leading tabs are reinserted.
    428  */
    429 
    430 static int	obufsize;
    431 static wchar_t	*outbuf;		/* Sandbagged output line image */
    432 static wchar_t	*outp;			/* Pointer in above */
    433 
    434 /*
    435  * Initialize the output section.
    436  */
    437 
    438 static void
    439 setout(void)
    440 {
    441 	outp = NULL;
    442 }
    443 
    444 /*
    445  * Pack a word onto the output line.  If this is the beginning of
    446  * the line, push on the appropriately-sized string of blanks first.
    447  * If the word won't fit on the current line, flush and begin a new
    448  * line.  If the word is too long to fit all by itself on a line,
    449  * just give it its own and hope for the best.
    450  */
    451 
    452 static void
    453 pack(const wchar_t *word)
    454 {
    455 	register const wchar_t *cp;
    456 	register long s, t;
    457 
    458 	if (outp == NULL)
    459 		leadin();
    460 	t = colwidth(word);
    461 	s = colwidthn(outbuf, outp);
    462 	if (t+s <= width) {
    463 		
    464 		/*
    465 		 * In like flint!
    466 		 */
    467 
    468 		for (cp = word; *cp; cp++) {
    469 			if (outp >= &outbuf[obufsize])
    470 				growobuf();
    471 			*outp++ = *cp;
    472 		}
    473 		return;
    474 	}
    475 	if (s > pfx) {
    476 		oflush();
    477 		leadin();
    478 	}
    479 	for (cp = word; *cp; cp++) {
    480 		if (outp >= &outbuf[obufsize])
    481 			growobuf();
    482 		*outp++ = *cp;
    483 	}
    484 }
    485 
    486 /*
    487  * If there is anything on the current output line, send it on
    488  * its way.  Set outp to NULL to indicate the absence of the current
    489  * line prefix.
    490  */
    491 
    492 static void
    493 oflush(void)
    494 {
    495 	if (outp == NULL)
    496 		return;
    497 	if (outp >= &outbuf[obufsize])
    498 		growobuf();
    499 	*outp = '\0';
    500 	tabulate(outbuf);
    501 	outp = NULL;
    502 }
    503 
    504 /*
    505  * Take the passed line buffer, insert leading tabs where possible, and
    506  * output on standard output (finally).
    507  */
    508 
    509 static void
    510 tabulate(wchar_t *line)
    511 {
    512 	register wchar_t *cp;
    513 	register int b, t;
    514 
    515 	/*
    516 	 * Toss trailing blanks in the output line.
    517 	 */
    518 
    519 	cp = line + wcslen(line) - 1;
    520 	while (cp >= line && *cp == ' ')
    521 		cp--;
    522 	*++cp = '\0';
    523 	
    524 	/*
    525 	 * Count the leading blank space and tabulate.
    526 	 */
    527 
    528 	for (cp = line; *cp == ' '; cp++)
    529 		;
    530 	b = cp-line;
    531 	t = b >> 3;
    532 	b &= 07;
    533 	if (t > 0)
    534 		do
    535 			putchar('\t');
    536 		while (--t);
    537 	if (b > 0)
    538 		do
    539 			putchar(' ');
    540 		while (--b);
    541 	while (*cp) {
    542 		if (mb_cur_max > 1 && *cp & ~(wchar_t)0177) {
    543 			char	mb[MB_LEN_MAX];
    544 			int	i, n;
    545 			n = wctomb(mb, *cp);
    546 			for (i = 0; i < n; i++)
    547 				putchar(mb[i]);
    548 		} else
    549 			putchar(*cp);
    550 		cp++;
    551 	}
    552 	putchar('\n');
    553 }
    554 
    555 /*
    556  * Initialize the output line with the appropriate number of
    557  * leading blanks.
    558  */
    559 
    560 static void
    561 leadin(void)
    562 {
    563 	register long b;
    564 
    565 	if (outbuf == 0)
    566 		growobuf();
    567 	for (b = 0; b < pfx; b++) {
    568 		if (b >= obufsize)
    569 			growobuf();
    570 		outbuf[b] = ' ';
    571 	}
    572 	outp = &outbuf[b];
    573 }
    574 
    575 /*
    576  * Is s2 the mail header field name s1?
    577  */
    578 
    579 static int
    580 chkhead(register const char *s1, register const wchar_t *s2)
    581 {
    582 
    583 	while (*s1 && *s1++ == *s2++);
    584 	if (*s1 != '\0')
    585 		return 0;
    586 	return 1;
    587 }
    588 
    589 /*
    590  * Sloppy recognition of Unix From_ lines (not according to the POSIX.2
    591  * mailx specification, but oriented on actual Unix tradition). We match
    592  * the ERE
    593  * ^From .* [A-Z][a-z][a-z] [A-Z][a-z][a-z] \
    594  * [0-9 ]?[0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9]
    595  */
    596 
    597 static int
    598 fromline(const wchar_t *cp)
    599 {
    600 	if (cp[0] != 'F' || cp[1] != 'r' || cp[2] != 'o' || cp[3] != 'm' ||
    601 			cp[4] != ' ')
    602 		return 0;
    603 	cp += 5;
    604 	while (*cp && *cp != ' ')
    605 		cp++;
    606 	if (*cp++ != ' ')
    607 		return 0;
    608 	if (!upperchar(cp[0]) || !lowerchar(cp[1]) || !lowerchar(cp[2]) ||
    609 			cp[3] != ' ' ||
    610 	    !upperchar(cp[4]) || !lowerchar(cp[5]) || !lowerchar(cp[6]) ||
    611 	    		cp[7] != ' ')
    612 		return 0;
    613 	cp += 8;
    614 	if (digitchar(*cp) || *cp == ' ')
    615 		cp++;
    616 	if (!digitchar(cp[0]) || cp[1] != ' '||
    617 			!digitchar(cp[2]) || !digitchar(cp[3]) ||
    618 				cp[4] != ':' ||
    619 			!digitchar(cp[5]) || !digitchar(cp[6]) ||
    620 				cp[7] != ':' ||
    621 			!digitchar(cp[8]) || !digitchar(cp[9]))
    622 		return 0;
    623 	return 1;
    624 }
    625 
    626 static size_t
    627 colwidth(const wchar_t *cp)
    628 {
    629 	size_t	n = 0;
    630 
    631 	if (mb_cur_max > 1)
    632 		while (*cp)
    633 			n += wcwidth(*cp++);
    634 	else
    635 		n = wcslen(cp);
    636 	return n;
    637 }
    638 
    639 static size_t
    640 colwidthn(const wchar_t *bot, const wchar_t *top)
    641 {
    642 	size_t	n = 0;
    643 
    644 	if (mb_cur_max > 1)
    645 		while (bot < top)
    646 			n += wcwidth(*bot++);
    647 	else
    648 		n = top - bot;
    649 	return n;
    650 }
    651 
    652 static void
    653 growibuf(void)
    654 {
    655 	ibufsize += 128;
    656 	if ((word = realloc(word, ibufsize * sizeof *word)) == 0 ||
    657 	    (linebuf = realloc(linebuf, ibufsize * sizeof *linebuf)) == 0 ||
    658 	    (canonb = realloc(canonb, ibufsize * sizeof *canonb)) == 0) {
    659 		fprintf(stderr, "%s: input line too long\n", progname);
    660 		exit(1);
    661 	}
    662 }
    663 
    664 static void
    665 growobuf(void)
    666 {
    667 	int	diff = 0;
    668 
    669 	if (outp != NULL)
    670 		diff = outp - outbuf;
    671 	obufsize += 128;
    672 	if ((outbuf = realloc(outbuf, obufsize * sizeof *outbuf)) == 0) {
    673 		fprintf(stderr, "%s: output line too long\n", progname);
    674 		exit(1);
    675 	}
    676 	if (outp != NULL)
    677 		outp = &outbuf[diff];
    678 }