fmt.c (14590B)
1 /* 2 * This code contains changes by 3 * Gunnar Ritter, Freiburg i. Br., Germany, April 2003. All rights reserved. 4 * 5 * Conditions 1, 2, and 4 and the no-warranty notice below apply 6 * to these changes. 7 * 8 * 9 * Copyright (c) 1991 10 * The Regents of the University of California. All rights reserved. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 */ 40 /* 41 * Copyright (c) 1980 Regents of the University of California. 42 * All rights reserved. The Berkeley software License Agreement 43 * specifies the terms and conditions for redistribution. 44 */ 45 46 /* from 4.3BSD fmt.c 5.2 (Berkeley) 6/21/85 */ 47 #if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 48 #define USED __attribute__ ((used)) 49 #elif defined __GNUC__ 50 #define USED __attribute__ ((unused)) 51 #else 52 #define USED 53 #endif 54 static const char sccsid[] USED = "@(#)fmt.sl 1.9 (gritter) 5/29/05"; 55 56 #include <stdio.h> 57 #include <string.h> 58 #include <wchar.h> 59 #include <wctype.h> 60 #include <ctype.h> 61 #include <stdlib.h> 62 #include <libgen.h> 63 #include <locale.h> 64 65 #ifdef __GLIBC__ 66 #ifdef _IO_putc_unlocked 67 #undef putchar 68 #define putchar(c) _IO_putc_unlocked(c, stdout) 69 #endif 70 #endif 71 72 #include <iblok.h> 73 #include <asciitype.h> 74 75 /* 76 * fmt -- format the concatenation of input files or standard input 77 * onto standard output. Designed for use with Mail ~| 78 * 79 * Syntax: fmt [ -width ] [ name ... ] 80 * Author: Kurt Shoens (UCB) 12/7/78 81 */ 82 83 static int pfx; /* Current leading blank count */ 84 static long long lineno; /* Current input line */ 85 static int mark; /* we saw a head line */ 86 static long width = 72; /* Width that we will not exceed */ 87 static int cflag; /* crown margin mode */ 88 static int sflag; /* split only */ 89 static const char *progname; /* argv0 */ 90 static int mb_cur_max; 91 92 93 static const char *headnames[] = {"To", "Subject", "Cc", "Bcc", "bcc", 0}; 94 95 static void setwidth(const char *); 96 static void usage(void); 97 static void fmt(struct iblok *); 98 static void prefix(const wchar_t *); 99 static void split(const wchar_t *); 100 static void setout(void); 101 static void pack(const wchar_t *); 102 static void oflush(void); 103 static void tabulate(wchar_t *); 104 static void leadin(void); 105 static int chkhead(const char *, const wchar_t *); 106 static int fromline(const wchar_t *); 107 static size_t colwidth(const wchar_t *); 108 static size_t colwidthn(const wchar_t *, const wchar_t *); 109 static void growibuf(void); 110 static void growobuf(void); 111 112 /* 113 * Drive the whole formatter by managing input files. Also, 114 * cause initialization of the output stuff and flush it out 115 * at the end. 116 */ 117 118 int 119 main(int argc, char **argv) 120 { 121 register struct iblok *fi; 122 register int errs = 0, i; 123 124 progname = basename(argv[0]); 125 setlocale(LC_CTYPE, ""); 126 mb_cur_max = MB_CUR_MAX; 127 setout(); 128 lineno = 1; 129 for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) { 130 if (argv[i][1] == '-' && argv[i][2] == '\0') { 131 i++; 132 break; 133 } 134 nopt: switch (argv[i][1]) { 135 case '\0': 136 continue; 137 case 'c': 138 cflag = 1; 139 break; 140 case 's': 141 sflag = 1; 142 break; 143 case 'w': 144 if (argv[i][2]) { 145 setwidth(&argv[i][2]); 146 continue; 147 } else if (i < argc) { 148 setwidth(argv[++i]); 149 continue; 150 } else 151 setwidth(NULL); 152 break; 153 case '0': 154 case '1': case '2': case '3': 155 case '4': case '5': case '6': 156 case '7': case '8': case '9': 157 setwidth(&argv[i][1]); 158 continue; 159 default: 160 usage(); 161 exit(2); 162 } 163 argv[i]++; 164 goto nopt; 165 } 166 if (i < argc) { 167 while (i < argc) { 168 if ((fi = ib_open(argv[i], 0)) == NULL) { 169 perror(argv[i]); 170 errs |= 1; 171 } else 172 fmt(fi); 173 i++; 174 } 175 } else { 176 if ((fi = ib_alloc(0, 0)) == NULL) { 177 perror("stdin"); 178 errs |= 1; 179 } else 180 fmt(fi); 181 } 182 oflush(); 183 exit(errs); 184 } 185 186 static void 187 setwidth(const char *s) 188 { 189 char *x; 190 191 if (s == NULL || (width = strtol(s, &x, 10), 192 width <= 0 || 193 *x != '\0' || *s == '+' || *s == '-')) { 194 usage(); 195 fprintf(stderr, " Non-numeric character found " 196 "in width specification\n"); 197 exit(2); 198 } 199 } 200 201 static void 202 usage(void) 203 { 204 fprintf(stderr, 205 "usage: %s [-c] [-s] [-w width | -width] [inputfile...]\n", 206 progname); 207 } 208 209 static char * 210 getvalid(struct iblok *ip, wint_t *wp, int *mp) 211 { 212 char *cp; 213 214 do 215 cp = ib_getw(ip, wp, mp); 216 while (cp && *wp == WEOF); 217 return cp; 218 } 219 220 #define get(mp, fi, c, m, b) (mp = mb_cur_max > 1 ? getvalid(fi, &c, &m) : \ 221 (b = c = ib_get(fi), m = 1, c != (wint_t)EOF ? &b : 0)) 222 223 static int ibufsize; 224 static wchar_t *linebuf; 225 static wchar_t *canonb; 226 227 /* 228 * Read up characters from the passed input file, forming lines, 229 * doing ^H processing, expanding tabs, stripping trailing blanks, 230 * and sending each line down for analysis. 231 */ 232 static void 233 fmt(struct iblok *fi) 234 { 235 register int p, p2; 236 wint_t c; 237 register long col; 238 char *mp; 239 int m; 240 char b; 241 242 get(mp, fi, c, m, b); 243 while (c != (wint_t)EOF) { 244 245 /* 246 * Collect a line, doing ^H processing. 247 * Leave tabs for now. 248 */ 249 250 p = 0; 251 while (c != '\n' && c != (wint_t)EOF) { 252 if (c == '\b') { 253 get(mp, fi, c, m, b); 254 continue; 255 } 256 if (!(mb_cur_max > 1 ? iswprint(c) : isprint(c)) && 257 c != '\t') { 258 get(mp, fi, c, m, b); 259 continue; 260 } 261 if (p >= ibufsize) 262 growibuf(); 263 linebuf[p++] = c; 264 get(mp, fi, c, m, b); 265 } 266 if (p >= ibufsize) 267 growibuf(); 268 linebuf[p] = '\0'; 269 270 /* 271 * Toss anything remaining on the input line. 272 */ 273 274 while (c != '\n' && c != (wint_t)EOF) 275 get(mp, fi, c, m, b); 276 277 /* 278 * Expand tabs on the way to canonb. 279 */ 280 281 col = 0; 282 p = p2 = 0; 283 while (c = linebuf[p++]) { 284 if (c != '\t') { 285 if (mb_cur_max > 1) 286 col += wcwidth(c); 287 else 288 col++; 289 if (p2 >= ibufsize) 290 growibuf(); 291 canonb[p2++] = c; 292 continue; 293 } 294 do { 295 if (p2 >= ibufsize) 296 growibuf(); 297 canonb[p2++] = ' '; 298 col++; 299 } while ((col & 07) != 0); 300 } 301 302 /* 303 * Swipe trailing blanks from the line. 304 */ 305 306 for (p2--; p2 >= 0 && canonb[p2] == ' '; p2--) 307 ; 308 if (p2 >= ibufsize-1) 309 growibuf(); 310 canonb[++p2] = '\0'; 311 prefix(canonb); 312 if (c != (wint_t)EOF) 313 get(mp, fi, c, m, b); 314 } 315 } 316 317 /* 318 * Take a line devoid of tabs and other garbage and determine its 319 * blank prefix. If the indent changes, call for a linebreak. 320 * If the input line is blank, echo the blank line on the output. 321 * Finally, if the line minus the prefix is a mail header, try to keep 322 * it on a line by itself. 323 */ 324 325 static void 326 prefix(const wchar_t *line) 327 { 328 register const wchar_t *cp; 329 register const char **hp; 330 register long np; 331 register int h; 332 static int nlpp; /* number of lines on current paragraph */ 333 334 if (wcslen(line) == 0) { 335 nlpp = 0; 336 oflush(); 337 putchar('\n'); 338 mark = 0; 339 return; 340 } 341 for (cp = line; *cp == ' '; cp++) 342 ; 343 np = cp - line; 344 345 /* 346 * The following horrible expression attempts to avoid linebreaks 347 * when the indent changes due to a paragraph. 348 */ 349 350 if (!cflag && np != pfx && (np > pfx || abs(pfx-np) > 8)) 351 oflush(); 352 if (h = fromline(cp)) 353 oflush(), mark = 1; 354 else if (mark) { 355 for (hp = &headnames[0]; *hp != NULL; hp++) 356 if (chkhead(*hp, cp)) { 357 h = 1; 358 oflush(); 359 break; 360 } 361 } 362 if (!h && (h = (*cp == '.' || sflag))) 363 oflush(); 364 if (!cflag || nlpp < 2) 365 pfx = np; 366 split(cp); 367 if (h) 368 oflush(); 369 nlpp++; 370 lineno++; 371 } 372 373 /* 374 * Split up the passed line into output "words" which are 375 * maximal strings of non-blanks with the blank separation 376 * attached at the end. Pass these words along to the output 377 * line packer. 378 */ 379 380 static wchar_t *word; 381 382 static void 383 split(const wchar_t *line) 384 { 385 register const wchar_t *cp; 386 register wchar_t *cp2; 387 388 cp = line; 389 while (*cp) { 390 cp2 = word; 391 392 /* 393 * Collect a 'word,' allowing it to contain escaped 394 * white space. 395 */ 396 397 while (*cp && *cp != ' ') { 398 if (*cp == '\\' && iswspace(cp[1])) 399 *cp2++ = *cp++; 400 *cp2++ = *cp++; 401 } 402 403 /* 404 * Guarantee a space at end of line. 405 * Two spaces after end of sentence punctuation. 406 */ 407 408 if (*cp == '\0') { 409 *cp2++ = ' '; 410 if (strchr(".:!?", cp[-1])) 411 *cp2++ = ' '; 412 } 413 while (*cp == ' ') 414 *cp2++ = *cp++; 415 *cp2 = '\0'; 416 pack(word); 417 } 418 } 419 420 /* 421 * Output section. 422 * Build up line images from the words passed in. Prefix 423 * each line with correct number of blanks. The buffer "outbuf" 424 * contains the current partial line image, including prefixed blanks. 425 * "outp" points to the next available space therein. When outp is NOSTR, 426 * there ain't nothing in there yet. At the bottom of this whole mess, 427 * leading tabs are reinserted. 428 */ 429 430 static int obufsize; 431 static wchar_t *outbuf; /* Sandbagged output line image */ 432 static wchar_t *outp; /* Pointer in above */ 433 434 /* 435 * Initialize the output section. 436 */ 437 438 static void 439 setout(void) 440 { 441 outp = NULL; 442 } 443 444 /* 445 * Pack a word onto the output line. If this is the beginning of 446 * the line, push on the appropriately-sized string of blanks first. 447 * If the word won't fit on the current line, flush and begin a new 448 * line. If the word is too long to fit all by itself on a line, 449 * just give it its own and hope for the best. 450 */ 451 452 static void 453 pack(const wchar_t *word) 454 { 455 register const wchar_t *cp; 456 register long s, t; 457 458 if (outp == NULL) 459 leadin(); 460 t = colwidth(word); 461 s = colwidthn(outbuf, outp); 462 if (t+s <= width) { 463 464 /* 465 * In like flint! 466 */ 467 468 for (cp = word; *cp; cp++) { 469 if (outp >= &outbuf[obufsize]) 470 growobuf(); 471 *outp++ = *cp; 472 } 473 return; 474 } 475 if (s > pfx) { 476 oflush(); 477 leadin(); 478 } 479 for (cp = word; *cp; cp++) { 480 if (outp >= &outbuf[obufsize]) 481 growobuf(); 482 *outp++ = *cp; 483 } 484 } 485 486 /* 487 * If there is anything on the current output line, send it on 488 * its way. Set outp to NULL to indicate the absence of the current 489 * line prefix. 490 */ 491 492 static void 493 oflush(void) 494 { 495 if (outp == NULL) 496 return; 497 if (outp >= &outbuf[obufsize]) 498 growobuf(); 499 *outp = '\0'; 500 tabulate(outbuf); 501 outp = NULL; 502 } 503 504 /* 505 * Take the passed line buffer, insert leading tabs where possible, and 506 * output on standard output (finally). 507 */ 508 509 static void 510 tabulate(wchar_t *line) 511 { 512 register wchar_t *cp; 513 register int b, t; 514 515 /* 516 * Toss trailing blanks in the output line. 517 */ 518 519 cp = line + wcslen(line) - 1; 520 while (cp >= line && *cp == ' ') 521 cp--; 522 *++cp = '\0'; 523 524 /* 525 * Count the leading blank space and tabulate. 526 */ 527 528 for (cp = line; *cp == ' '; cp++) 529 ; 530 b = cp-line; 531 t = b >> 3; 532 b &= 07; 533 if (t > 0) 534 do 535 putchar('\t'); 536 while (--t); 537 if (b > 0) 538 do 539 putchar(' '); 540 while (--b); 541 while (*cp) { 542 if (mb_cur_max > 1 && *cp & ~(wchar_t)0177) { 543 char mb[MB_LEN_MAX]; 544 int i, n; 545 n = wctomb(mb, *cp); 546 for (i = 0; i < n; i++) 547 putchar(mb[i]); 548 } else 549 putchar(*cp); 550 cp++; 551 } 552 putchar('\n'); 553 } 554 555 /* 556 * Initialize the output line with the appropriate number of 557 * leading blanks. 558 */ 559 560 static void 561 leadin(void) 562 { 563 register long b; 564 565 if (outbuf == 0) 566 growobuf(); 567 for (b = 0; b < pfx; b++) { 568 if (b >= obufsize) 569 growobuf(); 570 outbuf[b] = ' '; 571 } 572 outp = &outbuf[b]; 573 } 574 575 /* 576 * Is s2 the mail header field name s1? 577 */ 578 579 static int 580 chkhead(register const char *s1, register const wchar_t *s2) 581 { 582 583 while (*s1 && *s1++ == *s2++); 584 if (*s1 != '\0') 585 return 0; 586 return 1; 587 } 588 589 /* 590 * Sloppy recognition of Unix From_ lines (not according to the POSIX.2 591 * mailx specification, but oriented on actual Unix tradition). We match 592 * the ERE 593 * ^From .* [A-Z][a-z][a-z] [A-Z][a-z][a-z] \ 594 * [0-9 ]?[0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] 595 */ 596 597 static int 598 fromline(const wchar_t *cp) 599 { 600 if (cp[0] != 'F' || cp[1] != 'r' || cp[2] != 'o' || cp[3] != 'm' || 601 cp[4] != ' ') 602 return 0; 603 cp += 5; 604 while (*cp && *cp != ' ') 605 cp++; 606 if (*cp++ != ' ') 607 return 0; 608 if (!upperchar(cp[0]) || !lowerchar(cp[1]) || !lowerchar(cp[2]) || 609 cp[3] != ' ' || 610 !upperchar(cp[4]) || !lowerchar(cp[5]) || !lowerchar(cp[6]) || 611 cp[7] != ' ') 612 return 0; 613 cp += 8; 614 if (digitchar(*cp) || *cp == ' ') 615 cp++; 616 if (!digitchar(cp[0]) || cp[1] != ' '|| 617 !digitchar(cp[2]) || !digitchar(cp[3]) || 618 cp[4] != ':' || 619 !digitchar(cp[5]) || !digitchar(cp[6]) || 620 cp[7] != ':' || 621 !digitchar(cp[8]) || !digitchar(cp[9])) 622 return 0; 623 return 1; 624 } 625 626 static size_t 627 colwidth(const wchar_t *cp) 628 { 629 size_t n = 0; 630 631 if (mb_cur_max > 1) 632 while (*cp) 633 n += wcwidth(*cp++); 634 else 635 n = wcslen(cp); 636 return n; 637 } 638 639 static size_t 640 colwidthn(const wchar_t *bot, const wchar_t *top) 641 { 642 size_t n = 0; 643 644 if (mb_cur_max > 1) 645 while (bot < top) 646 n += wcwidth(*bot++); 647 else 648 n = top - bot; 649 return n; 650 } 651 652 static void 653 growibuf(void) 654 { 655 ibufsize += 128; 656 if ((word = realloc(word, ibufsize * sizeof *word)) == 0 || 657 (linebuf = realloc(linebuf, ibufsize * sizeof *linebuf)) == 0 || 658 (canonb = realloc(canonb, ibufsize * sizeof *canonb)) == 0) { 659 fprintf(stderr, "%s: input line too long\n", progname); 660 exit(1); 661 } 662 } 663 664 static void 665 growobuf(void) 666 { 667 int diff = 0; 668 669 if (outp != NULL) 670 diff = outp - outbuf; 671 obufsize += 128; 672 if ((outbuf = realloc(outbuf, obufsize * sizeof *outbuf)) == 0) { 673 fprintf(stderr, "%s: output line too long\n", progname); 674 exit(1); 675 } 676 if (outp != NULL) 677 outp = &outbuf[diff]; 678 }