sbase

suckless unix tools
git clone git://git.2f30.org/sbase.git
Log | Files | Refs | README | LICENSE

sed.c (41933B)


      1 /* FIXME: summary
      2  * decide whether we enforce valid UTF-8, right now it's enforced in certain
      3  *     parts of the script, but not the input...
      4  * nul bytes cause explosions due to use of libc string functions. thoughts?
      5  * lack of newline at end of file, currently we add one. what should we do?
      6  * allow "\\t" for "\t" etc. in regex? in replacement text?
      7  * POSIX says don't flush on N when out of input, but GNU and busybox do.
      8  */
      9 
     10 #include <ctype.h>
     11 #include <errno.h>
     12 #include <regex.h>
     13 #include <stdlib.h>
     14 #include <string.h>
     15 
     16 #include "utf.h"
     17 #include "util.h"
     18 
     19 /* Types */
     20 
     21 /* used as queue for writes and stack for {,:,b,t */
     22 typedef struct {
     23 	void **data;
     24 	size_t size;
     25 	size_t cap;
     26 } Vec;
     27 
     28 /* used for arbitrary growth, str is a C string
     29  * FIXME: does it make sense to keep track of length? or just rely on libc
     30  *        string functions? If we want to support nul bytes everything changes
     31  */
     32 typedef struct {
     33 	char  *str;
     34 	size_t cap;
     35 } String;
     36 
     37 typedef struct Cmd Cmd;
     38 typedef struct {
     39 	void  (*fn)(Cmd *);
     40 	char *(*getarg)(Cmd *, char *);
     41 	void  (*freearg)(Cmd *);
     42 	unsigned char naddr;
     43 } Fninfo;
     44 
     45 typedef struct {
     46 	union {
     47 		size_t   lineno;
     48 		regex_t *re;
     49 	} u;
     50 	enum {
     51 		IGNORE, /* empty address, ignore        */
     52 		EVERY , /* every line                   */
     53 		LINE  , /* ilne number                  */
     54 		LAST  , /* last line ($)                */
     55 		REGEX , /* use included regex           */
     56 		LASTRE, /* use most recently used regex */
     57 	} type;
     58 } Addr;
     59 
     60 /* DISCUSS: naddr is not strictly necessary, but very helpful
     61  * naddr == 0 iff beg.type == EVERY  && end.type == IGNORE
     62  * naddr == 1 iff beg.type != IGNORE && end.type == IGNORE
     63  * naddr == 2 iff beg.type != IGNORE && end.type != IGNORE
     64  */
     65 typedef struct {
     66 	Addr          beg;
     67 	Addr          end;
     68 	unsigned char naddr;
     69 } Range;
     70 
     71 typedef struct {
     72 	regex_t      *re; /* if NULL use last regex */
     73 	String        repl;
     74 	FILE         *file;
     75 	size_t        occurrence; /* 0 for all (g flag) */
     76 	Rune          delim;
     77 	unsigned int  p:1;
     78 } Sarg;
     79 
     80 typedef struct {
     81 	Rune *set1;
     82 	Rune *set2;
     83 } Yarg;
     84 
     85 typedef struct {
     86 	String str; /* a,c,i text. r file path */
     87 	void  (*print)(char *, FILE *); /* check_puts for a, write_file for r, unused for c,i */
     88 } ACIRarg;
     89 
     90 struct Cmd {
     91 	Range   range;
     92 	Fninfo *fninfo;
     93 	union {
     94 		Cmd      *jump;   /* used for   b,t when running  */
     95 		char     *label;  /* used for :,b,t when building */
     96 		ptrdiff_t offset; /* used for { (pointers break during realloc) */
     97 		FILE     *file;   /* used for w */
     98 
     99 		/* FIXME: Should the following be in the union? or pointers and malloc? */
    100 		Sarg      s;
    101 		Yarg      y;
    102 		ACIRarg   acir;
    103 	} u; /* I find your lack of anonymous unions disturbing */
    104 	unsigned int in_match:1;
    105 	unsigned int negate  :1;
    106 };
    107 
    108 /* Files for w command (and s' w flag) */
    109 typedef struct {
    110 	char *path;
    111 	FILE *file;
    112 } Wfile;
    113 
    114 /*
    115  * Function Declarations
    116  */
    117 
    118 /* Dynamically allocated arrays and strings */
    119 static void resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next);
    120 static void *pop(Vec *v);
    121 static void push(Vec *v, void *p);
    122 static void stracat(String *dst, char *src);
    123 static void strnacat(String *dst, char *src, size_t n);
    124 static void stracpy(String *dst, char *src);
    125 
    126 /* Cleanup and errors */
    127 static void usage(void);
    128 
    129 /* Parsing functions and related utilities */
    130 static void compile(char *s, int isfile);
    131 static int read_line(FILE *f, String *s);
    132 static char *make_range(Range *range, char *s);
    133 static char *make_addr(Addr *addr, char *s);
    134 static char *find_delim(char *s, Rune delim, int do_brackets);
    135 static char *chompr(char *s, Rune rune);
    136 static char *chomp(char *s);
    137 static Rune *strtorunes(char *s, size_t nrunes);
    138 static long stol(char *s, char **endp);
    139 static size_t escapes(char *beg, char *end, Rune delim, int n_newline);
    140 static size_t echarntorune(Rune *r, char *s, size_t n);
    141 static void insert_labels(void);
    142 
    143 /* Get and Free arg and related utilities */
    144 static char *get_aci_arg(Cmd *c, char *s);
    145 static void aci_append(Cmd *c, char *s);
    146 static void free_acir_arg(Cmd *c);
    147 static char *get_bt_arg(Cmd *c, char *s);
    148 static char *get_r_arg(Cmd *c, char *s);
    149 static char *get_s_arg(Cmd *c, char *s);
    150 static void free_s_arg(Cmd *c);
    151 static char *get_w_arg(Cmd *c, char *s);
    152 static char *get_y_arg(Cmd *c, char *s);
    153 static void free_y_arg(Cmd *c);
    154 static char *get_colon_arg(Cmd *c, char *s);
    155 static char *get_lbrace_arg(Cmd *c, char *s);
    156 static char *get_rbrace_arg(Cmd *c, char *s);
    157 static char *semicolon_arg(char *s);
    158 
    159 /* Running */
    160 static void run(void);
    161 static int in_range(Cmd *c);
    162 static int match_addr(Addr *a);
    163 static int next_file(void);
    164 static int is_eof(FILE *f);
    165 static void do_writes(void);
    166 static void write_file(char *path, FILE *out);
    167 static void check_puts(char *s, FILE *f);
    168 static void update_ranges(Cmd *beg, Cmd *end);
    169 
    170 /* Sed functions */
    171 static void cmd_y(Cmd *c);
    172 static void cmd_x(Cmd *c);
    173 static void cmd_w(Cmd *c);
    174 static void cmd_t(Cmd *c);
    175 static void cmd_s(Cmd *c);
    176 static void cmd_r(Cmd *c);
    177 static void cmd_q(Cmd *c);
    178 static void cmd_P(Cmd *c);
    179 static void cmd_p(Cmd *c);
    180 static void cmd_N(Cmd *c);
    181 static void cmd_n(Cmd *c);
    182 static void cmd_l(Cmd *c);
    183 static void cmd_i(Cmd *c);
    184 static void cmd_H(Cmd *c);
    185 static void cmd_h(Cmd *c);
    186 static void cmd_G(Cmd *c);
    187 static void cmd_g(Cmd *c);
    188 static void cmd_D(Cmd *c);
    189 static void cmd_d(Cmd *c);
    190 static void cmd_c(Cmd *c);
    191 static void cmd_b(Cmd *c);
    192 static void cmd_a(Cmd *c);
    193 static void cmd_colon(Cmd *c);
    194 static void cmd_equal(Cmd *c);
    195 static void cmd_lbrace(Cmd *c);
    196 static void cmd_rbrace(Cmd *c);
    197 static void cmd_last(Cmd *c);
    198 
    199 /* Actions */
    200 static void new_line(void);
    201 static void app_line(void);
    202 static void new_next(void);
    203 static void old_next(void);
    204 
    205 /*
    206  * Globals
    207  */
    208 static Vec braces, labels, branches; /* holds ptrdiff_t. addrs of {, :, bt */
    209 static Vec writes; /* holds cmd*. writes scheduled by a and r commands */
    210 static Vec wfiles; /* holds Wfile*. files for w and s///w commands */
    211 
    212 static Cmd   *prog, *pc; /* Program, program counter */
    213 static size_t pcap;
    214 static size_t lineno;
    215 
    216 static regex_t *lastre; /* last used regex for empty regex search */
    217 static char   **files;  /* list of file names from argv */
    218 static FILE    *file;   /* current file we are reading */
    219 
    220 static String patt, hold, genbuf;
    221 
    222 static struct {
    223 	unsigned int n       :1; /* -n (no print) */
    224 	unsigned int E       :1; /* -E (extended re) */
    225 	unsigned int s       :1; /* s/// replacement happened */
    226 	unsigned int aci_cont:1; /* a,c,i text continuation */
    227 	unsigned int s_cont  :1; /* s/// replacement text continuation */
    228 	unsigned int halt    :1; /* halt execution */
    229 } gflags;
    230 
    231 /* FIXME: move character inside Fninfo and only use 26*sizeof(Fninfo) instead of 127*sizeof(Fninfo) bytes */
    232 static Fninfo fns[] = {
    233 	['a'] = { cmd_a     , get_aci_arg   , free_acir_arg , 1 }, /* schedule write of text for later                                                      */
    234 	['b'] = { cmd_b     , get_bt_arg    , NULL          , 2 }, /* branch to label char *label when building, Cmd *jump when running                     */
    235 	['c'] = { cmd_c     , get_aci_arg   , free_acir_arg , 2 }, /* delete pattern space. at 0 or 1 addr or end of 2 addr, write text                     */
    236 	['d'] = { cmd_d     , NULL          , NULL          , 2 }, /* delete pattern space                                                                  */
    237 	['D'] = { cmd_D     , NULL          , NULL          , 2 }, /* delete to first newline and start new cycle without reading (if no newline, d)        */
    238 	['g'] = { cmd_g     , NULL          , NULL          , 2 }, /* replace pattern space with hold space                                                 */
    239 	['G'] = { cmd_G     , NULL          , NULL          , 2 }, /* append newline and hold space to pattern space                                        */
    240 	['h'] = { cmd_h     , NULL          , NULL          , 2 }, /* replace hold space with pattern space                                                 */
    241 	['H'] = { cmd_H     , NULL          , NULL          , 2 }, /* append newline and pattern space to hold space                                        */
    242 	['i'] = { cmd_i     , get_aci_arg   , free_acir_arg , 1 }, /* write text                                                                            */
    243 	['l'] = { cmd_l     , NULL          , NULL          , 2 }, /* write pattern space in 'visually unambiguous form'                                    */
    244 	['n'] = { cmd_n     , NULL          , NULL          , 2 }, /* write pattern space (unless -n) read to replace pattern space (if no input, quit)     */
    245 	['N'] = { cmd_N     , NULL          , NULL          , 2 }, /* append to pattern space separated by newline, line number changes (if no input, quit) */
    246 	['p'] = { cmd_p     , NULL          , NULL          , 2 }, /* write pattern space                                                                   */
    247 	['P'] = { cmd_P     , NULL          , NULL          , 2 }, /* write pattern space up to first newline                                               */
    248 	['q'] = { cmd_q     , NULL          , NULL          , 1 }, /* quit                                                                                  */
    249 	['r'] = { cmd_r     , get_r_arg     , free_acir_arg , 1 }, /* write contents of file (unable to open/read treated as empty file)                    */
    250 	['s'] = { cmd_s     , get_s_arg     , free_s_arg    , 2 }, /* find/replace/all that crazy s stuff                                                   */
    251 	['t'] = { cmd_t     , get_bt_arg    , NULL          , 2 }, /* if s/// succeeded (since input or last t) branch to label (branch to end if no label) */
    252 	['w'] = { cmd_w     , get_w_arg     , NULL          , 2 }, /* append pattern space to file                                                          */
    253 	['x'] = { cmd_x     , NULL          , NULL          , 2 }, /* exchange pattern and hold spaces                                                      */
    254 	['y'] = { cmd_y     , get_y_arg     , free_y_arg    , 2 }, /* replace runes in set1 with runes in set2                                              */
    255 	[':'] = { cmd_colon , get_colon_arg , NULL          , 0 }, /* defines label for later b and t commands                                              */
    256 	['='] = { cmd_equal , NULL          , NULL          , 1 }, /* printf("%d\n", line_number);                                                          */
    257 	['{'] = { cmd_lbrace, get_lbrace_arg, NULL          , 2 }, /* if we match, run commands, otherwise jump to close                                    */
    258 	['}'] = { cmd_rbrace, get_rbrace_arg, NULL          , 0 }, /* noop, hold onto open for ease of building scripts                                     */
    259 
    260 	[0x7f] = { NULL, NULL, NULL, 0 }, /* index is checked with isascii(3p). fill out rest of array */
    261 };
    262 
    263 /*
    264  * Function Definitions
    265  */
    266 
    267 /* given memory pointed to by *ptr that currently holds *nmemb members of size
    268  * size, realloc to hold new_nmemb members, return new_nmemb in *memb and one
    269  * past old end in *next. if realloc fails...explode
    270  */
    271 static void
    272 resize(void **ptr, size_t *nmemb, size_t size, size_t new_nmemb, void **next)
    273 {
    274 	void *n, *tmp;
    275 
    276 	if (new_nmemb) {
    277 		tmp = ereallocarray(*ptr, new_nmemb, size);
    278 	} else { /* turns out realloc(*ptr, 0) != free(*ptr) */
    279 		free(*ptr);
    280 		tmp = NULL;
    281 	}
    282 	n = (char *)tmp + *nmemb * size;
    283 	*nmemb = new_nmemb;
    284 	*ptr   = tmp;
    285 	if (next)
    286 		*next = n;
    287 }
    288 
    289 static void *
    290 pop(Vec *v)
    291 {
    292 	if (!v->size)
    293 		return NULL;
    294 	return v->data[--v->size];
    295 }
    296 
    297 static void
    298 push(Vec *v, void *p)
    299 {
    300 	if (v->size == v->cap)
    301 		resize((void **)&v->data, &v->cap, sizeof(*v->data), v->cap * 2 + 1, NULL);
    302 	v->data[v->size++] = p;
    303 }
    304 
    305 static void
    306 stracat(String *dst, char *src)
    307 {
    308 	int new = !dst->cap;
    309 	size_t len;
    310 
    311 	len = (new ? 0 : strlen(dst->str)) + strlen(src) + 1;
    312 	if (dst->cap < len)
    313 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    314 	if (new)
    315 		*dst->str = '\0';
    316 	strcat(dst->str, src);
    317 }
    318 
    319 static void
    320 strnacat(String *dst, char *src, size_t n)
    321 {
    322 	int new = !dst->cap;
    323 	size_t len;
    324 
    325 	len = strlen(src);
    326 	len = (new ? 0 : strlen(dst->str)) + MIN(n, len) + 1;
    327 	if (dst->cap < len)
    328 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    329 	if (new)
    330 		*dst->str = '\0';
    331 	strlcat(dst->str, src, len);
    332 }
    333 
    334 static void
    335 stracpy(String *dst, char *src)
    336 {
    337 	size_t len;
    338 
    339 	len = strlen(src) + 1;
    340 	if (dst->cap < len)
    341 		resize((void **)&dst->str, &dst->cap, 1, len * 2, NULL);
    342 	strcpy(dst->str, src);
    343 }
    344 
    345 static void
    346 leprintf(char *s)
    347 {
    348 	if (errno)
    349 		eprintf("%zu: %s: %s\n", lineno, s, strerror(errno));
    350 	else
    351 		eprintf("%zu: %s\n", lineno, s);
    352 }
    353 
    354 /* FIXME: write usage message */
    355 static void
    356 usage(void)
    357 {
    358 	eprintf("usage: sed [-nrE] script [file ...]\n"
    359 	        "       sed [-nrE] -e script [-e script] ... [-f scriptfile] ... [file ...]\n"
    360 	        "       sed [-nrE] [-e script] ... -f scriptfile [-f scriptfile] ... [file ...]\n");
    361 }
    362 
    363 /* Differences from POSIX
    364  * we allows semicolons and trailing blanks inside {}
    365  * we allow spaces after ! (and in between !s)
    366  * we allow extended regular expressions (-E)
    367  */
    368 static void
    369 compile(char *s, int isfile)
    370 {
    371 	FILE *f;
    372 
    373 	if (isfile) {
    374 		f = fopen(s, "r");
    375 		if (!f)
    376 			eprintf("fopen %s:", s);
    377 	} else {
    378 		if (!*s) /* empty string script */
    379 			return;
    380 		f = fmemopen(s, strlen(s), "r");
    381 		if (!f)
    382 			eprintf("fmemopen:");
    383 	}
    384 
    385 	/* NOTE: get arg functions can't use genbuf */
    386 	while (read_line(f, &genbuf) != EOF) {
    387 		s = genbuf.str;
    388 
    389 		/* if the first two characters of the script are "#n" default output shall be suppressed */
    390 		if (++lineno == 1 && *s == '#' && s[1] == 'n') {
    391 			gflags.n = 1;
    392 			continue;
    393 		}
    394 
    395 		if (gflags.aci_cont) {
    396 			aci_append(pc - 1, s);
    397 			continue;
    398 		}
    399 		if (gflags.s_cont)
    400 			s = (pc - 1)->fninfo->getarg(pc - 1, s);
    401 
    402 		while (*s) {
    403 			s = chompr(s, ';');
    404 			if (!*s || *s == '#')
    405 				break;
    406 
    407 			if ((size_t)(pc - prog) == pcap)
    408 				resize((void **)&prog, &pcap, sizeof(*prog), pcap * 2 + 1, (void **)&pc);
    409 
    410 			pc->range.beg.type = pc->range.end.type = IGNORE;
    411 			pc->fninfo = NULL;
    412 			pc->in_match = 0;
    413 
    414 			s = make_range(&pc->range, s);
    415 			s = chomp(s);
    416 			pc->negate = *s == '!';
    417 			s = chompr(s, '!');
    418 
    419 			if (!isascii(*s) || !(pc->fninfo = &fns[(unsigned)*s])->fn)
    420 				leprintf("bad sed function");
    421 			if (pc->range.naddr > pc->fninfo->naddr)
    422 				leprintf("wrong number of addresses");
    423 			s++;
    424 
    425 			if (pc->fninfo->getarg)
    426 				s = pc->fninfo->getarg(pc, s);
    427 
    428 			pc++;
    429 		}
    430 	}
    431 
    432 	fshut(f, s);
    433 }
    434 
    435 /* FIXME: if we decide to honor lack of trailing newline, set/clear a global
    436  * flag when reading a line
    437  */
    438 static int
    439 read_line(FILE *f, String *s)
    440 {
    441 	ssize_t len;
    442 
    443 	if (!f)
    444 		return EOF;
    445 
    446 	if ((len = getline(&s->str, &s->cap, f)) < 0) {
    447 		if (ferror(f))
    448 			eprintf("getline:");
    449 		return EOF;
    450 	}
    451 	if (s->str[--len] == '\n')
    452 		s->str[len] = '\0';
    453 	return 0;
    454 }
    455 
    456 /* read first range from s, return pointer to one past end of range */
    457 static char *
    458 make_range(Range *range, char *s)
    459 {
    460 	s = make_addr(&range->beg, s);
    461 
    462 	if (*s == ',')
    463 		s = make_addr(&range->end, s + 1);
    464 	else
    465 		range->end.type = IGNORE;
    466 
    467 	if      (range->beg.type == EVERY  && range->end.type == IGNORE) range->naddr = 0;
    468 	else if (range->beg.type != IGNORE && range->end.type == IGNORE) range->naddr = 1;
    469 	else if (range->beg.type != IGNORE && range->end.type != IGNORE) range->naddr = 2;
    470 	else leprintf("this is impossible...");
    471 
    472 	return s;
    473 }
    474 
    475 /* read first addr from s, return pointer to one past end of addr */
    476 static char *
    477 make_addr(Addr *addr, char *s)
    478 {
    479 	Rune r;
    480 	char *p = s + strlen(s);
    481 	size_t rlen = echarntorune(&r, s, p - s);
    482 
    483 	if (r == '$') {
    484 		addr->type = LAST;
    485 		s += rlen;
    486 	} else if (isdigitrune(r)) {
    487 		addr->type = LINE;
    488 		addr->u.lineno = stol(s, &s);
    489 	} else if (r == '/' || r == '\\') {
    490 		Rune delim;
    491 		if (r == '\\') {
    492 			s += rlen;
    493 			rlen = echarntorune(&r, s, p - s);
    494 		}
    495 		if (r == '\\')
    496 			leprintf("bad delimiter '\\'");
    497 		delim = r;
    498 		s += rlen;
    499 		rlen = echarntorune(&r, s, p - s);
    500 		if (r == delim) {
    501 			addr->type = LASTRE;
    502 			s += rlen;
    503 		} else {
    504 			addr->type = REGEX;
    505 			p = find_delim(s, delim, 1);
    506 			if (!*p)
    507 				leprintf("unclosed regex");
    508 			p -= escapes(s, p, delim, 0);
    509 			*p++ = '\0';
    510 			addr->u.re = emalloc(sizeof(*addr->u.re));
    511 			eregcomp(addr->u.re, s, gflags.E ? REG_EXTENDED : 0);
    512 			s = p;
    513 		}
    514 	} else {
    515 		addr->type = EVERY;
    516 	}
    517 
    518 	return s;
    519 }
    520 
    521 /* return pointer to first delim in s that is not escaped
    522  * and if do_brackets is set, not in [] (note possible [::], [..], [==], inside [])
    523  * return pointer to trailing nul byte if no delim found
    524  *
    525  * any escaped character that is not special is just itself (POSIX undefined)
    526  * FIXME: pull out into some util thing, will be useful for ed as well
    527  */
    528 static char *
    529 find_delim(char *s, Rune delim, int do_brackets)
    530 {
    531 	enum {
    532 		OUTSIDE         , /* not in brackets */
    533 		BRACKETS_OPENING, /* last char was first [ or last two were first [^ */
    534 		BRACKETS_INSIDE , /* inside [] */
    535 		INSIDE_OPENING  , /* inside [] and last char was [ */
    536 		CLASS_INSIDE    , /* inside class [::], or colating element [..] or [==], inside [] */
    537 		CLASS_CLOSING   , /* inside class [::], or colating element [..] or [==], and last character was the respective : . or = */
    538 	} state = OUTSIDE;
    539 
    540 	Rune r, c = 0; /* no c won't be used uninitialized, shutup -Wall */
    541 	size_t rlen;
    542 	int escape = 0;
    543 	char *end = s + strlen(s);
    544 
    545 	for (; *s; s += rlen) {
    546 		rlen = echarntorune(&r, s, end - s);
    547 
    548 		if      (state == BRACKETS_OPENING       &&  r == '^'  ) {                            continue; }
    549 		else if (state == BRACKETS_OPENING       &&  r == ']'  ) { state  = BRACKETS_INSIDE ; continue; }
    550 		else if (state == BRACKETS_OPENING                     ) { state  = BRACKETS_INSIDE ;           }
    551 
    552 		if      (state == CLASS_CLOSING          &&  r == ']'  ) { state  = BRACKETS_INSIDE ;           }
    553 		else if (state == CLASS_CLOSING                        ) { state  = CLASS_INSIDE    ;           }
    554 		else if (state == CLASS_INSIDE           &&  r ==  c   ) { state  = CLASS_CLOSING   ;           }
    555 		else if (state == INSIDE_OPENING         && (r == ':'  ||
    556 		                                             r == '.'  ||
    557 		                                             r == '=') ) { state  = CLASS_INSIDE    ; c = r;    }
    558 		else if (state == INSIDE_OPENING         &&  r == ']'  ) { state  = OUTSIDE         ;           }
    559 		else if (state == INSIDE_OPENING                       ) { state  = BRACKETS_INSIDE ;           }
    560 		else if (state == BRACKETS_INSIDE        &&  r == '['  ) { state  = INSIDE_OPENING  ;           }
    561 		else if (state == BRACKETS_INSIDE        &&  r == ']'  ) { state  = OUTSIDE         ;           }
    562 		else if (state == OUTSIDE                &&  escape    ) { escape = 0               ;           }
    563 		else if (state == OUTSIDE                &&  r == '\\' ) { escape = 1               ;           }
    564 		else if (state == OUTSIDE                &&  r == delim) return s;
    565 		else if (state == OUTSIDE && do_brackets &&  r == '['  ) { state  = BRACKETS_OPENING;           }
    566 	}
    567 	return s;
    568 }
    569 
    570 static char *
    571 chomp(char *s)
    572 {
    573 	return chompr(s, 0);
    574 }
    575 
    576 /* eat all leading whitespace and occurrences of rune */
    577 static char *
    578 chompr(char *s, Rune rune)
    579 {
    580 	Rune   r;
    581 	size_t rlen;
    582 	char  *end = s + strlen(s);
    583 
    584 	while (*s && (rlen = echarntorune(&r, s, end - s)) && (isspacerune(r) || r == rune))
    585 		s += rlen;
    586 	return s;
    587 }
    588 
    589 /* convert first nrunes Runes from UTF-8 string s in allocated Rune*
    590  * NOTE: sequence must be valid UTF-8, check first */
    591 static Rune *
    592 strtorunes(char *s, size_t nrunes)
    593 {
    594 	Rune *rs, *rp;
    595 
    596 	rp = rs = ereallocarray(NULL, nrunes + 1, sizeof(*rs));
    597 
    598 	while (nrunes--)
    599 		s += chartorune(rp++, s);
    600 
    601 	*rp = '\0';
    602 	return rs;
    603 }
    604 
    605 static long
    606 stol(char *s, char **endp)
    607 {
    608 	long n;
    609 	errno = 0;
    610 	n = strtol(s, endp, 10);
    611 
    612 	if (errno)
    613 		leprintf("strtol:");
    614 	if (*endp == s)
    615 		leprintf("strtol: invalid number");
    616 
    617 	return n;
    618 }
    619 
    620 /* from beg to end replace "\\d" with "d" and "\\n" with "\n" (where d is delim)
    621  * if delim is 'n' and n_newline is 0 then "\\n" is replaced with "n" (normal)
    622  * if delim is 'n' and n_newline is 1 then "\\n" is replaced with "\n" (y command)
    623  * if delim is 0 all escaped characters represent themselves (aci text)
    624  * memmove rest of string (beyond end) into place
    625  * return the number of converted escapes (backslashes removed)
    626  * FIXME: this has had too many corner cases slapped on and is ugly. rewrite better
    627  */
    628 static size_t
    629 escapes(char *beg, char *end, Rune delim, int n_newline)
    630 {
    631 	size_t num = 0;
    632 	char *src = beg, *dst = beg;
    633 
    634 	while (src < end) {
    635 		/* handle escaped backslash specially so we don't think the second
    636 		 * backslash is escaping something */
    637 		if (*src == '\\' && src[1] == '\\') {
    638 			*dst++ = *src++;
    639 			if (delim)
    640 				*dst++ = *src++;
    641 			else
    642 				src++;
    643 		} else if (*src == '\\' && !delim) {
    644 			src++;
    645 		} else if (*src == '\\' && src[1]) {
    646 			Rune r;
    647 			size_t rlen;
    648 			num++;
    649 			src++;
    650 			rlen = echarntorune(&r, src, end - src);
    651 
    652 			if (r == 'n' && delim == 'n') {
    653 				*src = n_newline ? '\n' : 'n'; /* src so we can still memmove() */
    654 			} else if (r == 'n') {
    655 				*src = '\n';
    656 			} else if (r != delim) {
    657 				*dst++ = '\\';
    658 				num--;
    659 			}
    660 
    661 			memmove(dst, src, rlen);
    662 			dst += rlen;
    663 			src += rlen;
    664 		} else {
    665 			*dst++ = *src++;
    666 		}
    667 	}
    668 	memmove(dst, src, strlen(src) + 1);
    669 	return num;
    670 }
    671 
    672 static size_t
    673 echarntorune(Rune *r, char *s, size_t n)
    674 {
    675 	size_t rlen = charntorune(r, s, n);
    676 	if (!rlen || *r == Runeerror)
    677 		leprintf("invalid UTF-8");
    678 	return rlen;
    679 }
    680 
    681 static void
    682 insert_labels(void)
    683 {
    684 	size_t i;
    685 	Cmd *from, *to;
    686 
    687 	while (branches.size) {
    688 		from = prog + (ptrdiff_t)pop(&branches);
    689 
    690 		if (!from->u.label) {/* no label branch to end of script */
    691 			from->u.jump = pc - 1;
    692 		} else {
    693 			for (i = 0; i < labels.size; i++) {
    694 				to = prog + (ptrdiff_t)labels.data[i];
    695 				if (!strcmp(from->u.label, to->u.label)) {
    696 					from->u.jump = to;
    697 					break;
    698 				}
    699 			}
    700 			if (i == labels.size)
    701 				leprintf("bad label");
    702 		}
    703 	}
    704 }
    705 
    706 /*
    707  * Getargs / Freeargs
    708  * Read argument from s, return pointer to one past last character of argument
    709  */
    710 
    711 /* POSIX compliant
    712  * i\
    713  * foobar
    714  *
    715  * also allow the following non POSIX compliant
    716  * i        # empty line
    717  * ifoobar
    718  * ifoobar\
    719  * baz
    720  *
    721  * FIXME: GNU and busybox discard leading spaces
    722  * i  foobar
    723  * i foobar
    724  * ifoobar
    725  * are equivalent in GNU and busybox. We don't. Should we?
    726  */
    727 static char *
    728 get_aci_arg(Cmd *c, char *s)
    729 {
    730 	c->u.acir.print = check_puts;
    731 	c->u.acir.str = (String){ NULL, 0 };
    732 
    733 	gflags.aci_cont = !!*s; /* no continue flag if empty string */
    734 
    735 	/* neither empty string nor POSIX compliant */
    736 	if (*s && !(*s == '\\' && !s[1]))
    737 		aci_append(c, s);
    738 
    739 	return s + strlen(s);
    740 }
    741 
    742 static void
    743 aci_append(Cmd *c, char *s)
    744 {
    745 	char *end = s + strlen(s), *p = end;
    746 
    747 	gflags.aci_cont = 0;
    748 	while (--p >= s && *p == '\\')
    749 		gflags.aci_cont = !gflags.aci_cont;
    750 
    751 	if (gflags.aci_cont)
    752 		*--end = '\n';
    753 
    754 	escapes(s, end, 0, 0);
    755 	stracat(&c->u.acir.str, s);
    756 }
    757 
    758 static void
    759 free_acir_arg(Cmd *c)
    760 {
    761 	free(c->u.acir.str.str);
    762 }
    763 
    764 /* POSIX dictates that label is rest of line, including semicolons, trailing
    765  * whitespace, closing braces, etc. and can be limited to 8 bytes
    766  *
    767  * I allow a semicolon or closing brace to terminate a label name, it's not
    768  * POSIX compliant, but it's useful and every sed version I've tried to date
    769  * does the same.
    770  *
    771  * FIXME: POSIX dictates that leading whitespace is ignored but trailing
    772  * whitespace is not. This is annoying and we should probably get rid of it.
    773  */
    774 static char *
    775 get_bt_arg(Cmd *c, char *s)
    776 {
    777 	char *p = semicolon_arg(s = chomp(s));
    778 
    779 	if (p != s) {
    780 		c->u.label = estrndup(s, p - s);
    781 	} else {
    782 		c->u.label = NULL;
    783 	}
    784 
    785 	push(&branches, (void *)(c - prog));
    786 
    787 	return p;
    788 }
    789 
    790 /* POSIX dictates file name is rest of line including semicolons, trailing
    791  * whitespace, closing braces, etc. and file name must be preceded by a space
    792  *
    793  * I allow a semicolon or closing brace to terminate a file name and don't
    794  * enforce leading space.
    795  *
    796  * FIXME: decide whether trailing whitespace should be included and fix
    797  * accordingly
    798  */
    799 static char *
    800 get_r_arg(Cmd *c, char *s)
    801 {
    802 	char *p = semicolon_arg(s = chomp(s));
    803 
    804 	if (p == s)
    805 		leprintf("no file name");
    806 
    807 	c->u.acir.str.str = estrndup(s, p - s);
    808 	c->u.acir.print = write_file;
    809 
    810 	return p;
    811 }
    812 
    813 /* we allow "\\n" in replacement text to mean "\n" (undefined in POSIX)
    814  *
    815  * FIXME: allow other escapes in regex and replacement? if so change escapes()
    816  */
    817 static char *
    818 get_s_arg(Cmd *c, char *s)
    819 {
    820 	Rune delim, r;
    821 	Cmd buf;
    822 	char *p;
    823 	int esc, lastre;
    824 
    825 	/* s/Find/Replace/Flags */
    826 
    827 	/* Find */
    828 	if (!gflags.s_cont) { /* NOT continuing from literal newline in replacement text */
    829 		lastre = 0;
    830 		c->u.s.repl = (String){ NULL, 0 };
    831 		c->u.s.occurrence = 1;
    832 		c->u.s.file = NULL;
    833 		c->u.s.p = 0;
    834 
    835 		if (!*s || *s == '\\')
    836 			leprintf("bad delimiter");
    837 
    838 		p = s + strlen(s);
    839 		s += echarntorune(&delim, s, p - s);
    840 		c->u.s.delim = delim;
    841 
    842 		echarntorune(&r, s, p - s);
    843 		if (r == delim) /* empty regex */
    844 			lastre = 1;
    845 
    846 		p = find_delim(s, delim, 1);
    847 		if (!*p)
    848 			leprintf("missing second delimiter");
    849 		p -= escapes(s, p, delim, 0);
    850 		*p = '\0';
    851 
    852 		if (lastre) {
    853 			c->u.s.re = NULL;
    854 		} else {
    855 			c->u.s.re = emalloc(sizeof(*c->u.s.re));
    856 			/* FIXME: different eregcomp that calls fatal */
    857 			eregcomp(c->u.s.re, s, gflags.E ? REG_EXTENDED : 0);
    858 		}
    859 		s = p + runelen(delim);
    860 	}
    861 
    862 	/* Replace */
    863 	delim = c->u.s.delim;
    864 
    865 	p = find_delim(s, delim, 0);
    866 	p -= escapes(s, p, delim, 0);
    867 	if (!*p) { /* no third delimiter */
    868 		/* FIXME: same backslash counting as aci_append() */
    869 		if (p[-1] != '\\')
    870 			leprintf("missing third delimiter or <backslash><newline>");
    871 		p[-1] = '\n';
    872 		gflags.s_cont = 1;
    873 	} else {
    874 		gflags.s_cont = 0;
    875 	}
    876 
    877 	/* check for bad references in replacement text */
    878 	*p = '\0';
    879 	for (esc = 0, p = s; *p; p++) {
    880 		if (esc) {
    881 			esc = 0;
    882 			if (isdigit(*p) && c->u.s.re && (size_t)(*p - '0') > c->u.s.re->re_nsub)
    883 				leprintf("back reference number greater than number of groups");
    884 		} else if (*p == '\\') {
    885 			esc = 1;
    886 		}
    887 	}
    888 	stracat(&c->u.s.repl, s);
    889 
    890 	if (gflags.s_cont)
    891 		return p;
    892 
    893 	s = p + runelen(delim);
    894 
    895 	/* Flags */
    896 	p = semicolon_arg(s = chomp(s));
    897 
    898 	/* FIXME: currently for simplicity take last of g or occurrence flags and
    899 	 *        ignore multiple p flags. need to fix that */
    900 	for (; s < p; s++) {
    901 		if (isdigit(*s)) {
    902 			c->u.s.occurrence = stol(s, &s);
    903 			s--; /* for loop will advance pointer */
    904 		} else {
    905 			switch (*s) {
    906 			case 'g': c->u.s.occurrence = 0; break;
    907 			case 'p': c->u.s.p = 1;          break;
    908 			case 'w':
    909 				/* must be last flag, take everything up to newline/semicolon
    910 				 * s == p after this */
    911 				s = get_w_arg(&buf, chomp(s+1));
    912 				c->u.s.file = buf.u.file;
    913 				break;
    914 			}
    915 		}
    916 	}
    917 	return p;
    918 }
    919 
    920 static void
    921 free_s_arg(Cmd *c)
    922 {
    923 	if (c->u.s.re)
    924 		regfree(c->u.s.re);
    925 	free(c->u.s.re);
    926 	free(c->u.s.repl.str);
    927 }
    928 
    929 /* see get_r_arg notes */
    930 static char *
    931 get_w_arg(Cmd *c, char *s)
    932 {
    933 	char *p = semicolon_arg(s = chomp(s));
    934 	Wfile *w, **wp;
    935 
    936 	if (p == s)
    937 		leprintf("no file name");
    938 
    939 	for (wp = (Wfile **)wfiles.data; (size_t)(wp - (Wfile **)wfiles.data) < wfiles.size; wp++) {
    940 		if (strlen((*wp)->path) == (size_t)(p - s) && !strncmp(s, (*wp)->path, p - s)) {
    941 			c->u.file = (*wp)->file;
    942 			return p;
    943 		}
    944 	}
    945 
    946 	w = emalloc(sizeof(*w));
    947 	w->path = estrndup(s, p - s);
    948 
    949 	if (!(w->file = fopen(w->path, "w")))
    950 		leprintf("fopen failed");
    951 
    952 	c->u.file = w->file;
    953 
    954 	push(&wfiles, w);
    955 	return p;
    956 }
    957 
    958 static char *
    959 get_y_arg(Cmd *c, char *s)
    960 {
    961 	Rune delim;
    962 	char *p = s + strlen(s);
    963 	size_t rlen = echarntorune(&delim, s, p - s);
    964 	size_t nrunes1, nrunes2;
    965 
    966 	c->u.y.set1 = c->u.y.set2 = NULL;
    967 
    968 	s += rlen;
    969 	p = find_delim(s, delim, 0);
    970 	p -= escapes(s, p, delim, 1);
    971 	nrunes1 = utfnlen(s, p - s);
    972 	c->u.y.set1 = strtorunes(s, nrunes1);
    973 
    974 	s = p + rlen;
    975 	p = find_delim(s, delim, 0);
    976 	p -= escapes(s, p, delim, 1);
    977 	nrunes2 = utfnlen(s, p - s);
    978 
    979 	if (nrunes1 != nrunes2)
    980 		leprintf("different set lengths");
    981 
    982 	c->u.y.set2 = strtorunes(s, utfnlen(s, p - s));
    983 
    984 	return p + rlen;
    985 }
    986 
    987 static void
    988 free_y_arg(Cmd *c)
    989 {
    990 	free(c->u.y.set1);
    991 	free(c->u.y.set2);
    992 }
    993 
    994 /* see get_bt_arg notes */
    995 static char *
    996 get_colon_arg(Cmd *c, char *s)
    997 {
    998 	char *p = semicolon_arg(s = chomp(s));
    999 
   1000 	if (p == s)
   1001 		leprintf("no label name");
   1002 
   1003 	c->u.label = estrndup(s, p - s);
   1004 	push(&labels, (void *)(c - prog));
   1005 	return p;
   1006 }
   1007 
   1008 static char *
   1009 get_lbrace_arg(Cmd *c, char *s)
   1010 {
   1011 	push(&braces, (void *)(c - prog));
   1012 	return s;
   1013 }
   1014 
   1015 static char *
   1016 get_rbrace_arg(Cmd *c, char *s)
   1017 {
   1018 	Cmd *lbrace;
   1019 
   1020 	if (!braces.size)
   1021 		leprintf("extra }");
   1022 
   1023 	lbrace = prog + (ptrdiff_t)pop(&braces);
   1024 	lbrace->u.offset = c - prog;
   1025 	return s;
   1026 }
   1027 
   1028 /* s points to beginning of an argument that may be semicolon terminated
   1029  * return pointer to semicolon or nul byte after string
   1030  * or closing brace as to not force ; before }
   1031  * FIXME: decide whether or not to eat trailing whitespace for arguments that
   1032  *        we allow semicolon/brace termination that POSIX doesn't
   1033  *        b, r, t, w, :
   1034  *        POSIX says trailing whitespace is part of label name, file name, etc.
   1035  *        we should probably eat it
   1036  */
   1037 static char *
   1038 semicolon_arg(char *s)
   1039 {
   1040 	char *p = strpbrk(s, ";}");
   1041 	if (!p)
   1042 		p = s + strlen(s);
   1043 	return p;
   1044 }
   1045 
   1046 static void
   1047 run(void)
   1048 {
   1049 	lineno = 0;
   1050 	if (braces.size)
   1051 		leprintf("extra {");
   1052 
   1053 	/* genbuf has already been initialized, patt will be in new_line
   1054 	 * (or we'll halt) */
   1055 	stracpy(&hold, "");
   1056 
   1057 	insert_labels();
   1058 	next_file();
   1059 	new_line();
   1060 
   1061 	for (pc = prog; !gflags.halt; pc++)
   1062 		pc->fninfo->fn(pc);
   1063 }
   1064 
   1065 /* return true if we are in range for c, set c->in_match appropriately */
   1066 static int
   1067 in_range(Cmd *c)
   1068 {
   1069 	if (match_addr(&c->range.beg)) {
   1070 		if (c->range.naddr == 2) {
   1071 			if (c->range.end.type == LINE && c->range.end.u.lineno <= lineno)
   1072 				c->in_match = 0;
   1073 			else
   1074 				c->in_match = 1;
   1075 		}
   1076 		return !c->negate;
   1077 	}
   1078 	if (c->in_match && match_addr(&c->range.end)) {
   1079 		c->in_match = 0;
   1080 		return !c->negate;
   1081 	}
   1082 	return c->in_match ^ c->negate;
   1083 }
   1084 
   1085 /* return true if addr matches current line */
   1086 static int
   1087 match_addr(Addr *a)
   1088 {
   1089 	switch (a->type) {
   1090 	default:
   1091 	case IGNORE: return 0;
   1092 	case EVERY: return 1;
   1093 	case LINE: return lineno == a->u.lineno;
   1094 	case LAST:
   1095 		while (is_eof(file) && !next_file())
   1096 			;
   1097 		return !file;
   1098 	case REGEX:
   1099 		lastre = a->u.re;
   1100 		return !regexec(a->u.re, patt.str, 0, NULL, 0);
   1101 	case LASTRE:
   1102 		if (!lastre)
   1103 			leprintf("no previous regex");
   1104 		return !regexec(lastre, patt.str, 0, NULL, 0);
   1105 	}
   1106 }
   1107 
   1108 /* move to next input file
   1109  * stdin if first call and no files
   1110  * return 0 for success and 1 for no more files
   1111  */
   1112 static int
   1113 next_file(void)
   1114 {
   1115 	static unsigned char first = 1;
   1116 
   1117 	if (file == stdin)
   1118 		clearerr(file);
   1119 	else if (file)
   1120 		fshut(file, "<file>");
   1121 	file = NULL;
   1122 
   1123 	do {
   1124 		if (!*files) {
   1125 			if (first) /* given no files, default to stdin */
   1126 				file = stdin;
   1127 			/* else we've used all our files, leave file = NULL */
   1128 		} else if (!strcmp(*files, "-")) {
   1129 			file = stdin;
   1130 			files++;
   1131 		} else if (!(file = fopen(*files++, "r"))) {
   1132 			/* warn this file didn't open, but move on to next */
   1133 			weprintf("fopen:");
   1134 		}
   1135 	} while (!file && *files);
   1136 	first = 0;
   1137 
   1138 	return !file;
   1139 }
   1140 
   1141 /* test if stream is at EOF */
   1142 static int
   1143 is_eof(FILE *f)
   1144 {
   1145 	int c;
   1146 
   1147 	if (!f || feof(f))
   1148 		return 1;
   1149 
   1150 	c = fgetc(f);
   1151 	if (c == EOF && ferror(f))
   1152 		eprintf("fgetc:");
   1153 	if (c != EOF && ungetc(c, f) == EOF)
   1154 		eprintf("ungetc EOF\n");
   1155 
   1156 	return c == EOF;
   1157 }
   1158 
   1159 /* perform writes that were scheduled
   1160  * for aci this is check_puts(string, stdout)
   1161  * for r this is write_file(path, stdout)
   1162  */
   1163 static void
   1164 do_writes(void)
   1165 {
   1166 	Cmd *c;
   1167 	size_t i;
   1168 
   1169 	for (i = 0; i < writes.size; i++) {
   1170 		c = writes.data[i];
   1171 		c->u.acir.print(c->u.acir.str.str, stdout);
   1172 	}
   1173 	writes.size = 0;
   1174 }
   1175 
   1176 /* used for r's u.acir.print()
   1177  * FIXME: something like util's concat() would be better
   1178  */
   1179 static void
   1180 write_file(char *path, FILE *out)
   1181 {
   1182 	FILE *in = fopen(path, "r");
   1183 	if (!in) /* no file is treated as empty file */
   1184 		return;
   1185 
   1186 	while (read_line(in, &genbuf) != EOF)
   1187 		check_puts(genbuf.str, out);
   1188 
   1189 	fshut(in, path);
   1190 }
   1191 
   1192 static void
   1193 check_puts(char *s, FILE *f)
   1194 {
   1195 	if (s && fputs(s, f) == EOF)
   1196 		eprintf("fputs:");
   1197 	if (fputs("\n", f) == EOF)
   1198 		eprintf("fputs:");
   1199 }
   1200 
   1201 /* iterate from beg to end updating ranges so we don't miss any commands
   1202  * e.g. sed -n '1d;1,3p' should still print lines 2 and 3
   1203  */
   1204 static void
   1205 update_ranges(Cmd *beg, Cmd *end)
   1206 {
   1207 	while (beg < end)
   1208 		in_range(beg++);
   1209 }
   1210 
   1211 /*
   1212  * Sed functions
   1213  */
   1214 static void
   1215 cmd_a(Cmd *c)
   1216 {
   1217 	if (in_range(c))
   1218 		push(&writes, c);
   1219 }
   1220 
   1221 static void
   1222 cmd_b(Cmd *c)
   1223 {
   1224 	if (!in_range(c))
   1225 		return;
   1226 
   1227 	/* if we jump backwards update to end, otherwise update to destination */
   1228 	update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
   1229 	pc = c->u.jump;
   1230 }
   1231 
   1232 static void
   1233 cmd_c(Cmd *c)
   1234 {
   1235 	if (!in_range(c))
   1236 		return;
   1237 
   1238 	/* write the text on the last line of the match */
   1239 	if (!c->in_match)
   1240 		check_puts(c->u.acir.str.str, stdout);
   1241 	/* otherwise start the next cycle without printing pattern space
   1242 	 * effectively deleting the text */
   1243 	new_next();
   1244 }
   1245 
   1246 static void
   1247 cmd_d(Cmd *c)
   1248 {
   1249 	if (!in_range(c))
   1250 		return;
   1251 
   1252 	new_next();
   1253 }
   1254 
   1255 static void
   1256 cmd_D(Cmd *c)
   1257 {
   1258 	char *p;
   1259 
   1260 	if (!in_range(c))
   1261 		return;
   1262 
   1263 	if ((p = strchr(patt.str, '\n'))) {
   1264 		p++;
   1265 		memmove(patt.str, p, strlen(p) + 1);
   1266 		old_next();
   1267 	} else {
   1268 		new_next();
   1269 	}
   1270 }
   1271 
   1272 static void
   1273 cmd_g(Cmd *c)
   1274 {
   1275 	if (in_range(c))
   1276 		stracpy(&patt, hold.str);
   1277 }
   1278 
   1279 static void
   1280 cmd_G(Cmd *c)
   1281 {
   1282 	if (!in_range(c))
   1283 		return;
   1284 
   1285 	stracat(&patt, "\n");
   1286 	stracat(&patt, hold.str);
   1287 }
   1288 
   1289 static void
   1290 cmd_h(Cmd *c)
   1291 {
   1292 	if (in_range(c))
   1293 		stracpy(&hold, patt.str);
   1294 }
   1295 
   1296 static void
   1297 cmd_H(Cmd *c)
   1298 {
   1299 	if (!in_range(c))
   1300 		return;
   1301 
   1302 	stracat(&hold, "\n");
   1303 	stracat(&hold, patt.str);
   1304 }
   1305 
   1306 static void
   1307 cmd_i(Cmd *c)
   1308 {
   1309 	if (in_range(c))
   1310 		check_puts(c->u.acir.str.str, stdout);
   1311 }
   1312 
   1313 /* I think it makes sense to print invalid UTF-8 sequences in octal to satisfy
   1314  * the "visually unambiguous form" sed(1p)
   1315  */
   1316 static void
   1317 cmd_l(Cmd *c)
   1318 {
   1319 	Rune   r;
   1320 	char  *p, *end;
   1321 	size_t rlen;
   1322 
   1323 	char *escapes[] = { /* FIXME: 7 entries and search instead of 127 */
   1324 		['\\'] = "\\\\", ['\a'] = "\\a", ['\b'] = "\\b",
   1325 		['\f'] = "\\f" , ['\r'] = "\\r", ['\t'] = "\\t",
   1326 		['\v'] = "\\v" , [0x7f] = NULL, /* fill out the table */
   1327 	};
   1328 
   1329 	if (!in_range(c))
   1330 		return;
   1331 
   1332 	/* FIXME: line wrapping. sed(1p) says "length at which folding occurs is
   1333 	 * unspecified, but should be appropraite for the output device"
   1334 	 * just wrap at 80 Runes?
   1335 	 */
   1336 	for (p = patt.str, end = p + strlen(p); p < end; p += rlen) {
   1337 		if (isascii(*p) && escapes[(unsigned int)*p]) {
   1338 			fputs(escapes[(unsigned int)*p], stdout);
   1339 			rlen = 1;
   1340 		} else if (!(rlen = charntorune(&r, p, end - p))) {
   1341 			/* ran out of chars, print the bytes of the short sequence */
   1342 			for (; p < end; p++)
   1343 				printf("\\%03hho", (unsigned char)*p);
   1344 			break;
   1345 		} else if (r == Runeerror) {
   1346 			for (; rlen; rlen--, p++)
   1347 				printf("\\%03hho", (unsigned char)*p);
   1348 		} else {
   1349 			while (fwrite(p, rlen, 1, stdout) < 1 && errno == EINTR)
   1350 				;
   1351 			if (ferror(stdout))
   1352 				eprintf("fwrite:");
   1353 		}
   1354 	}
   1355 	check_puts("$", stdout);
   1356 }
   1357 
   1358 static void
   1359 cmd_n(Cmd *c)
   1360 {
   1361 	if (!in_range(c))
   1362 		return;
   1363 
   1364 	if (!gflags.n)
   1365 		check_puts(patt.str, stdout);
   1366 	do_writes();
   1367 	new_line();
   1368 }
   1369 
   1370 static void
   1371 cmd_N(Cmd *c)
   1372 {
   1373 	if (!in_range(c))
   1374 		return;
   1375 	do_writes();
   1376 	app_line();
   1377 }
   1378 
   1379 static void
   1380 cmd_p(Cmd *c)
   1381 {
   1382 	if (in_range(c))
   1383 		check_puts(patt.str, stdout);
   1384 }
   1385 
   1386 static void
   1387 cmd_P(Cmd *c)
   1388 {
   1389 	char *p;
   1390 
   1391 	if (!in_range(c))
   1392 		return;
   1393 
   1394 	if ((p = strchr(patt.str, '\n')))
   1395 		*p = '\0';
   1396 
   1397 	check_puts(patt.str, stdout);
   1398 
   1399 	if (p)
   1400 		*p = '\n';
   1401 }
   1402 
   1403 static void
   1404 cmd_q(Cmd *c)
   1405 {
   1406 	if (!in_range(c))
   1407 		return;
   1408 
   1409 	if (!gflags.n)
   1410 		check_puts(patt.str, stdout);
   1411 	do_writes();
   1412 	gflags.halt = 1;
   1413 }
   1414 
   1415 static void
   1416 cmd_r(Cmd *c)
   1417 {
   1418 	if (in_range(c))
   1419 		push(&writes, c);
   1420 }
   1421 
   1422 static void
   1423 cmd_s(Cmd *c)
   1424 {
   1425 	String tmp;
   1426 	Rune r;
   1427 	size_t plen, rlen, len;
   1428 	char *p, *s, *end;
   1429 	unsigned int matches = 0, last_empty = 1, qflag = 0, cflags = 0;
   1430 	regex_t *re;
   1431 	regmatch_t *rm, *pmatch = NULL;
   1432 
   1433 	if (!in_range(c))
   1434 		return;
   1435 
   1436 	if (!c->u.s.re && !lastre)
   1437 		leprintf("no previous regex");
   1438 
   1439 	re = c->u.s.re ? c->u.s.re : lastre;
   1440 	lastre = re;
   1441 
   1442 	plen = re->re_nsub + 1;
   1443 	pmatch = ereallocarray(NULL, plen, sizeof(regmatch_t));
   1444 
   1445 	*genbuf.str = '\0';
   1446 	s = patt.str;
   1447 
   1448 	while (!qflag && !regexec(re, s, plen, pmatch, cflags)) {
   1449 		cflags = REG_NOTBOL; /* match against beginning of line first time, but not again */
   1450 		if (!*s) /* match against empty string first time, but not again */
   1451 			qflag = 1;
   1452 
   1453 		/* don't substitute if last match was not empty but this one is.
   1454 		 * s_a*_._g
   1455 		 * foobar -> .f.o.o.b.r.
   1456 		 */
   1457 		if ((last_empty || pmatch[0].rm_eo) &&
   1458 		    (++matches == c->u.s.occurrence || !c->u.s.occurrence)) {
   1459 			/* copy over everything before the match */
   1460 			strnacat(&genbuf, s, pmatch[0].rm_so);
   1461 
   1462 			/* copy over replacement text, taking into account &, backreferences, and \ escapes */
   1463 			for (p = c->u.s.repl.str, len = strcspn(p, "\\&"); *p; len = strcspn(++p, "\\&")) {
   1464 				strnacat(&genbuf, p, len);
   1465 				p += len;
   1466 				switch (*p) {
   1467 				default: leprintf("this shouldn't be possible");
   1468 				case '\0':
   1469 					/* we're at the end, back up one so the ++p will put us on
   1470 					 * the null byte to break out of the loop */
   1471 					--p;
   1472 					break;
   1473 				case '&':
   1474 					strnacat(&genbuf, s + pmatch[0].rm_so, pmatch[0].rm_eo - pmatch[0].rm_so);
   1475 					break;
   1476 				case '\\':
   1477 					if (isdigit(*++p)) { /* backreference */
   1478 						/* only need to check here if using lastre, otherwise we checked when building */
   1479 						if (!c->u.s.re && (size_t)(*p - '0') > re->re_nsub)
   1480 							leprintf("back reference number greater than number of groups");
   1481 						rm = &pmatch[*p - '0'];
   1482 						strnacat(&genbuf, s + rm->rm_so, rm->rm_eo - rm->rm_so);
   1483 					} else { /* character after backslash taken literally (well one byte, but it works) */
   1484 						strnacat(&genbuf, p, 1);
   1485 					}
   1486 					break;
   1487 				}
   1488 			}
   1489 		} else {
   1490 			/* not replacing, copy over everything up to and including the match */
   1491 			strnacat(&genbuf, s, pmatch[0].rm_eo);
   1492 		}
   1493 
   1494 		if (!pmatch[0].rm_eo) { /* empty match, advance one rune and add it to output */
   1495 			end = s + strlen(s);
   1496 			rlen = charntorune(&r, s, end - s);
   1497 
   1498 			if (!rlen) { /* ran out of bytes, copy short sequence */
   1499 				stracat(&genbuf, s);
   1500 				s = end;
   1501 			} else { /* copy whether or not it's a good rune */
   1502 				strnacat(&genbuf, s, rlen);
   1503 				s += rlen;
   1504 			}
   1505 		}
   1506 		last_empty = !pmatch[0].rm_eo;
   1507 		s += pmatch[0].rm_eo;
   1508 	}
   1509 	free(pmatch);
   1510 
   1511 	if (!(matches && matches >= c->u.s.occurrence)) /* no replacement */
   1512 		return;
   1513 
   1514 	gflags.s = 1;
   1515 
   1516 	stracat(&genbuf, s);
   1517 
   1518 	tmp    = patt;
   1519 	patt   = genbuf;
   1520 	genbuf = tmp;
   1521 
   1522 	if (c->u.s.p)
   1523 		check_puts(patt.str, stdout);
   1524 	if (c->u.s.file)
   1525 		check_puts(patt.str, c->u.s.file);
   1526 }
   1527 
   1528 static void
   1529 cmd_t(Cmd *c)
   1530 {
   1531 	if (!in_range(c) || !gflags.s)
   1532 		return;
   1533 
   1534 	/* if we jump backwards update to end, otherwise update to destination */
   1535 	update_ranges(c + 1, c->u.jump > c ? c->u.jump : prog + pcap);
   1536 	pc = c->u.jump;
   1537 	gflags.s = 0;
   1538 }
   1539 
   1540 static void
   1541 cmd_w(Cmd *c)
   1542 {
   1543 	if (in_range(c))
   1544 		check_puts(patt.str, c->u.file);
   1545 }
   1546 
   1547 static void
   1548 cmd_x(Cmd *c)
   1549 {
   1550 	String tmp;
   1551 
   1552 	if (!in_range(c))
   1553 		return;
   1554 
   1555 	tmp  = patt;
   1556 	patt = hold;
   1557 	hold = tmp;
   1558 }
   1559 
   1560 static void
   1561 cmd_y(Cmd *c)
   1562 {
   1563 	String tmp;
   1564 	Rune r, *rp;
   1565 	size_t n, rlen;
   1566 	char *s, *end, buf[UTFmax];
   1567 
   1568 	if (!in_range(c))
   1569 		return;
   1570 
   1571 	*genbuf.str = '\0';
   1572 	for (s = patt.str, end = s + strlen(s); *s; s += rlen) {
   1573 		if (!(rlen = charntorune(&r, s, end - s))) { /* ran out of chars, copy rest */
   1574 			stracat(&genbuf, s);
   1575 			break;
   1576 		} else if (r == Runeerror) { /* bad UTF-8 sequence, copy bytes */
   1577 			strnacat(&genbuf, s, rlen);
   1578 		} else {
   1579 			for (rp = c->u.y.set1; *rp; rp++)
   1580 				if (*rp == r)
   1581 					break;
   1582 			if (*rp) { /* found r in set1, replace with Rune from set2 */
   1583 				n = runetochar(buf, c->u.y.set2 + (rp - c->u.y.set1));
   1584 				strnacat(&genbuf, buf, n);
   1585 			} else {
   1586 				strnacat(&genbuf, s, rlen);
   1587 			}
   1588 		}
   1589 	}
   1590 	tmp    = patt;
   1591 	patt   = genbuf;
   1592 	genbuf = tmp;
   1593 }
   1594 
   1595 static void
   1596 cmd_colon(Cmd *c)
   1597 {
   1598 }
   1599 
   1600 static void
   1601 cmd_equal(Cmd *c)
   1602 {
   1603 	if (in_range(c))
   1604 		printf("%zu\n", lineno);
   1605 }
   1606 
   1607 static void
   1608 cmd_lbrace(Cmd *c)
   1609 {
   1610 	Cmd *jump;
   1611 
   1612 	if (in_range(c))
   1613 		return;
   1614 
   1615 	/* update ranges on all commands we skip */
   1616 	jump = prog + c->u.offset;
   1617 	update_ranges(c + 1, jump);
   1618 	pc = jump;
   1619 }
   1620 
   1621 static void
   1622 cmd_rbrace(Cmd *c)
   1623 {
   1624 }
   1625 
   1626 /* not actually a sed function, but acts like one, put in last spot of script */
   1627 static void
   1628 cmd_last(Cmd *c)
   1629 {
   1630 	if (!gflags.n)
   1631 		check_puts(patt.str, stdout);
   1632 	do_writes();
   1633 	new_next();
   1634 }
   1635 
   1636 /*
   1637  * Actions
   1638  */
   1639 
   1640 /* read new line, continue current cycle */
   1641 static void
   1642 new_line(void)
   1643 {
   1644 	while (read_line(file, &patt) == EOF) {
   1645 		if (next_file()) {
   1646 			gflags.halt = 1;
   1647 			return;
   1648 		}
   1649 	}
   1650 	gflags.s = 0;
   1651 	lineno++;
   1652 }
   1653 
   1654 /* append new line, continue current cycle
   1655  * FIXME: used for N, POSIX specifies do not print pattern space when out of
   1656  *        input, but GNU does so busybox does as well. Currently we don't.
   1657  *        Should we?
   1658  */
   1659 static void
   1660 app_line(void)
   1661 {
   1662 	while (read_line(file, &genbuf) == EOF) {
   1663 		if (next_file()) {
   1664 			gflags.halt = 1;
   1665 			return;
   1666 		}
   1667 	}
   1668 
   1669 	stracat(&patt, "\n");
   1670 	stracat(&patt, genbuf.str);
   1671 	gflags.s = 0;
   1672 	lineno++;
   1673 }
   1674 
   1675 /* read new line, start new cycle */
   1676 static void
   1677 new_next(void)
   1678 {
   1679 	*patt.str = '\0';
   1680 	update_ranges(pc + 1, prog + pcap);
   1681 	new_line();
   1682 	pc = prog - 1;
   1683 }
   1684 
   1685 /* keep old pattern space, start new cycle */
   1686 static void
   1687 old_next(void)
   1688 {
   1689 	update_ranges(pc + 1, prog + pcap);
   1690 	pc = prog - 1;
   1691 }
   1692 
   1693 int
   1694 main(int argc, char *argv[])
   1695 {
   1696 	char *arg;
   1697 	int ret = 0, script = 0;
   1698 
   1699 	ARGBEGIN {
   1700 	case 'n':
   1701 		gflags.n = 1;
   1702 		break;
   1703 	case 'r':
   1704 	case 'E':
   1705 		gflags.E = 1;
   1706 		break;
   1707 	case 'e':
   1708 		arg = EARGF(usage());
   1709 		compile(arg, 0);
   1710 		script = 1;
   1711 		break;
   1712 	case 'f':
   1713 		arg = EARGF(usage());
   1714 		compile(arg, 1);
   1715 		script = 1;
   1716 		break;
   1717 	default : usage();
   1718 	} ARGEND
   1719 
   1720 	/* no script to run */
   1721 	if (!script && !argc)
   1722 		usage();
   1723 
   1724 	/* no script yet, next argument is script */
   1725 	if (!script)
   1726 		compile(*argv++, 0);
   1727 
   1728 	/* shrink/grow memory to fit and add our last instruction */
   1729 	resize((void **)&prog, &pcap, sizeof(*prog), pc - prog + 1, NULL);
   1730 	pc = prog + pcap - 1;
   1731 	pc->fninfo = &(Fninfo){ cmd_last, NULL, NULL, 0 };
   1732 
   1733 	files = argv;
   1734 	run();
   1735 
   1736 	ret |= fshut(stdin, "<stdin>") | fshut(stdout, "<stdout>");
   1737 
   1738 	return ret;
   1739 }