parse.c (11606B)
1 /* $OpenBSD: parse.c,v 1.17 2009/10/27 23:59:39 deraadt Exp $ */ 2 /* $NetBSD: parse.c,v 1.12 2001/12/07 13:37:39 bjh21 Exp $ */ 3 4 /* 5 * Copyright (c) 1989, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the University nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/types.h> 34 #include <sys/file.h> 35 36 #include <ctype.h> 37 #include <err.h> 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 44 #include "hexdump.h" 45 #include "util.h" 46 47 FU *endfu; /* format at end-of-data */ 48 49 void 50 addfile(char *name) 51 { 52 FILE *fp; 53 size_t len; 54 char *buf, *lbuf, *p; 55 56 if ((fp = fopen(name, "r")) == NULL) 57 err(1, "fopen %s", name); 58 59 lbuf = NULL; 60 while ((buf = fgetln(fp, &len))) { 61 if (buf[len - 1] == '\n') 62 buf[len - 1] = '\0'; 63 else { 64 /* EOF without EOL, copy and add the NUL */ 65 if ((lbuf = malloc(len + 1)) == NULL) 66 err(1, NULL); 67 memcpy(lbuf, buf, len); 68 lbuf[len] = '\0'; 69 buf = lbuf; 70 } 71 for (p = buf; isspace((unsigned char)*p); ++p); 72 if (!*p || *p == '#') 73 continue; 74 add(p); 75 } 76 free(lbuf); 77 (void)fclose(fp); 78 } 79 80 void 81 add(const char *fmt) 82 { 83 const char *p; 84 static FS **nextfs; 85 FS *tfs; 86 FU *tfu, **nextfu; 87 const char *savep; 88 89 /* start new linked list of format units */ 90 tfs = emalloc(sizeof(FS)); 91 if (!fshead) 92 fshead = tfs; 93 else 94 *nextfs = tfs; 95 nextfs = &tfs->nextfs; 96 nextfu = &tfs->nextfu; 97 98 /* take the format string and break it up into format units */ 99 for (p = fmt;;) { 100 /* skip leading white space */ 101 for (; isspace((unsigned char)*p); ++p); 102 if (!*p) 103 break; 104 105 /* allocate a new format unit and link it in */ 106 tfu = emalloc(sizeof(FU)); 107 *nextfu = tfu; 108 nextfu = &tfu->nextfu; 109 tfu->reps = 1; 110 111 /* if leading digit, repetition count */ 112 if (isdigit((unsigned char)*p)) { 113 for (savep = p; isdigit((unsigned char)*p); ++p); 114 if (!isspace((unsigned char)*p) && *p != '/') 115 badfmt(fmt); 116 /* may overwrite either white space or slash */ 117 tfu->reps = atoi(savep); 118 tfu->flags = F_SETREP; 119 /* skip trailing white space */ 120 for (++p; isspace((unsigned char)*p); ++p); 121 } 122 123 /* skip slash and trailing white space */ 124 if (*p == '/') 125 while (isspace((unsigned char)*++p)); 126 127 /* byte count */ 128 if (isdigit((unsigned char)*p)) { 129 for (savep = p; isdigit((unsigned char)*p); ++p); 130 if (!isspace((unsigned char)*p)) 131 badfmt(fmt); 132 tfu->bcnt = atoi(savep); 133 /* skip trailing white space */ 134 for (++p; isspace((unsigned char)*p); ++p); 135 } 136 137 /* format */ 138 if (*p != '"') 139 badfmt(fmt); 140 for (savep = ++p; *p != '"';) 141 if (*p++ == 0) 142 badfmt(fmt); 143 if (!(tfu->fmt = malloc(p - savep + 1))) 144 nomem(); 145 (void) strncpy(tfu->fmt, savep, p - savep); 146 tfu->fmt[p - savep] = '\0'; 147 escape(tfu->fmt); 148 p++; 149 } 150 } 151 152 static const char *spec = ".#-+ 0123456789"; 153 154 int 155 size(FS *fs) 156 { 157 FU *fu; 158 int bcnt, cursize; 159 char *fmt; 160 int prec; 161 162 /* figure out the data block size needed for each format unit */ 163 for (cursize = 0, fu = fs->nextfu; fu; fu = fu->nextfu) { 164 if (fu->bcnt) { 165 cursize += fu->bcnt * fu->reps; 166 continue; 167 } 168 for (bcnt = prec = 0, fmt = fu->fmt; *fmt; ++fmt) { 169 if (*fmt != '%') 170 continue; 171 /* 172 * skip any special chars -- save precision in 173 * case it's a %s format. 174 */ 175 while (*++fmt && strchr(spec + 1, *fmt)); 176 if (*fmt == '.' && isdigit((unsigned char)*++fmt)) { 177 prec = atoi(fmt); 178 while (isdigit((unsigned char)*++fmt)); 179 } 180 switch(*fmt) { 181 case 'c': 182 bcnt += 1; 183 break; 184 case 'd': case 'i': case 'o': case 'u': 185 case 'x': case 'X': 186 bcnt += 4; 187 break; 188 case 'e': case 'E': case 'f': case 'g': case 'G': 189 bcnt += 8; 190 break; 191 case 's': 192 bcnt += prec; 193 break; 194 case '_': 195 switch(*++fmt) { 196 case 'c': case 'p': case 'u': 197 bcnt += 1; 198 break; 199 } 200 } 201 } 202 cursize += bcnt * fu->reps; 203 } 204 return (cursize); 205 } 206 207 void 208 rewrite(FS *fs) 209 { 210 enum { NOTOKAY, USEBCNT, USEPREC } sokay; 211 PR *pr, **nextpr; 212 FU *fu; 213 char *p1, *p2; 214 char savech, *fmtp, cs[3]; 215 int nconv, prec; 216 size_t len; 217 218 nextpr = NULL; 219 prec = 0; 220 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 221 /* 222 * Break each format unit into print units; each conversion 223 * character gets its own. 224 */ 225 for (nconv = 0, fmtp = fu->fmt; *fmtp; nextpr = &pr->nextpr) { 226 pr = emalloc(sizeof(PR)); 227 if (!fu->nextpr) 228 fu->nextpr = pr; 229 else 230 *nextpr = pr; 231 232 /* Skip preceding text and up to the next % sign. */ 233 for (p1 = fmtp; *p1 && *p1 != '%'; ++p1); 234 235 /* Only text in the string. */ 236 if (!*p1) { 237 pr->fmt = fmtp; 238 pr->flags = F_TEXT; 239 break; 240 } 241 242 /* 243 * Get precision for %s -- if have a byte count, don't 244 * need it. 245 */ 246 if (fu->bcnt) { 247 sokay = USEBCNT; 248 /* Skip to conversion character. */ 249 for (++p1; *p1 && strchr(spec, *p1); ++p1); 250 } else { 251 /* Skip any special chars, field width. */ 252 while (*++p1 && strchr(spec + 1, *p1)); 253 if (*p1 == '.' && 254 isdigit((unsigned char)*++p1)) { 255 sokay = USEPREC; 256 prec = atoi(p1); 257 while (isdigit((unsigned char)*++p1)) 258 continue; 259 } else 260 sokay = NOTOKAY; 261 } 262 263 p2 = *p1 ? p1 + 1 : p1; /* Set end pointer. */ 264 cs[0] = *p1; /* Set conversion string. */ 265 cs[1] = '\0'; 266 267 /* 268 * Figure out the byte count for each conversion; 269 * rewrite the format as necessary, set up blank- 270 * padding for end of data. 271 */ 272 switch(cs[0]) { 273 case 'c': 274 pr->flags = F_CHAR; 275 switch(fu->bcnt) { 276 case 0: case 1: 277 pr->bcnt = 1; 278 break; 279 default: 280 p1[1] = '\0'; 281 badcnt(p1); 282 } 283 break; 284 case 'd': case 'i': 285 case 'o': case 'u': case 'x': case 'X': 286 if (cs[0] == 'd' || cs[0] == 'i') 287 pr->flags = F_INT; 288 else 289 pr->flags = F_UINT; 290 291 cs[2] = '\0'; 292 cs[1] = cs[0]; 293 cs[0] = 'q'; 294 switch(fu->bcnt) { 295 case 0: case 4: 296 pr->bcnt = 4; 297 break; 298 case 1: 299 pr->bcnt = 1; 300 break; 301 case 2: 302 pr->bcnt = 2; 303 break; 304 case 8: 305 pr->bcnt = 8; 306 break; 307 default: 308 p1[1] = '\0'; 309 badcnt(p1); 310 } 311 break; 312 case 'e': case 'E': case 'f': case 'g': case 'G': 313 pr->flags = F_DBL; 314 switch(fu->bcnt) { 315 case 0: case 8: 316 pr->bcnt = 8; 317 break; 318 case 4: 319 pr->bcnt = 4; 320 break; 321 default: 322 p1[1] = '\0'; 323 badcnt(p1); 324 } 325 break; 326 case 's': 327 pr->flags = F_STR; 328 switch(sokay) { 329 case NOTOKAY: 330 badsfmt(); 331 case USEBCNT: 332 pr->bcnt = fu->bcnt; 333 break; 334 case USEPREC: 335 pr->bcnt = prec; 336 break; 337 } 338 break; 339 case '_': 340 ++p2; 341 switch(p1[1]) { 342 case 'A': 343 endfu = fu; 344 fu->flags |= F_IGNORE; 345 /* FALLTHROUGH */ 346 case 'a': 347 pr->flags = F_ADDRESS; 348 ++p2; 349 switch(p1[2]) { 350 case 'd': case 'o': case'x': 351 cs[0] = 'q'; 352 cs[1] = p1[2]; 353 cs[2] = '\0'; 354 break; 355 default: 356 if (p1[2]) 357 p1[3] = '\0'; 358 badconv(p1); 359 } 360 break; 361 case 'c': 362 case 'p': 363 case 'u': 364 if (p1[1] == 'c') { 365 pr->flags = F_C; 366 /* cs[0] = 'c'; set in conv_c */ 367 } else if (p1[1] == 'p') { 368 pr->flags = F_P; 369 cs[0] = 'c'; 370 } else { 371 pr->flags = F_U; 372 /* cs[0] = 'c'; set in conv_u */ 373 } 374 375 switch(fu->bcnt) { 376 case 0: case 1: 377 pr->bcnt = 1; 378 break; 379 default: 380 p1[2] = '\0'; 381 badcnt(p1); 382 } 383 break; 384 default: 385 if (p1[1]) 386 p1[2] = '\0'; 387 badconv(p1); 388 } 389 break; 390 default: 391 if (cs[0]) 392 p1[1] = '\0'; 393 badconv(p1); 394 } 395 396 /* 397 * Copy to PR format string, set conversion character 398 * pointer, update original. 399 */ 400 savech = *p2; 401 p1[0] = '\0'; 402 len = strlen(fmtp) + strlen(cs) + 1; 403 pr->fmt = emalloc(len); 404 snprintf(pr->fmt, len, "%s%s", fmtp, cs); 405 *p2 = savech; 406 pr->cchar = pr->fmt + (p1 - fmtp); 407 fmtp = p2; 408 409 /* Only one conversion character if byte count. */ 410 if (!(pr->flags&F_ADDRESS) && fu->bcnt && nconv++) 411 errx(1, 412 "byte count with multiple conversion characters"); 413 } 414 /* 415 * If format unit byte count not specified, figure it out 416 * so can adjust rep count later. 417 */ 418 if (!fu->bcnt) 419 for (pr = fu->nextpr; pr; pr = pr->nextpr) 420 fu->bcnt += pr->bcnt; 421 } 422 /* 423 * If the format string interprets any data at all, and it's 424 * not the same as the blocksize, and its last format unit 425 * interprets any data at all, and has no iteration count, 426 * repeat it as necessary. 427 * 428 * If, rep count is greater than 1, no trailing whitespace 429 * gets output from the last iteration of the format unit. 430 */ 431 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 432 if (!fu->nextfu && fs->bcnt < blocksize && 433 !(fu->flags&F_SETREP) && fu->bcnt) 434 fu->reps += (blocksize - fs->bcnt) / fu->bcnt; 435 if (fu->reps > 1) { 436 if (!fu->nextpr) 437 break; 438 for (pr = fu->nextpr;; pr = pr->nextpr) 439 if (!pr->nextpr) 440 break; 441 for (p1 = pr->fmt, p2 = NULL; *p1; ++p1) 442 p2 = isspace((unsigned char)*p1) ? p1 : NULL; 443 if (p2) 444 pr->nospace = p2; 445 } 446 } 447 #ifdef DEBUG 448 for (fu = fs->nextfu; fu; fu = fu->nextfu) { 449 (void)printf("fmt:"); 450 for (pr = fu->nextpr; pr; pr = pr->nextpr) 451 (void)printf(" {%s}", pr->fmt); 452 (void)printf("\n"); 453 } 454 #endif 455 } 456 457 void 458 escape(char *p1) 459 { 460 char *p2; 461 462 /* alphabetic escape sequences have to be done in place */ 463 for (p2 = p1;; ++p1, ++p2) { 464 if (!*p1) { 465 *p2 = *p1; 466 break; 467 } 468 if (*p1 == '\\') { 469 switch(*++p1) { 470 case '\0': 471 *p2++ = '\\'; 472 *p2 = '\0'; 473 return; /* incomplete escape sequence */ 474 case 'a': 475 /* *p2 = '\a'; */ 476 *p2 = '\007'; 477 break; 478 case 'b': 479 *p2 = '\b'; 480 break; 481 case 'f': 482 *p2 = '\f'; 483 break; 484 case 'n': 485 *p2 = '\n'; 486 break; 487 case 'r': 488 *p2 = '\r'; 489 break; 490 case 't': 491 *p2 = '\t'; 492 break; 493 case 'v': 494 *p2 = '\v'; 495 break; 496 default: 497 *p2 = *p1; 498 break; 499 } 500 } else 501 *p2 = *p1; 502 } 503 } 504 505 void 506 badcnt(char *s) 507 { 508 errx(1, "%s: bad byte count", s); 509 } 510 511 void 512 badsfmt(void) 513 { 514 errx(1, "%%s: requires a precision or a byte count"); 515 } 516 517 void 518 badfmt(const char *fmt) 519 { 520 errx(1, "\"%s\": bad format", fmt); 521 } 522 523 void 524 badconv(char *ch) 525 { 526 errx(1, "%%%s: bad conversion character", ch); 527 }