hbase

heirloom base
git clone git://git.2f30.org/hbase
Log | Files | Refs | README

commit d672d713160ed2c05d1d631d809ba17cb5a806ae
Author: Daniel Bainton <dpb@driftaway.org>
Date:   Wed, 26 Mar 2014 14:44:38 +0000

Initial commit

Diffstat:
ALICENSE/COPYING | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/COPYING.LGPL | 504+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/LICENSE | 354+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/LUCENT | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/OPENSOLARIS.LICENSE | 385+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ALICENSE/README | 27+++++++++++++++++++++++++++
AREADME | 5+++++
A_install/install.1b | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A_install/install.c | 436+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
A_install/mkfile | 12++++++++++++
Abc/bc.1 | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/bc.y | 743+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/lib.b | 241+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Abc/mkfile | 11+++++++++++
Abc/yyval.sed | 22++++++++++++++++++++++
Acp/cp.1 | 218+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/cp.c | 1264+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/ln.1 | 113+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acp/mkfile | 9+++++++++
Acp/mv.1 | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.1 | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.c | 2061+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/dc.h | 203+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adc/mkfile | 8++++++++
Adc/version.c | 13+++++++++++++
Add/dd.1 | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Add/dd.c | 1035+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Add/mkfile | 7+++++++
Adiff/diff.1 | 493+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diff.c | 473+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diff.h | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffdir.c | 993+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffh.c | 410+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffreg.c | 1629+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Adiff/diffver.c | 15+++++++++++++++
Adiff/mkfile | 14++++++++++++++
Aed/depsinc.mk | 1+
Aed/ed.1 | 1033+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aed/ed.c | 2822+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aed/mkfile | 8++++++++
Aexpr/expr.1 | 211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexpr/expr.y | 546+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aexpr/mkfile | 10++++++++++
Afind/find.1 | 558+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afind/find.c | 1554+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afind/mkfile | 8++++++++
Afmt/fmt.1 | 115+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afmt/fmt.c | 678+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Afmt/mkfile | 7+++++++
Agrep/ac.c | 578++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/alloc.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/alloc.h | 34++++++++++++++++++++++++++++++++++
Agrep/config.h | 4++++
Agrep/egrep.1 | 388+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/fgrep.1 | 179+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.1 | 297+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.c | 727+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grep.h | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/grid.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/mkfile | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/plist.c | 213+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/rcomp.c | 350+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agrep/sus.c | 133+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/hd.1 | 160+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/hd.c | 715+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahd/mkfile | 7+++++++
Alex/allprint.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/depsinc.mk | 1+
Alex/getopt.c | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/header.c | 409+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/ldefs.c | 309+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/lex.1 | 131+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/libmain.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alex/lsearch.c | 71+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/main.c | 364+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/mkfile | 26++++++++++++++++++++++++++
Alex/nceucform | 480+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/ncform | 290+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/nrform | 188+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/once.h | 166+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/parser.y | 978+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/reject.c | 158+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/search.h | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sgs.h | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub1.c | 1017+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub2.c | 1217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/sub3.c | 395+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/wcio.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/yyless.c | 137+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/yywrap.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/CHECK.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/_alloca.h | 27+++++++++++++++++++++++++++
Alibcommon/_malloc.h | 26++++++++++++++++++++++++++
Alibcommon/_utmpx.h | 89+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/asciitype.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/asciitype.h | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/atoll.h | 8++++++++
Alibcommon/blank.h | 38++++++++++++++++++++++++++++++++++++++
Alibcommon/depsinc.mk | 2++
Alibcommon/getdir.c | 197+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/getdir.h | 33+++++++++++++++++++++++++++++++++
Alibcommon/getopt.c | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/gmatch.c | 136+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_alloc.c | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_close.c | 36++++++++++++++++++++++++++++++++++++
Alibcommon/ib_free.c | 33+++++++++++++++++++++++++++++++++
Alibcommon/ib_getlin.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_getw.c | 81+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_open.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_popen.c | 87+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_read.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/ib_seek.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/iblok.h | 135+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/mbtowi.h | 22++++++++++++++++++++++
Alibcommon/memalign.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/memalign.h | 35+++++++++++++++++++++++++++++++++++
Alibcommon/mkfile | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/msgselect.h | 30++++++++++++++++++++++++++++++
Alibcommon/oblok.c | 260+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/oblok.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pathconf.c | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pathconf.h | 29+++++++++++++++++++++++++++++
Alibcommon/pfmt.c | 39+++++++++++++++++++++++++++++++++++++++
Alibcommon/pfmt.h | 46++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/pfmt_label.c | 1+
Alibcommon/regexp.h | 1211+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/regexpr.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/regexpr.h | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/setlabel.c | 40++++++++++++++++++++++++++++++++++++++++
Alibcommon/setuxlabel.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sfile.c | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sfile.h | 40++++++++++++++++++++++++++++++++++++++++
Alibcommon/sighold.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigignore.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/signal.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigpause.c | 48++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigrelse.c | 41+++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigset.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sigset.h | 38++++++++++++++++++++++++++++++++++++++
Alibcommon/strtol.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/sysv3.c | 2++
Alibcommon/utmpx.c | 252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibcommon/vpfmt.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/COPYING.LGPL | 504+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/NOTES | 14++++++++++++++
Alibuxre/_collelem.c | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/_collmult.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/bracket.c | 829+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/colldata.h | 226+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/depsinc.mk | 2++
Alibuxre/mkfile | 19+++++++++++++++++++
Alibuxre/onefile.c | 38++++++++++++++++++++++++++++++++++++++
Alibuxre/re.h | 228+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regcomp.c | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regdfa.c | 877+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regdfa.h | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regerror.c | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regex.h | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regexec.c | 68++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regfree.c | 42++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regnfa.c | 1070+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/regparse.c | 1091+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/stubs.c | 82+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alibuxre/wcharm.h | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/NOTICE | 34++++++++++++++++++++++++++++++++++
Amk/libbio/README | 5+++++
Amk/libbio/bbuffered.c | 20++++++++++++++++++++
Amk/libbio/bcat.c | 46++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bfildes.c | 9+++++++++
Amk/libbio/bflush.c | 33+++++++++++++++++++++++++++++++++
Amk/libbio/bgetc.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bgetd.c | 36++++++++++++++++++++++++++++++++++++
Amk/libbio/bgetrune.c | 47+++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/binit.c | 153+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bio.3 | 371+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bio.h | 91+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/boffset.c | 25+++++++++++++++++++++++++
Amk/libbio/bprint.c | 14++++++++++++++
Amk/libbio/bputc.c | 20++++++++++++++++++++
Amk/libbio/bputrune.c | 23+++++++++++++++++++++++
Amk/libbio/brdline.c | 94+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/brdstr.c | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bread.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bseek.c | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libbio/bvprint.c | 38++++++++++++++++++++++++++++++++++++++
Amk/libbio/bwrite.c | 38++++++++++++++++++++++++++++++++++++++
Amk/libbio/depsinc.mk | 2++
Amk/libbio/lib9.h | 26++++++++++++++++++++++++++
Amk/libbio/mkfile | 23+++++++++++++++++++++++
Amk/libfmt/NOTICE | 25+++++++++++++++++++++++++
Amk/libfmt/README | 5+++++
Amk/libfmt/charstod.c | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/depsinc.mk | 2++
Amk/libfmt/dofmt.c | 617+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/dorfmt.c | 50++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/errfmt.c | 16++++++++++++++++
Amk/libfmt/fltfmt.c | 668+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmt.c | 220+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmt.h | 116+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtdef.h | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtfd.c | 36++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtfdflush.c | 22++++++++++++++++++++++
Amk/libfmt/fmtinstall.3 | 379+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtlocale.c | 55+++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtlock.c | 15+++++++++++++++
Amk/libfmt/fmtnull.c | 33+++++++++++++++++++++++++++++++++
Amk/libfmt/fmtprint.c | 36++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtquote.c | 259+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/fmtrune.c | 28++++++++++++++++++++++++++++
Amk/libfmt/fmtstr.c | 16++++++++++++++++
Amk/libfmt/fmtvprint.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libfmt/fprint.c | 17+++++++++++++++++
Amk/libfmt/mkfile | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/nan.h | 4++++
Amk/libfmt/nan64.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/plan9.h | 38++++++++++++++++++++++++++++++++++++++
Amk/libfmt/pow10.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/print.3 | 482+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/print.c | 17+++++++++++++++++
Amk/libfmt/runefmtstr.c | 16++++++++++++++++
Amk/libfmt/runeseprint.c | 18++++++++++++++++++
Amk/libfmt/runesmprint.c | 18++++++++++++++++++
Amk/libfmt/runesnprint.c | 19+++++++++++++++++++
Amk/libfmt/runesprint.c | 18++++++++++++++++++
Amk/libfmt/runevseprint.c | 29+++++++++++++++++++++++++++++
Amk/libfmt/runevsmprint.c | 86+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/runevsnprint.c | 28++++++++++++++++++++++++++++
Amk/libfmt/seprint.c | 17+++++++++++++++++
Amk/libfmt/smprint.c | 17+++++++++++++++++
Amk/libfmt/snprint.c | 18++++++++++++++++++
Amk/libfmt/sprint.c | 30++++++++++++++++++++++++++++++
Amk/libfmt/strtod.c | 520+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/test.c | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/test2.c | 9+++++++++
Amk/libfmt/test3.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/vfprint.c | 21+++++++++++++++++++++
Amk/libfmt/vseprint.c | 28++++++++++++++++++++++++++++
Amk/libfmt/vsmprint.c | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libfmt/vsnprint.c | 28++++++++++++++++++++++++++++
Amk/libregexp/NOTICE | 25+++++++++++++++++++++++++
Amk/libregexp/README | 5+++++
Amk/libregexp/depsinc.mk | 2++
Amk/libregexp/lib9.h | 10++++++++++
Amk/libregexp/mkfile | 15+++++++++++++++
Amk/libregexp/regaux.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regcomp.c | 555+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regcomp.h | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regerror.c | 14++++++++++++++
Amk/libregexp/regexec.c | 231+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.3 | 220+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.7 | 141+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regexp9.h | 96+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/regsub.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/rregexec.c | 212+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/rregsub.c | 63+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/test.c | 46++++++++++++++++++++++++++++++++++++++++++++++
Amk/libregexp/test2.c | 20++++++++++++++++++++
Amk/libutf/NOTICE | 25+++++++++++++++++++++++++
Amk/libutf/README | 5+++++
Amk/libutf/depsinc.mk | 2++
Amk/libutf/isalpharune.3 | 57+++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/mkfile | 27+++++++++++++++++++++++++++
Amk/libutf/plan9.h | 29+++++++++++++++++++++++++++++
Amk/libutf/rune.3 | 194+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/rune.c | 217+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcat.3 | 74++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcat.c | 25+++++++++++++++++++++++++
Amk/libutf/runestrchr.c | 35+++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcmp.c | 35+++++++++++++++++++++++++++++++++++
Amk/libutf/runestrcpy.c | 28++++++++++++++++++++++++++++
Amk/libutf/runestrdup.c | 30++++++++++++++++++++++++++++++
Amk/libutf/runestrecpy.c | 32++++++++++++++++++++++++++++++++
Amk/libutf/runestrlen.c | 24++++++++++++++++++++++++
Amk/libutf/runestrncat.c | 32++++++++++++++++++++++++++++++++
Amk/libutf/runestrncmp.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/runestrncpy.c | 33+++++++++++++++++++++++++++++++++
Amk/libutf/runestrrchr.c | 30++++++++++++++++++++++++++++++
Amk/libutf/runestrstr.c | 44++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/runetype.c | 1151+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utf.7 | 99+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utf.h | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfdef.h | 33+++++++++++++++++++++++++++++++++
Amk/libutf/utfecpy.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/utflen.c | 37+++++++++++++++++++++++++++++++++++++
Amk/libutf/utfnlen.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfrrune.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfrune.c | 44++++++++++++++++++++++++++++++++++++++++++++
Amk/libutf/utfutf.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/mk/NOTICE | 34++++++++++++++++++++++++++++++++++
Amk/mk/README | 5+++++
Amk/mk/arc.c | 52++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/archive.c | 253+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/bufblock.c | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/env.c | 149+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/file.c | 90+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/fns.h | 88+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/graph.c | 279+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/job.c | 33+++++++++++++++++++++++++++++++++
Amk/mk/lex.c | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/main.c | 287+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/match.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.1 | 693+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.c | 234+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mk.h | 185+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/mkfile | 35+++++++++++++++++++++++++++++++++++
Amk/mk/parse.c | 318+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/rc.c | 194+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/recipe.c | 117+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/rule.c | 112+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/run.c | 296+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/sh.c | 206+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/shell.c | 80+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/shprint.c | 125+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/symtab.c | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/sys.h | 27+++++++++++++++++++++++++++
Amk/mk/unix.c | 341+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/var.c | 41+++++++++++++++++++++++++++++++++++++++++
Amk/mk/varsub.c | 252+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mk/word.c | 189+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amk/mkfile | 4++++
Amkfile | 5+++++
Anawk/COPYING | 340+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/NOTES | 20++++++++++++++++++++
Anawk/awk.g.y | 468+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/awk.h | 387+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/awk.lx.l | 383+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/b.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/lib.c | 852+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/main.c | 215+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/maketab.c | 177+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/mkfile | 53+++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/nawk.1 | 585+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/parse.c | 248+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/run.c | 1962+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/tran.c | 483+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anawk/version.c | 25+++++++++++++++++++++++++
Aod/mkfile | 8++++++++
Aod/od.1 | 291++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aod/od.c | 1078+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/backupfile.c | 246+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/backupfile.h | 39+++++++++++++++++++++++++++++++++++++++
Apatch/common.h | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/inp.c | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/inp.h | 32++++++++++++++++++++++++++++++++
Apatch/mkfile | 7+++++++
Apatch/mkpath.c | 78++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/patch.1 | 700+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/patch.c | 1074+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/pathnames.h | 12++++++++++++
Apatch/pch.c | 1596+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/pch.h | 56++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/util.c | 432+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apatch/util.h | 51+++++++++++++++++++++++++++++++++++++++++++++++++++
Apgrep/mkfile | 11+++++++++++
Apgrep/pgrep.1 | 258+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Apgrep/pgrep.c | 1748+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aprintf/mkfile | 7+++++++
Aprintf/printf.1 | 254+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aprintf/printf.c | 402+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/NOTES | 62++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/mkfile | 8++++++++
Aps/ps.1 | 488+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.1b | 421+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.c | 5043+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aps/ps.dfl | 39+++++++++++++++++++++++++++++++++++++++
Ased/mkfile | 7+++++++
Ased/sed.1 | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed.h | 191+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed0.c | 1266+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/sed1.c | 917+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ased/version.c | 22++++++++++++++++++++++
Astty/mkfile | 8++++++++
Astty/stty.1 | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astty/stty.1b | 345+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astty/stty.c | 1490+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/NOTES | 61+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/mkfile | 9+++++++++
Atar/tar.1 | 473+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/tar.c | 3204+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atar/tar.dfl | 9+++++++++
Ayacc/depsinc.mk | 1+
Ayacc/dextern | 319+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/getopt.c | 222+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/libmai.c | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/libzer.c | 45+++++++++++++++++++++++++++++++++++++++++++++
Ayacc/mkfile | 15+++++++++++++++
Ayacc/sgs.h | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y1.c | 1098+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y2.c | 1758+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y3.c | 568+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y4.c | 485+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/y5.c | 70++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/yacc.1 | 169+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ayacc/yaccpar | 565+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
394 files changed, 93648 insertions(+), 0 deletions(-)

diff --git a/LICENSE/COPYING b/LICENSE/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/LICENSE/COPYING.LGPL b/LICENSE/COPYING.LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/LICENSE/LICENSE b/LICENSE/LICENSE @@ -0,0 +1,354 @@ +******************************************************************************** +The license for newly written code and changes to existing code is: + + Copyright (c) 2003 Gunnar Ritter + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute + it freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. + +The following tools have been rewritten from scratch: + + basename cat chown cmp copy cp + cpio* csplit cut date dd df dirname + du env expand false fold groups hd + head hostname id install kill line + listusers logins logname man mesg mkdir + mkfifo mknod mt newform news nice nl + nohup od paste pathchk pg pgrep printenv + printf priocntl ps psrinfo pwd rm + rmdir sdiff setpgrp shl sleep split stty + su sync tabs tape tapecntl tee + time touch true tty uname unexpand + users wc who whoami whodo xargs yes + + * See below for the licenses on compression codes. + +All source code and documentation has been changed intensively, thus +the above license applies to all material distributed here, further +restricted by the original licenses. + +******************************************************************************** +Caldera's License for Unix 6th Edition, Unix 7th Edition, and Unix 32V +applies to nearly all manual pages and to the utilities based on these +Unix versions: + + Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + Redistributions of source code and documentation must retain the + above copyright notice, this list of conditions and the following + disclaimer. + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed or owned by Caldera + International, Inc. + Neither the name of Caldera International, Inc. nor the names of + other contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +These utilities have been derived from Ancient Unix code: + + banner bc cal calendar chmod cksum + col comm dc deroff(1b) diff diff3 + ed egrep expr factor fgrep file find + grep join mkdir oawk pr random sed + sort sum tail tar tr tsort uniq + unit + +as well as the 'gmatch' and 'regexp' parts of libcommon. + +******************************************************************************** +Some utilities and manual pages are based on various releases of +4BSD, governed by the following license: + + Copyright (c) 1980, 1993 + The Regents of the University of California. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + 3. All advertising materials mentioning features or use of this software + must display the following acknowledgement: + This product includes software developed by the University of + California, Berkeley and its contributors. + 4. Neither the name of the University nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS + BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The following utilities include 4BSD code: + + bc ching dc deroff(1b) diff diff3 fmt + ln(1b) more nawk oawk renice tcopy ul + +******************************************************************************** +The following utilities are based on Sun's OpenSolaris code; the file +OPENSOLARIS.LICENSE contains the licensing conditions for them: + + bdiff bfs dircmp echo fmtmsg getconf getopt mail + mvdir spell test what + +Some manual pages have also been derived from OpenSolaris code; see +the header of the respective page. +Changes to these programs are also subject to the original license. +******************************************************************************** +One utilities is based on the MINIX 2.0 sources, to which the +following license applies: + + Copyright (c) 1987,1997, Prentice Hall All rights reserved. + + Redistribution and use of the MINIX operating system in source and + binary forms, with or without modification, are permitted provided + that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + Neither the name of Prentice Hall nor the names of the software + authors or contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS, AUTHORS, AND + CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF + MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL PRENTICE HALL OR ANY AUTHORS OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The utility derived from MINIX sources is: + + ls + +******************************************************************************** +The utility 'nawk' and the library 'libuxre' are based on the Unix tools +released made available by Caldera at <http://unixtools.sourceforge.net/>. +GNU GPL 2.0 applies to 'nawk' (see the file COPYING); GNU LGPL 2.1 applies +to 'libuxre' (see COPYING.LGPL). Changes to these tools are subject to these +licenses also. + +******************************************************************************** +The 'deroff' utility is derived from Plan 9 <http://cm.bell-labs.com/plan9dist/> +and is distributed under the terms of the Lucent Public License Version 1.02; +see the file LUCENT. + +******************************************************************************** +The CRC-32 function for cpio was derived from zlib 1.1.4: + + Copyright (C) 1995-2002 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +******************************************************************************** +The inflate decompression code for the zip support has been derived from +Info-ZIP's zip 5.50: + + This is version 2002-Feb-16 of the Info-ZIP copyright and license. + The definitive version of this document should be available at + ftp://ftp.info-zip.org/pub/infozip/license.html indefinitely. + + + Copyright (c) 1990-2002 Info-ZIP. All rights reserved. + + For the purposes of this copyright and license, "Info-ZIP" is defined as + the following set of individuals: + + Mark Adler, John Bush, Karl Davis, Harald Denker, Jean-Michel + Dubois, Jean-loup Gailly, Hunter Goatley, Ian Gorman, Chris + Herborth, Dirk Haase, Greg Hartwig, Robert Heath, Jonathan Hudson, + Paul Kienitz, David Kirschbaum, Johnny Lee, Onno van der Linden, + Igor Mandrichenko, Steve P. Miller, Sergio Monesi, Keith Owens, + George Petrov, Greg Roelofs, Kai Uwe Rommel, Steve Salisbury, Dave + Smith, Christian Spieler, Antoine Verheijen, Paul von Behren, Rich + Wales, Mike White + + This software is provided "as is," without warranty of any kind, + express or implied. In no event shall Info-ZIP or its contributors + be held liable for any direct, indirect, incidental, special or + consequential damages arising out of the use of or inability to use + this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute + it freely, subject to the following restrictions: + + 1. Redistributions of source code must retain the above copyright + notice, definition, disclaimer, and this list of conditions. + + 2. Redistributions in binary form (compiled executables) must + reproduce the above copyright notice, definition, disclaimer, + and this list of conditions in documentation and/or other + materials provided with the distribution. The sole exception + to this condition is redistribution of a standard UnZipSFX + binary as part of a self-extracting archive; that is permitted + without inclusion of this license, as long as the normal + UnZipSFX banner has not been removed from the binary or + disabled. + + 3. Altered versions--including, but not limited to, ports to new + operating systems, existing ports with new graphical + interfaces, and dynamic, shared, or static library + versions--must be plainly marked as such and must not be + misrepresented as being the original source. Such altered + versions also must not be misrepresented as being Info-ZIP + releases--including, but not limited to, labeling of the + altered versions with the names "Info-ZIP" (or any variation + thereof, including, but not limited to, different + capitalizations), "Pocket UnZip," "WiZ" or "MacZip" without + the explicit permission of Info-ZIP. Such altered versions + are further prohibited from misrepresentative use of the + Zip-Bugs or Info-ZIP e-mail addresses or of the Info-ZIP + URL(s). + + 4. Info-ZIP retains the right to use the names "Info-ZIP," "Zip," + "UnZip," "UnZipSFX," "WiZ," "Pocket UnZip," "Pocket Zip," and + "MacZip" for its own source and binary releases. + +******************************************************************************** +The unshrink decompression code is derived from Info-ZIP's unzip 5.40: + + * Copyright (c) 1994 Greg Roelofs. + * Permission is granted to any individual/institution/corporate + * entity to use, copy, redistribute or modify this software for + * any purpose whatsoever, subject to the conditions noted in the + * Frequently Asked Questions section below, plus one additional + * condition: namely, that my name not be removed from the source + * code. (Other names may, of course, be added as modifications + * are made.) Corporate legal staff (like at IBM :-) ) who have + * problems understanding this can contact me through Zip-Bugs... + + + Q. Can I use the source code of Zip and UnZip in my commercial + application? + + A. Yes, so long as you include in your product an acknowledgment; a + pointer to the original, free compression sources; and a statement + making it clear that there are no extra or hidden charges resulting + from the use of our compression code in your product (see below for + an example). The acknowledgment should appear in at least one piece + of human-readable documentation (e.g., a README file or man page), + although additionally putting it in the executable(s) is OK, too. + In other words, you are allowed to sell only your own work, not ours, + and we'd like a little credit. (Note the additional restrictions + above on the code in unreduce.c, unshrink.c, vms.c, time_lib.c, and + everything in the wince and windll subdirectories.) Contact us at + Zip-Bugs@lists.wku.edu if you have special requirements. We also + like to hear when our code is being used, but we don't require that. + + <Product> incorporates compression code from the Info-ZIP group. + There are no extra charges or costs due to the use of this code, + and the original compression sources are freely available from + http://www.cdrom.com/pub/infozip/ or ftp://ftp.cdrom.com/pub/infozip/ + on the Internet. + + If you only need compression capability, not full zipfile support, + you might want to look at zlib instead; it has fewer restrictions + on commercial use. See http://www.cdrom.com/pub/infozip/zlib/ . + +******************************************************************************** +The blast decompression code (for DCL imploded zip archive entries) was +derived from code by Mark Adler distributed with zlib 1.2.1: + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + +******************************************************************************** + +******************************************************************************** +The explode decompression code is derived from unzip 5.40; this version +of this code was put in the public domain by Mark Adler. + +******************************************************************************** + +Gunnar Ritter 2/3/07 diff --git a/LICENSE/LUCENT b/LICENSE/LUCENT @@ -0,0 +1,258 @@ +The Plan 9 software is provided under the terms of the +Lucent Public License, Version 1.02, reproduced below, +with the following exceptions: + +1. No right is granted to create derivative works of or + to redistribute (other than with the Plan 9 Operating System) + the screen imprinter fonts identified in subdirectory + /lib/font/bit/lucida and printer fonts (Lucida Sans Unicode, Lucida + Sans Italic, Lucida Sans Demibold, Lucida Typewriter, Lucida Sans + Typewriter83), identified in subdirectory /sys/lib/postscript/font. + These directories contain material copyrights by B&H Inc. and Y&Y Inc. + +2. The printer fonts identified in subdirectory /sys/lib/ghostscript/font + are subject to the GNU GPL, reproduced in the file /LICENSE.gpl. + +3. The ghostscript program in the subdirectory /sys/src/cmd/gs is + covered by the Aladdin Free Public License, reproduced in the file + /LICENSE.afpl. + +=================================================================== + +Lucent Public License Version 1.02 + +THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS PUBLIC +LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE +PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. + +1. DEFINITIONS + +"Contribution" means: + + a. in the case of Lucent Technologies Inc. ("LUCENT"), the Original + Program, and + b. in the case of each Contributor, + + i. changes to the Program, and + ii. additions to the Program; + + where such changes and/or additions to the Program were added to the + Program by such Contributor itself or anyone acting on such + Contributor's behalf, and the Contributor explicitly consents, in + accordance with Section 3C, to characterization of the changes and/or + additions as Contributions. + +"Contributor" means LUCENT and any other entity that has Contributed a +Contribution to the Program. + +"Distributor" means a Recipient that distributes the Program, +modifications to the Program, or any part thereof. + +"Licensed Patents" mean patent claims licensable by a Contributor +which are necessarily infringed by the use or sale of its Contribution +alone or when combined with the Program. + +"Original Program" means the original version of the software +accompanying this Agreement as released by LUCENT, including source +code, object code and documentation, if any. + +"Program" means the Original Program and Contributions or any part +thereof + +"Recipient" means anyone who receives the Program under this +Agreement, including all Contributors. + +2. GRANT OF RIGHTS + + a. Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free copyright + license to reproduce, prepare derivative works of, publicly display, + publicly perform, distribute and sublicense the Contribution of such + Contributor, if any, and such derivative works, in source code and + object code form. + + b. Subject to the terms of this Agreement, each Contributor hereby + grants Recipient a non-exclusive, worldwide, royalty-free patent + license under Licensed Patents to make, use, sell, offer to sell, + import and otherwise transfer the Contribution of such Contributor, if + any, in source code and object code form. The patent license granted + by a Contributor shall also apply to the combination of the + Contribution of that Contributor and the Program if, at the time the + Contribution is added by the Contributor, such addition of the + Contribution causes such combination to be covered by the Licensed + Patents. The patent license granted by a Contributor shall not apply + to (i) any other combinations which include the Contribution, nor to + (ii) Contributions of other Contributors. No hardware per se is + licensed hereunder. + + c. Recipient understands that although each Contributor grants the + licenses to its Contributions set forth herein, no assurances are + provided by any Contributor that the Program does not infringe the + patent or other intellectual property rights of any other entity. Each + Contributor disclaims any liability to Recipient for claims brought by + any other entity based on infringement of intellectual property rights + or otherwise. As a condition to exercising the rights and licenses + granted hereunder, each Recipient hereby assumes sole responsibility + to secure any other intellectual property rights needed, if any. For + example, if a third party patent license is required to allow + Recipient to distribute the Program, it is Recipient's responsibility + to acquire that license before distributing the Program. + + d. Each Contributor represents that to its knowledge it has sufficient + copyright rights in its Contribution, if any, to grant the copyright + license set forth in this Agreement. + +3. REQUIREMENTS + +A. Distributor may choose to distribute the Program in any form under +this Agreement or under its own license agreement, provided that: + + a. it complies with the terms and conditions of this Agreement; + + b. if the Program is distributed in source code or other tangible + form, a copy of this Agreement or Distributor's own license agreement + is included with each copy of the Program; and + + c. if distributed under Distributor's own license agreement, such + license agreement: + + i. effectively disclaims on behalf of all Contributors all warranties + and conditions, express and implied, including warranties or + conditions of title and non-infringement, and implied warranties or + conditions of merchantability and fitness for a particular purpose; + ii. effectively excludes on behalf of all Contributors all liability + for damages, including direct, indirect, special, incidental and + consequential damages, such as lost profits; and + iii. states that any provisions which differ from this Agreement are + offered by that Contributor alone and not by any other party. + +B. Each Distributor must include the following in a conspicuous + location in the Program: + + Copyright (C) 2003, Lucent Technologies Inc. and others. All Rights + Reserved. + +C. In addition, each Contributor must identify itself as the +originator of its Contribution in a manner that reasonably allows +subsequent Recipients to identify the originator of the Contribution. +Also, each Contributor must agree that the additions and/or changes +are intended to be a Contribution. Once a Contribution is contributed, +it may not thereafter be revoked. + +4. COMMERCIAL DISTRIBUTION + +Commercial distributors of software may accept certain +responsibilities with respect to end users, business partners and the +like. While this license is intended to facilitate the commercial use +of the Program, the Distributor who includes the Program in a +commercial product offering should do so in a manner which does not +create potential liability for Contributors. Therefore, if a +Distributor includes the Program in a commercial product offering, +such Distributor ("Commercial Distributor") hereby agrees to defend +and indemnify every Contributor ("Indemnified Contributor") against +any losses, damages and costs (collectively"Losses") arising from +claims, lawsuits and other legal actions brought by a third party +against the Indemnified Contributor to the extent caused by the acts +or omissions of such Commercial Distributor in connection with its +distribution of the Program in a commercial product offering. The +obligations in this section do not apply to any claims or Losses +relating to any actual or alleged intellectual property infringement. +In order to qualify, an Indemnified Contributor must: a) promptly +notify the Commercial Distributor in writing of such claim, and b) +allow the Commercial Distributor to control, and cooperate with the +Commercial Distributor in, the defense and any related settlement +negotiations. The Indemnified Contributor may participate in any such +claim at its own expense. + +For example, a Distributor might include the Program in a commercial +product offering, Product X. That Distributor is then a Commercial +Distributor. If that Commercial Distributor then makes performance +claims, or offers warranties related to Product X, those performance +claims and warranties are such Commercial Distributor's responsibility +alone. Under this section, the Commercial Distributor would have to +defend claims against the Contributors related to those performance +claims and warranties, and if a court requires any Contributor to pay +any damages as a result, the Commercial Distributor must pay those +damages. + +5. NO WARRANTY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS +PROVIDED ON AN"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY +WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY +OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely +responsible for determining the appropriateness of using and +distributing the Program and assumes all risks associated with its +exercise of rights under this Agreement, including but not limited to +the risks and costs of program errors, compliance with applicable +laws, damage to or loss of data, programs or equipment, and +unavailability or interruption of operations. + +6. DISCLAIMER OF LIABILITY + +EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR +ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING +WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR +DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED +HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +7. EXPORT CONTROL + +Recipient agrees that Recipient alone is responsible for compliance +with the United States export administration regulations (and the +export control laws and regulation of any other countries). + +8. GENERAL + +If any provision of this Agreement is invalid or unenforceable under +applicable law, it shall not affect the validity or enforceability of +the remainder of the terms of this Agreement, and without further +action by the parties hereto, such provision shall be reformed to the +minimum extent necessary to make such provision valid and enforceable. + +If Recipient institutes patent litigation against a Contributor with +respect to a patent applicable to software (including a cross-claim or +counterclaim in a lawsuit), then any patent licenses granted by that +Contributor to such Recipient under this Agreement shall terminate as +of the date such litigation is filed. In addition, if Recipient +institutes patent litigation against any entity (including a +cross-claim or counterclaim in a lawsuit) alleging that the Program +itself (excluding combinations of the Program with other software or +hardware) infringes such Recipient's patent(s), then such Recipient's +rights granted under Section 2(b) shall terminate as of the date such +litigation is filed. + +All Recipient's rights under this Agreement shall terminate if it +fails to comply with any of the material terms or conditions of this +Agreement and does not cure such failure in a reasonable period of +time after becoming aware of such noncompliance. If all Recipient's +rights under this Agreement terminate, Recipient agrees to cease use +and distribution of the Program as soon as reasonably practicable. +However, Recipient's obligations under this Agreement and any licenses +granted by Recipient relating to the Program shall continue and +survive. + +LUCENT may publish new versions (including revisions) of this +Agreement from time to time. Each new version of the Agreement will be +given a distinguishing version number. The Program (including +Contributions) may always be distributed subject to the version of the +Agreement under which it was received. In addition, after a new +version of the Agreement is published, Contributor may elect to +distribute the Program (including its Contributions) under the new +version. No one other than LUCENT has the right to modify this +Agreement. Except as expressly stated in Sections 2(a) and 2(b) above, +Recipient receives no rights or licenses to the intellectual property +of any Contributor under this Agreement, whether expressly, by +implication, estoppel or otherwise. All rights in the Program not +expressly granted under this Agreement are reserved. + +This Agreement is governed by the laws of the State of New York and +the intellectual property laws of the United States of America. No +party to this Agreement will bring a legal action under this Agreement +more than one year after the cause of action arose. Each party waives +its rights to a jury trial in any resulting litigation. + diff --git a/LICENSE/OPENSOLARIS.LICENSE b/LICENSE/OPENSOLARIS.LICENSE @@ -0,0 +1,385 @@ +Unless otherwise noted, all files in this distribution are released +under the Common Development and Distribution License (CDDL), +Version 1.0 only. Exceptions are noted within the associated +source files. + +-------------------------------------------------------------------- + + +COMMON DEVELOPMENT AND DISTRIBUTION LICENSE Version 1.0 + +1. Definitions. + + 1.1. "Contributor" means each individual or entity that creates + or contributes to the creation of Modifications. + + 1.2. "Contributor Version" means the combination of the Original + Software, prior Modifications used by a Contributor (if any), + and the Modifications made by that particular Contributor. + + 1.3. "Covered Software" means (a) the Original Software, or (b) + Modifications, or (c) the combination of files containing + Original Software with files containing Modifications, in + each case including portions thereof. + + 1.4. "Executable" means the Covered Software in any form other + than Source Code. + + 1.5. "Initial Developer" means the individual or entity that first + makes Original Software available under this License. + + 1.6. "Larger Work" means a work which combines Covered Software or + portions thereof with code not governed by the terms of this + License. + + 1.7. "License" means this document. + + 1.8. "Licensable" means having the right to grant, to the maximum + extent possible, whether at the time of the initial grant or + subsequently acquired, any and all of the rights conveyed + herein. + + 1.9. "Modifications" means the Source Code and Executable form of + any of the following: + + A. Any file that results from an addition to, deletion from or + modification of the contents of a file containing Original + Software or previous Modifications; + + B. Any new file that contains any part of the Original + Software or previous Modifications; or + + C. Any new file that is contributed or otherwise made + available under the terms of this License. + + 1.10. "Original Software" means the Source Code and Executable + form of computer software code that is originally released + under this License. + + 1.11. "Patent Claims" means any patent claim(s), now owned or + hereafter acquired, including without limitation, method, + process, and apparatus claims, in any patent Licensable by + grantor. + + 1.12. "Source Code" means (a) the common form of computer software + code in which modifications are made and (b) associated + documentation included in or with such code. + + 1.13. "You" (or "Your") means an individual or a legal entity + exercising rights under, and complying with all of the terms + of, this License. For legal entities, "You" includes any + entity which controls, is controlled by, or is under common + control with You. For purposes of this definition, + "control" means (a) the power, direct or indirect, to cause + the direction or management of such entity, whether by + contract or otherwise, or (b) ownership of more than fifty + percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants. + + 2.1. The Initial Developer Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, the Initial + Developer hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Initial Developer, to use, + reproduce, modify, display, perform, sublicense and + distribute the Original Software (or portions thereof), + with or without Modifications, and/or as part of a Larger + Work; and + + (b) under Patent Claims infringed by the making, using or + selling of Original Software, to make, have made, use, + practice, sell, and offer for sale, and/or otherwise + dispose of the Original Software (or portions thereof). + + (c) The licenses granted in Sections 2.1(a) and (b) are + effective on the date Initial Developer first distributes + or otherwise makes the Original Software available to a + third party under the terms of this License. + + (d) Notwithstanding Section 2.1(b) above, no patent license is + granted: (1) for code that You delete from the Original + Software, or (2) for infringements caused by: (i) the + modification of the Original Software, or (ii) the + combination of the Original Software with other software + or devices. + + 2.2. Contributor Grant. + + Conditioned upon Your compliance with Section 3.1 below and + subject to third party intellectual property claims, each + Contributor hereby grants You a world-wide, royalty-free, + non-exclusive license: + + (a) under intellectual property rights (other than patent or + trademark) Licensable by Contributor to use, reproduce, + modify, display, perform, sublicense and distribute the + Modifications created by such Contributor (or portions + thereof), either on an unmodified basis, with other + Modifications, as Covered Software and/or as part of a + Larger Work; and + + (b) under Patent Claims infringed by the making, using, or + selling of Modifications made by that Contributor either + alone and/or in combination with its Contributor Version + (or portions of such combination), to make, use, sell, + offer for sale, have made, and/or otherwise dispose of: + (1) Modifications made by that Contributor (or portions + thereof); and (2) the combination of Modifications made by + that Contributor with its Contributor Version (or portions + of such combination). + + (c) The licenses granted in Sections 2.2(a) and 2.2(b) are + effective on the date Contributor first distributes or + otherwise makes the Modifications available to a third + party. + + (d) Notwithstanding Section 2.2(b) above, no patent license is + granted: (1) for any code that Contributor has deleted + from the Contributor Version; (2) for infringements caused + by: (i) third party modifications of Contributor Version, + or (ii) the combination of Modifications made by that + Contributor with other software (except as part of the + Contributor Version) or other devices; or (3) under Patent + Claims infringed by Covered Software in the absence of + Modifications made by that Contributor. + +3. Distribution Obligations. + + 3.1. Availability of Source Code. + + Any Covered Software that You distribute or otherwise make + available in Executable form must also be made available in Source + Code form and that Source Code form must be distributed only under + the terms of this License. You must include a copy of this + License with every copy of the Source Code form of the Covered + Software You distribute or otherwise make available. You must + inform recipients of any such Covered Software in Executable form + as to how they can obtain such Covered Software in Source Code + form in a reasonable manner on or through a medium customarily + used for software exchange. + + 3.2. Modifications. + + The Modifications that You create or to which You contribute are + governed by the terms of this License. You represent that You + believe Your Modifications are Your original creation(s) and/or + You have sufficient rights to grant the rights conveyed by this + License. + + 3.3. Required Notices. + + You must include a notice in each of Your Modifications that + identifies You as the Contributor of the Modification. You may + not remove or alter any copyright, patent or trademark notices + contained within the Covered Software, or any notices of licensing + or any descriptive text giving attribution to any Contributor or + the Initial Developer. + + 3.4. Application of Additional Terms. + + You may not offer or impose any terms on any Covered Software in + Source Code form that alters or restricts the applicable version + of this License or the recipients' rights hereunder. You may + choose to offer, and to charge a fee for, warranty, support, + indemnity or liability obligations to one or more recipients of + Covered Software. However, you may do so only on Your own behalf, + and not on behalf of the Initial Developer or any Contributor. + You must make it absolutely clear that any such warranty, support, + indemnity or liability obligation is offered by You alone, and You + hereby agree to indemnify the Initial Developer and every + Contributor for any liability incurred by the Initial Developer or + such Contributor as a result of warranty, support, indemnity or + liability terms You offer. + + 3.5. Distribution of Executable Versions. + + You may distribute the Executable form of the Covered Software + under the terms of this License or under the terms of a license of + Your choice, which may contain terms different from this License, + provided that You are in compliance with the terms of this License + and that the license for the Executable form does not attempt to + limit or alter the recipient's rights in the Source Code form from + the rights set forth in this License. If You distribute the + Covered Software in Executable form under a different license, You + must make it absolutely clear that any terms which differ from + this License are offered by You alone, not by the Initial + Developer or Contributor. You hereby agree to indemnify the + Initial Developer and every Contributor for any liability incurred + by the Initial Developer or such Contributor as a result of any + such terms You offer. + + 3.6. Larger Works. + + You may create a Larger Work by combining Covered Software with + other code not governed by the terms of this License and + distribute the Larger Work as a single product. In such a case, + You must make sure the requirements of this License are fulfilled + for the Covered Software. + +4. Versions of the License. + + 4.1. New Versions. + + Sun Microsystems, Inc. is the initial license steward and may + publish revised and/or new versions of this License from time to + time. Each version will be given a distinguishing version number. + Except as provided in Section 4.3, no one other than the license + steward has the right to modify this License. + + 4.2. Effect of New Versions. + + You may always continue to use, distribute or otherwise make the + Covered Software available under the terms of the version of the + License under which You originally received the Covered Software. + If the Initial Developer includes a notice in the Original + Software prohibiting it from being distributed or otherwise made + available under any subsequent version of the License, You must + distribute and make the Covered Software available under the terms + of the version of the License under which You originally received + the Covered Software. Otherwise, You may also choose to use, + distribute or otherwise make the Covered Software available under + the terms of any subsequent version of the License published by + the license steward. + + 4.3. Modified Versions. + + When You are an Initial Developer and You want to create a new + license for Your Original Software, You may create and use a + modified version of this License if You: (a) rename the license + and remove any references to the name of the license steward + (except to note that the license differs from this License); and + (b) otherwise make it clear that the license contains terms which + differ from this License. + +5. DISCLAIMER OF WARRANTY. + + COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" + BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, + INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED + SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR + PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND + PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY + COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE + INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF ANY + NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF + WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF + ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS + DISCLAIMER. + +6. TERMINATION. + + 6.1. This License and the rights granted hereunder will terminate + automatically if You fail to comply with terms herein and fail to + cure such breach within 30 days of becoming aware of the breach. + Provisions which, by their nature, must remain in effect beyond + the termination of this License shall survive. + + 6.2. If You assert a patent infringement claim (excluding + declaratory judgment actions) against Initial Developer or a + Contributor (the Initial Developer or Contributor against whom You + assert such claim is referred to as "Participant") alleging that + the Participant Software (meaning the Contributor Version where + the Participant is a Contributor or the Original Software where + the Participant is the Initial Developer) directly or indirectly + infringes any patent, then any and all rights granted directly or + indirectly to You by such Participant, the Initial Developer (if + the Initial Developer is not the Participant) and all Contributors + under Sections 2.1 and/or 2.2 of this License shall, upon 60 days + notice from Participant terminate prospectively and automatically + at the expiration of such 60 day notice period, unless if within + such 60 day period You withdraw Your claim with respect to the + Participant Software against such Participant either unilaterally + or pursuant to a written agreement with Participant. + + 6.3. In the event of termination under Sections 6.1 or 6.2 above, + all end user licenses that have been validly granted by You or any + distributor hereunder prior to termination (excluding licenses + granted to You by any distributor) shall survive termination. + +7. LIMITATION OF LIABILITY. + + UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT + (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE + INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF + COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE + LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR + CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT + LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK + STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER + COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN + INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF + LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL + INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT + APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO + NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR + CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT + APPLY TO YOU. + +8. U.S. GOVERNMENT END USERS. + + The Covered Software is a "commercial item," as that term is + defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial + computer software" (as that term is defined at 48 + C.F.R. 252.227-7014(a)(1)) and "commercial computer software + documentation" as such terms are used in 48 C.F.R. 12.212 + (Sept. 1995). Consistent with 48 C.F.R. 12.212 and 48 + C.F.R. 227.7202-1 through 227.7202-4 (June 1995), all + U.S. Government End Users acquire Covered Software with only those + rights set forth herein. This U.S. Government Rights clause is in + lieu of, and supersedes, any other FAR, DFAR, or other clause or + provision that addresses Government rights in computer software + under this License. + +9. MISCELLANEOUS. + + This License represents the complete agreement concerning subject + matter hereof. If any provision of this License is held to be + unenforceable, such provision shall be reformed only to the extent + necessary to make it enforceable. This License shall be governed + by the law of the jurisdiction specified in a notice contained + within the Original Software (except to the extent applicable law, + if any, provides otherwise), excluding such jurisdiction's + conflict-of-law provisions. Any litigation relating to this + License shall be subject to the jurisdiction of the courts located + in the jurisdiction and venue specified in a notice contained + within the Original Software, with the losing party responsible + for costs, including, without limitation, court costs and + reasonable attorneys' fees and expenses. The application of the + United Nations Convention on Contracts for the International Sale + of Goods is expressly excluded. Any law or regulation which + provides that the language of a contract shall be construed + against the drafter shall not apply to this License. You agree + that You alone are responsible for compliance with the United + States export administration regulations (and the export control + laws and regulation of any other countries) when You use, + distribute or otherwise make available any Covered Software. + +10. RESPONSIBILITY FOR CLAIMS. + + As between Initial Developer and the Contributors, each party is + responsible for claims and damages arising, directly or + indirectly, out of its utilization of rights under this License + and You agree to work with Initial Developer and Contributors to + distribute such responsibility on an equitable basis. Nothing + herein is intended or shall be deemed to constitute any admission + of liability. + +-------------------------------------------------------------------- + +NOTICE PURSUANT TO SECTION 9 OF THE COMMON DEVELOPMENT AND +DISTRIBUTION LICENSE (CDDL) + +For Covered Software in this distribution, this License shall +be governed by the laws of the State of California (excluding +conflict-of-law provisions). + +Any litigation relating to this License shall be subject to the +jurisdiction of the Federal Courts of the Northern District of +California and the state courts of the State of California, with +venue lying in Santa Clara County, California. diff --git a/LICENSE/README b/LICENSE/README @@ -0,0 +1,27 @@ +README for license conditions of the Heirloom Toolchest +======================================================= + +The Heirloom Toolchest is derived from a variety of sources; the +respective licensing terms can be found in the other files in this +directory; in addition, each source file contains the license terms +of the original author at its top. + +All newly written code is put under a zlib-style license (except for +additions to the GPL and LGPL code in awk and libuxre). The rationale +is that for something distributed as widely as Unix code, any license +that requires more than naming the author would only cause annoyance. + +In effect, this means that commercial Unix vendors who already have a +Unix source code license can use nearly all of this code without being +forced to mention it in other places than the source code files. + +However, if you work for such a vendor, don't do so. Instead, convince +the management to release at least the utility source. There is really +nothing to keep secret about it to have an advantage over competitors, +as any person or company can simply use the source of this or another +toolchest to have comparable functionality. So by releasing the source +to your version, you lose nothing, but you will make your users happy +since they can use it as a reference. And happy users also mean more +money in the end. + +Gunnar Ritter 9/22/03 diff --git a/README b/README @@ -0,0 +1,5 @@ +hbase is a collection of programs that complements sbase and ubase. It's meant +to be a temporary project that will shrink and die once sbase and ubase gets +implementations of most of the programs included. hbase mostly contains +programs taken from the Heirloom project, but also has other programs, such as +patch taken from FreeBSD/OpenBSD and mk taken from plan9port. diff --git a/_install/install.1b b/_install/install.1b @@ -0,0 +1,113 @@ +.\" +.\" Copyright (c) 2003 Gunnar Ritter +.\" +.\" This software is provided 'as-is', without any express or implied +.\" warranty. In no event will the authors be held liable for any damages +.\" arising from the use of this software. +.\" +.\" Permission is granted to anyone to use this software for any purpose, +.\" including commercial applications, and to alter it and redistribute +.\" it freely, subject to the following restrictions: +.\" +.\" 1. The origin of this software must not be misrepresented; you must not +.\" claim that you wrote the original software. If you use this software +.\" in a product, an acknowledgment in the product documentation would be +.\" appreciated but is not required. +.\" +.\" 2. Altered source versions must be plainly marked as such, and must not be +.\" misrepresented as being the original software. +.\" +.\" 3. This notice may not be removed or altered from any source distribution. +.\" Sccsid @(#)install.1b 1.3 (gritter) 4/17/03 +.TH INSTALL 1B "4/17/03" "Heirloom Toolchest" "BSD System Compatibility" +.SH NAME +install \- (BSD) install files +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/ucb/install\fR [\fB\-cs\fR] [\fB-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB-o\fI\ owner\fR] +\fIfile1 file2\fR +.HP +.ad l +.nh +\fB/usr/ucb/install\fR [\fB\-cs\fR] [\fB-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB-o\fI\ owner\fR] +\fIfile\fR\ .\ .\ .\ \fIdirectory\fR +.HP +.ad l +.nh +\fB/usr/ucb/install\fR \fB\-d\fR [\fB\-g\fI\ group\fR] [\fB\-m\fI\ mode\fR] +[\fB\-o\fI\ owner\fR] \fIdirectory\fR +.br +.ad b +.hy 1 +.SH DESCRIPTION +The +.I install +command copies one regular file to a destination file +or one or more regular files into a destination directory. +It is commonly used within Makefiles +to install newly created software components. +.PP +If the +.B \-d +option is present, +.I install +creates the named +.IR directory , +also creating non-existent parent directories. +It is not an error if the directory already exists. +The +.BR \-g , +.BR \-m , +and +.B \-o +options +apply to the last pathname component only; +attributes are set whether the directory is newly created or not. +Parent directories are always created using a default mode of 777 +minus umask +and default ownerships. +.PP +The following options are also accepted: +.TP 10 +.B \-c +This option is ignored and exists for compatibility only. +Ancient versions of this command removed the source file +unless this option was present. +.TP 10 +.B \-s +Strip the target files +(i.\|e. execute the +.IR strip (1) +command on them). +.TP 10 +\fB\-g\fI group\fR +Use the given +.I group +ownership for target files. +By default, +the group of the invoking user is used. +.TP 10 +\fB\-m\fI mode\fR +Set the access permissions of target files to octal +.IR mode . +By default, +mode 755 is used. +.TP 10 +\fB\-o\fI owner\fR +Specifies the +.I owner +of target files. +By default, +target files are owned by the invoking user. +.SH "SEE ALSO" +cp(1), +chgrp(1), +chmod(1), +chown(1), +make(1), +mkdir(1), +strip(1) diff --git a/_install/install.c b/_install/install.c @@ -0,0 +1,436 @@ +/* + * install - (BSD style) install files + * + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)/usr/ucb/install.sl 1.12 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <limits.h> +#include <pwd.h> +#include <grp.h> + +enum okay { + OKAY = 0, + STOP = 1 +}; + +static int mflag; /* -m option present */ +static int sflag; /* strip files */ +static mode_t mode = 0755; /* mode to set */ +static int dflag; /* create directories */ +static int gflag; /* set group */ +static gid_t group; /* group to set */ +static int oflag; /* set owner */ +static uid_t owner; /* owner to set */ +static int errcnt; /* count of errors */ +static char *progname; /* argv[0] to main */ + +void * +srealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +void * +smalloc(size_t size) +{ + return srealloc(NULL, size); +} + +uid_t +getowner(const char *string) +{ + struct passwd *pwd; + char *x; + long val; + + if ((pwd = getpwnam(string)) != NULL) + return pwd->pw_uid; + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, "%s: unknown user %s.\n", progname, string); + exit(1); + } + return val; +} + +gid_t +getgroup(const char *string) +{ + struct group *grp; + char *x; + long val; + + if ((grp = getgrnam(string)) != NULL) + return grp->gr_gid; + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, "%s: unknown group %s.\n", progname, string); + exit(1); + } + return val; +} + +void +getpath(const char *path, char **file, char **filend, size_t *sz, size_t *slen) +{ + *sz = 14 + strlen(path) + 2; + *file = smalloc(*sz); + *filend = *file; + if (path[0] == '/' && path[1] == '\0') + *(*filend)++ = '/'; + else { + const char *cp = path; + while ((*(*filend)++ = *cp++) != '\0'); + (*filend)[-1] = '/'; + } + *slen = *filend - *file; +} + +void +setpath(const char *base, char **file, char **filend, + size_t slen, size_t *sz, size_t *ss) +{ + if (slen + (*ss = strlen(base)) >= *sz) { + *sz += slen + *ss + 15; + *file = srealloc(*file, *sz); + *filend = &(*file)[slen]; + } + strcpy(*filend, base); +} + +void +fdcopy(const char *src, const struct stat *ssp, const int sfd, + const char *tgt, const struct stat *dsp, const int dfd) +{ + char *buf; + size_t bufsize; + ssize_t rsz, wo, wt; + + if ((bufsize = ssp->st_blksize) < dsp->st_blksize) + if ((bufsize = dsp->st_blksize) <= 0) + bufsize = 512; + buf = smalloc(bufsize); + while ((rsz = read(sfd, buf, bufsize)) > 0) { + wt = 0; + do { + if ((wo = write(dfd, buf + wt, rsz - wt)) < 0) { + fprintf(stderr, "%s: write: %s: %s\n", + progname, tgt, + strerror(errno)); + errcnt |= 01; + unlink(tgt); + free(buf); + return; + } + wt += wo; + } while (wt < rsz); + } + if (rsz < 0) { + fprintf(stderr, "%s: read: %s: %s\n", + progname, src, strerror(errno)); + errcnt |= 01; + unlink(tgt); + } + free(buf); +} + +static void +usage(void) +{ + fprintf(stderr, "\ +usage: %s [-cs] [-g group] [-m mode] [-o owner] file ... destination\n\ + %s -d [-g group] [-m mode] [-o owner] dir\n", + progname, progname); + exit(2); +} + +static void +strip(const char *file) +{ + const char cpr[] = "strip "; + char *cmd, *cp; + const char *sp; + + cp = cmd = smalloc(strlen(cpr) + strlen(file) + 1); + for (sp = cpr; *sp; sp++) + *cp++ = *sp; + for (sp = file; *sp; sp++) + *cp++ = *sp; + *cp = '\0'; + system(cmd); + free(cmd); +} + +static enum okay +chgown(const char *fn, struct stat *sp) +{ + struct stat st; + + if (sp == NULL) { + if (stat(fn, &st) < 0) { + fprintf(stderr, "%s: stat: %s: %s\n", + progname, fn, strerror(errno)); + errcnt |= 1; + return STOP; + } + sp = &st; + } + if (!oflag) + owner = sp->st_uid; + if (!gflag) + group = sp->st_gid; + if (chown(fn, owner, group) < 0) { + fprintf(stderr, "%s: chown: %s: %s\n", progname, fn, + strerror(errno)); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static enum okay +check(const char *src, const char *tgt, const struct stat *dsp, + struct stat *ssp) +{ + if (stat(src, ssp) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + return STOP; + } + if ((ssp->st_mode&S_IFMT) != S_IFREG && strcmp(src, "/dev/null")) { + fprintf(stderr, "%s: %s isn't a regular file.\n", + progname, src); + errcnt |= 01; + return STOP; + } + if (dsp && (ssp->st_dev == dsp->st_dev && ssp->st_ino == dsp->st_ino)) { + fprintf(stderr, "%s: %s and %s are the same file.\n", + progname, src, tgt); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +cp(const char *src, const char *tgt, struct stat *dsp) +{ + struct stat sst, nst; + int sfd, dfd; + + if (check(src, tgt, dsp, &sst) != OKAY) + return; + unlink(tgt); + if ((dfd = creat(tgt, 0700)) < 0 || fchmod(dfd, 0700) < 0 || + fstat(dfd, &nst) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + if (dfd >= 0) + close(dfd); + return; + } + if ((sfd = open(src, O_RDONLY)) < 0) { + fprintf(stderr, "%s: open: %s: %s\n", progname, src, + strerror(errno)); + errcnt |= 01; + return; + } + fdcopy(src, &sst, sfd, tgt, &nst, dfd); + close(dfd); + close(sfd); + if (sflag) + strip(tgt); + if (oflag || gflag) + chgown(tgt, &nst); + if (chmod(tgt, mode) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, tgt, strerror(errno)); + errcnt |= 01; + } +} + +static void +installf(int ac, char **av) +{ + struct stat dst, ust; + + if (lstat(av[ac-1], &dst) == 0) { + if ((dst.st_mode&S_IFMT) != S_IFLNK || + stat(av[ac-1], &ust) < 0) + ust = dst; + if ((ust.st_mode&S_IFMT) == S_IFDIR) { + char *copy, *cend; + size_t sz, slen, ss; + int i; + + getpath(av[ac-1], &copy, &cend, &sz, &slen); + for (i = 0; i < ac-1; i++) { + setpath(basename(av[i]), &copy, &cend, + slen, &sz, &ss); + cp(av[i], copy, stat(copy, &dst) < 0 ? + NULL : &dst); + } + } else if (ac > 2) + usage(); + else + cp(av[0], av[1], &ust); + } else if (ac > 2) + usage(); + else + cp(av[0], av[1], NULL); +} + +static enum okay +makedir(const char *dir) +{ + struct stat st; + + if (mkdir(dir, 0777) < 0) { + if (errno == EEXIST) { + if (stat(dir, &st) < 0 || + (st.st_mode&S_IFMT) != S_IFDIR){ + fprintf(stderr, "%s: %s is not a directory\n", + progname, dir); + errcnt |= 01; + return STOP; + } + } else { + fprintf(stderr, "%s: mkdir: %s: %s\n", + progname, dir, strerror(errno)); + errcnt |= 01; + return STOP; + } + } + return OKAY; +} +static void +installd(char *dir) +{ + struct stat st; + int sgid_bit; + char *slash; + char c; + + slash = dir; + do { + while (*slash == '/') + slash++; + while (*slash != '/' && *slash != '\0') + slash++; + c = *slash; + *slash = '\0'; + if (makedir(dir) != OKAY) + return; + if (c == '\0') { + if (oflag || gflag) + if (chgown(dir, NULL) != OKAY) + return; + if (mflag) { + sgid_bit = stat(dir, &st) == 0 && + st.st_mode&S_ISGID ? S_ISGID : 0; + if (chmod(dir, mode | sgid_bit) < 0) { + fprintf(stderr, "%s: chmod: %s: %s\n", + progname, dir, + strerror(errno)); + errcnt |= 01; + return; + } + } + } + *slash = c; + } while (c != '\0'); +} + +int +main(int argc, char **argv) +{ + const char optstring[] = "csg:m:o:d"; + int i; + + progname = basename(argv[0]); + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'c': + /* no-op */ + break; + case 's': + sflag = 1; + break; + case 'g': + gflag = 1; + group = getgroup(optarg); + break; + case 'm': + mflag = 1; + mode = strtol(optarg, NULL, 8); + break; + case 'o': + oflag = 1; + owner = getowner(optarg); + break; + case 'd': + dflag = 1; + break; + default: + usage(); + } + } + if (dflag) { + if (argc == optind || argc > optind + 1) + usage(); + if (mflag) + mode &= ~(mode_t)S_ISGID; + installd(argv[optind]); + } else { + if (argc < optind + 2) + usage(); + installf(argc - optind, &argv[optind]); + } + return errcnt; +} diff --git a/_install/mkfile b/_install/mkfile @@ -0,0 +1,12 @@ +BIN = _install +TARG = __install +OBJ = install.o +INSTALL_BIN = install +INSTALL_MAN1b = install.1b +CLEAN_FILES = install + +<$mkbuild/mk.default + +__install:QV: $BIN + mv _install install + diff --git a/bc/bc.1 b/bc/bc.1 @@ -0,0 +1,222 @@ +.\" +.\" Sccsid @(#)bc.1 1.7 (gritter) 10/11/03 +.\" Derived from bc(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH BC 1 "10/11/03" "Heirloom Toolchest" "User Commands" +.SH NAME +bc \- arbitrary-precision arithmetic language +.SH SYNOPSIS +\fBbc\fR [\fB\-c\fR] [\fB\-l\fR] [\fIfile\fR ... ] +.SH DESCRIPTION +.I Bc +is an interactive processor for a language which resembles +C but provides unlimited precision arithmetic. +It takes input from any files given, then reads +the standard input. +The +.B \-l +argument stands for the name +of an arbitrary precision math library. +The syntax for +.I bc +programs is as follows; +L means letter a-z, +E means expression, S means statement. +.HP 6 +Comments +.br +are enclosed in /* and */. +.HP 6 +Names +.br +simple variables: L +.br +array elements: L [ E ] +.br +The words `ibase', `obase', and `scale' +.HP 6 +Other operands +.br +arbitrarily long numbers with optional sign and decimal point. +.br +( E ) +.br +sqrt ( E ) +.br +length ( E ) number of significant decimal digits +.br +scale ( E ) number of digits right of decimal point +.br +L ( E , ... , E ) +.HP 6 +Operators +.br ++ \- * / % ^ +(% is remainder; ^ is power) +.br +++ \-\- (prefix and postfix; apply to names) +.br +== <= >= != < > +.br += =+ =\- =* =/ =% =^ +.br +.HP 6 +Statements +.br +E +.br +{ S ; ... ; S } +.br +if ( E ) S +.br +while ( E ) S +.br +for ( E ; E ; E ) S +.br +null statement +.br +break +.br +quit +.HP 6 +Function definitions +.br +define L ( L ,..., L ) { +.br + auto L, ... , L +.br + S; ... S +.br + return ( E ) +.br +} +.HP 6 +Functions in +.B \-l +math library +.br +s(x) sine +.br +c(x) cosine +.br +e(x) exponential +.br +l(x) log +.br +a(x) arctangent +.br +j(n,x) Bessel function +.PP +.DT +All function arguments are passed by value. +.PP +The value of a statement that is an expression is printed +unless the main operator is an assignment. +Either semicolons or newlines may separate statements. +Assignment to +.I scale +influences the number of digits to be retained on arithmetic +operations in the manner of +.IR dc (1). +Assignments to +.I ibase +or +.I obase +set the input and output number radix respectively. +.PP +The same letter may be used as an array, a function, +and a simple variable simultaneously. +All variables are global to the program. +`Auto' variables are pushed down during function calls. +When using arrays as function arguments +or defining them as automatic variables +empty square brackets must follow the array name. +.PP +For example +.PP +.nf +scale = 20 +define e(x){ + auto a, b, c, i, s + a = 1 + b = 1 + s = 1 + for(i=1; 1==1; i++){ + a = a*x + b = b*i + c = a/b + if(c == 0) return(s) + s = s+c + } +} +.PP +.fi +defines a function to compute an approximate value of +the exponential function and +.PP +.nf + for(i=1; i<=10; i++) e(i) +.fi +.PP +prints approximate values of the exponential function of +the first ten integers. +.PP +.I Bc +is actually a preprocessor for +.IR dc (1), +which it invokes automatically, unless the +.B \-c +(compile only) +option is present. +In this case the +.I dc +input is sent to the standard output instead. +.SH FILES +.ta \w'/usr/5lib/lib.b 'u +/usr/5lib/lib.b mathematical library +.br +dc(1) desk calculator proper +.SH "SEE ALSO" +dc(1) +.br +L. L. Cherry and R. Morris, +.I +BC \- An arbitrary precision desk-calculator language +.SH BUGS +No &&, \(or\|\(or, or ! operators. +.br +.I For +statement must have all three E's. +.br +.I Quit +is interpreted when read, not when executed. diff --git a/bc/bc.y b/bc/bc.y @@ -0,0 +1,743 @@ +%{ +/* from 4.4BSD /usr/src/usr.bin/bc/bc.y */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from bc.y 8.1 (Berkeley) 6/6/93 + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)bc.sl 1.24 (gritter) 7/3/05"; +#include <unistd.h> +#include <signal.h> +#include <limits.h> +#include <inttypes.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +typedef intptr_t YYSTYPE; +#define YYSTYPE YYSTYPE + static int cpeek(int c, int yes, int no); + static int getch(void); + static intptr_t bundle(int a, ...); + static void routput(intptr_t *p); + static void output(intptr_t *p); + static void conout(intptr_t p, intptr_t s); + static void pp(intptr_t); + static void tp(intptr_t); + static void yyinit(int argc, char *argv[]); + static intptr_t *getout(void); + static intptr_t *getf(intptr_t); + static intptr_t *geta(intptr_t); + static void yyerror(const char *); + static void cantopen(const char *); + extern int yylex(void); + +#if defined (__GLIBC__) && defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +%} +%right '=' +%left '+' '-' +%left '*' '/' '%' +%right '^' +%left UMINUS + +%term LETTER DIGIT SQRT LENGTH _IF FFF EQ +%term _WHILE _FOR NE LE GE INCR DECR +%term _RETURN _BREAK _DEFINE BASE OBASE SCALE +%term EQPL EQMI EQMUL EQDIV EQREM EQEXP +%term _AUTO DOT +%term QSTR + +%{ +#define THIS_BC_STRING_MAX 1000 +static FILE *in; +static char cary[LINE_MAX + 1], *cp = { cary }; +static char string[THIS_BC_STRING_MAX + 3], *str = {string}; +static int crs = '0'; +static int rcrs = '0'; /* reset crs */ +static int bindx = 0; +static int lev = 0; +static int ln; +static char *ss; +static int bstack[10] = { 0 }; +static char *numb[15] = { + " 0", " 1", " 2", " 3", " 4", " 5", + " 6", " 7", " 8", " 9", " 10", " 11", + " 12", " 13", " 14" }; +static intptr_t *pre, *post; +%} +%% +start : + | start stat tail + { output( (intptr_t *)$2 );} + | start def dargs ')' '{' dlist slist '}' + { bundle( 6,pre, $7, post ,"0",numb[lev],"Q"); + conout( $$, $2 ); + rcrs = crs; + output( (intptr_t *)"" ); + lev = bindx = 0; + } + ; + +dlist : tail + | dlist _AUTO dlets tail + ; + +stat : e + { bundle(2, $1, "ps." ); } + | + { bundle(1, "" ); } + | QSTR + { bundle(3,"[",$1,"]P");} + | LETTER '=' e + { bundle(3, $3, "s", $1 ); } + | LETTER '[' e ']' '=' e + { bundle(4, $6, $3, ":", geta($1)); } + | LETTER EQOP e + { bundle(6, "l", $1, $3, $2, "s", $1 ); } + | LETTER '[' e ']' EQOP e + { bundle(8,$3, ";", geta($1), $6, $5, $3, ":", geta($1));} + | _BREAK + { bundle(2, numb[lev-bstack[bindx-1]], "Q" ); } + | _RETURN '(' e ')' + { bundle(4, $3, post, numb[lev], "Q" ); } + | _RETURN '(' ')' + { bundle(4, "0", post, numb[lev], "Q" ); } + | _RETURN + { bundle(4,"0",post,numb[lev],"Q"); } + | SCALE '=' e + { bundle(2, $3, "k"); } + | SCALE EQOP e + { bundle(4,"K",$3,$2,"k"); } + | BASE '=' e + { bundle(2,$3, "i"); } + | BASE EQOP e + { bundle(4,"I",$3,$2,"i"); } + | OBASE '=' e + { bundle(2,$3,"o"); } + | OBASE EQOP e + { bundle(4,"O",$3,$2,"o"); } + | '{' slist '}' + { $$ = $2; } + | FFF + { bundle(1,"fY"); } + | error + { bundle(1,"c"); } + | _IF CRS BLEV '(' re ')' stat + { conout( $7, $2 ); + bundle(3, $5, $2, " " ); + } + | _WHILE CRS '(' re ')' stat BLEV + { bundle(3, $6, $4, $2 ); + conout( $$, $2 ); + bundle(3, $4, $2, " " ); + } + | fprefix CRS re ';' e ')' stat BLEV + { bundle(5, $7, $5, "s.", $3, $2 ); + conout( $$, $2 ); + bundle(5, $1, "s.", $3, $2, " " ); + } + | '~' LETTER '=' e + { bundle(3,$4,"S",$2); } + ; + +EQOP : EQPL + { $$ = (intptr_t)"+"; } + | EQMI + { $$ = (intptr_t)"-"; } + | EQMUL + { $$ = (intptr_t)"*"; } + | EQDIV + { $$ = (intptr_t)"/"; } + | EQREM + { $$ = (intptr_t)"%%"; } + | EQEXP + { $$ = (intptr_t)"^"; } + ; + +fprefix : _FOR '(' e ';' + { $$ = $3; } + ; + +BLEV : + { --bindx; } + ; + +slist : stat + | slist tail stat + { bundle(2, $1, $3 ); } + ; + +tail : '\n' + {ln++;} + | ';' + ; + +re : e EQ e + { bundle(3, $1, $3, "=" ); } + | e '<' e + { bundle(3, $1, $3, ">" ); } + | e '>' e + { bundle(3, $1, $3, "<" ); } + | e NE e + { bundle(3, $1, $3, "!=" ); } + | e GE e + { bundle(3, $1, $3, "!>" ); } + | e LE e + { bundle(3, $1, $3, "!<" ); } + | e + { bundle(2, $1, " 0!=" ); } + ; + +e : e '+' e + { bundle(3, $1, $3, "+" ); } + | e '-' e + { bundle(3, $1, $3, "-" ); } + | '-' e %prec UMINUS + { bundle(3, " 0", $2, "-" ); } + | e '*' e + { bundle(3, $1, $3, "*" ); } + | e '/' e + { bundle(3, $1, $3, "/" ); } + | e '%' e + { bundle(3, $1, $3, "%%" ); } + | e '^' e + { bundle(3, $1, $3, "^" ); } + | LETTER '[' e ']' + { bundle(3,$3, ";", geta($1)); } + | LETTER INCR + { bundle(4, "l", $1, "d1+s", $1 ); } + | INCR LETTER + { bundle(4, "l", $2, "1+ds", $2 ); } + | DECR LETTER + { bundle(4, "l", $2, "1-ds", $2 ); } + | LETTER DECR + { bundle(4, "l", $1, "d1-s", $1 ); } + | LETTER '[' e ']' INCR + { bundle(7,$3,";",geta($1),"d1+",$3,":",geta($1)); } + | INCR LETTER '[' e ']' + { bundle(7,$4,";",geta($2),"1+d",$4,":",geta($2)); } + | LETTER '[' e ']' DECR + { bundle(7,$3,";",geta($1),"d1-",$3,":",geta($1)); } + | DECR LETTER '[' e ']' + { bundle(7,$4,";",geta($2),"1-d",$4,":",geta($2)); } + | SCALE INCR + { bundle(1,"Kd1+k"); } + | INCR SCALE + { bundle(1,"K1+dk"); } + | SCALE DECR + { bundle(1,"Kd1-k"); } + | DECR SCALE + { bundle(1,"K1-dk"); } + | BASE INCR + { bundle(1,"Id1+i"); } + | INCR BASE + { bundle(1,"I1+di"); } + | BASE DECR + { bundle(1,"Id1-i"); } + | DECR BASE + { bundle(1,"I1-di"); } + | OBASE INCR + { bundle(1,"Od1+o"); } + | INCR OBASE + { bundle(1,"O1+do"); } + | OBASE DECR + { bundle(1,"Od1-o"); } + | DECR OBASE + { bundle(1,"O1-do"); } + | LETTER '(' cargs ')' + { bundle(4, $3, "l", getf($1), "x" ); } + | LETTER '(' ')' + { bundle(3, "l", getf($1), "x" ); } + | cons + { bundle(2, " ", $1 ); } + | DOT cons + { bundle(2, " .", $2 ); } + | cons DOT cons + { bundle(4, " ", $1, ".", $3 ); } + | cons DOT + { bundle(3, " ", $1, "." ); } + | DOT + { $$ = (intptr_t)"l."; } + | LETTER + { bundle(2, "l", $1 ); } + | LETTER '=' e + { bundle(3, $3, "ds", $1 ); } + | LETTER EQOP e %prec '=' + { bundle(6, "l", $1, $3, $2, "ds", $1 ); } + | LETTER '[' e ']' '=' e + { bundle(5,$6,"d",$3,":",geta($1)); } + | LETTER '[' e ']' EQOP e + { bundle(9,$3,";",geta($1),$6,$5,"d",$3,":",geta($1)); } + | LENGTH '(' e ')' + { bundle(2,$3,"Z"); } + | SCALE '(' e ')' + { bundle(2,$3,"X"); } /* must be before '(' e ')' */ + | '(' e ')' + { $$ = $2; } + | '?' + { bundle(1, "?" ); } + | SQRT '(' e ')' + { bundle(2, $3, "v" ); } + | '~' LETTER + { bundle(2,"L",$2); } + | SCALE '=' e + { bundle(2,$3,"dk"); } + | SCALE EQOP e %prec '=' + { bundle(4,"K",$3,$2,"dk"); } + | BASE '=' e + { bundle(2,$3,"di"); } + | BASE EQOP e %prec '=' + { bundle(4,"I",$3,$2,"di"); } + | OBASE '=' e + { bundle(2,$3,"do"); } + | OBASE EQOP e %prec '=' + { bundle(4,"O",$3,$2,"do"); } + | SCALE + { bundle(1,"K"); } + | BASE + { bundle(1,"I"); } + | OBASE + { bundle(1,"O"); } + ; + +cargs : eora + | cargs ',' eora + { bundle(2, $1, $3 ); } + ; +eora: e + | LETTER '[' ']' + {bundle(2,"l",geta($1)); } + ; + +cons : constant + { *cp++ = '\0'; } + +constant: + '_' + { $$ = (intptr_t)cp; *cp++ = '_'; } + | DIGIT + { $$ = (intptr_t)cp; *cp++ = $1; } + | constant DIGIT + { *cp++ = $2; } + ; + +CRS : + { $$ = (intptr_t)cp; *cp++ = crs++; *cp++ = '\0'; + if(crs == '[')crs+=3; + if(crs == 'a')crs='{'; + if(crs >= 0241){yyerror("program too big"); + getout(); + } + bstack[bindx++] = lev++; } + ; + +def : _DEFINE LETTER '(' + { $$ = (intptr_t)getf($2); + pre = (intptr_t *)""; + post = (intptr_t *)""; + lev = 1; + bstack[bindx=0] = 0; + } + ; + +dargs : + | lora + { pp( $1 ); } + | dargs ',' lora + { pp( $3 ); } + ; + +dlets : lora + { tp($1); } + | dlets ',' lora + { tp($3); } + ; +lora : LETTER + | LETTER '[' ']' + { $$ = (intptr_t)geta($1); } + ; + +%% +# define error 256 + +static int peekc = -1; +static int sargc; +static int ifile; +static char **sargv; + +static char funtab[52] = { + 01,0,02,0,03,0,04,0,05,0,06,0,07,0,010,0,011,0,012,0,013,0,014,0,015,0,016,0,017,0, + 020,0,021,0,022,0,023,0,024,0,025,0,026,0,027,0,030,0,031,0,032,0 }; +static char atab[52] = { + 0241,0,0242,0,0243,0,0244,0,0245,0,0246,0,0247,0,0250,0,0251,0,0252,0,0253,0, + 0254,0,0255,0,0256,0,0257,0,0260,0,0261,0,0262,0,0263,0,0264,0,0265,0,0266,0, + 0267,0,0270,0,0271,0,0272,0}; +static char *letr[26] = { + "a","b","c","d","e","f","g","h","i","j", + "k","l","m","n","o","p","q","r","s","t", + "u","v","w","x","y","z" } ; +/*static char *dot = { "." };*/ + +int +yylex(void){ + int c, ch; +restart: + c = getch(); + peekc = -1; + while( c == ' ' || c == '\t' ) c = getch(); + if(c == '\\'){ + getch(); + goto restart; + } + if( c<= 'z' && c >= 'a' ) { + /* look ahead to look for reserved words */ + peekc = getch(); + if( peekc >= 'a' && peekc <= 'z' ){ /* must be reserved word */ + if( c=='i' && peekc=='f' ){ c=_IF; goto skip; } + if( c=='w' && peekc=='h' ){ c=_WHILE; goto skip; } + if( c=='f' && peekc=='o' ){ c=_FOR; goto skip; } + if( c=='s' && peekc=='q' ){ c=SQRT; goto skip; } + if( c=='r' && peekc=='e' ){ c=_RETURN; goto skip; } + if( c=='b' && peekc=='r' ){ c=_BREAK; goto skip; } + if( c=='d' && peekc=='e' ){ c=_DEFINE; goto skip; } + if( c=='s' && peekc=='c' ){ c= SCALE; goto skip; } + if( c=='b' && peekc=='a' ){ c=BASE; goto skip; } + if( c=='i' && peekc == 'b'){ c=BASE; goto skip; } + if( c=='o' && peekc=='b' ){ c=OBASE; goto skip; } + if( c=='d' && peekc=='i' ){ c=FFF; goto skip; } + if( c=='a' && peekc=='u' ){ c=_AUTO; goto skip; } + if( c == 'l' && peekc=='e'){ c=LENGTH; goto skip; } + if( c == 'q' && peekc == 'u'){getout();} + /* could not be found */ + return( error ); + skip: /* skip over rest of word */ + peekc = -1; + while( (ch = getch()) >= 'a' && ch <= 'z' ); + peekc = ch; + return( c ); + } + + /* usual case; just one single letter */ + + yylval = (intptr_t)letr[c-'a']; + return( LETTER ); + } + if( c>= '0' && c <= '9' || c>= 'A' && c<= 'F' ){ + yylval = c; + return( DIGIT ); + } + switch( c ){ + case '.': return( DOT ); + case '=': + switch( peekc = getch() ){ + case '=': c=EQ; goto gotit; + case '+': c=EQPL; goto gotit; + case '-': c=EQMI; goto gotit; + case '*': c=EQMUL; goto gotit; + case '/': c=EQDIV; goto gotit; + case '%': c=EQREM; goto gotit; + case '^': c=EQEXP; goto gotit; + default: return( '=' ); + gotit: peekc = -1; return(c); + } + case '+': return( cpeek( '+', INCR, cpeek( '=', EQPL, '+') ) ); + case '-': return( cpeek( '-', DECR, cpeek( '=', EQMI, '-') ) ) ; + case '<': return( cpeek( '=', LE, '<' ) ); + case '>': return( cpeek( '=', GE, '>' ) ); + case '!': return( cpeek( '=', NE, '!' ) ); + case '/': + if((peekc = getch()) == '*'){ + peekc = -1; + while((getch() != '*') || ((peekc = getch()) != '/')); + peekc = -1; + goto restart; + } + else if (peekc == '=') { + c=EQDIV; + goto gotit; + } + else return(c); + case '*': + return( cpeek( '=', EQMUL, '*' ) ); + case '%': + return( cpeek( '=', EQREM, '%' ) ); + case '^': + return( cpeek( '=', EQEXP, '^' ) ); + case '"': + yylval = (intptr_t)str; + while((c=getch()) != '"'){*str++ = c; + if(str >= &string[sizeof string - 1]){yyerror("string space exceeded"); + getout(); + } + } + *str++ = '\0'; + return(QSTR); + default: return( c ); + } +} + +static int +cpeek(int c, int yes, int no){ + if( (peekc=getch()) != c ) return( no ); + else { + peekc = -1; + return( yes ); + } +} + +static int +getch(void){ + int ch; +loop: + ch = (peekc < 0) ? getc(in) : peekc; + peekc = -1; + if(ch != EOF)return(ch); + if(++ifile > sargc){ + if(ifile >= sargc+2)getout(); + in = stdin; + ln = 0; + goto loop; + } + fclose(in); + if((in = fopen(sargv[ifile],"r")) != NULL){ + ln = 0; + ss = sargv[ifile]; + goto loop; + } + cantopen(sargv[ifile]); + return EOF; +} +# define b_sp_max 3000 +static intptr_t b_space [ b_sp_max ]; +static intptr_t * b_sp_nxt = { b_space }; + +static int bdebug = 0; + +static intptr_t +bundle(int a, ...){ + intptr_t i, *q; + va_list ap; + + i = a; + q = b_sp_nxt; + if( bdebug ) printf("bundle %ld elements at %lo\n",(long)i, (long)q ); + va_start(ap, a); + while(i-- > 0){ + if( b_sp_nxt >= & b_space[b_sp_max] ) yyerror( "bundling space exceeded" ); + * b_sp_nxt++ = va_arg(ap, intptr_t); + } + va_end(ap); + * b_sp_nxt++ = 0; + yyval = (intptr_t)q; + return( (intptr_t)q ); +} + +static void +routput(intptr_t *p) { + if( bdebug ) printf("routput(%lo)\n", (long)p ); + if( p >= &b_space[0] && p < &b_space[b_sp_max]){ + /* part of a bundle */ + while( *p != 0 ) routput( (intptr_t *)*p++ ); + } + else printf( (char *)p ); /* character string */ +} + +static void +output(intptr_t *p) { + routput( p ); + b_sp_nxt = & b_space[0]; + printf( "\n" ); + fflush(stdout); + cp = cary; + crs = rcrs; +} + +static void +conout(intptr_t p, intptr_t s) { + printf("["); + routput( (intptr_t *)p ); + printf("]s%s\n", (char *)s ); + fflush(stdout); + lev--; +} + +static void +yyerror(const char *s) { + if(ifile > sargc)ss="teletype"; + fprintf(stderr, "%s on line %d, %s\n", + s ,ss?ln+1:0,ss?ss:"command line"); + cp = cary; + crs = rcrs; + bindx = 0; + lev = 0; + b_sp_nxt = &b_space[0]; +} + +static void +cantopen(const char *fn) +{ + char spc[280]; + char *oss = ss; + + ss = 0; + snprintf(spc, sizeof spc, "can't open input file %s", fn); + yyerror(spc); + ss = oss; +} + +static void +pp(intptr_t s) { + /* puts the relevant stuff on pre and post for the letter s */ + + bundle(3, "S", s, pre ); + pre = (intptr_t *)yyval; + bundle(4, post, "L", s, "s." ); + post = (intptr_t *)yyval; +} + +static void +tp(intptr_t s) { /* same as pp, but for temps */ + bundle(3, "0S", s, pre ); + pre = (intptr_t *)yyval; + bundle(4, post, "L", s, "s." ); + post = (intptr_t *)yyval; +} + +static void +yyinit(int argc,char **argv) { + signal(SIGINT, SIG_IGN); + sargv=argv; + sargc= -- argc; + if(sargc == 0)in=stdin; + else if((in = fopen(sargv[1],"r")) == NULL) { + cantopen(sargv[1]); + exit(0); + } + ifile = 1; + ln = 0; + ss = sargv[1]; +} + +static intptr_t * +getout(void){ + printf("q"); + fflush(stdout); + exit(0); + /*NOTREACHED*/ + return(NULL); +} + +static intptr_t * +getf(intptr_t p) { + return(intptr_t *)(&funtab[2*(*((char *)p) -0141)]); +} + +static intptr_t * +geta(intptr_t p) { + return(intptr_t *)(&atab[2*(*((char *)p) - 0141)]); +} + +int +main(int argc, char **argv) +{ + extern int yyparse(void); + const char optstring[] = "cdl"; + int p[2]; + int i; + int cflag = 0, lflag = 0; + + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'd': + case 'c': + cflag = 1; + break; + case 'l': + lflag = 1; + break; + default: + exit(2); + } + } + argv += optind - 1, argc -= optind - 1; + if (cflag) { + yyinit(argc, argv); + yyparse(); + exit(0); + } + if (lflag) { + *argv-- = LIBB; + argc++; + } + pipe(p); + if (fork()==0) { + close(1); + dup(p[1]); + close(p[0]); + close(p[1]); + yyinit(argc, argv); + yyparse(); + exit(0); + } + close(0); + dup(p[0]); + close(p[0]); + close(p[1]); + execl(DC, "dc", "-", NULL); + execl("/usr/5bin/dc", "dc", "-", NULL); + execl("/usr/local/bin/dc", "dc", "-", NULL); + execl("/usr/contrib/bin/dc", "dc", "-", NULL); + execl("/usr/bin/dc", "dc", "-", NULL); + return(1); +} diff --git a/bc/lib.b b/bc/lib.b @@ -0,0 +1,241 @@ +/* from 4.4BSD /usr/src/usr.bin/bc/bc.library */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from bc.library 8.1 (Berkeley) 6/6/93 + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)lib.b 1.4 (gritter) 8/26/02 */ + +scale = 20 +define e(x){ + auto a, b, c, d, e, g, t, w, y + + t = scale + scale = t + .434*x + 1 + + w = 0 + if(x<0){ + x = -x + w = 1 + } + y = 0 + while(x>2){ + x = x/2 + y = y + 1 + } + + a=1 + b=1 + c=b + d=1 + e=1 + for(a=1;1==1;a++){ + b=b*x + c=c*a+b + d=d*a + g = c/d + if(g == e){ + g = g/1 + while(y--){ + g = g*g + } + scale = t + if(w==1) return(1/g) + return(g/1) + } + e=g + } +} + +define l(x){ + auto a, b, c, d, e, f, g, u, s, t + if(x <=0) return(1-10^scale) + t = scale + + f=1 + scale = scale + scale(x) - length(x) + 1 + s=scale + while(x > 2){ + s = s + (length(x)-scale(x))/2 + 1 + if(s>0) scale = s + x = sqrt(x) + f=f*2 + } + while(x < .5){ + s = s + (length(x)-scale(x))/2 + 1 + if(s>0) scale = s + x = sqrt(x) + f=f*2 + } + + scale = t + length(f) - scale(f) + 1 + u = (x-1)/(x+1) + + scale = scale + 1.1*length(t) - 1.1*scale(t) + s = u*u + b = 2*f + c = b + d = 1 + e = 1 + for(a=3;1==1;a=a+2){ + b=b*s + c=c*a+d*b + d=d*a + g=c/d + if(g==e){ + scale = t + return(u*c/d) + } + e=g + } +} + +define s(x){ + auto a, b, c, s, t, y, p, n, i + t = scale + y = x/.7853 + s = t + length(y) - scale(y) + if(s<t) s=t + scale = s + p = a(1) + + scale = 0 + if(x>=0) n = (x/(2*p)+1)/2 + if(x<0) n = (x/(2*p)-1)/2 + x = x - 4*n*p + if(n%2!=0) x = -x + + scale = t + length(1.2*t) - scale(1.2*t) + y = -x*x + a = x + b = 1 + s = x + for(i=3; 1==1; i=i+2){ + a = a*y + b = b*i*(i-1) + c = a/b + if(c==0){scale=t; return(s/1)} + s = s+c + } +} + +define c(x){ + auto t + t = scale + scale = scale+1 + x = s(x+2*a(1)) + scale = t + return(x/1) +} + +define a(x){ + auto a, b, c, d, e, f, g, s, t + if(x==0) return(0) + if(x==1) { + if(scale<52) { +return(.7853981633974483096156608458198757210492923498437764/1) + } + } + t = scale + f=1 + while(x > .5){ + scale = scale + 1 + x= -(1-sqrt(1.+x*x))/x + f=f*2 + } + while(x < -.5){ + scale = scale + 1 + x = -(1-sqrt(1.+x*x))/x + f=f*2 + } + s = -x*x + b = f + c = f + d = 1 + e = 1 + for(a=3;1==1;a=a+2){ + b=b*s + c=c*a+d*b + d=d*a + g=c/d + if(g==e){ + scale = t + return(x*c/d) + } + e=g + } +} + +define j(n,x){ +auto a,b,c,d,e,g,i,s,k,t + + t = scale + k = 1.36*x + 1.16*t - n + k = length(k) - scale(k) + if(k>0) scale = scale + k + +s= -x*x/4 +if(n<0){ + n= -n + x= -x + } +a=1 +c=1 +for(i=1;i<=n;i++){ + a=a*x + c = c*2*i + } +b=a +d=1 +e=1 +for(i=1;1;i++){ + a=a*s + b=b*i*(n+i) + a + c=c*i*(n+i) + g=b/c + if(g==e){ + scale = t + return(g/1) + } + e=g + } +} diff --git a/bc/mkfile b/bc/mkfile @@ -0,0 +1,11 @@ +BIN = bc +OBJ = bc.o +LOCAL_CFLAGS = -DDC=\"$BINDIR/dc\" -DLIBB=\"$LIBDIR/lib.b\" +CLEAN_FILES = bc.c +INSTALL_BIN = bc +INSTALL_LIB = lib.b +INSTALL_MAN1 = bc.1 +DEPS = yacc + +<$mkbuild/mk.default + diff --git a/bc/yyval.sed b/bc/yyval.sed @@ -0,0 +1,22 @@ +# +# Sccsid @(#)yyval.sed 1.3 (gritter) 4/27/04 +# +# bison has a yacc-compatible yyval, but it is a local variable inside +# yyparse(). Making the variable global is necessary to make bc work +# with a bison-generated parser. +1,2 { + /Bison/ { + :look + /YYSTYPE/ { + a\ + YYSTYPE yyval; + :repl + s/^[ ]*YYSTYPE[ ]*yyval;// + n + t + b repl + } + n + b look + } +} diff --git a/cp/cp.1 b/cp/cp.1 @@ -0,0 +1,218 @@ +.\" +.\" Sccsid @(#)cp.1 1.26 (gritter) 5/3/05 +.\" Parts taken from cp(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH CP 1 "5/3/05" "Heirloom Toolchest" "User Commands" +.SH NAME +cp \- copy files +.SH SYNOPSIS +\fBcp\fR +[\fB\-adDfiHLpPrRs\fR] [\fB\-b\ \fIsize\fR] +\fIfile1\fR [\fIfile2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +.I File1 +is copied onto +.IR target . +If +.I target +is an existing regular file, +its content is overwritten. +Its mode and owner are preserved; +the mode of the source file is used otherwise. +.PP +If +.I target +is a directory, +one or more files are copied +into the directory with their original file-names. +.PP +.I Cp +refuses to copy a file onto itself. +.PP +The +.I cp +command accepts the following options: +.TP +.B \-i +.I Cp +will ask for confirmation +before overwriting an existing target file. +For +.B /usr/5bin/cp +and +.BR /usr/5bin/s42/cp , +.I cp +will also ask for confirmation before overwriting a directory +with the +.I \-r +or +.I \-R +option. +For +.BR /usr/5bin/cp , +this flag will be automatically disabled +if standard input is not a terminal. +.TP +.B \-p +.I Cp +will try to preserve access and modification times, +user and group ownerships, +and file permission bits. +Failing to preserve these modes +is always considered an error, +but only +.BR /usr/5bin/s42/cp , +.BR /usr/5bin/posix2001/cp , +and +.B /usr/5bin/posix/cp +will print an error message. +.TP +.B \-r +The source file operands may be directories +that will be copied recursively. +Symbolic links are followed. +The content of all non-directory files encountered +is tried to be reproduced in a regular file. +.PP +The following options have been added by POSIX.2: +.TP +.B \-f +If overwriting a target file fails, +.I cp +will try to unlink that file and proceed. +.TP +.B \-R +The source file operands may be directories +that will be copied recursively. +Special files +such as block and character devices, +FIFOs, +and symbolic links encountered during traversal +are recreated in the target hierarchy. +If a symbolic link is given as an operand, +its target is copied. +.PP +The following options have been added by POSIX.1-2001: +.TP +.B \-H +With the +.I \-R +option, follow symbolic links given as operands, +but do not follow symbolic links encountered during traversal +of the source hierarchy. +This is the default. +.TP +.B \-L +With the +.I \-R +option, follow all symbolic links. +.TP +.B \-P +With the +.I \-R +option, do not follow any symbolic links. +.PP +The following options are extensions: +.TP +.B \-a +Perform a recursive copy and, if possible, +preserve hard links as well as any attributes. +This is the same as the combination of the +.I \-Rdp +options. +.TP +\fB\-b\ \fIsize\fR +With this option given, +.I cp +performs input and output in units of +.I size +bytes. +The default size depends on the current input file. +.TP +.B \-d +With the +.I \-r +or +.I \-R +options, +hard links between copied files are usually splitted, +i.\|e. each copied file is assigned to a separate i-node. +When this option is given, +hard links between copied files +are reproduced in the destination hierarchy. +.TP +.B \-D +Causes +.I cp +to use direct i/o +when copying file data. +See the description of `O_DIRECT' in +.IR open (2) +for more information. +.TP +.B \-s +With this option, +.I cp +prints i/o statistics for each single file +of which data was copied. +.SH "SEE ALSO" +cat(1), +cpio(1), +mv(1), +pr(1), +rm(1) +.SH NOTES +Use either +.RS +.sp +.B cp +.B \-\- +.I \-file +.I target +.sp +.RE +or +.RS +.sp +.B cp +.I ./\-file +.I target +.sp +.RE +to copy files that begin with a hyphen character. +.PP +A copy of a symbolic link +contains the same pathname as the original. +Symbolic links with relative pathnames +may thus change or lose their target +if copied to a different level in the file hierarchy. diff --git a/cp/cp.c b/cp/cp.c @@ -0,0 +1,1264 @@ +/* + * cp - copy files + * + * Gunnar Ritter, Freiburg i. Br., Germany, July 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SUS) +static const char sccsid[] USED = "@(#)cp_sus.sl 1.84 (gritter) 3/4/06"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)cp_s42.sl 1.84 (gritter) 3/4/06"; +#else +static const char sccsid[] USED = "@(#)cp.sl 1.84 (gritter) 3/4/06"; +#endif + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <malloc.h> +#include <errno.h> +#include <libgen.h> +#include <limits.h> +#include <dirent.h> +#include <utime.h> +#include "sfile.h" +#include "memalign.h" +#include "alloca.h" + +#ifndef S_IFDOOR +#define S_IFDOOR 0xD000 /* Solaris door */ +#endif +#ifndef S_IFNAM +#define S_IFNAM 0x5000 /* XENIX special named file */ +#endif +#ifndef S_IFNWK +#define S_IFNWK 0x9000 /* HP-UX network special file */ +#endif + +static enum { + PERS_CP, + PERS_MV, + PERS_LN +} pers; + +enum okay { + OKAY = 0, + STOP = 1 +}; + +struct islot { + struct islot *i_lln; + struct islot *i_rln; + char *i_name; + ino_t i_ino; +}; + +struct dslot { + struct dslot *d_nxt; + struct islot *d_isl; + dev_t d_dev; +}; + +static struct dslot *d0; + +static unsigned errcnt; /* count of errors */ +static long bflag; /* buffer size */ +static int dflag; /* preserve hard links */ +#ifdef O_DIRECT +static int Dflag; /* use direct i/o */ +#endif /* O_DIRECT */ +static int fflag; /* force */ +static int iflag; /* ask before overwriting */ +static int nflag; /* ln: do not remove links */ +static int pflag; /* preserve owner and times */ +static int rflag; /* recursive, read FIFOs */ +static int Rflag; /* recursive, recreate FIFOs */ +static int sflag; /* make symlinks / show statistics */ +static int HLPflag; /* -H, -L, or -P */ +static int ontty; /* stdin is a terminal */ +static mode_t umsk; /* current umask */ +static uid_t myuid; /* current uid */ +static gid_t mygid; /* current gid */ +static char *progname; /* argv[0] to main() */ +static struct islot *inull; /* inode tree null element */ +static void (*go)(const char *, const char *, struct stat *, int, + int (*statfn)(const char *, struct stat *)); + +static mode_t +check_suid(const struct stat *sp, mode_t mode) +{ + if (sp->st_uid != myuid || sp->st_gid != mygid) { + mode &= ~(mode_t)S_ISUID; + if ((sp->st_mode&S_IFMT) != S_IFDIR || sp->st_mode&0010) + mode &= ~(mode_t)S_ISGID; + if ((sp->st_mode&S_IFMT) == S_IFDIR || sp->st_gid != mygid) + mode &= ~(mode_t)S_ISGID; + } + return mode; +} + +static void +nomem(void) +{ + write(2, progname, strlen(progname)); + write(2, ": Insufficient memory space.\n", 29); + _exit(077); +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) + nomem(); + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static void * +scalloc(size_t nelem, size_t nbytes) +{ + void *p; + + if ((p = calloc(nelem, nbytes)) == NULL) + nomem(); + return p; +} + +static void +usage(void) +{ + switch (pers) { + case PERS_CP: + fprintf(stderr, "\ +Usage: %s [-i] [-p] f1 f2\n\ + %s [-i] [-p] f1 ... fn d1\n\ + %s [-i] [-p] [-r] d1 d2\n", + progname, progname, progname); + break; + case PERS_MV: + fprintf(stderr, "\ +Usage: %s [-f] [-i] f1 f2\n\ + %s [-f] [-i] f1 ... fn d1\n\ + %s [-f] [-i] d1 d2\n", + progname, progname, progname); + break; + case PERS_LN: + { +#if defined (SUS) + const char nstr[] = ""; +#else /* !SUS */ + const char nstr[] = "[-n] "; +#endif /* !SUS */ + fprintf(stderr, "\ +Usage: %s [-f] %s[-s] f1 f2\n\ + %s [-f] %s[-s] f1 ... fn d1\n\ + %s [-f] %s[-s] d1 d2\n", + progname, nstr, progname, nstr, + progname, nstr); + } + break; + } + exit(2); +} + +static void +freeislots(struct islot *ip) +{ + if (ip == inull) + return; + freeislots(ip->i_lln); + freeislots(ip->i_rln); + free(ip->i_name); + free(ip); +} + +static void +freedslots(void) +{ + struct dslot *dp, *dn; + + for (dp = d0; dp; dp = dn) { + dn = dp->d_nxt; + freeislots(dp->d_isl); + free(dp); + } + d0 = NULL; +} + +static struct islot * +isplay(ino_t ino, struct islot *x) +{ + struct islot hdr; + struct islot *leftmax, *rightmin; + struct islot *y; + + hdr.i_lln = hdr.i_rln = inull; + leftmax = rightmin = &hdr; + inull->i_ino = ino; + while (ino != x->i_ino) { + if (ino < x->i_ino) { + if (ino < x->i_lln->i_ino) { + y = x->i_lln; + x->i_lln = y->i_rln; + y->i_rln = x; + x = y; + } + if (x->i_lln == inull) + break; + rightmin->i_lln = x; + rightmin = x; + x = x->i_lln; + } else { + if (ino > x->i_rln->i_ino) { + y = x->i_rln; + x->i_rln = y->i_lln; + y->i_lln = x; + x = y; + } + if (x->i_rln == inull) + break; + leftmax->i_rln = x; + leftmax = x; + x = x->i_rln; + } + } + leftmax->i_rln = x->i_lln; + rightmin->i_lln = x->i_rln; + x->i_lln = hdr.i_rln; + x->i_rln = hdr.i_lln; + inull->i_ino = !ino; + return x; +} + +static struct islot * +ifind(ino_t ino, struct islot **it) +{ + if (*it == NULL) + return NULL; + *it = isplay(ino, *it); + return (*it)->i_ino == ino ? *it : NULL; +} + +static void +iput(struct islot *ik, struct islot **it) +{ + if ((*it) == NULL) { + ik->i_lln = ik->i_rln = inull; + (*it) = ik; + } else { + /*(*it) = isplay(ik->i_ino, (*it));*/ + /* ifind() is always called before */ + if (ik->i_ino < (*it)->i_ino) { + ik->i_lln = (*it)->i_lln; + ik->i_rln = (*it); + (*it)->i_lln = inull; + (*it) = ik; + } else if ((*it)->i_ino < ik->i_ino) { + ik->i_rln = (*it)->i_rln; + ik->i_lln = (*it); + (*it)->i_rln = inull; + (*it) = ik; + } + } +} +static int +canlink(const char *path, const struct stat *sp) +{ + struct dslot *ds, *dp; + struct islot *ip; + + for (ds = d0, dp = NULL; ds; dp = ds, ds = ds->d_nxt) + if (ds->d_dev == sp->st_dev) + break; + if (ds == NULL) { + ds = scalloc(1, sizeof *ds); + ds->d_dev = sp->st_dev; + if (d0 == NULL) + d0 = ds; + else + dp->d_nxt = ds; + } + if ((ip = ifind(sp->st_ino, &ds->d_isl)) == NULL) { + ip = scalloc(1, sizeof *ip); + ip->i_name = smalloc(strlen(path) + 1); + strcpy(ip->i_name, path); + ip->i_ino = sp->st_ino; + iput(ip, &ds->d_isl); + } else { + if (link(ip->i_name, path) == 0) + return 1; + } + return 0; +} + +static enum okay +confirm(void) +{ + enum okay yes = STOP; + char c; + + if (read(0, &c, 1) == 1) { + yes = (c == 'y' || c == 'Y') ? OKAY : STOP; + while (c != '\n' && read(0, &c, 1) == 1); + } + return yes; +} + +static void +permissions(const char *path, const struct stat *ssp) +{ + mode_t mode; + + mode = ssp->st_mode & 07777; + if (pflag) { + struct utimbuf ut; + ut.actime = ssp->st_atime; + ut.modtime = ssp->st_mtime; + if (utime(path, &ut) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, "%s: cannot set times for %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + } + if (myuid == 0) { + if (chown(path, ssp->st_uid, ssp->st_gid) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, + "%s: cannot change owner and group of %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + mode &= ~(mode_t)(S_ISUID|S_ISGID); + } + } else + mode = check_suid(ssp, mode); + } else + mode = check_suid(ssp, mode & ~umsk); + if (chmod(path, mode) < 0) { +#if defined (SUS) || defined (S42) + fprintf(stderr, "%s: cannot set permissions for %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); +#endif /* SUS || S42 */ + if (pers != PERS_MV) + errcnt |= 010; + } +} + +static size_t +balign(const struct stat *ssp, const struct stat *dsp, + long long size, size_t prefd) +{ + int n, m; + size_t s; + + n = (ssp->st_mode&S_IFMT) == S_IFREG && ssp->st_blksize >= 0 ? + ssp->st_blksize : 512; + m = (dsp->st_mode&S_IFMT) == S_IFREG && dsp->st_blksize >= 0 ? + dsp->st_blksize : 512; + if (prefd <= size && prefd % n == 0 && prefd % m == 0) + return prefd; + else if (n % m == 0) + return n; + else if (m % n == 0) + return m; + else { + s = n; + while (s % m) + s *= 2; + return s; + } +} + +/*ARGSUSED*/ +void +writerr(void *vp, int count, int written) +{ +} + +static long long +fdcopy(const char *src, const struct stat *ssp, const int sfd, + const char *tgt, const struct stat *dsp, const int dfd) +{ + static long pagesize; + static char *buf = NULL; + static size_t bufsize; + ssize_t rsz, wo, wt; + size_t blksize; + long long copied = 0; +#ifdef O_DIRECT + int sfl = 0, dfl = 0, haverest = 0, dioen = 0; + off_t remsz = 0; +#endif + +#ifdef __linux__ + if (!bflag && !Dflag && ssp->st_size > 0) { + long long sent; + + if ((sent = sfile(dfd, sfd, ssp->st_mode, ssp->st_size)) == + ssp->st_size) + return sent; + if (sent < 0) + goto err; + } +#endif /* __linux__ */ + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if (bflag) + blksize = bflag; +#ifdef O_DIRECT + else if (Dflag) + blksize = balign(ssp, dsp, ssp->st_size, 1048576); +#endif /* O_DIRECT */ + else + blksize = balign(ssp, dsp, ssp->st_size, 4096); + if (blksize > bufsize) { + if (buf) + free(buf); + if ((buf = memalign(pagesize, bufsize = blksize)) == 0) + nomem(); + } +#ifdef O_DIRECT + if (Dflag) { + if ((ssp->st_mode&S_IFMT) == S_IFREG && + ssp->st_size > blksize || + (ssp->st_mode&S_IFMT) == S_IFBLK) { + sfl = fcntl(sfd, F_GETFL); + fcntl(sfd, F_SETFL, sfl | O_DIRECT); + remsz = ssp->st_size; + } + if ((dsp->st_mode&S_IFMT) == S_IFREG || + (dsp->st_mode&S_IFMT) == S_IFBLK) { + dfl = fcntl(dfd, F_GETFL); + fcntl(dfd, F_SETFL, dfl | O_DIRECT); + dioen = 1; + } + } +#endif /* O_DIRECT */ + while ((rsz = read(sfd, buf, blksize)) > 0) { +#ifdef O_DIRECT + if (Dflag && rsz < blksize && dioen != 0) { + fcntl(dfd, F_SETFL, dfl); + dioen = 0; + } +#endif /* O_DIRECT */ + wt = 0; + do { + if ((wo = write(dfd, buf + wt, rsz - wt)) < 0) { +#ifdef __linux__ + err: +#endif /* __linux__ */ + fprintf(stderr, "%s: %s: write: %s\n", + progname, tgt, + strerror(errno)); + errcnt |= 04; +#ifdef notdef + if ((dsp->st_mode&S_IFMT) == S_IFREG) + unlink(tgt); +#endif /* notdef */ + return copied; + } + wt += wo; + copied += wo; + } while (wt < rsz); +#ifdef O_DIRECT + if (Dflag && ssp->st_size > blksize && + (ssp->st_mode&S_IFMT) == S_IFREG) { + remsz -= rsz; + if (remsz > 0 && remsz < blksize && + haverest == 0 && (bflag || + remsz<(blksize=balign(ssp, dsp, + ssp->st_size, 4096)))) { + fcntl(sfd, F_SETFL, sfl); + haverest = 1; + } + } +#endif /* O_DIRECT */ + } + if (rsz < 0) { + fprintf(stderr, "%s: %s: read: %s\n", + progname, src, strerror(errno)); + errcnt |= 04; +#ifdef notdef + if ((dsp->st_mode&S_IFMT) == S_IFREG) + unlink(tgt); +#endif /* notdef */ + } +#ifdef O_DIRECT + if (haverest) { +#if !defined (__FreeBSD__) && !defined (__DragonFly__) && !defined (__APPLE__) + fdatasync(dfd); +#else /* __FreeBSD__, __DragonFly__, __APPLE__ */ + fsync(dfd); +#endif /* __FreeBSD_, __DragonFly__, __APPLE__ */ + } +#endif /* O_DIRECT */ + return copied; +} + +static void +filecopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + struct stat stbuf; + mode_t mode; + int sfd, dfd; + float f, s, t; + struct timeval tv1, tv2; + struct rusage ru1, ru2; + long long copied = 0; + + if (sflag) { + gettimeofday(&tv1, NULL); + getrusage(RUSAGE_SELF, &ru1); + } + if ((sfd = open(src, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s\n%s: %s\n", + progname, src, + src, strerror(errno)); + errcnt |= 01; + return; + } + mode = check_suid(ssp, ssp->st_mode & 07777); + if ((dfd = creat(tgt, mode)) < 0) + if (pers != PERS_MV && dsp != NULL && fflag && unlink(tgt) == 0) + dfd = creat(tgt, mode); + if (dfd < 0) { + fprintf(stderr, "%s: cannot create %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + goto end1; + } + if (fstat(dfd, &stbuf) < 0) { + fprintf(stderr, "%s: fstat for %s failed: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 04; + goto end2; + } + copied = fdcopy(src, ssp, sfd, tgt, &stbuf, dfd); +end2: + if (pflag) + permissions(tgt, ssp); + if (close(dfd) < 0) { + fprintf(stderr, "%s: close error on %s: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 04; + } + if (sflag) { + gettimeofday(&tv2, NULL); + getrusage(RUSAGE_SELF, &ru2); +#define tv2f(tv) ((tv).tv_sec + (float)(tv).tv_usec / 1000000) + f = tv2f(tv2) - tv2f(tv1); + s = (float)copied / (2<<19); + t = f ? s / f : s; + printf(" ****** %s File Information ******\n" + " Input file : %s\n" + " Output file : %s\n" + " Real Time (secs) : %14.6f\n" + " User Time (secs) : %14.6f\n" + " System Time (secs) : %14.6f\n" + " File Size (MB) : %14.6f\n" + " Transfer Rate (MB/s) : %14.6f\n", + progname, src, tgt, + f, + tv2f(ru2.ru_utime) - tv2f(ru1.ru_utime), + tv2f(ru2.ru_stime) - tv2f(ru1.ru_stime), + s, + t); + } +end1: + close(sfd); +} + +static void +ignoring(const char *type, const char *path) +{ + fprintf(stderr, "%s: %signoring %s %s\n", progname, +#if defined (SUS) + "", +#else /* !SUS */ + "warning: ", +#endif /* !SUS */ + type, path); +#if defined (SUS) + if (pers == PERS_MV) + errcnt |= 020; +#endif /* SUS */ +} + +static enum okay +do_unlink(const char *tgt, const struct stat *dsp) +{ + if (dsp && unlink(tgt) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +devicecopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + if (do_unlink(tgt, dsp) != OKAY) + return; + if (mknod(tgt, check_suid(ssp, ssp->st_mode & (07777|S_IFMT)), + ssp->st_rdev) < 0) { + fprintf(stderr, "%s: cannot create special file %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (pflag) + permissions(tgt, ssp); +} + +static void +symlinkcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + static char *buf; + static size_t bufsize; + ssize_t sz; + + if (buf == NULL) + buf = smalloc(bufsize = 256); + for (;;) { + sz = readlink(src, buf, bufsize - 1); + if (sz < 0) { + fprintf(stderr, + "%s: cannot read symbolic link %s\n%s: %s\n", + progname, src, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (sz == bufsize - 1) { + buf = srealloc(buf, bufsize += 256); + continue; + } + buf[sz] = '\0'; + break; + } + if (do_unlink(tgt, dsp) != OKAY) + return; + if (symlink(buf, tgt) < 0) { + fprintf(stderr, "%s: cannot create symbolic link %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + if (myuid == 0 && lchown(tgt, ssp->st_uid, ssp->st_gid) < 0) { +#if defined (SUS) + fprintf(stderr, + "%s: cannot change owner and group of %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); +#endif /* SUS */ + if (pers != PERS_MV) + errcnt |= 010; + } +} + +static void +socketcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + int fd, addrsz; + struct sockaddr_un addr; + size_t len; + + if (do_unlink(tgt, dsp) != OKAY) + return; + len = strlen(tgt); + memset(&addr, 0, sizeof addr); + addr.sun_family = AF_UNIX; + addrsz = sizeof addr - sizeof addr.sun_path + len; + if ((len >= sizeof addr.sun_path ? errno = ENAMETOOLONG, fd = -1, 1 : + (strncpy(addr.sun_path,tgt,sizeof addr.sun_path), 0)) || + (fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0 || + bind(fd, (struct sockaddr *)&addr, addrsz) < 0) { + fprintf(stderr, "%s: cannot create socket %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + if (fd >= 0) + close(fd); + errcnt |= 01; + return; + } + close(fd); + if (pflag) + permissions(tgt, ssp); +} + +static void +specialcopy(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + switch (ssp->st_mode & S_IFMT) { + case S_IFIFO: + case S_IFCHR: + case S_IFBLK: + case S_IFNAM: + case S_IFNWK: + devicecopy(src, ssp, tgt, dsp); + break; + case S_IFLNK: + symlinkcopy(src, ssp, tgt, dsp); + break; + case S_IFSOCK: + socketcopy(src, ssp, tgt, dsp); + break; + case S_IFDOOR: + ignoring("door", src); + break; + default: + fprintf(stderr, "%s: %s: unknown file type %o\n", + progname, src, (int)ssp->st_mode); + if (pers == PERS_MV) + errcnt |= 020; + } +} + +static void +getpath(const char *path, char **file, char **filend, size_t *sz, size_t *slen) +{ + *sz = 14 + strlen(path) + 2; + *file = smalloc(*sz); + *filend = *file; + if (path[0] == '/' && path[1] == '\0') + *(*filend)++ = '/'; + else { + register const char *cp = path; + while ((*(*filend)++ = *cp++) != '\0'); + (*filend)[-1] = '/'; + } + *slen = *filend - *file; +} + +static void +setpath(const char *base, char **file, char **filend, + size_t slen, size_t *sz, size_t *ss) +{ + if (slen + (*ss = strlen(base)) >= *sz) { + *sz += slen + *ss + 15; + *file = srealloc(*file, *sz); + *filend = &(*file)[slen]; + } + strcpy(*filend, base); +} + +static enum okay +trydelete(const char *path, int recursive) +{ + struct stat st; + enum okay val = OKAY; + + if (lstat(path, &st) < 0) { + fprintf(stderr, "%s: cannot stat %s for removal\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } else if ((st.st_mode & S_IFMT) == S_IFDIR) { + DIR *Dp; + + if (recursive == 0) + goto do_rmdir; + if ((Dp = opendir(path)) != NULL) { + struct dirent *dp; + char *copy, *cend; + size_t sz, slen, ss; + + getpath(path, &copy, &cend, &sz, &slen); + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + setpath(dp->d_name, &copy, &cend, + slen, &sz, &ss); + if ((val = trydelete(copy, recursive)) == STOP) + break; + } + free(copy); + closedir(Dp); + if (val != STOP) { +do_rmdir: + if (rmdir(path) < 0) { + fprintf(stderr, + "%s: cannot remove directory %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); + val = STOP; + } + } + } else { + fprintf(stderr, + "%s: cannot open directory %s for removal\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } + } else { + if (unlink(path) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, path, + progname, strerror(errno)); + errcnt |= 040; + val = STOP; + } + } + return val; +} + +static enum okay +tryrename(const char *src, const struct stat *ssp, + const char *tgt, const struct stat *dsp) +{ + if (dsp && !fflag) { + if (iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return STOP; + } else if (ontty && (dsp->st_mode&S_IFMT) != S_IFLNK && + access(tgt, W_OK) < 0) { + fprintf(stderr, "%s: %s: %o mode? ", + progname, tgt, + (int)(dsp->st_mode & 0777)); + if (confirm() != OKAY) + return STOP; + } + } + if (rename(src, tgt) == 0) + return STOP; + if (errno != EXDEV) { + fprintf(stderr, "%s: cannot rename %s to %s\n%s: %s\n", + progname, src, tgt, + progname, strerror(errno)); + errcnt |= 01; + return STOP; + } + if (dsp) { + if ((dsp->st_mode & S_IFMT) == S_IFDIR && + (ssp->st_mode & S_IFMT) != S_IFDIR) { + fprintf(stderr, "%s: <%s> directory\n", + progname, tgt); + errcnt |= 01; + return STOP; + } + if ((dsp->st_mode & S_IFMT) != S_IFDIR && + (ssp->st_mode & S_IFMT) == S_IFDIR) { + fprintf(stderr, "%s: Target must be directory\n", + progname); + errcnt |= 01; + return STOP; + } + } + if (dsp == NULL || trydelete(tgt, 0) == OKAY) + return OKAY; + return STOP; +} + +static enum okay +commoncheck(const char *src, const char *tgt, const struct stat *dsp, + struct stat *ssp, + int (*statfn)(const char *, struct stat *)) +{ + if (statfn(src, ssp) < 0) { + if (pers == PERS_LN && sflag) + return OKAY; + fprintf(stderr, "%s: cannot access %s\n", progname, src); + errcnt |= 01; + return STOP; + } + if (dsp && (ssp->st_dev == dsp->st_dev && ssp->st_ino == dsp->st_ino)) { + fprintf(stderr, "%s: %s and %s are identical\n", + progname, src, tgt); + errcnt |= 01; + return STOP; + } + return OKAY; +} + +static void +cpmv(const char *src, const char *tgt, struct stat *dsp, int level, + int (*statfn)(const char *, struct stat *)) +{ + struct stat sst; + + if (commoncheck(src, tgt, dsp, &sst, + Rflag && level == 0 ? + pers == PERS_MV || HLPflag == 'P' ? + lstat : stat : + statfn) != OKAY) + return; + if (pers == PERS_MV && level == 0) { + if (tryrename(src, &sst, tgt, dsp) == STOP) + return; + dsp = NULL; + } + if ((sst.st_mode & S_IFMT) == S_IFDIR) { + DIR *Dp; + struct dirent *dp; + char *scp, *send, *dcp, *dend; + size_t ssz, slen, sss, dsz, dlen, dss; + int destcreat = 0; + + if (rflag == 0) { + fprintf(stderr, "%s: <%s> directory\n", + progname, src); + errcnt |= 01; + return; + } + if (dsp && (dsp->st_mode & S_IFMT) != S_IFDIR) { + fprintf(stderr, "%s: %s: Not a directory.\n", + progname, tgt); + errcnt |= 01; + return; + } +#if !defined (SUS) + if (pers == PERS_CP && dsp != NULL && iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return; + } +#endif /* !SUS */ + if (dsp == NULL) { + if (mkdir(tgt, check_suid(&sst, + sst.st_mode&07777 | S_IRWXU)) < 0) { + fprintf(stderr, "%s: %s: %s\n", + progname, tgt, strerror(errno)); + errcnt |= 01; + return; + } + destcreat = 1; + } + if ((Dp = opendir(src)) == NULL) { + fprintf(stderr, "%s: %s: %s\n", + progname, src, + strerror(errno)); + errcnt |= 01; + return; + } + getpath(src, &scp, &send, &ssz, &slen); + getpath(tgt, &dcp, &dend, &dsz, &dlen); + while ((dp = readdir(Dp)) != NULL) { + struct stat xst; + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + setpath(dp->d_name, &scp, &send, slen, &ssz, &sss); + setpath(dp->d_name, &dcp, &dend, dlen, &dsz, &dss); + go(scp, dcp, stat(dcp, &xst) < 0 ? NULL : &xst, + level + 1, statfn); + } + free(scp); + free(dcp); + if (destcreat) + permissions(tgt, &sst); + closedir(Dp); + } else { + if (dsp != NULL && iflag) { + fprintf(stderr, "%s: overwrite %s? ", + progname, tgt); + if (confirm() != OKAY) + return; + } + if (dflag && sst.st_nlink > 1) { + if (canlink(tgt, &sst)) + return; + } + if ((sst.st_mode & S_IFMT) == S_IFREG || Rflag == 0) + filecopy(src, &sst, tgt, dsp); + else + specialcopy(src, &sst, tgt, dsp); + } + if (pers == PERS_MV && errcnt == 0 && level == 0) + trydelete(src, 1); + if ((pers == PERS_CP || pers == PERS_MV) && level == 0 && d0) + freedslots(); +} + +/*ARGSUSED3*/ +static void +ln(const char *src, const char *tgt, struct stat *dsp, int level, + int (*statfn)(const char *, struct stat *)) +{ + struct stat sst; + int (*how)(const char *, const char *) = sflag ? symlink : link; + + if (commoncheck(src, tgt, dsp, &sst, statfn) != OKAY) + return; + if ((sst.st_mode&S_IFMT) == S_IFDIR && !sflag) { + fprintf(stderr, "%s: <%s> directory\n", progname, src); + errcnt |= 01; + return; + } +#if (defined (SUS) || defined (S42)) && (defined (__linux__) || defined (__sun)) + if (sflag == 0) { + char *rpbuf = alloca(PATH_MAX+1); + if (realpath(src, rpbuf) == NULL) { + fprintf(stderr, "%s: cannot access %s\n", + progname, src); + errcnt |= 01; + return; + } + src = rpbuf; + } +#endif /* (SUS || S42) && (__linux__ || __sun) */ + if (dsp +#if !defined (SUS) + && !sflag +#endif /* !SUS */ + ) { + if (nflag && !fflag) { + fprintf(stderr, "%s: %s: File exists\n", + progname, tgt); + errcnt |= 01; + return; + } + if (!fflag && ontty && (dsp->st_mode&S_IFMT) != S_IFLNK && + access(tgt, W_OK) < 0) { + fprintf(stderr, "%s: %s: %o mode? ", + progname, tgt, (int)(dsp->st_mode & 0777)); + if (confirm() != OKAY) + return; + } + if (unlink(tgt) < 0) { + fprintf(stderr, "%s: cannot unlink %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + errcnt |= 01; + return; + } + } + if (how(src, tgt) < 0) { + if (sflag) + fprintf(stderr, "%s: cannot create %s\n%s: %s\n", + progname, tgt, + progname, strerror(errno)); + else if (errno == EXDEV) + fprintf(stderr, "%s: different file system\n", + progname); + else + fprintf(stderr, "%s: errno: %d no permission for %s\n", + progname, errno, tgt); + errcnt |= 01; + } +} + +static const char * +getfl(void) +{ + const char *optstring; + + if (progname[0] == 'm' && progname[1] == 'v') { + pers = PERS_MV; + optstring = "b:fi"; + dflag = pflag = rflag = Rflag = 1; + go = cpmv; + } else if (progname[0] == 'l' && progname[1] == 'n') { + pers = PERS_LN; + optstring = "fns"; +#if defined (SUS) + nflag = 1; +#endif /* SUS */ + go = ln; + } else { + pers = PERS_CP; + optstring = "ab:dDfiHLpPrRs"; + go = cpmv; + } + return optstring; +} + +int +main(int argc, char **argv) +{ + struct stat dst, ust; + const char *optstring; + int (*statfn)(const char *, struct stat *); + int i, illegal = 0; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + progname = basename(argv[0]); + optstring = getfl(); + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'b': + bflag = atol(optarg); + break; +#ifdef O_DIRECT + case 'D': + Dflag = 1; + break; +#endif /* O_DIRECT */ + case 'd': + dflag = 1; + break; + case 'f': + fflag = 1; +#if defined (SUS) + if (pers == PERS_MV) + iflag = 0; +#endif /* SUS */ + break; + case 'i': + iflag = 1; +#if defined (SUS) + if (pers == PERS_MV) + fflag = 0; +#endif /* SUS */ + break; + case 'n': + nflag = 1; + break; + case 'p': + pflag = 1; + break; + case 'a': + dflag = pflag = 1; + /*FALLTHRU*/ + case 'R': + Rflag = 1; + /*FALLTHRU*/ + case 'r': + rflag = 1; + break; + case 's': + sflag = 1; + break; + case 'H': + case 'L': + case 'P': + HLPflag = i; + break; + default: + illegal = 1; + } + } + argv += optind, argc -= optind; + if (argc < 2) { + fprintf(stderr, "%s: Insufficient arguments (%d)\n", + progname, argc); + illegal = 1; + } + if (illegal) + usage(); + umask(umsk = umask(0)); + ontty = isatty(0); +#if defined (SUS) + /* nothing */ +#elif defined (S42) + if (pers == PERS_MV && !ontty) + iflag = 0; +#else /* !SUS, !S42 */ + if (!ontty) + iflag = 0; +#endif /* !SUS, !S42 */ + myuid = geteuid(); + mygid = getegid(); + inull = scalloc(1, sizeof *inull); + inull->i_lln = inull->i_rln = inull; + statfn = (Rflag && HLPflag != 'L' +#if !defined (SUS) && !defined (S42) + || pers == PERS_LN +#endif /* !SUS && !S42 */ + ? lstat : stat); + if (lstat(argv[argc-1], &dst) == 0) { + if ((dst.st_mode&S_IFMT) != S_IFLNK || + stat(argv[argc-1], &ust) < 0) + ust = dst; + if ((ust.st_mode&S_IFMT) == S_IFDIR) { + char *copy, *cend; + size_t sz, slen, ss; + unsigned saverrs = errcnt; + + getpath(argv[argc-1], &copy, &cend, &sz, &slen); + for (i = 0; i < argc-1; i++) { + errcnt = 0; + setpath(basename(argv[i]), &copy, &cend, + slen, &sz, &ss); + go(argv[i], copy, statfn(copy, &dst) < 0 ? + NULL : &dst, 0, statfn); + saverrs |= errcnt; + } + errcnt = saverrs; + } else if (argc > 2) { + fprintf(stderr, "%s: Target must be directory\n", + progname); + usage(); + } else + go(argv[0], argv[1], pers == PERS_CP ? &ust : &dst, + 0, statfn); + } else if (argc > 2) { + fprintf(stderr, "%s: %s not found\n", progname, argv[argc-1]); + errcnt |= 01; + } else + go(argv[0], argv[1], NULL, 0, statfn); + return errcnt; +} diff --git a/cp/ln.1 b/cp/ln.1 @@ -0,0 +1,113 @@ +.\" +.\" Sccsid @(#)ln.1 1.11 (gritter) 2/2/05 +.\" Parts taken from ln(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH LN 1 "2/2/05" "Heirloom Toolchest" "User Commands" +.SH NAME +ln \- make a link +.SH SYNOPSIS +\fBln\fR [\fB\-f\fR] [\fB\-n\fR] [\fB\-s\fR] +\fIname1\fR [\fIname2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +A link is a directory entry referring to a file; +the same file +(together with its size, all its protection information, etc.) +may have several links to it. +There is no way to distinguish a link to a file +from its original directory entry; +any changes in the file +are effective independently of the name +by which the file is known. +.PP +.B Ln +creates a link named +.I target +to an existing file +.IR name1 . +If +.I target +is a directory, +more than one name may be given, +and the links are placed in that directory, +with the name of the last pathname component. +.PP +It is forbidden to link to a directory +or to link across file systems. +It is, however, possible +to create a +.I symbolic +.I link +even in this case; +see the +.B \-s +option below. +.PP +The +.B ln +command accepts the following options: +.TP +.B \-f +If the target file exists, +an attempt is made to unlink it +before the new link is created, +regardless of file permissions. +This option is ignored with +.B /usr/5bin/ln +and +.B /usr/5bin/s42/ln +if the +.I \-s +option is also given. +.TP +.B \-n +Do not remove an existing target file +even if the user has write permission on it. +This is the default for +.B /usr/5bin/posix/ls +and +.BR /usr/5bin/posix2001/ls . +.TP +.B \-s +Create a symbolic link, +that is a special file containing the pathname of the target file. +The system will resolve this pathname +when the symbolic link is accessed. +A symbolic link can refer to all types of files +and span device boundaries, +but will become stale if the target file is removed. +An existing target file will not be overwritten. +.SH "SEE ALSO" +cp(1), +mv(1), +link(2), +symlink(2) diff --git a/cp/mkfile b/cp/mkfile @@ -0,0 +1,9 @@ +BIN = cp +OBJ = cp.o +LOCAL_CFLAGS = -DSUS +INSTALL_BIN = cp +INSTALL_MAN1 = cp.1 +DEPS = libcommon + +<$mkbuild/mk.default + diff --git a/cp/mv.1 b/cp/mv.1 @@ -0,0 +1,179 @@ +.\" +.\" Sccsid @(#)mv.1 1.15 (gritter) 1/24/05 +.\" Parts taken from cp(1) and mv(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH MV 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +mv \- move or rename files and directories +.SH SYNOPSIS +\fBmv\fR [\fB\-f\fR] [\fB\-i\fR] [\fB\-b\ \fIsize\fR] +\fIfile1\fR [\fIfile2\fR .\ .\ .\ ] \fItarget\fR +.SH DESCRIPTION +.B Mv +moves (changes the name of) +.I file1 +to +.IR target . +If +.I target +is an existing regular file, +its content is overwritten. +Its mode and owner are preserved; +the mode of the source file is used otherwise. +If +the mode of +.I target +forbids writing +(and standard input is terminal for +.B /usr/5bin/mv +and +.BR /usr/5bin/s42/mv ), +.B mv +prints the mode +(see +.IR chmod (2)) +and reads the standard input +to obtain a line; +if the line begins with y, +the move takes place; +if not, +the file is not moved. +.PP +If +.I target +is a directory, +one or more files are copied +into the directory with their original file-names. +.PP +.B Mv +refuses to move a file onto itself. +.PP +The +.B mv +command accepts the following options: +.TP +.B \-f +.B Mv +will not ask for confirmation +even if the modes of the +.I target +file do not permit writing. +Overrides the +.B \-i +option in +.B /usr/5bin/posix/mv +and +.BR /usr/5bin/posix2001/mv . +.TP +.B \-i +.B mv +will ask for confirmation +before overwriting an existing target. +For +.B /usr/5bin/mv +and +.BR /usr/5bin/s42/mv , +this flag will be automatically disabled +if standard input is not a terminal. +Overrides the +.B \-f +option in +.B /usr/5bin/posix/mv +and +.BR /usr/5bin/posix2001/mv . +.PP +The following option is an extension: +.TP +\fB\-b\ \fIsize\fR +When a regular file is moved to another file system, +its data must be copied. +This option overrides the automatically determined +i/o buffer size for such copies; +.I size +is given in bytes. +.SH "SEE ALSO" +cp(1), +cat(1), +pr(1), +unlink(2) +.SH NOTES +Use either +.RS +.sp +.B mv +.B \-\- +.I \-file +.I target +.sp +.RE +or +.RS +.sp +.B mv +.I ./\-file +.I target +.sp +.RE +to move files that begin with a hyphen character. +.PP +If source and target +lie on different file systems, +.B mv +must copy the file and delete the original. +In this case +any linking relationship with other files is lost, +but +.B mv +will preserve linkage inside the moved tree. +.B Mv +will try to preserve access and modification times, +user and group ownerships, +and file permission bits. +Failing to preserve these modes +is not considered an error, +only +.B /usr/5bin/posix/mv +and +.B /usr/5bin/posix2001/mv +will print an error message. +Special files +such as block and character devices, +FIFOs, +and symbolic links +are recreated in the target hierarchy. +.PP +The pathname contained in a symbolic link +is not changed when the link is moved. +Symbolic links with relative pathnames +may thus change or lose their target +if moved to a different level in the file hierarchy. diff --git a/dc/dc.1 b/dc/dc.1 @@ -0,0 +1,231 @@ +.\" +.\" Sccsid @(#)dc.1 1.5 (gritter) 1/11/03 +.\" Derived from dc(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH DC 1 "1/11/03" "Heirloom Toolchest" "User Commands" +.SH NAME +dc \- desk calculator +.SH SYNOPSIS +\fBdc\fR [\fIfile\fR] +.SH DESCRIPTION +.I Dc +is an arbitrary precision arithmetic package. +Ordinarily it operates on decimal integers, +but one may specify an input base, output base, +and a number of fractional digits to be maintained. +The overall structure of +.I dc +is +a stacking (reverse Polish) calculator. +If an argument is given, +input is taken from that file until its end, +then from the standard input. +The following constructions are recognized: +.HP 6 +number +.br +The value of the number is pushed on the stack. +A number is an unbroken string of the digits 0-9. +It may be preceded by an underscore _ to input a +negative number. +Numbers may contain decimal points. +.HP 6 ++ \- / * % ^ +.br +The +top two values on the stack are added +(+), +subtracted +(\-), +multiplied (*), +divided (/), +remaindered (%), +or exponentiated (^). +The two entries are popped off the stack; +the result is pushed on the stack in their place. +Any fractional part of an exponent is ignored. +.TP +.BI s x +The +top of the stack is popped and stored into +a register named +.I x, +where +.I x +may be any character. +If +the +.B s +is capitalized, +.I x +is treated as a stack and the value is pushed on it. +.TP +.BI l x +The +value in register +.I x +is pushed on the stack. +The register +.I x +is not altered. +All registers start with zero value. +If the +.B l +is capitalized, +register +.I x +is treated as a stack and its top value is popped onto the main stack. +.TP +.B d +The +top value on the stack is duplicated. +.TP +.B p +The top value on the stack is printed. +The top value remains unchanged. +.B P +interprets the top of the stack as an ascii string, +removes it, and prints it. +.TP +.B f +All values on the stack and in registers are printed. +.TP +.B q +exits the program. +If executing a string, the recursion level is +popped by two. +If +.B q +is capitalized, +the top value on the stack is popped and the string execution level is popped +by that value. +.TP +.B x +treats the top element of the stack as a character string +and executes it as a string of dc commands. +.TP +.B X +replaces the number on the top of the stack with its scale factor. +.TP +.B "[ ... ]" +puts the bracketed ascii string onto the top of the stack. +.HP 6 +.I "<x >x =x" +.br +The +top two elements of the stack are popped and compared. +Register +.I x +is executed if they obey the stated +relation. +.TP +.B v +replaces the top element on the stack by its square root. +Any existing fractional part of the argument is taken +into account, but otherwise the scale factor is ignored. +.TP +.B ! +interprets the rest of the line as a UNIX command. +.TP +.B c +All values on the stack are popped. +.TP +.B i +The top value on the stack is popped and used as the +number radix for further input. +.B I +pushes the input base on the top of the stack. +.TP +.B o +The top value on the stack is popped and used as the +number radix for further output. +.TP +.SM +.B O +pushes the output base on the top of the stack. +.TP +.B k +the top of the stack is popped, and that value is used as +a non-negative scale factor: +the appropriate number of places +are printed on output, +and maintained during multiplication, division, and exponentiation. +The interaction of scale factor, +input base, and output base will be reasonable if all are changed +together. +.TP +.B z +The stack level is pushed onto the stack. +.TP +.SM +.B Z +replaces the number on the top of the stack with its length. +.TP +.B ? +A line of input is taken from the input source (usually the terminal) +and executed. +.TP +.B "; :" +are used by +.I bc +for array operations. +.PP +An example which prints the first ten values of n! is +.nf +.PP +.in +3 +[la1+dsa*pla10>y]sy +.br +0sa1 +.br +lyx +.fi +.SH "SEE ALSO" +bc(1), +which is a preprocessor for +.I dc +providing infix notation and a C-like syntax +which implements functions and reasonable control +structures for programs. +.SH DIAGNOSTICS +`x is unimplemented' where x is an octal number. +.br +`stack empty' for not enough elements on the stack to do what was asked. +.br +`Out of space' when the free list is exhausted (too many digits). +.br +`Out of headers' for too many numbers being kept around. +.br +`Out of pushdown' for too many items on the stack. +.br +`Nesting Depth' for too many levels of nested execution. diff --git a/dc/dc.c b/dc/dc.c @@ -0,0 +1,2061 @@ +/* from 4.4BSD /usr/src/usr.bin/dc/dc.c */ +/*- + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * This module is believed to contain source code proprietary to AT&T. + * Use and redistribution is subject to the Berkeley Software License + * Agreement and your Software Agreement with AT&T (Western Electric). + * + * from dc.c 8.1 (Berkeley) 6/6/93" + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* Sccsid @(#)dc.c 1.21 (gritter) 12/25/06> */ + +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> +#include <stdio.h> +#include <signal.h> +#include "sigset.h" +#include <stdlib.h> +#include <inttypes.h> +#include <limits.h> + +#include "dc.h" + +int +main(int argc,char **argv) +{ + init(argc,argv); + commnds(); + /*NOTREACHED*/ + return(0); +} + +void +commnds(void){ + register int c; + register struct blk *p,*q; + long l; + int sign; + struct blk **ptr,*s,*t; + struct sym *sp; + int sk,sk1,sk2; + int n,d; + + while(1){ + if(((c = readc())>='0' && c <= '9')|| (c>='A' && c <='F') || c == '.'){ + unreadc(c); + p = readin(); + pushp(p); + continue; + } + switch(c){ + case ' ': + case '\n': + case 0377: + case EOF: + continue; + case 'Y': + sdump("stk",*stkptr); + printf("all %ld rel %ld headmor %ld\n",all,rel,headmor); + printf("nbytes %ld\n",nbytes); + continue; + case '_': + p = readin(); + savk = sunputc(p); + chsign(p); + sputc(p,savk); + pushp(p); + continue; + case '-': + subt(); + continue; + case '+': + if(eqk() != 0)continue; + binop('+'); + continue; + case '*': + arg1 = pop(); + EMPTY; + arg2 = pop(); + EMPTYR(arg1); + sk1 = sunputc(arg1); + sk2 = sunputc(arg2); + binop('*'); + p = pop(); + sunputc(p); + savk = n = sk1+sk2; + if(n>k && n>sk1 && n>sk2){ + sk = sk1; + if(sk<sk2)sk = sk2; + if(sk<k)sk = k; + p = removc(p,n-sk); + savk = sk; + } + sputc(p,savk); + pushp(p); + continue; + case '/': +casediv: + if(dscale() != 0)continue; + binop('/'); + if(irem != 0)release(irem); + release(rem); + continue; + case '%': + if(dscale() != 0)continue; + binop('/'); + p = pop(); + release(p); + if(irem == 0){ + sputc(rem,skr+k); + pushp(rem); + continue; + } + p = add0(rem,skd-(skr+k)); + q = add(p,irem); + release(p); + release(irem); + sputc(q,skd); + pushp(q); + continue; + case 'v': + p = pop(); + EMPTY; + savk = sunputc(p); + if(length(p) == 0){ + sputc(p,savk); + pushp(p); + continue; + } + if((c = sbackc(p))<0){ + error("sqrt of neg number\n"); + } + if(k<savk)n = savk; + else{ + n = k*2-savk; + savk = k; + } + arg1 = add0(p,n); + arg2 = dcsqrt(arg1); + sputc(arg2,savk); + pushp(arg2); + continue; + case '^': + neg = 0; + arg1 = pop(); + EMPTY; + if(sunputc(arg1) != 0)error("exp not an integer\n"); + arg2 = pop(); + EMPTYR(arg1); + if(sfbeg(arg1) == 0 && sbackc(arg1)<0){ + neg++; + chsign(arg1); + } + if(length(arg1)>=3){ + error("exp too big\n"); + } + savk = sunputc(arg2); + p = dcexp(arg2,arg1); + release(arg2); + rewind(arg1); + c = sgetc(arg1); + if(sfeof(arg1) == 0) + c = sgetc(arg1)*100 + c; + d = c*savk; + release(arg1); + if(neg == 0){ + if(k>=savk)n = k; + else n = savk; + if(n<d){ + q = removc(p,d-n); + sputc(q,n); + pushp(q); + } + else { + sputc(p,d); + pushp(p); + } + } + else { + sputc(p,d); + pushp(p); + } + if(neg == 0)continue; + p = pop(); + q = salloc(2); + sputc(q,1); + sputc(q,0); + pushp(q); + pushp(p); + goto casediv; + case 'z': + p = salloc(2); + n = stkptr - stkbeg; + if(n >= 100){ + sputc(p,n/100); + n %= 100; + } + sputc(p,n); + sputc(p,0); + pushp(p); + continue; + case 'Z': + p = pop(); + EMPTY; + n = (length(p)-1)<<1; + fsfile(p); + sbackc(p); + if(sfbeg(p) == 0){ + if((c = sbackc(p))<0){ + n -= 2; + if(sfbeg(p) == 1)n += 1; + else { + if((c = sbackc(p)) == 0)n += 1; + else if(c > 90)n -= 1; + } + } + else if(c < 10) n -= 1; + } + release(p); + q = salloc(1); + if(n >= 100){ + sputc(q,n%100); + n /= 100; + } + sputc(q,n); + sputc(q,0); + pushp(q); + continue; + case 'i': + p = pop(); + EMPTY; + p = scalint(p); + release(inbas); + inbas = p; + continue; + case 'I': + p = copy(inbas,length(inbas)+1); + sputc(p,0); + pushp(p); + continue; + case 'o': + p = pop(); + EMPTY; + p = scalint(p); + sign = 0; + n = length(p); + q = copy(p,n); + fsfile(q); + l = c = sbackc(q); + if(n != 1){ + if(c<0){ + sign = 1; + chsign(q); + n = length(q); + fsfile(q); + l = c = sbackc(q); + } + if(n != 1){ + while(sfbeg(q) == 0)l = l*100+sbackc(q); + } + } + if (l > BC_BASE_MAX) + error("output base is too large\n"); + logo = log_2(l); + obase = l; + release(basptr); + if(sign == 1)obase = (long)-l; + basptr = p; + outdit = (int (*)(struct blk *, int, int))bigot; + if(n == 1 && sign == 0){ + if(c <= 16){ + outdit = (int (*)(struct blk *, int, int))hexot; + fw = 1; + fw1 = 0; + ll = 68; + release(q); + continue; + } + } + n = 0; + if(sign == 1)n++; + p = salloc(1); + sputc(p,-1); + t = add(p,q); + n += length(t)*2; + fsfile(t); + if((c = sbackc(t))>9)n++; + release(t); + release(q); + release(p); + fw = n; + fw1 = n-1; + ll = 68; + if(fw>=ll)continue; + ll = (68/fw)*fw; + continue; + case 'O': + p = copy(basptr,length(basptr)+1); + sputc(p,0); + pushp(p); + continue; + case '[': + n = 0; + p = salloc(0); + while(1){ + if((c = readc()) == ']'){ + if(n == 0)break; + n--; + } + sputc(p,c); + if(c == '[')n++; + } + pushp(p); + continue; + case 'k': + p = pop(); + EMPTY; + p = scalint(p); + if(length(p)>1){ + error("scale too big\n"); + } + rewind(p); + k = sfeof(p)?0:sgetc(p); + release(scalptr); + scalptr = p; + continue; + case 'K': + p = copy(scalptr,length(scalptr)+1); + sputc(p,0); + pushp(p); + continue; + case 'X': + p = pop(); + EMPTY; + fsfile(p); + n = sbackc(p); + release(p); + p = salloc(2); + sputc(p,n); + sputc(p,0); + pushp(p); + continue; + case 'Q': + p = pop(); + EMPTY; + if(length(p)>2){ + error("Q?\n"); + } + rewind(p); + if((c = sgetc(p))<0){ + error("neg Q\n"); + } + release(p); + while(c-- > 0){ + if(readptr == &readstk[0]){ + error("readstk?\n"); + } + if(*readptr != 0)release(*readptr); + readptr--; + } + continue; + case 'q': + if(readptr <= &readstk[1])exit(0); + if(*readptr != 0)release(*readptr); + readptr--; + if(*readptr != 0)release(*readptr); + readptr--; + continue; + case 'f': + if(stkptr == &stack[0])printf("empty stack\n"); + else { + for(ptr = stkptr; ptr > &stack[0];){ + print(*ptr--); + } + } + continue; + case 'p': + if(stkptr == &stack[0])printf("empty stack\n"); + else{ + print(*stkptr); + } + continue; + case 'P': + p = pop(); + EMPTY; + sputc(p,0); + printf("%s",p->beg); + release(p); + continue; + case 'd': + if(stkptr == &stack[0]){ + printf("empty stack\n"); + continue; + } + q = *stkptr; + n = length(q); + p = copy(*stkptr,n); + pushp(p); + continue; + case 'c': + while(stkerr == 0){ + p = pop(); + if(stkerr == 0)release(p); + } + continue; + case 'S': + if(stkptr == &stack[0]){ + error("save: args\n"); + } + c = readc() & 0377; + sptr = stable[c]; + sp = stable[c] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sp->next = sptr; + p = pop(); + EMPTY; + if(c >= ARRAYST){ + q = copy(p,length(p)); + for(n = 0;n < PTRSZ;n++)sputc(q,0); + release(p); + p = q; + } + sp->val = p; + continue; +sempty: + error("symbol table overflow\n"); + case 's': + if(stkptr == &stack[0]){ + error("save:args\n"); + } + c = readc() & 0377; + sptr = stable[c]; + if(sptr != 0){ + p = sptr->val; + if(c >= ARRAYST){ + rewind(p); + while(sfeof(p) == 0)release(dcgetwd(p)); + } + release(p); + } + else{ + sptr = stable[c] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sptr->next = 0; + } + p = pop(); + sptr->val = p; + continue; + case 'l': + load(); + continue; + case 'L': + c = readc() & 0377; + sptr = stable[c]; + if(sptr == 0){ + error("L?\n"); + } + stable[c] = sptr->next; + sptr->next = sfree; + sfree = sptr; + p = sptr->val; + if(c >= ARRAYST){ + rewind(p); + while(sfeof(p) == 0){ + q = dcgetwd(p); + if(q != 0)release(q); + } + } + pushp(p); + continue; + case ':': + p = pop(); + EMPTY; + q = scalint(p); + fsfile(q); + c = 0; + if((sfbeg(q) == 0) && ((c = sbackc(q))<0)){ + error("neg index\n"); + } + if(length(q)>2){ + error("index too big\n"); + } + if(sfbeg(q) == 0)c = c*100+sbackc(q); + if(c >= BC_DIM_MAX){ + error("index too big\n"); + } + release(q); + n = readc() & 0377; + sptr = stable[n]; + if(sptr == 0){ + sptr = stable[n] = sfree; + sfree = sfree->next; + if(sfree == 0)goto sempty; + sptr->next = 0; + p = salloc((c+PTRSZ)*PTRSZ); + zero(p); + } + else{ + p = sptr->val; + if(length(p)-PTRSZ < c*PTRSZ){ + q = copy(p,(c+PTRSZ)*PTRSZ); + release(p); + p = q; + } + } + seekc(p,c*PTRSZ); + q = lookwd(p); + if (q!=NULL) release(q); + s = pop(); + EMPTY; + salterwd((struct wblk *)p,s); + sptr->val = p; + continue; + case ';': + p = pop(); + EMPTY; + q = scalint(p); + fsfile(q); + c = 0; + if((sfbeg(q) == 0) && ((c = sbackc(q))<0)){ + error("neg index\n"); + } + if(length(q)>2){ + error("index too big\n"); + } + if(sfbeg(q) == 0)c = c*100+sbackc(q); + if(c >= BC_DIM_MAX){ + error("index too big\n"); + } + release(q); + n = readc() & 0377; + sptr = stable[n]; + if(sptr != 0){ + p = sptr->val; + if(length(p)-PTRSZ >= c*PTRSZ){ + seekc(p,c*PTRSZ); + s = dcgetwd(p); + if(s != 0){ + q = copy(s,length(s)); + pushp(q); + continue; + } + } + } + q = salloc(1); + sputc(q, 0); + pushp(q); + continue; + case 'x': +execute: + p = pop(); + EMPTY; + if((readptr != &readstk[0]) && (*readptr != 0)){ + if((*readptr)->rd == (*readptr)->wt) + release(*readptr); + else{ + if(readptr++ == &readstk[RDSKSZ]){ + error("nesting depth\n"); + } + } + } + else readptr++; + *readptr = p; + if(p != 0)rewind(p); + else{ + if((c = readc()) != '\n')unreadc(c); + } + continue; + case '?': + if(++readptr == &readstk[RDSKSZ]){ + error("nesting depth\n"); + } + *readptr = 0; + fsave = curfile; + curfile = stdin; + while((c = readc()) == '!')command(); + p = salloc(0); + sputc(p,c); + while((c = readc()) != '\n'){ + sputc(p,c); + if(c == '\\')sputc(p,readc()); + } + curfile = fsave; + *readptr = p; + continue; + case '!': + if(command() == 1)goto execute; + continue; + case '<': + case '>': + case '=': + if(cond(c) == 1)goto execute; + continue; + default: + printf("%o is unimplemented\n",c); + } + } +} + +struct blk * +div(struct blk *ddivd,struct blk *ddivr) +{ + int divsign,remsign,offset,divcarry = 0; + int carry, dig = 0,magic,d = 0,dd; + long c,td,cc; + struct blk *ps; + register struct blk *p,*divd,*divr; + + rem = 0; + p = salloc(0); + if(length(ddivr) == 0){ + pushp(ddivr); + printf("divide by 0\n"); + return NULL; + } + divsign = remsign = 0; + divr = ddivr; + fsfile(divr); + if(sbackc(divr) == -1){ + divr = copy(ddivr,length(ddivr)); + chsign(divr); + divsign = ~divsign; + } + divd = copy(ddivd,length(ddivd)); + fsfile(divd); + if(sfbeg(divd) == 0 && sbackc(divd) == -1){ + chsign(divd); + divsign = ~divsign; + remsign = ~remsign; + } + offset = length(divd) - length(divr); + if(offset < 0)goto ddone; + seekc(p,offset+1); + sputc(divd,0); + magic = 0; + fsfile(divr); + c = sbackc(divr); + if(c<10)magic++; + c = c*100 + (sfbeg(divr)?0:sbackc(divr)); + if(magic>0){ + c = (c*100 +(sfbeg(divr)?0:sbackc(divr)))*2; + c /= 25; + } + while(offset >= 0){ + fsfile(divd); + td = sbackc(divd)*100; + dd = sfbeg(divd)?0:sbackc(divd); + td = (td+dd)*100; + dd = sfbeg(divd)?0:sbackc(divd); + td = td+dd; + cc = c; + if(offset == 0)td += 1; + else cc += 1; + if(magic != 0)td = td<<3; + dig = td/cc; + rewind(divr); + rewind(divxyz); + carry = 0; + while(sfeof(divr) == 0){ + d = sgetc(divr)*dig+carry; + carry = d / 100; + salterc(divxyz,d%100); + } + salterc(divxyz,carry); + rewind(divxyz); + seekc(divd,offset); + carry = 0; + while(sfeof(divd) == 0){ + d = slookc(divd); + d = d-(sfeof(divxyz)?0:sgetc(divxyz))-carry; + carry = 0; + if(d < 0){ + d += 100; + carry = 1; + } + salterc(divd,d); + } + divcarry = carry; + sbackc(p); + salterc(p,dig); + sbackc(p); + if(--offset >= 0){ + if(d > 0){ + sbackc(divd); + dd=sbackc(divd); + salterc(divd,dd+100); + } + divd->wt--; + } + } + if(divcarry != 0){ + salterc(p,dig-1); + salterc(divd,-1); + ps = add(divr,divd); + release(divd); + divd = ps; + } + + rewind(p); + divcarry = 0; + while(sfeof(p) == 0){ + d = slookc(p)+divcarry; + divcarry = 0; + if(d >= 100){ + d -= 100; + divcarry = 1; + } + salterc(p,d); + } + if(divcarry != 0)salterc(p,divcarry); + fsfile(p); + while(sfbeg(p) == 0){ + if(sbackc(p) == 0)truncate(p); + else break; + } + if(divsign < 0)chsign(p); + fsfile(divd); + while(sfbeg(divd) == 0){ + if(sbackc(divd) == 0)truncate(divd); + else break; + } +ddone: + if(remsign<0)chsign(divd); + if(divr != ddivr)release(divr); + rem = divd; + return(p); +} + +int +dscale(void){ + register struct blk *dd,*dr; + register struct blk *r; + int c; + + dr = pop(); + EMPTYS; + dd = pop(); + EMPTYSR(dr); + fsfile(dd); + skd = sunputc(dd); + fsfile(dr); + skr = sunputc(dr); + if(sfbeg(dr) == 1 || (sfbeg(dr) == 0 && sbackc(dr) == 0)){ + sputc(dr,skr); + pushp(dr); + errorrt("divide by 0\n"); + } + c = k-skd+skr; + if(c < 0)r = removr(dd,-c); + else { + r = add0(dd,c); + irem = 0; + } + arg1 = r; + arg2 = dr; + savk = k; + return(0); +} + +struct blk * +removr(struct blk *p,int n) +{ + int nn; + register struct blk *q,*s,*r; + + rewind(p); + nn = (n+1)/2; + q = salloc(nn); + while(n>1){ + sputc(q,sgetc(p)); + n -= 2; + } + r = salloc(2); + while(sfeof(p) == 0)sputc(r,sgetc(p)); + release(p); + if(n == 1){ + s = dcdiv(r,tenptr); + release(r); + rewind(rem); + if(sfeof(rem) == 0)sputc(q,sgetc(rem)); + release(rem); + irem = q; + return(s); + } + irem = q; + return(r); +} + +struct blk * +sqrt(struct blk *p) +{ + struct blk *t; + struct blk *r,*q,*s; + int c,n,nn; + + n = length(p); + fsfile(p); + c = sbackc(p); + if((n&1) != 1)c = c*100+(sfbeg(p)?0:sbackc(p)); + n = (n+1)>>1; + r = salloc(n); + zero(r); + seekc(r,n); + nn=1; + while((c -= nn)>=0)nn+=2; + c=(nn+1)>>1; + fsfile(r); + sbackc(r); + if(c>=100){ + c -= 100; + salterc(r,c); + sputc(r,1); + } + else salterc(r,c); + while(1){ + q = dcdiv(p,r); + s = add(q,r); + release(q); + release(rem); + q = dcdiv(s,sqtemp); + release(s); + release(rem); + s = copy(r,length(r)); + chsign(s); + t = add(s,q); + release(s); + fsfile(t); + nn = sfbeg(t)?0:sbackc(t); + if(nn>=0)break; + release(r); + release(t); + r = q; + } + release(t); + release(q); + release(p); + return(r); +} + +struct blk * +exp(struct blk *base,struct blk *ex) +{ + register struct blk *r,*e,*p; + struct blk *e1,*t,*cp; + int temp,c,n; + r = salloc(1); + sputc(r,1); + p = copy(base,length(base)); + e = copy(ex,length(ex)); + fsfile(e); + if(sfbeg(e) != 0)goto edone; + temp=0; + c = sbackc(e); + if(c<0){ + temp++; + chsign(e); + } + while(length(e) != 0){ + e1=dcdiv(e,sqtemp); + release(e); + e = e1; + n = length(rem); + release(rem); + if(n != 0){ + e1=mult(p,r); + release(r); + r = e1; + } + t = copy(p,length(p)); + cp = mult(p,t); + release(p); + release(t); + p = cp; + } + if(temp != 0){ + if((c = length(base)) == 0){ + goto edone; + } + if(c>1)create(r); + else{ + rewind(base); + if((c = sgetc(base))<=1){ + create(r); + sputc(r,c); + } + else create(r); + } + } +edone: + release(p); + release(e); + return(r); +} + +void +init(int argc,char **argv) +{ + register struct sym *sp; + + if (sigset(SIGINT, SIG_IGN) != SIG_IGN) + sigset(SIGINT,onintr); + setbuf(stdout,(char *)NULL); + svargc = --argc; + svargv = argv; + while(svargc>0 && svargv[1][0] == '-'){ + switch(svargv[1][1]){ + default: + dbg=1; + } + svargc--; + svargv++; + } + ifile=1; + if(svargc<=0)curfile = stdin; + else if((curfile = fopen(svargv[1],"r")) == NULL){ + printf("can't open file %s\n",svargv[1]); + exit(1); + } + scalptr = salloc(1); + sputc(scalptr,0); + basptr = salloc(1); + sputc(basptr,10); + obase=10; + log_10=log_2(10L); + ll=68; + fw=1; + fw1=0; + tenptr = salloc(1); + sputc(tenptr,10); + obase=10; + inbas = salloc(1); + sputc(inbas,10); + sqtemp = salloc(1); + sputc(sqtemp,2); + chptr = salloc(0); + strptr = salloc(0); + divxyz = salloc(0); + stkbeg = stkptr = &stack[0]; + stkend = &stack[STKSZ]; + stkerr = 0; + readptr = &readstk[0]; + k=0; + sp = sptr = &symlst[0]; + while(sptr < &symlst[TBLSZ]){ + sptr->next = ++sp; + sptr++; + } + sptr->next=0; + sfree = &symlst[0]; + return; +} + +void +onintr(int signum){ + + sigset(SIGINT,onintr); + while(readptr != &readstk[0]){ + if(*readptr != 0){release(*readptr);} + readptr--; + } + curfile = stdin; + commnds(); +} + +void +pushp(struct blk *p) +{ + if(stkptr == stkend){ + printf("out of stack space\n"); + return; + } + stkerr=0; + *++stkptr = p; + return; +} + +struct blk * +pop(void){ + if(stkptr == stack){ + stkerr=1; + return(0); + } + return(*stkptr--); +} + +struct blk * +readin(void){ + register struct blk *p,*q; + int dp,dpct; + register int c; + + dp = dpct=0; + p = salloc(0); + while(1){ + c = readc(); + switch(c){ + case '.': + if(dp != 0){ + unreadc(c); + break; + } + dp++; + continue; + case '\\': + readc(); + continue; + default: + if(c >= 'A' && c <= 'F')c = c - 'A' + 10; + else if(c >= '0' && c <= '9')c -= '0'; + else goto gotnum; + if(dp != 0){ + if(dpct >= 99)continue; + dpct++; + } + create(chptr); + if(c != 0)sputc(chptr,c); + q = mult(p,inbas); + release(p); + p = add(chptr,q); + release(q); + } + } +gotnum: + unreadc(c); + if(dp == 0){ + sputc(p,0); + return(p); + } + else{ + q = scale(p,dpct); + return(q); + } +} + +struct blk * +add0(struct blk *p,int ct) +{ + /* returns pointer to struct with ct 0's & p */ + register struct blk *q,*t; + + q = salloc(length(p)+(ct+1)/2); + while(ct>1){ + sputc(q,0); + ct -= 2; + } + rewind(p); + while(sfeof(p) == 0){ + sputc(q,sgetc(p)); + } + release(p); + if(ct == 1){ + t = mult(tenptr,q); + release(q); + return(t); + } + return(q); +} + +struct blk * +mult(struct blk *p,struct blk *q) +{ + register struct blk *mp,*mq,*mr; + int sign,offset,carry; + int cq,cp,mt,mcr; + + offset = sign = 0; + fsfile(p); + mp = p; + if(sfbeg(p) == 0){ + if(sbackc(p)<0){ + mp = copy(p,length(p)); + chsign(mp); + sign = ~sign; + } + } + fsfile(q); + mq = q; + if(sfbeg(q) == 0){ + if(sbackc(q)<0){ + mq = copy(q,length(q)); + chsign(mq); + sign = ~sign; + } + } + mr = salloc(length(mp)+length(mq)); + zero(mr); + rewind(mq); + while(sfeof(mq) == 0){ + cq = sgetc(mq); + rewind(mp); + rewind(mr); + mr->rd += offset; + carry=0; + while(sfeof(mp) == 0){ + cp = sgetc(mp); + mcr = sfeof(mr)?0:slookc(mr); + mt = cp*cq + carry + mcr; + carry = mt/100; + salterc(mr,mt%100); + } + offset++; + if(carry != 0){ + mcr = sfeof(mr)?0:slookc(mr); + salterc(mr,mcr+carry); + } + } + if(sign < 0){ + chsign(mr); + } + if(mp != p)release(mp); + if(mq != q)release(mq); + return(mr); +} + +void +chsign(struct blk *p) +{ + register int carry; + register char ct; + + carry=0; + rewind(p); + while(sfeof(p) == 0){ + ct=100-slookc(p)-carry; + carry=1; + if(ct>=100){ + ct -= 100; + carry=0; + } + salterc(p,ct); + } + if(carry != 0){ + sputc(p,-1); + fsfile(p); + sbackc(p); + ct = sbackc(p); + if(ct == 99){ + truncate(p); + sputc(p,-1); + } + } + else{ + fsfile(p); + ct = sbackc(p); + if(ct == 0)truncate(p); + } + return; +} + +int +readc(void){ +loop: + if((readptr != &readstk[0]) && (*readptr != 0)){ + if(sfeof(*readptr) == 0)return(lastchar = sgetc(*readptr)); + release(*readptr); + readptr--; + goto loop; + } + lastchar = getc(curfile); + if(lastchar != EOF)return(lastchar); + if(readptr != &readptr[0]){ + readptr--; + if(*readptr == 0)curfile = stdin; + goto loop; + } + if(curfile != stdin){ + fclose(curfile); + curfile = stdin; + goto loop; + } + exit(0); +} + +void +unreadc(char c) +{ + + if((readptr != &readstk[0]) && (*readptr != 0)){ + sungetc(*readptr,c); + } + else ungetc(c,curfile); + return; +} + +void +binop(char c) +{ + register struct blk *r = NULL; + + switch(c){ + case '+': + r = add(arg1,arg2); + break; + case '*': + r = mult(arg1,arg2); + break; + case '/': + r = dcdiv(arg1,arg2); + break; + } + release(arg1); + release(arg2); + sputc(r,savk); + pushp(r); + return; +} + +void +print(struct blk *hptr) +{ + int sc; + register struct blk *p,*q,*dec; + int dig,dout,ct; + + rewind(hptr); + while(sfeof(hptr) == 0){ + if(sgetc(hptr)>99){ + rewind(hptr); + while(sfeof(hptr) == 0){ + printf("%c",sgetc(hptr)); + } + printf("\n"); + return; + } + } + fsfile(hptr); + sc = sbackc(hptr); + if(sfbeg(hptr) != 0){ + printf("0\n"); + return; + } + count = ll; + p = copy(hptr,length(hptr)); + sunputc(p); + fsfile(p); + if(sbackc(p)<0){ + chsign(p); + OUTC('-'); + } + if((obase == 0) || (obase == -1)){ + oneot(p,sc,'d'); + return; + } + if(obase == 1){ + oneot(p,sc,'1'); + return; + } + if(obase == 10){ + tenot(p,sc); + return; + } + create(strptr); + dig = log_10*sc; + dout = ((dig/10) + dig) /logo; + dec = getdec(p,sc); + p = removc(p,sc); + while(length(p) != 0){ + q = dcdiv(p,basptr); + release(p); + p = q; + (*outdit)(rem,0,1); + } + release(p); + fsfile(strptr); + while(sfbeg(strptr) == 0)OUTC(sbackc(strptr)); + if(sc == 0){ + release(dec); + printf("\n"); + return; + } + create(strptr); + OUTC('.'); + ct=0; + do{ + q = mult(basptr,dec); + release(dec); + dec = getdec(q,sc); + p = removc(q,sc); + (*outdit)(p,1,ct+1<dout); + }while(++ct < dout); + release(dec); + rewind(strptr); + while(sfeof(strptr) == 0)OUTC(sgetc(strptr)); + printf("\n"); + return; +} + +struct blk * +getdec(struct blk *p,int sc) +{ + int cc; + register struct blk *q,*t,*s; + + rewind(p); + if(length(p)*2 < sc){ + q = copy(p,length(p)); + return(q); + } + q = salloc(length(p)); + while(sc >= 1){ + sputc(q,sgetc(p)); + sc -= 2; + } + if(sc != 0){ + t = mult(q,tenptr); + s = salloc(cc = length(q)); + release(q); + rewind(t); + while(cc-- > 0)sputc(s,sgetc(t)); + sputc(s,0); + release(t); + t = dcdiv(s,tenptr); + release(s); + release(rem); + return(t); + } + return(q); +} + +void +tenot(struct blk *p,int sc) +{ + register int c,f; + char b[3]; + + fsfile(p); + f=0; + while((sfbeg(p) == 0) && ((p->rd-p->beg-1)*2 >= sc)){ + c = sbackc(p); + if((c<10) && (f == 1))snprintf(b, sizeof b, "0%d",c); + else snprintf(b, sizeof b, "%d",c); + f=1; + TEST2(b); + } + if(sc == 0){ + printf("\n"); + release(p); + return; + } + if((p->rd-p->beg)*2 > sc){ + c = sbackc(p); + snprintf(b, sizeof b, "%d.",c/10); + TEST2(b); + OUTC(c%10 +'0'); + sc--; + } + else { + OUTC('.'); + } + if(sc > (p->rd-p->beg)*2){ + while(sc>(p->rd-p->beg)*2){ + OUTC('0'); + sc--; + } + } + while(sc > 1){ + c = sbackc(p); + if(c<10)snprintf(b, sizeof b, "0%d",c); + else snprintf(b, sizeof b, "%d",c); + sc -= 2; + TEST2(b); + } + if(sc == 1){ + OUTC(sbackc(p)/10 +'0'); + } + printf("\n"); + release(p); + return; +} + +void +oneot(struct blk *p,int sc,char ch) +{ + register struct blk *q; + + q = removc(p,sc); + create(strptr); + sputc(strptr,-1); + while(length(q)>0){ + p = add(strptr,q); + release(q); + q = p; + OUTC(ch); + } + release(q); + printf("\n"); + return; +} + +void +hexot(struct blk *p,int flg,int unused) +{ + register int c; + rewind(p); + if(sfeof(p) != 0){ + sputc(strptr,'0'); + release(p); + return; + } + c = sgetc(p); + release(p); + if(c >= 16){ + printf("hex digit > 16"); + return; + } + sputc(strptr,c<10?c+'0':c-10+'A'); + return; +} + +void +bigot(struct blk *p,int flg,int putspc) +{ + register struct blk *t,*q; + register int l = 0; + int neg; + + if(flg == 1)t = salloc(0); + else{ + t = strptr; + l = length(strptr)+fw-1; + } + neg=0; + if(length(p) != 0){ + fsfile(p); + if(sbackc(p)<0){ + neg=1; + chsign(p); + } + while(length(p) != 0){ + q = dcdiv(p,tenptr); + release(p); + p = q; + rewind(rem); + sputc(t,sfeof(rem)?'0':sgetc(rem)+'0'); + release(rem); + } + } + release(p); + if(flg == 1){ + l = fw1-length(t); + if(neg != 0){ + l--; + sputc(strptr,'-'); + } + fsfile(t); + while(l-- > 0)sputc(strptr,'0'); + while(sfbeg(t) == 0)sputc(strptr,sbackc(t)); + release(t); + } + else{ + l -= length(strptr); + while(l-- > 0)sputc(strptr,'0'); + if(neg != 0){ + sunputc(strptr); + sputc(strptr,'-'); + } + } + if (putspc) + sputc(strptr,' '); + return; +} + +struct blk * +add(struct blk *a1,struct blk *a2) +{ + register struct blk *p; + register int carry,n; + int size; + int c = 0,n1,n2; + + size = length(a1)>length(a2)?length(a1):length(a2); + p = salloc(size); + rewind(a1); + rewind(a2); + carry=0; + while(--size >= 0){ + n1 = sfeof(a1)?0:sgetc(a1); + n2 = sfeof(a2)?0:sgetc(a2); + n = n1 + n2 + carry; + if(n>=100){ + carry=1; + n -= 100; + } + else if(n<0){ + carry = -1; + n += 100; + } + else carry = 0; + sputc(p,n); + } + if(carry != 0)sputc(p,carry); + fsfile(p); + if(sfbeg(p) == 0){ + while(sfbeg(p) == 0 && (c = sbackc(p)) == 0); + if(c != 0)salterc(p,c); + truncate(p); + } + fsfile(p); + if(sfbeg(p) == 0 && sbackc(p) == -1){ + while((c = sbackc(p)) == 99){ + if(c == EOF)break; + } + sgetc(p); + salterc(p,-1); + truncate(p); + } + return(p); +} + +int +eqk(void){ + register struct blk *p,*q; + register int skp; + int skq; + + p = pop(); + EMPTYS; + q = pop(); + EMPTYSR(p); + skp = sunputc(p); + skq = sunputc(q); + if(skp == skq){ + arg1=p; + arg2=q; + savk = skp; + return(0); + } + else if(skp < skq){ + savk = skq; + p = add0(p,skq-skp); + } + else { + savk = skp; + q = add0(q,skp-skq); + } + arg1=p; + arg2=q; + return(0); +} + +struct blk * +removc(struct blk *p,int n) +{ + register struct blk *q,*r; + + rewind(p); + while(n>1){ + sgetc(p); + n -= 2; + } + q = salloc(2); + while(sfeof(p) == 0)sputc(q,sgetc(p)); + if(n == 1){ + r = dcdiv(q,tenptr); + release(q); + release(rem); + q = r; + } + release(p); + return(q); +} + +struct blk * +scalint(struct blk *p) +{ + register int n; + n = sunputc(p); + p = removc(p,n); + return(p); +} + +struct blk * +scale(struct blk *p,int n) +{ + register struct blk *q,*s,*t; + + t = add0(p,n); + q = salloc(1); + sputc(q,n); + s = dcexp(inbas,q); + release(q); + q = dcdiv(t,s); + release(t); + release(s); + release(rem); + sputc(q,n); + return(q); +} + +int +subt(void){ + arg1=pop(); + EMPTYS; + savk = sunputc(arg1); + chsign(arg1); + sputc(arg1,savk); + pushp(arg1); + if(eqk() != 0)return(1); + binop('+'); + return(0); +} + +int +command(void){ + int c; + static char *line; + static int linesize; + char *sl; + register void (*savint)(int); + register int pid,rpid; + int retcode; + + switch(c = readc()){ + case '<': + return(cond(NL)); + case '>': + return(cond(NG)); + case '=': + return(cond(NE)); + default: + if (line == 0) + line = srealloc(0, linesize = 10); + sl = line; + *sl++ = c; + while((c = readc()) != '\n') { + if (sl >= &line[linesize-2]) { + int diff = sl - line; + line = srealloc(line, linesize += 10); + sl = &line[diff]; + } + *sl++ = c; + } + *sl = 0; + if((pid = fork()) == 0){ + execl(SHELL,"sh","-c",line,NULL); + exit(0100); + } + savint = sigset(SIGINT, SIG_IGN); + while((rpid = wait(&retcode)) != pid && rpid != -1); + sigset(SIGINT,savint); + printf("!\n"); + return(0); + } +} + +int +cond(char c) +{ + register struct blk *p; + register int cc; + + if(subt() != 0)return(1); + p = pop(); + sunputc(p); + if(length(p) == 0){ + release(p); + if(c == '<' || c == '>' || c == NE){ + readc(); + return(0); + } + load(); + return(1); + } + else { + if(c == '='){ + release(p); + readc(); + return(0); + } + } + if(c == NE){ + release(p); + load(); + return(1); + } + fsfile(p); + cc = sbackc(p); + release(p); + if((cc<0 && (c == '<' || c == NG)) || + (cc >0) && (c == '>' || c == NL)){ + readc(); + return(0); + } + load(); + return(1); +} + +void +load(void){ + register int c; + register struct blk *p,*q; + struct blk *t,*s; + c = readc() & 0377; + sptr = stable[c]; + if(sptr != 0){ + p = sptr->val; + if(c >= ARRAYST){ + q = salloc(length(p)); + rewind(p); + while(sfeof(p) == 0){ + s = dcgetwd(p); + if(s == 0){putwd(q, (struct blk *)NULL);} + else{ + t = copy(s,length(s)); + putwd(q,t); + } + } + pushp(q); + } + else{ + q = copy(p,length(p)); + pushp(q); + } + } + else{ + q = salloc(1); + sputc(q,0); + pushp(q); + } + return; +} + +int +log_2(long n) +{ + register int i; + + if(n == 0)return(0); + i=31; + if(n<0)return(i); + while((n= n<<1) >0)i--; + return(--i); +} + +struct blk * +salloc(int size) +{ + register struct blk *hdr; + register char *ptr; + all++; + nbytes += size; + ptr = malloc((unsigned)(size?size:1)); + if(ptr == 0){ + garbage("salloc"); + if((ptr = malloc((unsigned)(size?size:1))) == 0) + ospace("salloc"); + } + if((hdr = hfree) == 0)hdr = morehd(); + hfree = (struct blk *)hdr->rd; + hdr->rd = hdr->wt = hdr->beg = ptr; + hdr->last = ptr+size; + return(hdr); +} + +struct blk * +morehd(void){ + register struct blk *h,*kk; + headmor++; + nbytes += HEADSZ; + hfree = h = (struct blk *)malloc(HEADSZ); + if(hfree == 0){ + garbage("morehd"); + if((hfree = h = (struct blk *)malloc(HEADSZ)) == 0) + ospace("headers"); + } + kk = h; + while(h<hfree+(HEADSZ/BLK))(h++)->rd = (char *)++kk; + (--h)->rd=0; + return(hfree); +} + +/* +sunputc(struct blk *hptr) +{ + hptr->wt--; + hptr->rd = hptr->wt; + return(*hptr->wt); +} +*/ + +struct blk * +copy(struct blk *hptr,int size) +{ + register struct blk *hdr; + register unsigned sz; + register char *ptr; + + all++; + nbytes += size; + sz = length(hptr); + ptr = nalloc(hptr->beg, (unsigned)size); + if(ptr == 0){ + garbage("copy"); + if((ptr = nalloc(hptr->beg, (unsigned)size)) == NULL){ + printf("copy size %d\n",size); + ospace("copy"); + } + } + if((hdr = hfree) == 0)hdr = morehd(); + hfree = (struct blk *)hdr->rd; + hdr->rd = hdr->beg = ptr; + hdr->last = ptr+size; + hdr->wt = ptr+sz; + ptr = hdr->wt; + while(ptr<hdr->last)*ptr++ = '\0'; + return(hdr); +} + +void +sdump(char *s1,struct blk *hptr) +{ + char *p; + printf("%s %lo rd %lo wt %lo beg %lo last %lo\n", s1, + (long)(intptr_t)hptr, + (long)(intptr_t)hptr->rd, + (long)(intptr_t)hptr->wt, + (long)(intptr_t)hptr->beg, + (long)(intptr_t)hptr->last); + p = hptr->beg; + while(p < hptr->wt)printf("%d ",*p++); + printf("\n"); +} + +void +seekc(struct blk *hptr,int n) +{ + register char *nn,*p; + + nn = hptr->beg+n; + if(nn > hptr->last){ + nbytes += nn - hptr->last; + /*free(hptr->beg);*/ + p = realloc(hptr->beg, (unsigned)n); + if(p == 0){ + hptr->beg = realloc(hptr->beg, (unsigned)(hptr->last-hptr->beg)); + garbage("seekc"); + if((p = realloc(hptr->beg, (unsigned)n)) == 0) + ospace("seekc"); + } + hptr->beg = p; + hptr->wt = hptr->last = hptr->rd = p+n; + return; + } + hptr->rd = nn; + if(nn>hptr->wt)hptr->wt = nn; + return; +} + +void +salterwd(struct wblk *hptr,struct blk *n) +{ + if(hptr->rdw == hptr->lastw)more((struct blk *)hptr); + *hptr->rdw++ = n; + if(hptr->rdw > hptr->wtw)hptr->wtw = hptr->rdw; + return; +} + +void +more(struct blk *hptr) +{ + register unsigned size; + register char *p; + + if((size=(hptr->last-hptr->beg)*2) == 0)size=1; + nbytes += size/2; + /*free(hptr->beg);*/ + p = realloc(hptr->beg, (unsigned)size); + if(p == 0){ + hptr->beg = realloc(hptr->beg, (unsigned)(hptr->last-hptr->beg)); + garbage("more"); + if((p = realloc(hptr->beg,size)) == 0) + ospace("more"); + } + hptr->rd = hptr->rd-hptr->beg+p; + hptr->wt = hptr->wt-hptr->beg+p; + hptr->beg = p; + hptr->last = p+size; + return; +} + +void +ospace(char *s) +{ + printf("out of space: %s\n",s); + printf("all %ld rel %ld headmor %ld\n",all,rel,headmor); + printf("nbytes %ld\n",nbytes); + sdump("stk",*stkptr); + abort(); +} + +void +garbage(char *s) +{ + int i; + struct blk *p, *q; + struct sym *tmps; + int ct; + +/* printf("got to garbage %s\n",s); */ + for(i=0;i<TBLSZ;i++){ + tmps = stable[i]; + if(tmps != 0){ + if(i < ARRAYST){ + do { + p = tmps->val; + if(((intptr_t)p->beg & 01) != 0){ + printf("string %o\n",i); + sdump("odd beg",p); + } + redef(p); + tmps = tmps->next; + } while(tmps != 0); + continue; + } + else { + do { + p = tmps->val; + rewind(p); + ct = 0; + while((q = dcgetwd(p)) != NULL){ + ct++; + if(q != 0){ + if(((intptr_t)q->beg & 01) != 0){ + printf("array %o elt %d odd\n",i-ARRAYST,ct); +printf("tmps %lo p %lo\n",(long)(intptr_t)tmps,(long)(intptr_t)p); + sdump("elt",q); + } + redef(q); + } + } + tmps = tmps->next; + } while(tmps != 0); + } + } + } +} + +void +redef(struct blk *p) +{ + register int offset; + register char *newp; + + if ((intptr_t)p->beg&01) { + printf("odd ptr %lo hdr %lo\n",(long)(intptr_t)p->beg, + (long)(intptr_t)p); + ospace("redef-bad"); + } + /*free(p->beg);*/ + newp = realloc(p->beg, (unsigned)(p->last-p->beg)); + if(newp == NULL)ospace("redef"); + offset = newp - p->beg; + p->beg = newp; + p->rd += offset; + p->wt += offset; + p->last += offset; +} + +void +release(register struct blk *p) +{ + rel++; + nbytes -= p->last - p->beg; + p->rd = (char *)hfree; + hfree = p; + free(p->beg); +} + +struct blk * +dcgetwd(struct blk *p) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->rdw == wp->wtw) + return(NULL); + return(*wp->rdw++); +} + +void +putwd(struct blk *p, struct blk *c) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->wtw == wp->lastw) + more(p); + *wp->wtw++ = c; +} + +struct blk * +lookwd(struct blk *p) +{ + register struct wblk *wp; + + wp = (struct wblk *)p; + if (wp->rdw == wp->wtw) + return(NULL); + return(*wp->rdw); +} + +char * +nalloc(register char *p,unsigned nbytes) +{ + register char *q, *r; + q = r = malloc(nbytes ? nbytes : 1); + if(q==0) + return(0); + while(nbytes--) + *q++ = *p++; + return(r); +} + +void * +srealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == 0) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} diff --git a/dc/dc.h b/dc/dc.h @@ -0,0 +1,203 @@ +/* from Unix 7th Edition /usr/src/cmd/dc/dc.h */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)dc.h 1.9 (gritter) 2/4/05> */ + +#include <stdlib.h> +#include <signal.h> + +#define FATAL 0 +#define NFATAL 1 +#define BLK sizeof(struct blk) +#define PTRSZ sizeof(int *) +#define HEADSZ 1024 +#define STKSZ 100 +#define RDSKSZ 100 +#define TBLSZ 256 +#define ARRAYST 0241 +#define NL 1 +#define NG 2 +#define NE 3 +#define length(p) ((p)->wt-(p)->beg) +#define rewind(p) (p)->rd=(p)->beg +#define create(p) (p)->rd = (p)->wt = (p)->beg +#define fsfile(p) (p)->rd = (p)->wt +#define truncate(p) (p)->wt = (p)->rd +#define sfeof(p) (((p)->rd>=(p)->wt)?1:0) +#define sfbeg(p) (((p)->rd==(p)->beg)?1:0) +#define sungetc(p,c) *(--(p)->rd)=c +#ifdef interdata +#define NEGBYTE 0200 +#define MASK (-1 & ~0377) +#define sgetc(p) ( ((p)->rd==(p)->wt) ? EOF :( ((*(p)->rd & NEGBYTE) != 0) ? ( *(p)->rd++ | MASK): *(p)->rd++ )) +#define slookc(p) ( ((p)->rd==(p)->wt) ? EOF :( ((*(p)->rd & NEGBYTE) != 0) ? (*(p)->rd | MASK) : *(p)->rd )) +#define sbackc(p) ( ((p)->rd==(p)->beg) ? EOF :( ((*(--(p)->rd) & NEGBYTE) != 0) ? (*(p)->rd | MASK): *(p)->rd )) +#endif +#ifndef interdata +#define sgetc(p) (((p)->rd==(p)->wt)?EOF:*(p)->rd++) +#define slookc(p) (((p)->rd==(p)->wt)?EOF:*(p)->rd) +#define sbackc(p) (((p)->rd==(p)->beg)?EOF:*(--(p)->rd)) +#endif +#define sputc(p,c) {if((p)->wt==(p)->last)more(p); *(p)->wt++ = c; } +#define salterc(p,c) {if((p)->rd==(p)->last)more(p); *(p)->rd++ = c; if((p)->rd>(p)->wt)(p)->wt=(p)->rd;} +#define sunputc(p) (*( (p)->rd = --(p)->wt)) +#define zero(p) for(pp=(p)->beg;pp<(p)->last;)*pp++='\0' +#define OUTC(x) {int _c = (x); if (_c) {printf("%c",_c); if(--count == 0){printf("\\\n"); count=ll;} } } +#define TEST2(b) { OUTC(b[0] & 0377); OUTC(b[1] & 0377); } +#define EMPTY if(stkerr != 0){printf("stack empty\n"); continue; } +#define EMPTYR(x) if(stkerr!=0){pushp(x);printf("stack empty\n");continue;} +#define EMPTYS if(stkerr != 0){printf("stack empty\n"); return(1);} +#define EMPTYSR(x) if(stkerr !=0){printf("stack empty\n");pushp(x);return(1);} +#define error(p) {printf(p); continue; } +#define errorrt(p) {printf(p); return(1); } +struct blk { + char *rd; + char *wt; + char *beg; + char *last; +}; +struct blk *hfree; +struct blk *arg1, *arg2; +int svargc; +char savk; +char **svargv; +int dbg; +int ifile; +FILE *curfile; +struct blk *scalptr, *basptr, *tenptr, *inbas; +struct blk *sqtemp, *chptr, *strptr, *divxyz; +struct blk *stack[STKSZ]; +struct blk **stkptr,**stkbeg; +struct blk **stkend; +int stkerr; +int lastchar; +struct blk *readstk[RDSKSZ]; +struct blk **readptr; +struct blk *rem; +int k; +struct blk *irem; +int skd,skr; +int neg; +struct sym { + struct sym *next; + struct blk *val; +} symlst[TBLSZ]; +struct sym *stable[TBLSZ]; +struct sym *sptr,*sfree; +struct wblk { + struct blk **rdw; + struct blk **wtw; + struct blk **begw; + struct blk **lastw; +}; +FILE *fsave; +long rel; +long nbytes; +long all; +long headmor; +long obase; +int fw,fw1,ll; +int (*outdit)(struct blk *, int, int); +int logo; +int log_10; +int count; +char *pp; +char *dummy; + +#define div(a, b) dcdiv(a, b) +#define sqrt(a) dcsqrt(a) +#define exp(a, b) dcexp(a, b) +#define getwd(a) dcgetwd(a) +extern void commnds(void); +extern struct blk *div(struct blk *, struct blk *); +extern int dscale(void); +extern struct blk *removr(struct blk *, int); +extern struct blk *sqrt(struct blk *); +extern struct blk *exp(struct blk *, struct blk *); +extern void init(int, char *[]); +extern void onintr(int); +extern void pushp(struct blk *); +extern struct blk *pop(void); +extern struct blk *readin(void); +extern struct blk *add0(struct blk *, int); +extern struct blk *mult(struct blk *, struct blk *); +extern void chsign(struct blk *); +extern int readc(void); +extern void unreadc(char); +extern void binop(char); +extern void print(struct blk *); +extern struct blk *getdec(struct blk *, int); +extern void tenot(struct blk *, int); +extern void oneot(struct blk *, int, char); +extern void hexot(struct blk *, int, int); +extern void bigot(struct blk *, int, int); +extern struct blk *add(struct blk *, struct blk *); +extern int eqk(void); +extern struct blk *removc(struct blk *, int); +extern struct blk *scalint(struct blk *); +extern struct blk *scale(struct blk *, int); +extern int subt(void); +extern int command(void); +extern int cond(char); +extern void load(void); +extern int log_2(long); +extern struct blk *salloc(int); +extern struct blk *morehd(void); +extern struct blk *copy(struct blk *, int); +extern void sdump(char *, struct blk *); +extern void seekc(struct blk *, int); +extern void salterwd(struct wblk *, struct blk *); +extern void more(struct blk *); +extern void ospace(char *); +extern void garbage(char *); +extern void redef(struct blk *); +extern void release(register struct blk *); +extern struct blk *getwd(struct blk *); +extern void putwd(struct blk *, struct blk *); +extern struct blk *lookwd(struct blk *); +extern char *nalloc(register char *, unsigned); +extern void *srealloc(void *, size_t); + +#if defined (__GLIBC__) && defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif + +#ifndef BC_BASE_MAX +#define BC_BASE_MAX 99 +#endif +#ifndef BC_DIM_MAX +#define BC_DIM_MAX 2048 +#endif diff --git a/dc/mkfile b/dc/mkfile @@ -0,0 +1,8 @@ +BIN = dc +OBJ = dc.o +LOCAL_CFLAGS = -DSHELL=\"$SHELL\" +INSTALL_BIN = dc +INSTALL_MAN1 = dc.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/dc/version.c b/dc/version.c @@ -0,0 +1,13 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)dc.sl 2.12 (gritter) 12/25/06"; +/* SLIST */ +/* +dc.c: Sccsid @(#)dc.c 1.21 (gritter) 12/25/06> +dc.h: Sccsid @(#)dc.h 1.9 (gritter) 2/4/05> +*/ diff --git a/dd/dd.1 b/dd/dd.1 @@ -0,0 +1,293 @@ +.\" +.\" Sccsid @(#)dd.1 1.7 (gritter) 1/14/05 +.\" +.\" Parts taken from dd(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH DD 1 "1/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +dd \- convert and copy a file +.SH SYNOPSIS +.B dd +[option=value] ... +.SH DESCRIPTION +.I Dd +copies the specified input file +to the specified output with +possible conversions. +The standard input and output are used by default. +The input and output block size may be +specified to take advantage of raw physical I/O. +.PP +.br +.ns +.TP 15 +.I option +.I values +.br +.ns +.TP +if= +input file name; standard input is default +.br +.ns +.TP +of= +output file name; standard output is default +.br +.ns +.TP +.RI ibs= n +input block size +.I n +bytes (default 512) +.br +.ns +.TP +.RI obs= n +output block size (default 512) +.br +.ns +.TP +.RI bs= n +set both input and output block size, +superseding +.I ibs +and +.I obs; +also, if no conversion is specified, +it is particularly efficient since no copy need be done +.br +.ns +.TP +.RI cbs= n +conversion buffer size +.br +.ns +.TP +.RI skip= n +skip +.IR n "" +input records before starting copy +.br +.ns +.TP +.RI iseek= n +seek +.IR n "" +input records before starting copy +.br +.ns +.TP +.RI files= n +copy +.I n +files from (tape) input +.br +.ns +.TP +.RI seek= n +seek +.I n +records from beginning of output file before copying +.br +.ns +.TP +.RI oseek= n +same as seek +.br +.ns +.TP +count=\fIn\fR +copy only +.IR n "" +input records +.br +.ns +.TP +conv=ascii +.ds h \h'\w'conv='u' +convert EBCDIC to ASCII +.br +.ns +.IP \*hebcdic +convert ASCII to EBCDIC +.br +.ns +.IP \*hibm +slightly different map of ASCII to EBCDIC +.br +.ns +.IP \*hblock +convert newline-terminated input lines to blocks +.br +.ns +.IP \*hunblock +convert blocked input to lines +.br +.ns +.IP \*hlcase +map alphabetics to lower case +.br +.ns +.IP \*hucase +map alphabetics to upper case +.br +.ns +.IP \*hswab +swap every pair of bytes +.br +.ns +.IP \*hnoerror +do not stop processing on an error +.br +.ns +.IP \*hnotrunc +do not truncate the output file +.br +.ns +.IP \*hsync +pad every input record to +.I ibs +.br +.ns +.IP "\*h... , ..." +several comma-separated conversions +.PP +.fi +Where sizes are specified, +a number of bytes is expected. +A number may end with +.B "k, b" +or +.B w +to specify multiplication by +1024, 512, or 2 respectively; +a pair of numbers may be separated by +.B x +to indicate a product. +.PP +.I Cbs +is used only if +.IR ascii , +.IR unblock, +.IR ebcdic , +.IR ibm , +or +.IR block +conversion is specified. +In the first two cases, +.I cbs +bytes are placed into the conversion buffer, converted to +ASCII, and trailing blanks trimmed and new-line added +before sending the line to the output. +In the latter three cases, +ASCII characters (bytes) are read into the +conversion buffer, converted to EBCDIC, and blanks added +to make up an +output record of size +.IR cbs . +.PP +Two additional values for the `conv' +option, `conv=idirect' and `conv=odirect', +are available as extensions. +They enable direct i/o on input or output, respectively. +See the description of the `O_DIRECT' flag in +.IR open (2) +for more information. +`conv=odirect' must be used with care +as it requires padding for correct operation; +a write that is not a multiple of an acceptable buffer size will fail. +This is particularly of concern for the last block written. +Using `conv=odirect' thus usually requires `conv=sync' +and cannot be used if such padding destroys the file integrity. +.PP +After completion, +.I dd +reports the number of whole and partial input and output +blocks. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +for `conv=lcase' and `conv=ucase'. +.SH EXAMPLES +To read an EBCDIC tape blocked ten 80-byte +EBCDIC card images per record into the ASCII file +.IR x : +.IP "" +dd if=/dev/rmt0 of=x ibs=800 cbs=80 conv=ascii,lcase +.PP +Note the use of raw magtape. +.I Dd +is especially suited to I/O on the raw +physical devices because it allows reading +and writing in arbitrary record sizes. +.PP +To skip over a file before copying from magnetic tape do +.IP "" +(dd of=/dev/null; dd of=x) </dev/rmt0 +.SH "SEE ALSO" +cp(1), +tr(1), +locale(7) +.SH DIAGNOSTICS +f+p records in(out): numbers of full and partial records read(written) +.SH NOTES +The ASCII/EBCDIC conversion tables are +taken +.\" This was valid for v7/BSD conversion tables. +.\"from the 256 character standard in +.\"the CACM Nov, 1968. +.\" *** +.\" This is what various AT&T sources and mem(3) of libast say for the +.\" current tables. The tables itselves are given in the POSIX.2 rationale. +from a proposed BTL standard April 16, 1979. +The `ibm' conversion, while less blessed as a standard, +corresponds better to certain IBM print train conventions. +There is no universal solution. +.PP +When reading from pipes, FIFOs, character devices (e.\|g. terminals), +or network sockets, +partial input records can occur at any time +even before the end of the data stream is reached. +For the `count' option, +these are handled exactly like full records. +Using +.I dd +to retrieve exactly +.IR count * ibs +bytes from such files does thus not generally work unless `ibs=1'. diff --git a/dd/dd.c b/dd/dd.c @@ -0,0 +1,1035 @@ +/* + * dd - convert and copy + * + * Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)dd.sl 1.30 (gritter) 1/22/06"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <malloc.h> +#include <errno.h> +#include <libgen.h> +#include <ctype.h> +#include <locale.h> +#include <signal.h> +#include "sigset.h" +#include <wchar.h> +#include <wctype.h> +#include <limits.h> + +#include <sys/ioctl.h> + +#if defined (__linux__) || defined (__sun) || defined (__FreeBSD__) || \ + defined (__hpux) || defined (_AIX) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <sys/mtio.h> +#else /* SVR4.2MP */ +#include <sys/scsi.h> +#include <sys/st01.h> +#endif /* SVR4.2MP */ + +#include "atoll.h" +#include "memalign.h" +#include "mbtowi.h" + +/* + * For 'conv=ascii'. + */ +static const unsigned char c_ascii[] = { +0000,0001,0002,0003,0234,0011,0206,0177,0227,0215,0216,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0235,0205,0010,0207,0030,0031,0222,0217,0034,0035,0036,0037, +0200,0201,0202,0203,0204,0012,0027,0033,0210,0211,0212,0213,0214,0005,0006,0007, +0220,0221,0026,0223,0224,0225,0226,0004,0230,0231,0232,0233,0024,0025,0236,0032, +0040,0240,0241,0242,0243,0244,0245,0246,0247,0250,0325,0056,0074,0050,0053,0174, +0046,0251,0252,0253,0254,0255,0256,0257,0260,0261,0041,0044,0052,0051,0073,0176, +0055,0057,0262,0263,0264,0265,0266,0267,0270,0271,0313,0054,0045,0137,0076,0077, +0272,0273,0274,0275,0276,0277,0300,0301,0302,0140,0072,0043,0100,0047,0075,0042, +0303,0141,0142,0143,0144,0145,0146,0147,0150,0151,0304,0305,0306,0307,0310,0311, +0312,0152,0153,0154,0155,0156,0157,0160,0161,0162,0136,0314,0315,0316,0317,0320, +0321,0345,0163,0164,0165,0166,0167,0170,0171,0172,0322,0323,0324,0133,0326,0327, +0330,0331,0332,0333,0334,0335,0336,0337,0340,0341,0342,0343,0344,0135,0346,0347, +0173,0101,0102,0103,0104,0105,0106,0107,0110,0111,0350,0351,0352,0353,0354,0355, +0175,0112,0113,0114,0115,0116,0117,0120,0121,0122,0356,0357,0360,0361,0362,0363, +0134,0237,0123,0124,0125,0126,0127,0130,0131,0132,0364,0365,0366,0367,0370,0371, +0060,0061,0062,0063,0064,0065,0066,0067,0070,0071,0372,0373,0374,0375,0376,0377 +}; + +/* + * For 'conv=ibm'. + */ +static const unsigned char c_ibm[] = { +0000,0001,0002,0003,0067,0055,0056,0057,0026,0005,0045,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0074,0075,0062,0046,0030,0031,0077,0047,0034,0035,0036,0037, +0100,0132,0177,0173,0133,0154,0120,0175,0115,0135,0134,0116,0153,0140,0113,0141, +0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0172,0136,0114,0176,0156,0157, +0174,0301,0302,0303,0304,0305,0306,0307,0310,0311,0321,0322,0323,0324,0325,0326, +0327,0330,0331,0342,0343,0344,0345,0346,0347,0350,0351,0255,0340,0275,0137,0155, +0171,0201,0202,0203,0204,0205,0206,0207,0210,0211,0221,0222,0223,0224,0225,0226, +0227,0230,0231,0242,0243,0244,0245,0246,0247,0250,0251,0300,0117,0320,0241,0007, +0040,0041,0042,0043,0044,0025,0006,0027,0050,0051,0052,0053,0054,0011,0012,0033, +0060,0061,0032,0063,0064,0065,0066,0010,0070,0071,0072,0073,0004,0024,0076,0341, +0101,0102,0103,0104,0105,0106,0107,0110,0111,0121,0122,0123,0124,0125,0126,0127, +0130,0131,0142,0143,0144,0145,0146,0147,0150,0151,0160,0161,0162,0163,0164,0165, +0166,0167,0170,0200,0212,0213,0214,0215,0216,0217,0220,0232,0233,0234,0235,0236, +0237,0240,0252,0253,0254,0255,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267, +0270,0271,0272,0273,0274,0275,0276,0277,0312,0313,0314,0315,0316,0317,0332,0333, +0334,0335,0336,0337,0352,0353,0354,0355,0356,0357,0372,0373,0374,0375,0376,0377 +}; + +/* + * For 'conv=ebcdic'. + */ +static const unsigned char c_ebcdic[] = { +0000,0001,0002,0003,0067,0055,0056,0057,0026,0005,0045,0013,0014,0015,0016,0017, +0020,0021,0022,0023,0074,0075,0062,0046,0030,0031,0077,0047,0034,0035,0036,0037, +0100,0132,0177,0173,0133,0154,0120,0175,0115,0135,0134,0116,0153,0140,0113,0141, +0360,0361,0362,0363,0364,0365,0366,0367,0370,0371,0172,0136,0114,0176,0156,0157, +0174,0301,0302,0303,0304,0305,0306,0307,0310,0311,0321,0322,0323,0324,0325,0326, +0327,0330,0331,0342,0343,0344,0345,0346,0347,0350,0351,0255,0340,0275,0232,0155, +0171,0201,0202,0203,0204,0205,0206,0207,0210,0211,0221,0222,0223,0224,0225,0226, +0227,0230,0231,0242,0243,0244,0245,0246,0247,0250,0251,0300,0117,0320,0137,0007, +0040,0041,0042,0043,0044,0025,0006,0027,0050,0051,0052,0053,0054,0011,0012,0033, +0060,0061,0032,0063,0064,0065,0066,0010,0070,0071,0072,0073,0004,0024,0076,0341, +0101,0102,0103,0104,0105,0106,0107,0110,0111,0121,0122,0123,0124,0125,0126,0127, +0130,0131,0142,0143,0144,0145,0146,0147,0150,0151,0160,0161,0162,0163,0164,0165, +0166,0167,0170,0200,0212,0213,0214,0215,0216,0217,0220,0152,0233,0234,0235,0236, +0237,0240,0252,0253,0254,0112,0256,0257,0260,0261,0262,0263,0264,0265,0266,0267, +0270,0271,0272,0273,0274,0241,0276,0277,0312,0313,0314,0315,0316,0317,0332,0333, +0334,0335,0336,0337,0352,0353,0354,0355,0356,0357,0372,0373,0374,0375,0376,0377 +}; + +static char *progname; /* argv[0] to main() */ + +typedef long long d_type; + +static char *iblok; /* input buffer */ +static char *oblok; /* output buffer */ +static char *cblok; /* conversion buffer */ + +static char mblok[MB_LEN_MAX+1]; /* tow{upper|lower} buffer */ +static char *mbp; /* points to remaining chars in mblok */ +static int mbrest; /* number of remaining chars in mblok */ + +static const char *iffile; /* input file name */ +static int iffd; /* input file descriptor */ +static const char *offile; /* output file name */ +static int offd; /* output file descriptor */ +static struct stat istat; /* stat of input */ +static struct stat ostat; /* stat of output */ +static d_type ibs = 512; /* input block size */ +static d_type obs = 512; /* output block size */ +static d_type bs; /* size for both buffers */ +static d_type oflow; /* remaining bytes in output buffer */ +static d_type cbs; /* conversion block size */ +static d_type cflow; /* remaining bytes in conv. buffer */ +static int ctrunc; /* truncate current data (conv=block) */ +static d_type skip; /* skip these blocks on input */ +static d_type count = -1; /* no more than count blocks of input */ +static int files = 1; /* read EOF this many times */ +static d_type iseek; /* seek these blocks on input */ +static d_type oseek; /* seek these blocks on output */ +static int mb_cur_max; /* MB_CUR_MAX acceleration */ + +static d_type iwhole; /* statistics */ +static d_type ipartial; +static d_type owhole; +static d_type opartial; +static d_type truncated; + +static enum charconv { + CHAR_NONE = 0, + CHAR_ASCII = 1, + CHAR_EBCDIC = 2, + CHAR_IBM = 3 +} chars = CHAR_NONE; + +static enum conversion { + CONV_NONE = 0, + CONV_BLOCK = 01, + CONV_UNBLOCK = 02, + CONV_LCASE = 04, + CONV_UCASE = 010, + CONV_SWAB = 020, + CONV_NOERROR = 040, + CONV_NOTRUNC = 0100, + CONV_IDIRECT = 0200, + CONV_ODIRECT = 0400, + CONV_DIRECT = 0600, + CONV_SYNC = 01000 +} convs = CONV_NONE; + +static struct { + const char *c_name; + enum conversion c_conv; + enum charconv c_char; +} convtab[] = { + { "ascii", CONV_UNBLOCK, CHAR_ASCII }, + { "ebcdic", CONV_BLOCK, CHAR_EBCDIC }, + { "ibm", CONV_BLOCK, CHAR_IBM }, + { "block", CONV_BLOCK, CHAR_NONE }, + { "unblock", CONV_UNBLOCK, CHAR_NONE }, + { "lcase", CONV_LCASE, CHAR_NONE }, + { "ucase", CONV_UCASE, CHAR_NONE }, + { "swab", CONV_SWAB, CHAR_NONE }, + { "noerror", CONV_NOERROR, CHAR_NONE }, + { "notrunc", CONV_NOTRUNC, CHAR_NONE }, +#ifdef O_DIRECT + { "idirect", CONV_IDIRECT, CHAR_NONE }, + { "odirect", CONV_ODIRECT, CHAR_NONE }, +#endif /* O_DIRECT */ + { "sync", CONV_SYNC, CHAR_NONE }, + { NULL, CONV_NONE, CHAR_NONE } +}; + +static void * +bmalloc(size_t nbytes) +{ + static long pagesize; + void *vp; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if ((vp = memalign(pagesize, nbytes)) == NULL) { + fprintf(stderr, "%s: not enough memory\n", progname); + fprintf(stderr, "Please use a smaller buffer size\n"); + exit(077); + } + return vp; +} + +/************************** ARGUMENT SCANNING ***************************/ +static void +badarg(const char *arg) +{ + fprintf(stderr, "%s: bad arg: \"%s\"\n", progname, arg); + exit(2); +} + +static void +badnumeric(const char *arg) +{ + fprintf(stderr, "%s: bad numeric arg: \"%s\"\n", progname, arg); + exit(2); +} + +static void +nozeroblok(void) +{ + fprintf(stderr, "%s: buffer sizes cannot be zero\n", progname); + exit(2); +} + +/* + * Get the value of a numeric argument. + */ +static d_type +expr(const char *ap) +{ + d_type val; + char *x; + int c; + + if (*ap == '-' || *ap == '+') + badnumeric(ap); + val = strtoull(ap, &x, 10); + while ((c = *x++) != '\0') { + switch (c) { + case 'k': + val *= 1024; + break; + case 'b': + val *= 512; + break; + case 'w': + val *= 2; + break; + case 'x': + case '*': + return val * expr(x); + default: + badnumeric(ap); + } + } + return val; +} + +static void +setin(const char *ap) +{ + iffile = ap; +} + +static void +setof(const char *ap) +{ + offile = ap; +} + +static void +setibs(const char *ap) +{ + ibs = expr(ap); + if (ibs == 0) + nozeroblok(); +} + +static void +setobs(const char *ap) +{ + obs = expr(ap); + if (obs == 0) + nozeroblok(); +} + +static void +setbs(const char *ap) +{ + bs = expr(ap); +} + +static void +setcbs(const char *ap) +{ + cbs = expr(ap); +} + +static void +setskip(const char *ap) +{ + skip = expr(ap); +} + +static void +setcount(const char *ap) +{ + count = expr(ap); +} + +static void +setconv(const char *ap) +{ + const char *cp, *cq; + int i; + + for (;;) { + while (*ap == ',') + ap++; + if (*ap == '\0') + break; + for (i = 0; convtab[i].c_name; i++) { + for (cp = convtab[i].c_name, cq = ap; + *cp && (*cp == *cq); + cp++, cq++); + if (*cp == '\0' && (*cq == ',' || *cq == '\0')) { + convs |= convtab[i].c_conv; + if (convtab[i].c_char != CHAR_NONE) + chars = convtab[i].c_char; + ap = cq; + goto next; + } + } + badarg(ap); + next:; + } +} + +static void +setfiles(const char *ap) +{ + files = expr(ap); +} + +static void +setiseek(const char *ap) +{ + iseek = expr(ap); +} + +static void +setoseek(const char *ap) +{ + oseek = expr(ap); +} + +static struct { + const char *a_name; + void (*a_func)(const char *); +} argtab[] = { + { "if=", setin }, + { "of=", setof }, + { "ibs=", setibs }, + { "obs=", setobs }, + { "bs=", setbs }, + { "cbs=", setcbs }, + { "skip=", setskip }, + { "seek=", setoseek }, + { "count=", setcount }, + { "conv=", setconv }, + { "files=", setfiles }, + { "iseek=", setiseek }, + { "oseek=", setoseek }, + { NULL, NULL } +}; + +static const char * +thisarg(const char *sp, const char *ap) +{ + do { + if (*sp != *ap) + return NULL; + if (*sp == '=') + return &sp[1]; + } while (*sp++ && *ap++); + return NULL; +} + +/******************************* EXECUTION ********************************/ +static void +stats(void) +{ + fprintf(stderr, "%llu+%llu records in\n", + (unsigned long long)iwhole, + (unsigned long long)ipartial); + fprintf(stderr, "%llu+%llu records out\n", + (unsigned long long)owhole, + (unsigned long long)opartial); + if (truncated) { + fprintf(stderr, "%llu truncated record%s\n", + (unsigned long long)truncated, + truncated > 1 ? "s" : ""); + } +} + +static void charconv(char *data, size_t size); +static void bflush(void); +static void cflush(void); +static void uflush(void); + +static void +quit(int status) +{ + if (mbp) + charconv(NULL, 0); + cflush(); + uflush(); + bflush(); + stats(); + exit(status); +} + +static void +onint(int sig) +{ + stats(); + exit(sig | 0200); +} + +static int +ontape(void) +{ + static int yes = -1; + + if (yes == -1) { +#if defined (__linux__) || defined (__FreeBSD__) || defined (__hpux) || \ + defined (_AIX) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) + struct mtget mg; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, MTIOCGET, &mg) == 0; +#elif defined (__sun) + struct mtdrivetype_request mr; + struct mtdrivetype md; + mr.size = sizeof md; + mr.mtdtp = &md; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, MTIOCGETDRIVETYPE, &mr) == 0; +#else /* SVR4.2MP */ + struct blklen bl; + yes = (istat.st_mode&S_IFMT) == S_IFCHR && + ioctl(iffd, T_RDBLKLEN, &bl) == 0; +#endif /* SVR4.2MP */ + } + return yes; +} + +static void +seekconv(d_type count) +{ + ssize_t sz; + off_t offs; + + if (lseek(offd, 0, SEEK_CUR) != (off_t)-1) { + do { + if ((offs = lseek(offd, obs, SEEK_CUR)) == (off_t)-1) { + err: fprintf(stderr, "%s: output seek error: %s\n", + progname, strerror(errno)); + exit(3); + } + } while (--count); + if ((convs & CONV_NOTRUNC) == 0 && + (ostat.st_mode&S_IFMT) == S_IFREG) + ftruncate(offd, offs); + return; + } + while (count) { + if ((sz = read(offd, oblok, obs)) == 0) + break; + if (sz < 0) + goto err; + count--; + } + if (count) { + memset(oblok, 0, obs); + do { + if ((sz = write(offd, oblok, obs)) < 0) + goto err; + } while (--count); + } +} + +static void +skipconv(int canseek, d_type count) +{ + ssize_t rd = 0; + + if (canseek && lseek(iffd, 0, SEEK_CUR) == (off_t)-1) + canseek = 0; + while (count--) { + if (canseek) { + if (lseek(iffd, ibs, SEEK_CUR) != (off_t)-1) + rd = ibs; + else if (errno == EINVAL) + rd = 0; + else { + fprintf(stderr, "%s: input seek error: %s\n", + progname, strerror(errno)); + exit(3); + } + } else { + if ((rd = read(iffd, iblok, ibs)) < 0) { + fprintf(stderr, + "%s: read error during skip: %s\n", + progname, strerror(errno)); + exit(3); + } + } + if (rd == 0 && files-- <= 1) { + fprintf(stderr, "%s: cannot skip past end-of-file\n", + progname); + exit(3); + } + } +} + +static void +prepare(void) +{ + int flags; + + if (bs) + ibs = obs = bs; + iblok = bmalloc(ibs); + if (!(bs && chars == CHAR_NONE && + (convs|CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT) + == (CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT))) + oblok = bmalloc(obs); + if (cbs > 0) { + if ((convs & (CONV_BLOCK|CONV_UNBLOCK)) == 0) { + fprintf(stderr, + "%s: cbs must be zero if no block conversion requested\n", + progname); + exit(2); + } + cblok = bmalloc(cbs + 1); + } else + convs &= ~(CONV_BLOCK|CONV_UNBLOCK); + if ((iffd = iffile ? open(iffile, O_RDONLY) : dup(0)) < 0) { + fprintf(stderr, "%s: cannot open %s: %s\n", progname, + iffile ? iffile : "", strerror(errno)); + exit(1); + } + fstat(iffd, &istat); +#ifdef O_DIRECT + if (convs & CONV_IDIRECT) { + int flags; + flags = fcntl(iffd, F_GETFL); + fcntl(iffd, F_SETFL, flags | O_DIRECT); + } +#endif /* O_DIRECT */ + if (skip) + skipconv(0, skip); + else if (iseek) + skipconv(1, iseek); + flags = O_RDWR | O_CREAT; + if ((convs & CONV_NOTRUNC) == 0 && oseek == 0) + flags |= O_TRUNC; + if ((offd = offile ? open(offile, flags, 0666) : dup(1)) < 0) { + fprintf(stderr, "%s: cannot %s %s: %s\n", + progname, + flags & O_TRUNC ? "create" : "open", + offile ? offile : "", strerror(errno)); + exit(1); + } + fstat(offd, &ostat); +#ifdef O_DIRECT + if (convs & CONV_ODIRECT) { + int flags; + flags = fcntl(offd, F_GETFL); + fcntl(offd, F_SETFL, flags | O_DIRECT); + } +#endif /* O_DIRECT */ + if (oseek) + seekconv(oseek); +} + +static void +swabconv(char *data, size_t size) +{ + char c; + + while (size > 1) { + c = data[0]; + data[0] = data[1]; + data[1] = c; + size -= 2; + data += 2; + } +} + +static void +ascconv(char *data, size_t size) +{ + while (size--) { + *data = c_ascii[*data & 0377]; + data++; + } +} + +static ssize_t +swrite(const char *data, size_t size) +{ + ssize_t wt; + + for (;;) { + if ((wt = write(offd, data, size)) <= 0) { + if (errno == EINTR) + continue; + fprintf(stderr, "%s: write error: %s\n", + progname, strerror(errno)); + oflow = 0; + offd = -1; + quit(1); + } + break; + } + return wt; +} + +/* + * Write without output buffering (if bs= was specified). + */ +static void +dwrite(const char *data, size_t size) +{ + ssize_t wrt; + + do { + wrt = swrite(data, size); + if (wrt == obs) + owhole++; + else + opartial++; + data += wrt; + size -= wrt; + } while (size > 0); +} + +/* + * Write to output buffer. On short write, remaining data is kept within + * the buffer and written next time again. Might a warning be useful in + * this case? + */ +static void +bwrite(const char *data, size_t size) +{ + ssize_t wrt; + size_t di; + + while (oflow + size > obs) { + di = obs - oflow; + size -= di; + if (oflow) { + memcpy(&oblok[oflow], data, di); + wrt = swrite(oblok, obs); + } else + wrt = swrite(data, obs); + if (wrt != obs) { + memcpy(oblok, &(oflow ? oblok : data)[wrt], obs - wrt); + opartial++; + } else + owhole++; + oflow = obs - wrt; + data += di; + } + if (size == obs) { + if ((wrt = swrite(data, obs)) == obs) + owhole++; + else + opartial++; + size -= wrt; + data += wrt; + } + if (size) { + memcpy(&oblok[oflow], data, size); + oflow += size; + } +} + +static void +bflush(void) +{ + ssize_t wrt; + + if (offd >= 0) { + while (oflow) { + if ((wrt = swrite(oblok, oflow)) != oflow) + memcpy(oblok, &oblok[wrt], obs - wrt); + oflow -= wrt; + opartial++; + } + if (close(offd) < 0) { + fprintf(stderr, "%s: write error: %s\n", + progname, strerror(errno)); + offd = -1; + quit(1); + } + offd = -1; + } +} + +/* + * Handle conversions to EBCDIC. + */ +static void +ewrite(char *data, size_t size) +{ + char *dt = data; + size_t sz = size; + if (chars == CHAR_EBCDIC) { + while (sz--) { + *dt = c_ebcdic[*dt & 0377]; + dt++; + } + } else if (chars == CHAR_IBM) { + while (sz--) { + *dt = c_ibm[*dt & 0377]; + dt++; + } + } + bwrite(data, size); +} + +/* + * Handle 'conv=block'. + */ +static void +cflush(void) +{ + if (convs & CONV_BLOCK && cflow) { + while (cflow < cbs) + cblok[cflow++] = ' '; + ewrite(cblok, cbs); + cflow = 0; + } +} + +static void +cwrite(const char *data, size_t size) +{ + while (size) { + if (ctrunc == 0) { + cblok[cflow] = *data++; + if (cblok[cflow] == '\n') { + if (cflow == 0) + cblok[cflow++] = ' '; + cflush(); + } else if (++cflow == cbs) { + cflush(); + ctrunc = 1; + } + } else { + if (*data++ == '\n') + ctrunc = 0; + else if (ctrunc == 1) { + truncated++; + ctrunc = 2; + } + } + size--; + } +} + +/* + * Handle 'conv=unblock'. + */ +static void +uflush(void) +{ + char *cp; + + if (cflow) { + for (cp = &cblok[cflow-1]; cp >= cblok && *cp == ' '; cp--); + cp[1] = '\n'; + bwrite(cblok, cp - cblok + 2); + cflow = 0; + } +} + +static void +uwrite(const char *data, size_t size) +{ + while (size) { + while (cflow < cbs) { + cblok[cflow++] = *data++; + if (--size == 0) + return; + } + uflush(); + } +} + +static void +blokconv(char *data, size_t size) +{ + switch (chars) { + case CHAR_EBCDIC: + case CHAR_IBM: + if ((convs & (CONV_BLOCK|CONV_UNBLOCK)) == 0) { + ewrite(data, size); + break; + } + /*FALLTHRU*/ + default: + if (convs & CONV_BLOCK) + cwrite(data, size); + else if (convs & CONV_UNBLOCK) + uwrite(data, size); + else + bwrite(data, size); + break; + } +} + +static void +charconv(char *data, size_t size) +{ + if (convs & (CONV_LCASE|CONV_UCASE)) { + if (mb_cur_max > 1) { + /* + * Multibyte case conversion is somewhat ugly + * with dd as there is no guarantee that a + * character fits in an input block. We need + * another intermediate therefore to store + * incomplete multibyte sequences. + */ + int i, n, len; + wint_t wc; + int flush = size == 0; + + while (size > 0 || (flush && mbrest)) { + i = 0; + if (mbrest && mbp && mbp > mblok) { + do + mblok[i] = mbp[i]; + while (i++, --mbrest); + } else if (mbp == mblok) { + i = mbrest; + mbrest = 0; + } + if (i == 0 && size) { + mblok[i++] = *data++; + size--; + } + if (mblok[0] & 0200) { + while (i < mb_cur_max && size) { + mblok[i++] = *data++; + size--; + } + if (!flush && i < mb_cur_max) { + mbp = mblok; + mbrest = i; + return; + } + if ((n = mbtowi(&wc, mblok, i)) < 0) { + len = 1; + wc = WEOF; + } else if (n == 0) + len = 1; + else + len = n; + } else { + wc = mblok[0]; + len = n = 1; + } + if (i > 0) { + mbrest = i - len; + mbp = &mblok[len]; + } else { + mbrest = 0; + mbp = NULL; + } + if (wc != WEOF) { + char new[MB_LEN_MAX + 1]; + + if (convs & CONV_LCASE) + wc = wc & ~(wchar_t)0177 ? + towlower(wc) : + tolower(wc); + if (convs & CONV_UCASE) + wc = wc & ~(wchar_t)0177 ? + towupper(wc) : + toupper(wc); + if ((n = wctomb(new, wc)) > 0) + blokconv(new, n); + else + goto inv; + } else + inv: blokconv(mblok, len); + } + return; + } else { + char *dp = data; + size_t sz = size; + + while (sz--) { + if (convs & CONV_LCASE) + *dp = tolower(*dp & 0377); + if (convs & CONV_UCASE) + *dp = toupper(*dp & 0377); + dp++; + } + } + } + blokconv(data, size); +} + +static void +dd(void) +{ + ssize_t rd; + + while (count == -1 || count > 0) { + if ((rd = read(iffd, iblok, ibs)) < ibs) { + if (rd < 0) { + fprintf(stderr, "%s: read error: %s\n", + progname, strerror(errno)); + if (convs & CONV_NOERROR) { + stats(); + if (!ontape()) + lseek(iffd, ibs, SEEK_CUR); + if (convs & CONV_SYNC) + rd = 0; + else + continue; + } else + quit(1); + } else if (rd == 0) { + if (files-- <= 1) + break; + continue; + } else if (rd > 0) + ipartial++; + if (convs & CONV_SYNC) { + int c; + + c = convs&(CONV_BLOCK|CONV_UNBLOCK) ? ' ' : 0; + memset(&iblok[rd], c, ibs - rd); + rd = ibs; + } + } else + iwhole++; + if (count > 0) + count--; + if (bs && chars == CHAR_NONE && + (convs|CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT) + == (CONV_SYNC|CONV_NOERROR|CONV_NOTRUNC|CONV_DIRECT)) + dwrite(iblok, rd); + else { + if (convs & CONV_SWAB) + swabconv(iblok, rd); + if (chars == CHAR_ASCII) + ascconv(iblok, rd); + charconv(iblok, rd); + } + } +} + +int +main(int argc, char **argv) +{ + const char *cp; + int o, i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '-' && + argv[1][2] == '\0') + o = 2; + else + o = 1; + while (o < argc) { + for (i = 0; argtab[i].a_name; i++) { + if ((cp = thisarg(argv[o], argtab[i].a_name)) != 0) { + argtab[i].a_func(cp); + break; + } + } + if (argtab[i].a_name == NULL) + badarg(argv[o]); + o++; + } + if ((sigset(SIGINT, SIG_IGN)) != SIG_IGN) + sigset(SIGINT, onint); + prepare(); + dd(); + quit(0); + /*NOTREACHED*/ + return 0; +} diff --git a/dd/mkfile b/dd/mkfile @@ -0,0 +1,7 @@ +BIN = dd +OBJ = dd.o +INSTALL_BIN = dd +INSTALL_MAN1 = dd.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/diff/diff.1 b/diff/diff.1 @@ -0,0 +1,493 @@ +.\" +.\" Copyright (c) 1980 Regents of the University of California. +.\" All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" +.\" from 4.3BSD diff.1 6.4 (Berkeley) 5/19/86 +.\" +.\" This code contains changes by +.\" Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. +.\" +.\" Conditions 1, 2, and 4 and the no-warranty notice below apply +.\" to these changes. +.\" +.\" +.\" Copyright (c) 1980, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowedgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH DIFF 1 "6/28/05" "Heirloom Toolchest" "User Commands" +.SH NAME +diff \- differential file comparator +.SH SYNOPSIS +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiptw\fR] +[\fB\-cefhnu\fR] +[\fB\-C\ \fInumber\fR] +[\fB\-U\ \fInumber\fR] +\fIfile1\fR \fIfile2\fR +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiptw\fR] +[\fB\-D\ \fIstring\fR] +\fIfile1\fR \fIfile2\fR +.HP +.nh +.ad l +\fBdiff\fR +[\fB\-abBiNptw12\fR] +[\fB\-cefhnu\fR] +[\fB\-C\ \fInumber\fR] +[\fB\-U\ \fInumber\fR] +[\fB\-lrs\fR] +[\fB\-S\ \fIname\fR] +[\fB\-x\ \fIpattern\fR] +[\fB\-X\ \fIname\fR] +\fIdirectory1\fR \fIdirectory2\fR +.br +.hy 1 +.SH DESCRIPTION +.I Diff +tells what lines must be changed in two files to bring them +into agreement. +If +.I file1 +.RI ( file2 ) +is `\-', the standard input is used. +If +.I file1 +.RI ( file2 ) +is a directory, then a file in that directory +whose file-name is the same as the file-name of +.I file2 +.RI ( file1 ) +is used. +The normal output contains lines of these forms: +.IP "" 5 +.I n1 +a +.I n3,n4 +.br +.I n1,n2 +d +.I n3 +.br +.I n1,n2 +c +.I n3,n4 +.PP +These lines resemble +.I ed +commands to convert +.I file1 +into +.IR file2 . +The numbers after the letters pertain to +.IR file2 . +In fact, by exchanging `a' for `d' and reading backward +one may ascertain equally how to convert +.I file2 +into +.IR file1 . +As in +.I ed, +identical pairs where +.I n1 += +.I n2 +or +.I n3 += +.I n4 +are abbreviated as a single number. +.PP +Following each of these lines come all the lines that are +affected in the first file flagged by `<', +then all the lines that are affected in the second file +flagged by `>'. +.TP 10 +.B \-a +causes a list of differences to be output +for all files, +even for those found to have binary content. +This option is an extension. +.TP 10 +.B \-b +causes trailing whitespace characters +to be ignored, and other +strings of whitespace to compare equal. +.TP 10 +.B \-i +ignores the case of letters. E.g., ``A'' will compare equal to ``a''. +.TP 10 +.B \-p +causes the name of the surrounding C function, +or, more exactly, +of the first previous unchanged line +beginning with a letter, the dollar sign, or the underscore, +to be output with each set of changes. +Implies +.I \-c +unless +.I \-u +is also present. +This option is an extension. +.TP 10 +.B \-t +will expand tabs in output lines. Normal, +.B \-c +or +.B \-u +output adds character(s) to the front of each line which may screw up +the indentation of the original source lines and make the output listing +difficult to interpret. This option will preserve the original source's +indentation. +.TP 10 +.B \-w +is similar to +.B \-b +but causes whitespace characters +to be totally ignored. +E.g., ``if\ (\ a\ ==\ b\ )'' will compare equal to ``if(a==b)''. +.TP 10 +.B \-B +causes changes that consist entirely of empty lines added or deleted +to be ignored. +This option is an extension. +.PP +The following options are mutually exclusive: +.TP 10 +.B \-c +produces a diff with three lines of context. +With +.B \-c +the output format is modified slightly: +the output beginning with identification of the files involved and +their creation dates and then each change is separated +by a line with a dozen *'s. +The lines removed from +.I file1 +are marked with `\(mi '; those added to +.I file2 +are marked `+ '. Lines which are changed from one +file to the other are marked in both files with with `! '. +.\".sp +.\"Changes which lie within <context> lines of each other are grouped +.\"together on output. (This is a change from the previous ``diff -c'' +.\"but the resulting output is usually much easier to interpret.) +.TP 10 +\fB\-C\ \fInumber\fR +Same as +.B \-c +but uses +.I number +of lines of context. +.TP 10 +\fB\-D\ \fIstring\fR +causes +.I diff +to create a merged version of +.I file1 +and +.I file2 +on the standard output, with C preprocessor controls included so that +a compilation of the result without defining \fIstring\fR is equivalent +to compiling +.I file1, +while defining +.I string +will yield +.I file2. +.TP 10 +.B \-e +produces a script of +.I "a, c" +and +.I d +commands for the editor +.I ed, +which will recreate +.I file2 +from +.IR file1 . +In connection with +.BR \-e , +the following shell program may help maintain +multiple versions of a file. +Only an ancestral file ($1) and a chain of +version-to-version +.I ed +scripts ($2,$3,...) made by +.I diff +need be on hand. +A `latest version' appears on +the standard output. +.IP +\ \ \ \ \ \ \ \ (shift; cat $*; echo \'1,$p\') \(bv ed \- $1 +.IP +Extra commands are added to the output when comparing directories with +.B \-e, +so that the result is a +.IR sh (1) +script for converting text files which are common to the two directories +from their state in +.I dir1 +to their state in +.I dir2. +.TP 10 +.B \-f +produces a script similar to that of +.B \-e, +not useful with +.I ed, +and in the opposite order. +.TP 10 +.B \-h +does a fast, half-hearted job. +It works only when changed stretches are short +and well separated, +but does work on files of unlimited length. +.TP 10 +.B \-n +produces a script similar to that of +.B \-e, +but in the opposite order and with a count of changed lines on each +insert or delete command. +.\"This is the form used by +.\".IR rcsdiff (1). +.TP 10 +.B \-u +produces a unified diff with three lines of context. +The output begins with identification of the files involved +and their creation dates, +followed by the changes +separated by `@@ \-range +range @@'. +Lines removed from +.I file1 +are marked with `\(mi', +those added to +.I file2 +are marked `+'. +This option is an extension. +.TP 10 +\fB\-U\ \fInumber\fR +Same as +.B \-u +but uses +.I number +of lines of context. +This option is an extension. +.PP +If both arguments are directories, +.I diff +sorts the contents of the directories by name, and then runs the +regular file +.I diff +algorithm on text files which are different. +Binary files which differ, +common subdirectories, and files which appear in only one directory +are listed. +.PP +Options when comparing directories are: +.TP 10 +.B \-l +long output format; each text file +.I diff +is piped through +.IR pr (1) +to paginate it, +other differences are remembered and summarized +after all text file differences are reported. +.TP 10 +.B \-N +causes the text of files +that exist in one directory only +to be output +as if compared to an empty file modified at 1/1/70. +This option is an extension. +.TP 10 +.B \-1 +is similar to +.IR \-N , +but causes just the text of files that exist in +.I directory1 +only to be output. +Files that exist only in +.I directory2 +are listed. +This option is an extension. +.TP 10 +.B \-2 +is similar to +.IR \-N , +but causes just the text of files that exist in +.I directory2 +only to be output. +Files that exist only in +.I directory1 +are listed. +This option is an extension. +.TP 10 +.B \-r +causes application of +.I diff +recursively to common subdirectories encountered. +.TP 10 +.B \-s +causes +.I diff +to report files which are the same, which are otherwise not mentioned. +.TP 10 +.BI \-S " name" +starts a directory +.I diff +in the middle beginning with file +.I name. +.TP 10 +.BI \-x " pattern" +excludes all file names that match +.I pattern +(as described in +.IR glob (7)) +from comparison. +If +.I pattern +matches a directory, +files below that directory are also excluded. +This option is an extension. +.TP 10 +.BI \-X " name" +excludes all file names contained in +.IR name . +This option is an extension. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale(7). +.TP +.B LC_CTYPE +Sets the mapping of bytes to characters, +character case translation +and the set of whitespace characters. +.TP +.B SYSV3 +If this variable is set, +invalid options are ignored instead of being rejected, +and the text of some diagnostic messages is changed. +.SH FILES +.ta \w'/usr/5lib/diffh 'u +/tmp/d????? +.br +/usr/5lib/diffh for \fB\-h\fR +.br +diff for directory diffs +.br +pr +.SH "SEE ALSO" +bdiff(1), +cmp(1), +cc(1), +comm(1), +ed(1), +diff3(1), +patch(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 for no differences, 1 for some, 2 for trouble. +.SH NOTES +Editing scripts produced under the +.BR \-e " or" +.BR \-f " option are naive about" +creating lines consisting of a single `\fB.\fR'. +.PP +When comparing directories with the +\fB\-b, \-w\fP, or \fB\-i\fP +options specified, +.I diff +first compares the files ala +.I cmp, +and then decides to run the +.I diff +algorithm if they are not equal. +This may cause a small amount of spurious output if the files +then turn out to be identical because the only differences are +insignificant blank string or case differences. +.PP +When +.I diff +output is used with +.IR ed (1) +or +.IR patch (1) +for file synchronization, +it is recommended that it is run in the +.I C +or another single-byte LC_CTYPE locale +since character-to-byte conversion +might otherwise hide some changes. diff --git a/diff/diff.c b/diff/diff.c @@ -0,0 +1,473 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diff.c 1.24 (gritter) 3/27/05> */ +/* from 4.3BSD diff.c 4.6 4/3/86 */ + +#include "diff.h" +#include <unistd.h> +#include <locale.h> +#include <iblok.h> +/* + * diff - driver and subroutines + */ + +const char diff[] = "diff"; +const char diffh[] = DIFFH; +const char pr[] = "pr"; +const char *progname; +const char *argv0; + +static void usage(void); +static void xadd(const char *); +static void Xadd(const char *); + +int +main(int argc, char **argv) +{ + int i, invalid = 0; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (getenv("SYSV3") != NULL) + sysv3 = 1; + ifdef1 = "FILE1"; ifdef2 = "FILE2"; + status = 2; + argv0 = argv[0]; + diffargv = argv; + while ((i = getopt(argc, argv, ":D:efnbBwitcC:uU:hS:rslNx:a12pX:")) + != EOF) { + switch (i) { +#ifdef notdef + case 'I': + opt = D_IFDEF; + wantelses = 0; + break; + case 'E': + opt = D_IFDEF; + wantelses = 1; + break; + case '1': + opt = D_IFDEF; + ifdef1 = optarg; + break; +#endif + case 'D': + /* -Dfoo = -E -1 -2foo */ + wantelses = 1; + ifdef1 = ""; + /* fall through */ +#ifdef notdef + case '2': +#endif + opt = D_IFDEF; + ifdef2 = optarg; + break; + case 'e': + opt = D_EDIT; + break; + case 'f': + opt = D_REVERSE; + break; + case 'n': + opt = D_NREVERSE; + break; + case 'b': + bflag = 1; + break; + case 'B': + Bflag = 1; + break; + case 'w': + wflag = 1; + break; + case 'i': + iflag = 1; + break; + case 't': + tflag = 1; + break; + case 'c': + opt = D_CONTEXT; + context = 3; + break; + case 'C': { + char *x; + + opt = D_CONTEXT; + context = strtol(optarg, &x, 10); + if (*x != '\0' || *optarg == '+' || *optarg == '-') { + fprintf(stderr, "%s: use -C num\n", progname); + done(); + } + break; + } + case 'u': + opt = D_UNIFIED; + context = 3; + break; + case 'U': { + char *x; + + opt = D_UNIFIED; + context = strtol(optarg, &x, 10); + if (*x != '\0' || *optarg == '+' || *optarg == '-') { + fprintf(stderr, "%s: use -U num\n", progname); + done(); + } + break; + } + case 'h': + hflag++; + break; + case 'S': + start = optarg; + break; + case 'r': + rflag++; + break; + case 's': + sflag++; + break; + case 'l': + lflag++; + break; + case 'N': + Nflag |= 3; + break; + case '1': + Nflag |= 1; + break; + case '2': + Nflag |= 2; + break; + case 'x': + xadd(optarg); + break; + case 'a': + aflag++; + break; + case 'p': + pflag++; + break; + case 'X': + Xadd(optarg); + break; + default: + if (invalid == 0 && !sysv3) + invalid = optopt; + } + } + argv += optind, argc -= optind; + if (argc != 2) { + fprintf(stderr, sysv3 ? "%s: arg count\n" : + "%s: two filename arguments required\n", + progname); + done(); + } + file1 = argv[0]; + file2 = argv[1]; + if (invalid) { + fprintf(stderr, "%s: invalid option -%c\n", progname, invalid); + usage(); + } + if (pflag) { + if (opt == D_UNIFIED || opt == D_CONTEXT) + /*EMPTY*/; + else if (opt == 0) { + opt = D_CONTEXT; + context = 3; + } else { + fprintf(stderr, + "%s: -p doesn't support -e, -f, -n, or -I\n", + progname); + done(); + } + } + if (hflag && opt) { + fprintf(stderr, + "%s: -h doesn't support -e, -f, -n, -c, -u, or -I\n", + progname); + done(); + } + diffany(argv); + /*NOTREACHED*/ + return 0; +} + +void +diffany(char **argv) +{ + if (!strcmp(file1, "-")) + stb1.st_mode = S_IFREG; + else if (stat(file1, &stb1) < 0) { + if (sysv3) + stb1.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } + if (!strcmp(file2, "-")) + stb2.st_mode = S_IFREG; + else if (stat(file2, &stb2) < 0) { + if (sysv3) + stb2.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + if ((stb1.st_mode & S_IFMT) == S_IFDIR && + (stb2.st_mode & S_IFMT) == S_IFDIR) { + diffdir(argv); + } else + diffreg(); + done(); +} + +static void +usage(void) +{ + fprintf(stderr, "\ +usage: %s [ -bcefhilnrstw -C num -D string -S name ] file1 file2\n", + progname); + done(); +} + +int +min(int a,int b) +{ + + return (a < b ? a : b); +} + +int +max(int a,int b) +{ + + return (a > b ? a : b); +} + +void +done(void) +{ + if (tempfile1) { + unlink(tempfile1); + tempfile1 = NULL; + } + if (tempfile2) { + unlink(tempfile2); + tempfile2 = NULL; + } + if (recdepth == 0) + exit(status); + else + longjmp(recenv, 1); +} + +static void noroom(void); + +void * +dalloc(size_t n) +{ + struct stackblk *sp; + + if ((sp = malloc(n + sizeof *sp)) != NULL) { + sp->s_prev = NULL; + sp->s_next = curstack; + if (curstack) + curstack->s_prev = sp; + curstack = sp; + return (char *)sp + sizeof *sp; + } else + return NULL; +} + +void * +talloc(size_t n) +{ + register void *p; + + if ((p = dalloc(n)) == NULL) + noroom(); + return p; +} + +void * +ralloc(void *p,size_t n) +{ + struct stackblk *sp, *sq; + + if (p == NULL) + return talloc(n); + sp = (struct stackblk *)((char *)p - sizeof *sp); + if ((sq = realloc(sp, n + sizeof *sp)) == NULL) + noroom(); + if (sq->s_prev) + sq->s_prev->s_next = sq; + if (sq->s_next) + sq->s_next->s_prev = sq; + if (curstack == sp) + curstack = sq; + return (char *)sq + sizeof *sq; +} + +void +tfree(void *p) +{ + struct stackblk *sp; + + if (p == NULL) + return; + sp = (struct stackblk *)((char *)p - sizeof *sp); + if (sp->s_prev) + sp->s_prev->s_next = sp->s_next; + if (sp->s_next) + sp->s_next->s_prev = sp->s_prev; + if (sp == curstack) + curstack = sp->s_next; + free(sp); +} + +void +purgestack(void) +{ + struct stackblk *sp = curstack, *sq = NULL; + + do { + free(sq); + sq = sp; + if (sp) + sp = sp->s_next; + } while (sq); +} + +static void +noroom(void) +{ + oomsg(": files too big, try -h\n"); + status = 2; + done(); +} + +static void +xadd(const char *cp) +{ + struct xclusion *xp; + + xp = talloc(sizeof *xp); + xp->x_pat = cp; + xp->x_nxt = xflag; + xflag = xp; +} + +static void +Xadd(const char *name) +{ + struct iblok *ip; + char *line = NULL; + size_t size = 0, len; + + if (name[0] == '-' && name[1] == '\0') + ip = ib_alloc(0, 0); + else + ip = ib_open(name, 0); + if (ip == NULL) { + fprintf(stderr, "%s: -X %s: %s\n", progname, name, + strerror(errno)); + done(); + } + while ((len = ib_getlin(ip, &line, &size, realloc)) != 0) { + if (line[len-1] == '\n') + line[--len] = '\0'; + xadd(line); + line = NULL; + size = 0; + } + free(line); + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); +} + +void +oomsg(const char *s) +{ + write(2, progname, strlen(progname)); + write(2, s, strlen(s)); +} diff --git a/diff/diff.h b/diff/diff.h @@ -0,0 +1,211 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diff.h 1.15 (gritter) 3/26/05> */ +/* from 4.3BSD diff.h 4.7 85/08/16 */ + +/* + * diff - common declarations + */ + +#include <stdio.h> +#include <ctype.h> +#include <sys/param.h> +#include <sys/stat.h> +#include <dirent.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <libgen.h> +#include <errno.h> +#include <setjmp.h> + +#if defined (__GLIBC__) +#if defined (_IO_getc_unlocked) +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +#if defined (_IO_putc_unlocked) +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif + +/* + * Output format options + */ +int opt; + +#define D_NORMAL 0 /* Normal output */ +#define D_EDIT -1 /* Editor script out */ +#define D_REVERSE 1 /* Reverse editor script */ +#define D_CONTEXT 2 /* Diff with context */ +#define D_IFDEF 3 /* Diff with merged #ifdef's */ +#define D_NREVERSE 4 /* Reverse ed script with numbered + lines and no trailing . */ +#define D_UNIFIED 5 /* Unified diff */ + +int aflag; /* diff binary files */ +int tflag; /* expand tabs on output */ +int pflag; /* show surrounding C function */ + +/* + * Algorithm related options + */ +int hflag; /* -h, use halfhearted DIFFH */ +int bflag; /* ignore blanks in comparisons */ +int wflag; /* totally ignore blanks in comparisons */ +int iflag; /* ignore case in comparisons */ +int Bflag; /* ignore changes that consist of blank lines */ + +/* + * Options on hierarchical diffs. + */ +int lflag; /* long output format with header */ +int rflag; /* recursively trace directories */ +int sflag; /* announce files which are same */ +int Nflag; /* write text of nonexistant files */ +const char *start; /* do file only if name >= this */ + +struct xclusion { + struct xclusion *x_nxt; + const char *x_pat; +} *xflag; /* patterns to exclude from comparison */ + +/* + * Variables for -I D_IFDEF option. + */ +int wantelses; /* -E */ +char *ifdef1; /* String for -1 */ +char *ifdef2; /* String for -2 */ +char *endifname; /* What we will print on next #endif */ +int inifdef; + +/* + * Variables for -c context option. + */ +int context; /* lines of context to be printed */ + +/* + * State for exit status. + */ +int status; +int anychange; +char *tempfile1; /* used when comparing against std input */ +char *tempfile2; /* used when comparing against std input */ + +/* + * Variables for diffdir. + */ +char **diffargv; /* option list to pass to recursive diffs */ +int recdepth; /* recursion depth */ +jmp_buf recenv; /* jump stack on error */ + +struct stackblk { + struct stackblk *s_prev; + struct stackblk *s_next; +} *curstack; + +/* + * Input file names. + * With diffdir, file1 and file2 are allocated BUFSIZ space, + * and padded with a '/', and then efile0 and efile1 point after + * the '/'. + */ +char *file1, *file2, *efile1, *efile2; +struct stat stb1, stb2; + +extern const char diffh[], diff[], pr[]; +extern const char *argv0; +extern const char *progname; +int mb_cur_max; +extern int sysv3; + +/* diff.c */ +void diffany(char **); +int min(int, int); +int max(int, int); +void done(void); +void *dalloc(size_t); +void *talloc(size_t); +void *ralloc(void *, size_t); +void tfree(void *); +void purgestack(void); +void oomsg(const char *); +/* diffdir.c */ +void diffdir(char **); +int ascii(int); +/* diffreg.c */ +void diffreg(void); diff --git a/diff/diffdir.c b/diff/diffdir.c @@ -0,0 +1,993 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diffdir.c 1.30 (gritter) 1/22/06> */ +/* from 4.3BSD diffdir.c 4.9 (Berkeley) 8/28/84 */ + +#include "diff.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <signal.h> +#include "sigset.h" +#include "pathconf.h" + +#ifdef __GLIBC__ /* old glibcs don't know _XOPEN_SOURCE=600L yet */ +#ifndef S_IFSOCK +#ifdef __S_IFSOCK +#define S_IFSOCK __S_IFSOCK +#endif /* __S_IFSOCK */ +#endif /* !S_IFSOCK */ +#endif /* __GLIBC__ */ + +/* + * diff - directory comparison + */ +#define d_flags d_ino + +#define ONLY 1 /* Only in this directory */ +#define SAME 2 /* Both places and same */ +#define DIFFER 4 /* Both places and different */ +#define DIRECT 8 /* Directory */ + +struct dir { + unsigned long long d_ino; + char *d_entry; +}; + +static int header; +static char *title, *etitle; +static size_t titlesize; +static char procself[40]; + +static void setfile(char **, char **, const char *); +static void scanpr(register struct dir *, int, const char *, const char *, + const char *, const char *, const char *); +static void only(struct dir *, int); +static struct dir *setupdir(const char *); +static int entcmp(const struct dir *, const struct dir *); +static void compare(struct dir *, char **); +static void calldiff(const char *, char **); +static int useless(register const char *); +static const char *mtof(mode_t mode); +static void putN(const char *, const char *, const char *, int); +static void putNreg(const char *, const char *, time_t, int); +static void putNnorm(FILE *, const char *, const char *, + FILE *, long long, int); +static void putNedit(FILE *, const char *, const char *, + FILE *, long long, int, int); +static void putNcntx(FILE *, const char *, const char *, + time_t, FILE *, long long, int); +static void putNunif(FILE *, const char *, const char *, + time_t, FILE *, long long, int); +static void putNhead(FILE *, const char *, const char *, time_t, int, + const char *, const char *); +static void putNdata(FILE *, FILE *, int, int); +static void putNdir(const char *, const char *, int); +static long long linec(const char *, FILE *); +static char *mkpath(const char *, const char *); +static void mktitle(void); +static int xclude(const char *); + +void +diffdir(char **argv) +{ + register struct dir *d1, *d2; + struct dir *dir1, *dir2; + register int i, n; + int cmp; + + if (opt == D_IFDEF) { + fprintf(stderr, "%s: can't specify -I with directories\n", + progname); + done(); + } + status = 0; + if (opt == D_EDIT && (sflag || lflag)) + fprintf(stderr, + "%s: warning: shouldn't give -s or -l with -e\n", + progname); + for (n = 6, i = 1; diffargv[i+2]; i++) + n += strlen(diffargv[i]) + 1; + if (n > titlesize) + title = ralloc(title, titlesize = n); + title[0] = 0; + strcpy(title, "diff "); + for (i = 1; diffargv[i+2]; i++) { + if (!strcmp(diffargv[i], "-")) + continue; /* was -S, dont look silly */ + strcat(title, diffargv[i]); + strcat(title, " "); + } + for (etitle = title; *etitle; etitle++) + ; + /* + * This works around a bug present in (at least) Solaris 8 and + * 9: If exec() is called with /proc/self/object/a.out, the + * process hangs. It is possible, though, to use the executable + * of another process. So the parent diff is used instead of the + * forked child. + */ + i = getpid(); + snprintf(procself, sizeof procself, +#if defined (__linux__) + "/proc/%d/exe", +#elif defined (__FreeBSD__) || defined (__DragonFly__) || defined (__APPLE__) + "/proc/%d/file", +#else /* !__linux__, !__FreeBSD__, !__APPLE__ */ + "/proc/%d/object/a.out", +#endif /* !__linux__, !__FreeBSD__, !__APPLE__ */ + i); + setfile(&file1, &efile1, file1); + setfile(&file2, &efile2, file2); + argv[0] = file1; + argv[1] = file2; + dir1 = setupdir(file1); + dir2 = setupdir(file2); + d1 = dir1; d2 = dir2; + while (d1->d_entry != 0 || d2->d_entry != 0) { + if (d1->d_entry && useless(d1->d_entry)) { + d1++; + continue; + } + if (d2->d_entry && useless(d2->d_entry)) { + d2++; + continue; + } + if (d1->d_entry == 0) + cmp = 1; + else if (d2->d_entry == 0) + cmp = -1; + else + cmp = strcmp(d1->d_entry, d2->d_entry); + if (cmp < 0) { + if (lflag && !(Nflag&1)) + d1->d_flags |= ONLY; + else if (Nflag&1 || opt == D_NORMAL || + opt == D_CONTEXT || opt == D_UNIFIED) + only(d1, 1); + d1++; + } else if (cmp == 0) { + compare(d1, argv); + d1++; + d2++; + } else { + if (lflag && !(Nflag&2)) + d2->d_flags |= ONLY; + else if (Nflag&2 || opt == D_NORMAL || + opt == D_CONTEXT || opt == D_UNIFIED) + only(d2, 2); + d2++; + } + } + if (lflag) { + scanpr(dir1, ONLY, "Only in %.*s", file1, efile1, 0, 0); + scanpr(dir2, ONLY, "Only in %.*s", file2, efile2, 0, 0); + scanpr(dir1, SAME, "Common identical files in %.*s and %.*s", + file1, efile1, file2, efile2); + scanpr(dir1, DIFFER, "Binary files which differ in %.*s and %.*s", + file1, efile1, file2, efile2); + scanpr(dir1, DIRECT, "Common subdirectories of %.*s and %.*s", + file1, efile1, file2, efile2); + } + if (rflag) { + if (header && lflag) + printf("\f"); + for (d1 = dir1; d1->d_entry; d1++) { + if ((d1->d_flags & DIRECT) == 0) + continue; + strcpy(efile1, d1->d_entry); + strcpy(efile2, d1->d_entry); + calldiff(0, argv); + } + } +} + +static void +setfile(char **fpp, char **epp, const char *file) +{ + register char *cp; + int n; + + if ((n = pathconf(file, _PC_PATH_MAX)) < 1024) + n = 1024; + *fpp = dalloc(strlen(file) + 2 + n); + if (*fpp == 0) { + oomsg(": ran out of memory\n"); + exit(1); + } + strcpy(*fpp, file); + for (cp = *fpp; *cp; cp++) + continue; + *cp++ = '/'; + *cp = '\0'; + *epp = cp; +} + +static void +scanpr(register struct dir *dp, int test, const char *title, + const char *file1, const char *efile1, + const char *file2, const char *efile2) +{ + int titled = 0; + + for (; dp->d_entry; dp++) { + if ((dp->d_flags & test) == 0) + continue; + if (titled == 0) { + if (header == 0) + header = 1; + else + printf("\n"); + printf(title, + efile1 - file1 - 1, file1, + efile2 - file2 - 1, file2); + printf(":\n"); + titled = 1; + } + printf("\t%s\n", dp->d_entry); + } +} + +static void +only(struct dir *dp, int which) +{ + char *file = which == 1 ? file1 : file2; + char *other = which == 1 ? file2 : file1; + char *efile = which == 1 ? efile1 : efile2; + char *eother = which == 1 ? efile2 : efile1; + + if (Nflag&which) { + char c = file[efile - file - 1]; + char d = other[eother - other - 1]; + file[efile - file - 1] = '\0'; + other[eother - other - 1] = '\0'; + putN(file, other, dp->d_entry, which); + file[efile - file - 1] = c; + other[eother - other - 1] = d; + } else + printf("Only in %.*s: %s\n", (int)(efile - file - 1), file, + dp->d_entry); + status = 1; +} + +static struct dir * +setupdir(const char *cp) +{ + register struct dir *dp = 0, *ep; + register struct dirent *rp; + register int nitems; + DIR *dirp; + + dirp = opendir(cp); + if (dirp == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, cp, strerror(errno)); + done(); + } + nitems = 0; + dp = dalloc(sizeof (struct dir)); + if (dp == 0) { + oomsg(": ran out of memory\n"); + status = 2; + done(); + } + while (rp = readdir(dirp)) { + if (xflag && xclude(rp->d_name)) + continue; + ep = &dp[nitems++]; + ep->d_entry = 0; + ep->d_flags = 0; + ep->d_entry = dalloc(strlen(rp->d_name) + 1); + if (ep->d_entry == 0) { + oomsg(": out of memory\n"); + status = 2; + done(); + } + strcpy(ep->d_entry, rp->d_name); + dp = ralloc(dp, (nitems + 1) * sizeof (struct dir)); + } + dp[nitems].d_entry = 0; /* delimiter */ + closedir(dirp); + qsort(dp, nitems, sizeof (struct dir), + (int (*)(const void *, const void *))entcmp); + return (dp); +} + +static int +entcmp(const struct dir *d1, const struct dir *d2) +{ + return (strcmp(d1->d_entry, d2->d_entry)); +} + +static void +compare(struct dir *dp, char **argv) +{ + register int i, j; + int f1 = -1, f2 = -1; + mode_t fmt1, fmt2; + struct stat stb1, stb2; + char buf1[BUFSIZ], buf2[BUFSIZ]; + + strcpy(efile1, dp->d_entry); + strcpy(efile2, dp->d_entry); + if (stat(file1, &stb1) < 0 || (fmt1 = stb1.st_mode&S_IFMT) == S_IFREG && + (f1 = open(file1, O_RDONLY)) < 0) { + perror(file1); + status = 2; + return; + } + if (stat(file2, &stb2) < 0 || (fmt2 = stb2.st_mode&S_IFMT) == S_IFREG && + (f2 = open(file2, O_RDONLY)) < 0) { + perror(file2); + close(f1); + status = 2; + return; + } + if (fmt1 != S_IFREG || fmt2 != S_IFREG) { + if (fmt1 == fmt2) { + switch (fmt1) { + case S_IFDIR: + dp->d_flags = DIRECT; + if (lflag || opt == D_EDIT) + goto closem; + if (opt != D_UNIFIED) + printf("Common subdirectories: " + "%s and %s\n", + file1, file2); + goto closem; + case S_IFBLK: + case S_IFCHR: + if (stb1.st_rdev == stb2.st_rdev) + goto same; + printf("Special files %s and %s differ\n", + file1, file2); + break; + case S_IFIFO: + if (stb1.st_dev == stb2.st_dev && + stb1.st_ino == stb2.st_ino) + goto same; + printf("Named pipes %s and %s differ\n", + file1, file2); + break; + default: + printf("Don't know how to compare " + "%ss %s and %s\n", + mtof(fmt1), file1, file2); + } + } else + printf("File %s is a %s while file %s is a %s\n", + file1, mtof(fmt1), file2, mtof(fmt2)); + if (lflag) + dp->d_flags |= DIFFER; + status = 1; + goto closem; + } + if (stb1.st_size != stb2.st_size) + goto notsame; + if (stb1.st_dev == stb2.st_dev && stb1.st_ino == stb2.st_ino) + goto same; + for (;;) { + i = read(f1, buf1, BUFSIZ); + j = read(f2, buf2, BUFSIZ); + if (i < 0 || j < 0 || i != j) + goto notsame; + if (i == 0 && j == 0) + goto same; + for (j = 0; j < i; j++) + if (buf1[j] != buf2[j]) + goto notsame; + } +same: + if (sflag == 0) + goto closem; + if (lflag) + dp->d_flags = SAME; + else + printf("Files %s and %s are identical\n", file1, file2); + goto closem; +notsame: + if (!aflag && (!ascii(f1) || !ascii(f2))) { + if (lflag) + dp->d_flags |= DIFFER; + else if (opt == D_NORMAL || opt == D_CONTEXT || + opt == D_UNIFIED) + printf("Binary files %s and %s differ\n", + file1, file2); + status = 1; + goto closem; + } + close(f1); close(f2); + anychange = 1; + if (lflag) + calldiff(title, argv); + else { + if (opt == D_EDIT) { + printf("ed - %s << '-*-END-*-'\n", dp->d_entry); + calldiff(0, argv); + } else { + printf("%s%s %s\n", title, file1, file2); + calldiff(0, argv); + } + if (opt == D_EDIT) + printf("w\nq\n-*-END-*-\n"); + } + return; +closem: + close(f1); close(f2); +} + +static void +stackdiff(char **argv) +{ + int oanychange; + char *ofile1, *ofile2, *oefile1, *oefile2; + struct stat ostb1, ostb2; + struct stackblk *ocurstack; + char *oargv[2]; + int oheader; + char *otitle, *oetitle; + size_t otitlesize; + jmp_buf orecenv; + + (void)&oargv; + recdepth++; + oanychange = anychange; + ofile1 = file1; + ofile2 = file2; + oefile1 = efile1; + oefile2 = efile2; + ostb1 = stb1; + ostb2 = stb2; + ocurstack = curstack; + oargv[0] = argv[0]; + oargv[1] = argv[1]; + oheader = header; + otitle = title; + oetitle = etitle; + otitlesize = titlesize; + memcpy(orecenv, recenv, sizeof orecenv); + + anychange = 0; + file1 = argv[0]; + file2 = argv[1]; + efile1 = NULL; + efile2 = NULL; + curstack = NULL; + header = 0; + title = NULL; + etitle = NULL; + titlesize = 0; + + if (setjmp(recenv) == 0) + diffany(argv); + purgestack(); + + anychange = oanychange; + file1 = ofile1; + file2 = ofile2; + efile1 = oefile1; + efile2 = oefile2; + stb1 = ostb1; + stb2 = ostb2; + curstack = ocurstack; + argv[0] = oargv[0]; + argv[1] = oargv[1]; + header = oheader; + title = otitle; + etitle = oetitle; + titlesize = otitlesize; + memcpy(recenv, orecenv, sizeof recenv); + recdepth--; +} + +static const char *prargs[] = { "pr", "-h", 0, "-f", 0, 0 }; + +static void +calldiff(const char *wantpr, char **argv) +{ + int pid, cstatus, cstatus2, pv[2]; + + if (wantpr == NULL && hflag == 0) { + stackdiff(argv); + return; + } + prargs[2] = wantpr; + fflush(stdout); + if (wantpr) { + mktitle(); + pipe(pv); + pid = fork(); + if (pid == -1) { + fprintf(stderr, "No more processes\n"); + done(); + } + if (pid == 0) { + close(0); + dup(pv[0]); + close(pv[0]); + close(pv[1]); + execvp(pr, (char **)prargs); + perror(pr); + done(); + } + } + pid = fork(); + if (pid == -1) { + fprintf(stderr, "%s: No more processes\n", progname); + done(); + } + if (pid == 0) { + if (wantpr) { + close(1); + dup(pv[1]); + close(pv[0]); + close(pv[1]); + } + execv(procself, diffargv); + execv(argv0, diffargv); + execvp(diff, diffargv); + perror(diff); + done(); + } + if (wantpr) { + close(pv[0]); + close(pv[1]); + } + while (wait(&cstatus) != pid) + continue; + if (cstatus != 0) { + if (WIFEXITED(cstatus) && WEXITSTATUS(cstatus) == 1) + status = 1; + else + status = 2; + } + while (wait(&cstatus2) != -1) + continue; +/* + if ((status >> 8) >= 2) + done(); +*/ +} + +int +ascii(int f) +{ + char buf[BUFSIZ]; + register int cnt; + register char *cp; + + lseek(f, 0, 0); + cnt = read(f, buf, BUFSIZ); + cp = buf; + while (--cnt >= 0) + if (*cp++ == '\0') + return (0); + return (1); +} + +/* + * THIS IS CRUDE. + */ +static int +useless(register const char *cp) +{ + + if (cp[0] == '.') { + if (cp[1] == '\0') + return (1); /* directory "." */ + if (cp[1] == '.' && cp[2] == '\0') + return (1); /* directory ".." */ + } + if (start && strcmp(start, cp) > 0) + return (1); + return (0); +} + +static const char * +mtof(mode_t mode) +{ + switch (mode) { + case S_IFDIR: + return "directory"; + case S_IFCHR: + return "character special file"; + case S_IFBLK: + return "block special file"; + case S_IFREG: + return "plain file"; + case S_IFIFO: + return "named pipe"; +#ifdef S_IFSOCK + case S_IFSOCK: + return "socket"; +#endif /* S_IFSOCK */ + default: + return "unknown type"; + } +} + +static void +putN(const char *dir, const char *odir, const char *file, int which) +{ + struct stat st; + char *path; + char *opath; + + path = mkpath(dir, file); + opath = mkpath(odir, file); + if (stat(path, &st) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, path, + strerror(errno)); + status = 2; + goto out; + } + switch (st.st_mode & S_IFMT) { + case S_IFREG: + putNreg(path, opath, st.st_mtime, which); + break; + case S_IFDIR: + putNdir(path, opath, which); + break; + default: + printf("Only in %s: %s\n", dir, file); + } +out: tfree(path); + tfree(opath); +} + +static void +putNreg(const char *fn, const char *on, time_t mtime, int which) +{ + long long lines; + FILE *fp; + FILE *op; + void (*opipe)(int) = SIG_DFL; + pid_t pid = 0; + + if ((fp = fopen(fn, "r")) == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, strerror(errno)); + status = 2; + return; + } + if ((lines = linec(fn, fp)) == 0 || fseek(fp, 0, SEEK_SET) != 0) + goto out; + if (lflag) { + int pv[2]; + opipe = sigset(SIGPIPE, SIG_IGN); + fflush(stdout); + prargs[2] = title; + pipe(pv); + switch (pid = fork()) { + case -1: + fprintf(stderr, "No more processes\n"); + done(); + /*NOTREACHED*/ + case 0: + close(0); + dup(pv[0]); + close(pv[0]); + close(pv[1]); + execvp(pr, (char **)prargs); + perror(pr); + done(); + } + close(pv[0]); + op = fdopen(pv[1], "w"); + } else + op = stdout; + fprintf(op, "%.*s %s %s\n", (int)(etitle - title - 1), title, + which == 1 ? fn : on, + which == 1 ? on : fn); + switch (opt) { + case D_NORMAL: + putNnorm(op, fn, on, fp, lines, which); + break; + case D_EDIT: + putNedit(op, fn, on, fp, lines, which, 0); + break; + case D_REVERSE: + putNedit(op, fn, on, fp, lines, which, 1); + break; + case D_CONTEXT: + putNcntx(op, fn, on, mtime, fp, lines, which); + break; + case D_NREVERSE: + putNedit(op, fn, on, fp, lines, which, 2); + break; + case D_UNIFIED: + putNunif(op, fn, on, mtime, fp, lines, which); + break; + } + if (lflag) { + fclose(op); + while (wait(NULL) != pid); + sigset(SIGPIPE, opipe); + } +out: fclose(fp); +} + +static void +putNnorm(FILE *op, const char *fn, const char *on, + FILE *fp, long long lines, int which) +{ + int pfx; + + if (which == 1) { + fprintf(op, "1,%lldd0\n", lines); + pfx = '<'; + } else { + fprintf(op, "0a1,%lld\n", lines); + pfx = '>'; + } + putNdata(op, fp, pfx, ' '); +} + +static void +putNedit(FILE *op, const char *fn, const char *on, + FILE *fp, long long lines, int which, int reverse) +{ + switch (reverse) { + case 0: + if (which == 1) + fprintf(op, "1,%lldd\n", lines); + else { + fprintf(op, "0a\n"); + putNdata(op, fp, 0, 0); + fprintf(op, ".\n"); + } + break; + case 1: + if (which == 1) + fprintf(op, "d1 %lld\n", lines); + else { + fprintf(op, "a0\n"); + putNdata(op, fp, 0, 0); + fprintf(op, ".\n"); + } + break; + case 2: + if (which == 1) + fprintf(op, "d1 %lld\n", lines); + else { + fprintf(op, "a0 %lld\n", lines); + putNdata(op, fp, 0, 0); + } + break; + } +} + +static void +putNcntx(FILE *op, const char *fn, const char *on, time_t mtime, + FILE *fp, long long lines, int which) +{ + putNhead(op, fn, on, mtime, which, "***", "---"); + fprintf(op, "***************\n*** "); + if (which == 1) + fprintf(op, "1,%lld", lines); + else + putc('0', op); + fprintf(op, " ****\n"); + if (which != 1) + fprintf(op, "--- 1,%lld ----\n", lines); + putNdata(op, fp, which == 1 ? '-' : '+', ' '); + if (which == 1) + fprintf(op, "--- 0 ----\n"); +} + +static void +putNunif(FILE *op, const char *fn, const char *on, time_t mtime, + FILE *fp, long long lines, int which) +{ + putNhead(op, fn, on, mtime, which, "---", "+++"); + fprintf(op, "@@ "); + fprintf(op, which == 1 ? "-1,%lld +0,0" : "-0,0 +1,%lld", lines); + fprintf(op, " @@\n"); + putNdata(op, fp, which == 1 ? '-' : '+', 0); +} + +static void +putNhead(FILE *op, const char *fn, const char *on, time_t mtime, int which, + const char *p1, const char *p2) +{ + time_t t1, t2; + const char *f1, *f2; + + t1 = which == 1 ? mtime : 0; + t2 = which == 1 ? 0 : mtime; + f1 = which == 1 ? fn : on; + f2 = which == 1 ? on : fn; + fprintf(op, "%s %s\t%s", p1, f1, ctime(&t1)); + fprintf(op, "%s %s\t%s", p2, f2, ctime(&t2)); +} + +static void +putNdata(FILE *op, FILE *fp, int pfx, int sec) +{ + int c, lastc = '\n', col = 0; + + while ((c = getc(fp)) != EOF) { + if (lastc == '\n') { + col = 0; + if (pfx) + putc(pfx, op); + if (sec) + putc(sec, op); + } + if (c == '\t' && tflag) { + do + putc(' ', op); + while (++col & 7); + } else { + putc(c, op); + col++; + } + lastc = c; + } + if (lastc != '\n') { + if (aflag) + fprintf(op, "\n\\ No newline at end of file\n"); + else + putc('\n', op); + } +} + +static void +putNdir(const char *fn, const char *on, int which) +{ + DIR *Dp; + struct dirent *dp; + + if ((Dp = opendir(fn)) == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, strerror(errno)); + status = 2; + return; + } + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + dp->d_name[1] == '.' && + dp->d_name[2] == '\0')) + continue; + if (xflag && xclude(dp->d_name)) + continue; + putN(fn, on, dp->d_name, which); + } + closedir(Dp); +} + +static long long +linec(const char *fn, FILE *fp) +{ + int c, lastc = '\n'; + long long cnt = 0; + + while ((c = getc(fp)) != EOF) { + if (c == '\n') + cnt++; + lastc = c; + } + if (lastc != '\n') { + if (!aflag) + fprintf(stderr, + "Warning: missing newline at end of file %s\n", + fn); + cnt++; + } + return cnt; +} + +static char * +mkpath(const char *dir, const char *file) +{ + char *path, *pp; + const char *cp; + + pp = path = talloc(strlen(dir) + strlen(file) + 2); + for (cp = dir; *cp; cp++) + *pp++ = *cp; + if (pp > path && pp[-1] != '/') + *pp++ = '/'; + for (cp = file; *cp; cp++) + *pp++ = *cp; + *pp = '\0'; + return path; +} + +static void +mktitle(void) +{ + int n; + + n = strlen(file1) + strlen(file2) + 2; + if (etitle - title + n < titlesize) { + titlesize = n; + n = etitle - title; + title = ralloc(title, titlesize); + etitle = &title[n]; + } + sprintf(etitle, "%s %s", file1, file2); +} + +static int +xclude(const char *fn) +{ + extern int gmatch(const char *, const char *); + struct xclusion *xp; + + for (xp = xflag; xp; xp = xp->x_nxt) + if (gmatch(fn, xp->x_pat)) + return 1; + return 0; +} diff --git a/diff/diffh.c b/diff/diffh.c @@ -0,0 +1,410 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)diffh.sl 1.11 (gritter) 5/29/05"; + +/* from 4.3BSD diffh.c 4.4 11/27/85> */ + +#include <stdio.h> +#include <ctype.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <locale.h> +#include <wchar.h> +#include <wctype.h> + +#include <iblok.h> +#include <mbtowi.h> + +#define C 3 +#define RANGE 30 +#define INF 16384 + +#define next(wc, s, n) (*(s) & 0200 ? ((n) = mbtowi(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)) + +static char *text[2][RANGE]; +static size_t size[2][RANGE]; +static long long lineno[2] = {1, 1}; /*no. of 1st stored line in each file*/ +static int ntext[2]; /*number of stored lines in each*/ +static long long n0,n1; /*scan pointer in each*/ +static int bflag; +static int mb_cur_max; +static int debug = 0; +static struct iblok *file[2]; +static int eof[2]; + +static char *getl(int, long long); +static void clrl(int, long long); +static void movstr(int, int, int); +static int easysynch(void); +static int output(int, int); +static void change(long long, int, long long, int, const char *); +static void range(long long, int); +static int cmp(const char *, const char *); +static struct iblok *dopen(const char *, const char *); +static void progerr(const char *); +static void error(const char *, const char *); +static int hardsynch(void); +static void *lrealloc(void *, size_t); + + /* return pointer to line n of file f*/ +static char * +getl(int f, long long n) +{ + register int delta, nt; + size_t len; + + delta = n - lineno[f]; + nt = ntext[f]; + if(delta<0) + progerr("1"); + if(delta<nt) + return(text[f][delta]); + if(delta>nt) + progerr("2"); + if(nt>=RANGE) + progerr("3"); + if(eof[f]) + return(NULL); + len = ib_getlin(file[f], &text[f][nt], &size[f][nt], lrealloc); + if (len != 0) { + ntext[f]++; + return(text[f][nt]); + } else { + eof[f]++; + return NULL; + } +} + + /*remove thru line n of file f from storage*/ +static void +clrl(int f,long long n) +{ + register long long i,j; + j = n-lineno[f]+1; + for(i=0;i+j<ntext[f];i++) + movstr(f, i+j, i); + lineno[f] = n+1; + ntext[f] -= j; +} + +static void +movstr(register int f, register int i, register int j) +{ + free(text[f][j]); + text[f][j] = text[f][i]; + size[f][j] = size[f][i]; + text[f][i] = 0; + size[f][i] = 0; +} + +int +main(int argc,char **argv) +{ + char *s0,*s1; + register int c, status = 0; + + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + while((c=getopt(argc,argv,":b")) != EOF) { + switch (c) { + case 'b': + bflag++; + break; + } + } + if(argc-optind!=2) + error("must have 2 file arguments",""); + file[0] = dopen(argv[optind],argv[optind+1]); + file[1] = dopen(argv[optind+1],argv[optind]); + for(;;) { + s0 = getl(0,++n0); + s1 = getl(1,++n1); + if(s0==NULL||s1==NULL) + break; + if(cmp(s0,s1)!=0) { + if(!easysynch()&&!hardsynch()) + progerr("5"); + status = 1; + } else { + clrl(0,n0); + clrl(1,n1); + } + } + if(s0==NULL&&s1==NULL) + exit(status); + if(s0==NULL) + output(-1,INF); + if(s1==NULL) + output(INF,-1); + return (1); +} + + /* synch on C successive matches*/ +static int +easysynch(void) +{ + int i,j; + register int k,m; + char *s0,*s1; + for(i=j=1;i<RANGE&&j<RANGE;i++,j++) { + s0 = getl(0,n0+i); + if(s0==NULL) + return(output(INF,INF)); + for(k=C-1;k<j;k++) { + for(m=0;m<C;m++) + if(cmp(getl(0,n0+i-m), + getl(1,n1+k-m))!=0) + goto cont1; + return(output(i-C,k-C)); +cont1: ; + } + s1 = getl(1,n1+j); + if(s1==NULL) + return(output(INF,INF)); + for(k=C-1;k<=i;k++) { + for(m=0;m<C;m++) + if(cmp(getl(0,n0+k-m), + getl(1,n1+j-m))!=0) + goto cont2; + return(output(k-C,j-C)); +cont2: ; + } + } + return(0); +} + +static int +output(int a,int b) +{ + register int i; + char *s; + if(a<0) + change(n0-1,0,n1,b,"a"); + else if(b<0) + change(n0,a,n1-1,0,"d"); + else + change(n0,a,n1,b,"c"); + for(i=0;i<=a;i++) { + s = getl(0,n0+i); + if(s==NULL) + break; + printf("< %s",s); + clrl(0,n0+i); + } + n0 += i-1; + if(a>=0&&b>=0) + printf("---\n"); + for(i=0;i<=b;i++) { + s = getl(1,n1+i); + if(s==NULL) + break; + printf("> %s",s); + clrl(1,n1+i); + } + n1 += i-1; + return(1); +} + +static void +change(long long a,int b,long long c,int d,const char *s) +{ + range(a,b); + printf("%s",s); + range(c,d); + printf("\n"); +} + +static void +range(long long a,int b) +{ + if(b==INF) + printf("%lld,$",a); + else if(b==0) + printf("%lld",a); + else + printf("%lld,%lld",a,a+b); +} + +static int +cmp(const char *s,const char *t) +{ + if(debug) + printf("%s:%s\n",s,t); + for(;;){ + if(bflag) { + if (mb_cur_max > 1) { + wint_t wc, wd; + int n, m; + + if (next(wc, s, n), next(wd, t, m), + iswspace(wc) && iswspace(wd)) { + while (s += n, next(wc, s, n), + iswspace(wc)); + while (t += m, next(wd, t, m), + iswspace(wd)); + } + } else { + if (isspace(*s)&&isspace(*t)) { + while(isspace(*++s)) ; + while(isspace(*++t)) ; + } + } + } + if(*s!=*t||*s==0) + break; + s++; + t++; + } + return((*s&0377)-(*t&0377)); +} + +static struct iblok * +dopen(const char *f1,const char *f2) +{ + struct iblok *ip; + char *b=0,*bptr; + const char *eptr; + struct stat statbuf; + if(cmp(f1,"-")==0) + if(cmp(f2,"-")==0) + error("can't do - -",""); + else + return(ib_alloc(0, 0)); + if(stat(f1,&statbuf)==-1) + error("can't access ",f1); + if((statbuf.st_mode&S_IFMT)==S_IFDIR) { + b = lrealloc(0, strlen(f1) + strlen(f2) + 2); + for(bptr=b;*bptr= *f1++;bptr++) ; + *bptr++ = '/'; + for(eptr=f2;*eptr;eptr++) + if(*eptr=='/'&&eptr[1]!=0&&eptr[1]!='/') + f2 = eptr+1; + while(*bptr++= *f2++) ; + f1 = b; + } + ip = ib_open(f1,0); + if(ip==NULL) + error("can't open",f1); + if (b) + free(b); + return(ip); +} + +static void +progerr(const char *s) +{ + error("program error ",s); +} + +static void +error(const char *s,const char *t) +{ + fprintf(stderr,"diffh: %s%s\n",s,t); + exit(2); +} + + /*stub for resychronization beyond limits of text buf*/ +static int +hardsynch(void) +{ + change(n0,INF,n1,INF,"c"); + printf("---change record omitted\n"); + error("can't resynchronize",""); + return(0); +} + +static void * +lrealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == NULL) { + write(2, "diffh: line too long\n", 21); + _exit(1); + } + return np; +} diff --git a/diff/diffreg.c b/diff/diffreg.c @@ -0,0 +1,1629 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)diffreg.c 1.30 (gritter) 3/15/07> */ +/* from 4.3BSD diffreg.c 4.16 3/29/86 */ + +#include "diff.h" +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <signal.h> +#include "sigset.h" +#include <wchar.h> +#include <wctype.h> +#include <inttypes.h> +#include "mbtowi.h" +/* + * diff - compare two files. + */ + +/* + * Uses an algorithm due to Harold Stone, which finds + * a pair of longest identical subsequences in the two + * files. + * + * The major goal is to generate the match vector J. + * J[i] is the index of the line in file1 corresponding + * to line i file0. J[i] = 0 if there is no + * such line in file1. + * + * Lines are hashed so as to work in core. All potential + * matches are located by sorting the lines of each file + * on the hash (called ``value''). In particular, this + * collects the equivalence classes in file1 together. + * Subroutine equiv replaces the value of each line in + * file0 by the index of the first element of its + * matching equivalence in (the reordered) file1. + * To save space equiv squeezes file1 into a single + * array member in which the equivalence classes + * are simply concatenated, except that their first + * members are flagged by changing sign. + * + * Next the indices that point into member are unsorted into + * array class according to the original order of file0. + * + * The cleverness lies in routine stone. This marches + * through the lines of file0, developing a vector klist + * of "k-candidates". At step i a k-candidate is a matched + * pair of lines x,y (x in file0 y in file1) such that + * there is a common subsequence of length k + * between the first i lines of file0 and the first y + * lines of file1, but there is no such subsequence for + * any smaller y. x is the earliest possible mate to y + * that occurs in such a subsequence. + * + * Whenever any of the members of the equivalence class of + * lines in file1 matable to a line in file0 has serial number + * less than the y of some k-candidate, that k-candidate + * with the smallest such y is replaced. The new + * k-candidate is chained (via pred) to the current + * k-1 candidate so that the actual subsequence can + * be recovered. When a member has serial number greater + * that the y of all k-candidates, the klist is extended. + * At the end, the longest subsequence is pulled out + * and placed in the array J by unravel + * + * With J in hand, the matches there recorded are + * check'ed against reality to assure that no spurious + * matches have crept in due to hashing. If they have, + * they are broken, and "jackpot" is recorded--a harmless + * matter except that a true match for a spuriously + * mated line may now be unnecessarily reported as a change. + * + * Much of the complexity of the program comes simply + * from trying to minimize core utilization and + * maximize the range of doable problems by dynamically + * allocating what is needed and reusing what is not. + * The core requirements for problems larger than somewhat + * are (in words) 2*length(file0) + length(file1) + + * 3*(number of k-candidates installed), typically about + * 6n words for files of length n. + */ + +#define prints(s) fputs(s,stdout) + +static FILE *input[2]; +static char mbuf[2][MB_LEN_MAX+1]; +static char *mcur[2]; +static char *mend[2]; +static int incompl[2]; + +struct cand { + long x; + long y; + long pred; +} cand; +static struct line { + long serial; + long value; +} *file[2], line; +static long len[2]; +static struct line *sfile[2];/*shortened by pruning common prefix and suffix*/ +static long slen[2]; +static long pref, suff; /* length of prefix and suffix */ +static long *class; /* will be overlaid on file[0] */ +static long *member; /* will be overlaid on file[1] */ +static long *klist; /* will be overlaid on file[0] after class */ +static struct cand *clist; /* merely a free storage pot for candidates */ +static long clen = 0; +static long *J; /* will be overlaid on class */ +static off_t *ixold; /* will be overlaid on klist */ +static off_t *ixnew; /* will be overlaid on file[1] */ +static int (*chrtran)(int);/* translation for case-folding */ +static long pstart; /* start of last search for -p */ +static long plast; /* match of last search for -p */ +static long *saveJ; /* saved J for -p */ + +/* chrtran points to one of 3 translation functions: + * cup2low if folding upper to lower case + * clow2low if not folding case + * wlow2low if not folding case and MB_CUR_MAX > 1 + */ +static int +clow2low(int c) +{ + return c; +} + +static int +cup2low(int c) +{ + return tolower(c); +} + +static int +wup2low(int c) +{ + return c & ~(wchar_t)0177 ? towlower(c) : tolower(c); +} + +static char *copytemp(char **, const char *); +#undef splice +#define splice xxsplice +static char *splice(const char *, char *); +static void prepare(int); +static void prune(void); +static void equiv(struct line *, long, struct line *, long, long *); +static long stone(long *, long, long *, long *); +static long newcand(long, long, long); +static long search(long *, long, long); +static void unravel(long); +static void check_sb(void); +static void check_mb(void); +static void sort(struct line *, long); +static void unsort(struct line *, long, long *); +static long skipline(int); +static long wskipline(int); +static void output(void); +static void change(long, long, long, long); +static void range(long, long, const char *); +static void fetch(off_t *, long, long, FILE *, const char *, int); +static int readhash(int); +static int asciifile(FILE *); +static void dump_context_vec(void); +static void sdone(int); +static char *wcget(int, wint_t *, int *); +static void missnl(int); +static void pdump(long); + +#define notseekable(m) (((m)&S_IFMT) != S_IFREG && ((m)&S_IFMT) != S_IFBLK) + +void +diffreg(void) +{ + register long i, j; + char buf1[BUFSIZ], buf2[BUFSIZ]; + + if (hflag) { + diffargv[0] = "diffh"; + execvp(diffh, diffargv); + if (sysv3) + fprintf(stderr, "%s: cannot find diffh\n", progname); + else + fprintf(stderr, "%s: %s: %s\n", progname, diffh, + strerror(errno)); + done(); + } + chrtran = (iflag? mb_cur_max>1 ? wup2low : cup2low : clow2low); + if ((stb1.st_mode & S_IFMT) == S_IFDIR) { + file1 = splice(file1, file2); + if (stat(file1, &stb1) < 0) { + if (sysv3) + stb1.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } + } else if ((stb2.st_mode & S_IFMT) == S_IFDIR) { + file2 = splice(file2, file1); + if (stat(file2, &stb2) < 0) { + if (sysv3) + stb2.st_mode = S_IFREG; + else { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + } + if (!strcmp(file1, "-") || (notseekable(stb1.st_mode) && + strcmp(file1, "/dev/null"))) { + if (!strcmp(file2, "-")) { + fprintf(stderr, "%s: can't specify - -\n", progname); + done(); + } + file1 = copytemp(&tempfile1, file1); + if (stat(file1, &stb1) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + } else if (!strcmp(file2, "-") || (notseekable(stb2.st_mode) && + strcmp(file2, "/dev/null"))) { + file2 = copytemp(&tempfile2, file2); + if (stat(file2, &stb2) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + done(); + } + } + if ((input[0] = fopen(file1, "r")) == NULL) { + if (sysv3) + fprintf(stderr, "%s: cannot open %s\n", + progname, file1); + else + fprintf(stderr, "%s: %s: %s\n", progname, file1, + strerror(errno)); + done(); + } + mcur[0] = mend[0] = NULL; + if ((input[1] = fopen(file2, "r")) == NULL) { + if (sysv3) + fprintf(stderr, "%s: cannot open %s\n", + progname, file2); + else + fprintf(stderr, "%s: %s: %s\n", progname, file2, + strerror(errno)); + fclose(input[0]); + done(); + } + mcur[1] = mend[1] = NULL; + if (stb1.st_size != stb2.st_size) + goto notsame; + for (;;) { + i = fread(buf1, 1, BUFSIZ, input[0]); + j = fread(buf2, 1, BUFSIZ, input[1]); + if (i < 0 || j < 0 || i != j) + goto notsame; + if (i == 0 && j == 0) { + fclose(input[0]); + fclose(input[1]); + status = 0; /* files don't differ */ + goto same; + } + for (j = 0; j < i; j++) + if (buf1[j] != buf2[j]) + goto notsame; + } +notsame: + /* + * Files certainly differ at this point; set status accordingly + */ + status = 1; + if (!aflag && (!asciifile(input[0]) || !asciifile(input[1]))) { + printf("Binary files %s and %s differ\n", file1, file2); + fclose(input[0]); + fclose(input[1]); + done(); + } + prepare(0); + prepare(1); + fclose(input[0]); + fclose(input[1]); + prune(); + sort(sfile[0],slen[0]); + sort(sfile[1],slen[1]); + + member = (long *)file[1]; + equiv(sfile[0], slen[0], sfile[1], slen[1], member); + member = ralloc(member,(slen[1]+2)*sizeof(*member)); + + class = (long *)file[0]; + unsort(sfile[0], slen[0], class); + class = ralloc(class,(slen[0]+2)*sizeof(*class)); + + klist = talloc((slen[0]+2)*sizeof(*klist)); + clist = talloc(sizeof(cand)); + i = stone(class, slen[0], member, klist); + tfree(member); + tfree(class); + + J = talloc((len[0]+2)*sizeof(*J)); + unravel(klist[i]); + tfree(clist); + tfree(klist); + + ixold = talloc((len[0]+2)*sizeof(*ixold)); + ixnew = talloc((len[1]+2)*sizeof(*ixnew)); + if (mb_cur_max > 1) + check_mb(); + else + check_sb(); + pstart = plast = 0; + output(); + status = anychange; +same: + if (opt == D_CONTEXT && anychange == 0) + printf("No differences encountered\n"); + done(); +} + +static char * +copytemp(char **tf, const char *fn) +{ + const char templ[] = "/tmp/dXXXXXX"; + char buf[BUFSIZ]; + register int i, f, sfd; + + if (*tf) { + unlink(*tf); + strcpy(*tf, templ); + } else { + sigset(SIGHUP,sdone); + sigset(SIGINT,sdone); + sigset(SIGPIPE,sdone); + sigset(SIGTERM,sdone); + *tf = strdup(templ); + } + f = mkstemp(*tf); + if (f < 0) { + fprintf(stderr, "%s: cannot create %s\n", progname, *tf); + done(); + } + if (strcmp(fn, "-")) { + if ((sfd = open(fn, O_RDONLY)) < 0) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, + strerror(errno)); + done(); + } + } else + sfd = 0; + while ((i = read(sfd,buf,BUFSIZ)) > 0) + if (write(f,buf,i) != i) { + fprintf(stderr, "%s: write failed %s\n", progname, *tf); + done(); + } + close(f); + if (sfd > 0) + close(sfd); + return (*tf); +} + +static char * +splice(const char *dir, char *file) +{ + const char *tail; + char *buf; + + if (!strcmp(file, "-")) { + fprintf(stderr, + "%s: can't specify - with other arg directory\n", + progname); + done(); + } + tail = basename(file); + buf = talloc(strlen(dir) + strlen(tail) + 2); + sprintf(buf, "%s/%s", dir, tail); + return (buf); +} + +static void +prepare(int i) +{ + register struct line *p; + register long j; + register long h; + + fseeko(input[i], 0, SEEK_SET); + mcur[i] = mend[i] = NULL; + p = talloc(3*sizeof(line)); + for(j=0; h=readhash(i);) { + p = ralloc(p,(++j+3)*sizeof(line)); + p[j].value = h; + } + len[i] = j; + file[i] = p; +} + +static void +prune(void) +{ + register long i; + register int j; + for(pref=0;pref<len[0]&&pref<len[1]&& + file[0][pref+1].value==file[1][pref+1].value; + pref++ ) ; + for(suff=0;suff<len[0]-pref&&suff<len[1]-pref&& + file[0][len[0]-suff].value==file[1][len[1]-suff].value; + suff++) ; + for(j=0;j<2;j++) { + sfile[j] = file[j]+pref; + slen[j] = len[j]-pref-suff; + for(i=0;i<=slen[j];i++) + sfile[j][i].serial = i; + } +} + +static void +equiv(struct line *a,long n,struct line *b,long m,long *c) +{ + register long i, j; + i = j = 1; + while(i<=n && j<=m) { + if(a[i].value <b[j].value) + a[i++].value = 0; + else if(a[i].value == b[j].value) + a[i++].value = j; + else + j++; + } + while(i <= n) + a[i++].value = 0; + b[m+1].value = 0; + j = 0; + while(++j <= m) { + c[j] = -b[j].serial; + while(b[j+1].value == b[j].value) { + j++; + c[j] = b[j].serial; + } + } + c[j] = -1; +} + +static long +stone(long *a,long n,long *b,register long *c) +{ + register long i, k,y; + long j, l; + long oldc, tc; + long oldl; + k = 0; + c[0] = newcand(0,0,0); + for(i=1; i<=n; i++) { + j = a[i]; + if(j==0) + continue; + y = -b[j]; + oldl = 0; + oldc = c[0]; + do { + if(y <= clist[oldc].y) + continue; + l = search(c, k, y); + if(l!=oldl+1) + oldc = c[l-1]; + if(l<=k) { + if(clist[c[l]].y <= y) + continue; + tc = c[l]; + c[l] = newcand(i,y,oldc); + oldc = tc; + oldl = l; + } else { + c[l] = newcand(i,y,oldc); + k++; + break; + } + } while((y=b[++j]) > 0); + } + return(k); +} + +static long +newcand(long x,long y,long pred) +{ + register struct cand *q; + clist = ralloc(clist,++clen*sizeof(cand)); + q = clist + clen -1; + q->x = x; + q->y = y; + q->pred = pred; + return(clen-1); +} + +static long +search(long *c, long k, long y) +{ + register long i, j, l; + long t; + if(clist[c[k]].y<y) /*quick look for typical case*/ + return(k+1); + i = 0; + j = k+1; + while (1) { + l = i + j; + if ((l >>= 1) <= i) + break; + t = clist[c[l]].y; + if(t > y) + j = l; + else if(t < y) + i = l; + else + return(l); + } + return(l+1); +} + +static void +unravel(long p) +{ + register long i; + register struct cand *q; + for(i=0; i<=len[0]; i++) + J[i] = i<=pref ? i: + i>len[0]-suff ? i+len[1]-len[0]: + 0; + for(q=clist+p;q->y!=0;q=clist+q->pred) + J[q->x+pref] = q->y+pref; +} + +/* check does double duty: +1. ferret out any fortuitous correspondences due +to confounding by hashing (which result in "jackpot") +2. collect random access indexes to the two files */ + +static void +check_sb(void) +{ + register long i, j; + long jackpot; + off_t ctold, ctnew; + register int c,d; + + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for(i=1;i<=len[0];i++) { + if(J[i]==0) { + ixold[i] = ctold += skipline(0); + continue; + } + while(j<J[i]) { + ixnew[j] = ctnew += skipline(1); + j++; + } + if(bflag || wflag || iflag) { + for(;;) { + c = getc(input[0]); + d = getc(input[1]); + ctold++; + ctnew++; + if(bflag && isspace(c) && isspace(d)) { + do { + if(c=='\n') + break; + ctold++; + } while(isspace(c=getc(input[0]))); + do { + if(d=='\n') + break; + ctnew++; + } while(isspace(d=getc(input[1]))); + } else if ( wflag ) { + while( isspace(c) && c!='\n' ) { + c=getc(input[0]); + ctold++; + } + while( isspace(d) && d!='\n' ) { + d=getc(input[1]); + ctnew++; + } + } + if(chrtran(c) != chrtran(d)) { + jackpot++; + J[i] = 0; + if(c!='\n') + ctold += skipline(0); + if(d!='\n') + ctnew += skipline(1); + break; + } + if(c=='\n' || c==EOF) + break; + } + } else { + for(;;) { + ctold++; + ctnew++; + if((c=getc(input[0])) != (d=getc(input[1]))) { + /* jackpot++; */ + J[i] = 0; + if(c!='\n') + ctold += skipline(0); + if(d!='\n') + ctnew += skipline(1); + break; + } + if(c=='\n' || c==EOF) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for(;j<=len[1];j++) { + ixnew[j] = ctnew += skipline(1); + } + fclose(input[0]); + fclose(input[1]); +/* + if(jackpot) + fprintf(stderr, "jackpot\n"); +*/ +} + +static void +check_mb(void) +{ + register long i, j; + long jackpot; + off_t ctold, ctnew; + wint_t wc, wd; + int nc, nd; + char *cc, *cd; + + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + mcur[0] = mend[0] = NULL; + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + mcur[1] = mend[1] = NULL; + j = 1; + ixold[0] = ixnew[0] = 0; + jackpot = 0; + ctold = ctnew = 0; + for(i=1;i<=len[0];i++) { + if(J[i]==0) { + ixold[i] = ctold += wskipline(0); + continue; + } + while(j<J[i]) { + ixnew[j] = ctnew += wskipline(1); + j++; + } + if(bflag || wflag || iflag) { + for(;;) { + cc = wcget(0, &wc, &nc); + cd = wcget(1, &wd, &nd); + if(bflag && iswspace(wc) && iswspace(wd)) { + do { + if(wc=='\n') + break; + ctold += nc; + } while(cc = wcget(0, &wc, &nc), + iswspace(wc)); + do { + if(wd=='\n') + break; + ctnew += nd; + } while(cd = wcget(1, &wd, &nd), + iswspace(wd)); + ctold += nc; + ctnew += nd; + } else if ( wflag ) { + ctold += nc; + ctnew += nd; + while( iswspace(wc) && wc!='\n' && cc) { + cc = wcget(0, &wc, &nc); + ctold += nc; + } + while( iswspace(wd) && wd!='\n' && cd) { + cd = wcget(1, &wd, &nd); + ctnew += nd; + } + } else { + ctold += nc; + ctnew += nd; + } + if(chrtran(wc) != chrtran(wd) || + wc == WEOF && wd == WEOF && + (cc == NULL && cd && *cd || + cc && *cc && cd == NULL || + cc && cd && *cc != *cd)) { + jackpot++; + J[i] = 0; + if(wc!='\n') + ctold += wskipline(0); + if(wd!='\n') + ctnew += wskipline(1); + break; + } + if(wc=='\n' || cc == NULL) + break; + } + } else { + for(;;) { + cc = wcget(0, &wc, &nc); + cd = wcget(1, &wd, &nd); + ctold += nc; + ctnew += nd; + if (wc != wd || wc == WEOF && wd == WEOF && + (cc == NULL && cd && *cd || + cc && *cc && cd == NULL || + cc && cd && *cc != *cd)) { + /* jackpot++; */ + J[i] = 0; + if(wc!='\n') + ctold += wskipline(0); + if(wd!='\n') + ctnew += wskipline(1); + break; + } + if(wc=='\n' || cc == NULL) + break; + } + } + ixold[i] = ctold; + ixnew[j] = ctnew; + j++; + } + for(;j<=len[1];j++) { + ixnew[j] = ctnew += wskipline(1); + } + fclose(input[0]); + fclose(input[1]); +/* + if(jackpot) + fprintf(stderr, "jackpot\n"); +*/ +} + +static void +sort(struct line *a,long n) /*shellsort CACM #201*/ +{ + struct line w; + register long j,m = 0; + struct line *ai; + register struct line *aim; + long k; + + if (n == 0) + return; + for(j=1;j<=n;j*= 2) + m = 2*j - 1; + for(m/=2;m!=0;m/=2) { + k = n-m; + for(j=1;j<=k;j++) { + for(ai = &a[j]; ai > a; ai -= m) { + aim = &ai[m]; + if(aim < ai) + break; /*wraparound*/ + if(aim->value > ai[0].value || + aim->value == ai[0].value && + aim->serial > ai[0].serial) + break; + w.value = ai[0].value; + ai[0].value = aim->value; + aim->value = w.value; + w.serial = ai[0].serial; + ai[0].serial = aim->serial; + aim->serial = w.serial; + } + } + } +} + +static void +unsort(struct line *f, long l, long *b) +{ + register long *a; + register long i; + a = talloc((l+1)*sizeof(*a)); + for(i=1;i<=l;i++) + a[f[i].serial] = f[i].value; + for(i=1;i<=l;i++) + b[i] = a[i]; + tfree(a); +} + +static long +skipline(int f) +{ + register long i; + register int c; + + for(i=1;(c=getc(input[f]))!='\n';i++) + if (c == EOF) + return(i); + return(i); +} + +static long +wskipline(int f) +{ + long i; + int n; + wint_t wc; + char *cp; + + for (i = 1; cp = wcget(f, &wc, &n), wc != '\n'; i += n) + if (cp == NULL) + return (i); + return (i); +} + +static void +output(void) +{ + long m; + register long i0, i1, j1; + long j0; + if ((input[0] = fopen(file1,"r")) == NULL) { + perror(file1); + done(); + } + if ((input[1] = fopen(file2,"r")) == NULL) { + perror(file2); + fclose(input[0]); + done(); + } + m = len[0]; + J[0] = 0; + J[m+1] = len[1]+1; + if (pflag) { + saveJ = talloc((len[0]+2)*sizeof(*saveJ)); + memcpy(saveJ, J, (len[0]+2)*sizeof(*saveJ)); + } + if(opt!=D_EDIT) for(i0=1;i0<=m;i0=i1+1) { + while(i0<=m&&J[i0]==J[i0-1]+1) i0++; + j0 = J[i0-1]+1; + i1 = i0-1; + while(i1<m&&J[i1+1]==0) i1++; + j1 = J[i1+1]-1; + J[i1] = j1; + change(i0,i1,j0,j1); + } else for(i0=m;i0>=1;i0=i1-1) { + while(i0>=1&&J[i0]==J[i0+1]-1&&J[i0]!=0) i0--; + j0 = J[i0+1]-1; + i1 = i0+1; + while(i1>1&&J[i1-1]==0) i1--; + j1 = J[i1-1]+1; + J[i1] = j1; + change(i1,i0,j1,j0); + } + if(m==0) + change(1,0,1,len[1]); + if (opt==D_IFDEF) { + for (;;) { +#define c i0 + c = getc(input[0]); + if (c == EOF) + goto end; + putchar(c); + } +#undef c + } + if (anychange && (opt == D_CONTEXT || opt == D_UNIFIED)) + dump_context_vec(); +end: fclose(input[0]); + fclose(input[1]); +} + +static int +allblank(off_t *f, long a, long b, FILE *lb) +{ + long i; + + if (a > b) + return 1; + for (i = a; i <= b; i++) + if (f[i]-f[i-1] != 1) + return 0; + return 1; +} + +/* + * The following struct is used to record change information when + * doing a "context" diff. (see routine "change" to understand the + * highly mneumonic field names) + */ +struct context_vec { + long a; /* start line in old file */ + long b; /* end line in old file */ + long c; /* start line in new file */ + long d; /* end line in new file */ +}; + +static struct context_vec *context_vec_start, + *context_vec_end, + *context_vec_ptr; + +#define MAX_CONTEXT 129 + +/* indicate that there is a difference between lines a and b of the from file + to get to lines c to d of the to file. + If a is greater then b then there are no lines in the from file involved + and this means that there were lines appended (beginning at b). + If c is greater than d then there are lines missing from the to file. +*/ +static void +change(long a,long b,long c,long d) +{ + int ch; + struct stat stbuf; + + if (opt != D_IFDEF && a>b && c>d) + return; + if (Bflag && allblank(ixold,a,b,input[0]) && + allblank(ixnew,c,d,input[1])) + return; + if (anychange == 0) { + anychange = 1; + if(opt == D_CONTEXT || opt == D_UNIFIED) { + if (opt == D_CONTEXT) { + printf("*** %s\t", file1); + stat(file1, &stbuf); + printf("%s--- %s\t", + ctime(&stbuf.st_mtime), file2); + stat(file2, &stbuf); + printf("%s", ctime(&stbuf.st_mtime)); + } else { /* opt == D_UNIFIED */ + printf("--- %s\t", file1); + stat(file1, &stbuf); + printf("%s+++ %s\t", + ctime(&stbuf.st_mtime), file2); + stat(file2, &stbuf); + printf("%s", ctime(&stbuf.st_mtime)); + } + + context_vec_start = talloc(MAX_CONTEXT * + sizeof(*context_vec_start)); + context_vec_end = context_vec_start + MAX_CONTEXT; + context_vec_ptr = context_vec_start - 1; + } + } + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + if(opt == D_CONTEXT || opt == D_UNIFIED) { + /* + * if this new change is within 'context' lines of + * the previous change, just add it to the change + * record. If the record is full or if this + * change is more than 'context' lines from the previous + * change, dump the record, reset it & add the new change. + */ + if ( context_vec_ptr >= context_vec_end-1 || + ( context_vec_ptr >= context_vec_start && + a > (context_vec_ptr->b + 2*context) && + c > (context_vec_ptr->d + 2*context) ) ) + dump_context_vec(); + + context_vec_ptr++; + context_vec_ptr->a = a; + context_vec_ptr->b = b; + context_vec_ptr->c = c; + context_vec_ptr->d = d; + return; + } + switch (opt) { + + case D_NORMAL: + case D_EDIT: + range(a,b,","); + putchar(a>b?'a':c>d?'d':'c'); + if(opt==D_NORMAL) + range(c,d,","); + putchar('\n'); + break; + case D_REVERSE: + putchar(a>b?'a':c>d?'d':'c'); + range(a,b," "); + putchar('\n'); + break; + case D_NREVERSE: + if (a>b) + printf("a%ld %ld\n",b,d-c+1); + else { + printf("d%ld %ld\n",a,b-a+1); + if (!(c>d)) + /* add changed lines */ + printf("a%ld %ld\n",b, d-c+1); + } + break; + } + if(opt == D_NORMAL || opt == D_IFDEF) { + fetch(ixold,a,b,input[0],"< ", 1); + if(a<=b&&c<=d && opt == D_NORMAL) + prints("---\n"); + } + fetch(ixnew,c,d,input[1],opt==D_NORMAL?"> ":"", 0); + if ((opt ==D_EDIT || opt == D_REVERSE) && c<=d) + prints(".\n"); + if (inifdef) { + fprintf(stdout, "#endif %s\n", endifname); + inifdef = 0; + } +} + +static void +range(long a,long b,const char *separator) +{ + printf("%ld", a>b?b:a); + if(a<b || opt==D_UNIFIED) { + printf("%s%ld", separator, opt==D_UNIFIED ? b-a+1 : b); + } +} + +static void +fetch(off_t *f,long a,long b,FILE *lb,const char *s,int oldfile) +{ + register long i, j; + register int c; + register long col; + register long nc; + int oneflag = (*ifdef1!='\0') != (*ifdef2!='\0'); + + /* + * When doing #ifdef's, copy down to current line + * if this is the first file, so that stuff makes it to output. + */ + if (opt == D_IFDEF && oldfile){ + off_t curpos = ftello(lb); + /* print through if append (a>b), else to (nb: 0 vs 1 orig) */ + nc = f[a>b? b : a-1 ] - curpos; + for (i = 0; i < nc; i++) { + c = getc(lb); + if (c == EOF) + break; + putchar(c); + } + } + if (a > b) + return; + if (opt == D_IFDEF) { + if (inifdef) + fprintf(stdout, "#else %s%s\n", oneflag && oldfile==1 ? "!" : "", ifdef2); + else { + if (oneflag) { + /* There was only one ifdef given */ + endifname = ifdef2; + if (oldfile) + fprintf(stdout, "#ifndef %s\n", endifname); + else + fprintf(stdout, "#ifdef %s\n", endifname); + } + else { + endifname = oldfile ? ifdef1 : ifdef2; + fprintf(stdout, "#ifdef %s\n", endifname); + } + } + inifdef = 1+oldfile; + } + + for(i=a;i<=b;i++) { + fseeko(lb,f[i-1],SEEK_SET); + nc = f[i]-f[i-1]; + if (opt != D_IFDEF) + prints(s); + col = 0; + for(j=0;j<nc;j++) { + c = getc(lb); + if (c == '\t' && tflag) + do + putchar(' '); + while (++col & 7); + else if (c == EOF) { + if (aflag) + printf("\n\\ No newline at " + "end of file\n"); + else + putchar('\n'); + break; + } else { + putchar(c); + col++; + } + } + } + + if (inifdef && !wantelses) { + fprintf(stdout, "#endif %s\n", endifname); + inifdef = 0; + } +} + +#define POW2 /* define only if HALFLONG is 2**n */ +#define HALFLONG 16 +#define low(x) (x&((1L<<HALFLONG)-1)) +#define high(x) (x>>HALFLONG) + +/* + * hashing has the effect of + * arranging line in 7-bit bytes and then + * summing 1-s complement in 16-bit hunks + */ +static int +readhash(register int f) +{ + register int32_t sum; + register unsigned shift; + register int t; + register int space; + int content; + wint_t wt; + int n; + char *cp; + + sum = 1; + space = 0; + content = 0; + if(!bflag && !wflag) { + if(iflag) { + if (mb_cur_max > 1) { + for (shift = 0; cp = wcget(f, &wt, &n), + wt != '\n'; shift += 7) { + if (cp == NULL) { + if (content) { + missnl(f); + break; + } + return (0); + } + content = 1; + sum += (int32_t)chrtran(wt) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } else { + for(shift=0;(t=getc(input[f]))!='\n';shift+=7) { + if(t==EOF) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + sum += (int32_t)chrtran(t) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } + } else { + for(shift=0;(t=getc(input[f]))!='\n';shift+=7) { + if(t==EOF) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + sum += (int32_t)t << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + } + } + } else { + if (mb_cur_max > 1) { + for(shift=0;;) { + if ((cp = wcget(f, &wt, &n)) == NULL) { + if (content) { + missnl(f); + break; + } + return(0); + } + content = 1; + switch (wt) { + default: + if (iswspace(wt)) { + space++; + continue; + } + if(space && !wflag) { + shift += 7; + space = 0; + } + sum += (int32_t)chrtran(wt) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + shift += 7; + continue; + case '\n': + break; + } + break; + } + } else { + for(shift=0;;) { + switch(t=getc(input[f])) { + case EOF: + if (content) { + missnl(f); + break; + } + return(0); + default: + content = 1; + if (isspace(t)) { + space++; + continue; + } + if(space && !wflag) { + shift += 7; + space = 0; + } + sum += (int32_t)chrtran(t) << (shift +#ifdef POW2 + &= HALFLONG - 1); +#else + %= HALFLONG); +#endif + shift += 7; + continue; + case '\n': + break; + } + break; + } + } + } + sum = low(sum) + high(sum); + return((int16_t)low(sum) + (int16_t)high(sum)); +} + +static int +asciifile(FILE *f) +{ + char buf[BUFSIZ]; + register int cnt; + register char *cp; + + fseeko(f, 0, SEEK_SET); + cnt = fread(buf, 1, BUFSIZ, f); + cp = buf; + while (--cnt >= 0) + if (*cp++ == '\0') + return (0); + return (1); +} + + +/* dump accumulated "context" diff changes */ +static void +dump_context_vec(void) +{ + register long a, b = 0, c, d = 0; + register char ch; + register struct context_vec *cvp = context_vec_start; + register long lowa, upb, lowc, upd; + register int do_output; + + if ( cvp > context_vec_ptr ) + return; + + lowa = max(1, cvp->a - context); + upb = min(len[0], context_vec_ptr->b + context); + lowc = max(1, cvp->c - context); + upd = min(len[1], context_vec_ptr->d + context); + + if (opt == D_UNIFIED) { + printf("@@ -"); + range(lowa, upb, ","); + printf(" +"); + range(lowc, upd, ","); + printf(" @@"); + if (pflag) + pdump(lowa-1); + printf("\n"); + + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + switch (ch) { + case 'a': + fetch(ixold,lowa,b,input[0]," ", 0); + fetch(ixnew,c,d,input[1],"+",0); + break; + case 'c': + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0],"-",0); + fetch(ixnew,c,d,input[1],"+",0); + break; + case 'd': + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0],"-",0); + break; + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b+1, upb, input[0], " ", 0); + } + + if (opt == D_CONTEXT) { + printf("***************"); + if (pflag) + pdump(lowa-1); + printf("\n*** "); + range(lowa,upb,","); + printf(" ****\n"); + + /* + * output changes to the "old" file. The first loop suppresses + * output if there were no changes to the "old" file (we'll see + * the "old" lines as context in the "new" list). + */ + do_output = 0; + for ( ; cvp <= context_vec_ptr; cvp++) + if (cvp->a <= cvp->b) { + cvp = context_vec_start; + do_output++; + break; + } + + if ( do_output ) { + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'a') + fetch(ixold,lowa,b,input[0]," ", 0); + else { + fetch(ixold,lowa,a-1,input[0]," ", 0); + fetch(ixold,a,b,input[0], + ch == 'c' ? "! " : "- ", + 0); + } + lowa = b + 1; + cvp++; + } + fetch(ixold, b+1, upb, input[0], " ", 0); + } + + /* output changes to the "new" file */ + printf("--- "); + range(lowc,upd,","); + printf(" ----\n"); + + do_output = 0; + for (cvp = context_vec_start; cvp <= context_vec_ptr; cvp++) + if (cvp->c <= cvp->d) { + cvp = context_vec_start; + do_output++; + break; + } + + if (do_output) { + while (cvp <= context_vec_ptr) { + a = cvp->a; b = cvp->b; c = cvp->c; d = cvp->d; + + if (a <= b && c <= d) + ch = 'c'; + else + ch = (a <= b) ? 'd' : 'a'; + + if (ch == 'd') + fetch(ixnew,lowc,d,input[1]," ", 0); + else { + fetch(ixnew,lowc,c-1,input[1]," ", 0); + fetch(ixnew,c,d,input[1], + ch == 'c' ? "! " : "+ ", + 0); + } + lowc = d + 1; + cvp++; + } + fetch(ixnew, d+1, upd, input[1], " ", 0); + } + } + + context_vec_ptr = context_vec_start - 1; +} + +/*ARGSUSED*/ +static void +sdone(int signo) +{ + done(); +} + +static char * +wcget(int f, wint_t *wc, int *len) +{ + size_t rest; + int c, i, n; + + i = 0; + rest = mend[f] - mcur[f]; + if (rest && mcur[f] > mbuf[f]) { + do + mbuf[f][i] = mcur[f][i]; + while (i++, --rest); + } else if (incompl[f]) { + incompl[f] = 0; + *wc = WEOF; + mend[f] = mcur[f] = NULL; + return NULL; + } + if (i == 0) { + c = getc(input[f]); + if (c == EOF) { + *wc = WEOF; + mend[f] = mcur[f] = NULL; + return NULL; + } + mbuf[f][i++] = c; + } + if (mbuf[f][0] & 0200) { + while (mbuf[f][i-1] != '\n' && i < mb_cur_max && + incompl[f] == 0) { + c = getc(input[f]); + if (c != EOF) + mbuf[f][i++] = c; + else + incompl[f] = 1; + } + n = mbtowi(wc, mbuf[f], i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = mbuf[f][0]; + *len = n = 1; + } + mcur[f] = &mbuf[f][*len]; + mend[f] = &mcur[f][i - *len]; + return mbuf[f]; +} + +static void +missnl(int f) +{ + if (aflag == 0) + fprintf(stderr, "Warning: missing newline at end of file %s\n", + f == 0 ? file1 : file2); +} + +/* + * Find and dump the name of the C function with the -p option. The + * search begins at line sa. + */ +static void +pdump(long sa) +{ +#define psize 40 + static char lbuf[psize*MB_LEN_MAX+1]; + char mbuf[MB_LEN_MAX+1]; + int c, i, j; + wchar_t wc; + long a = sa; + + while (a-- > pstart) { + if (saveJ[a+1] == 0) + continue; + fseeko(input[0], ixold[a], SEEK_SET); + i = 0; + do { + if ((c=getc(input[0])) == EOF || c == '\n') + break; + mbuf[i] = c; + } while (++i<mb_cur_max); + if (mb_cur_max>1) { + mbuf[i] = 0; + if (((c=mbuf[0])&0200)==0) + wc = mbuf[0]; + else if (mbtowc(&wc, mbuf, i) < 0) + continue; + } + if ((mb_cur_max>1 && mbuf[0]&0200 ? iswalpha(wc):isalpha(c)) || + c == '$' || c == '_') { + plast = a+1; + for (j = 0; j < i; j++) + lbuf[j] = mbuf[j]; + while (i < sizeof lbuf - 1) { + if ((c=getc(input[0])) == EOF || c == '\n') + break; + lbuf[i++] = c; + } + for (j=0;j<i&&j<psize;) { + if (mb_cur_max==1 || (lbuf[j]&0200) == 0) + j++; + else { + c = mbtowc(NULL, &lbuf[j], i-j); + j += c>0 ? c:1; + } + } + lbuf[j] = 0; + break; + } + } + pstart = sa; + if (plast) { + putchar(' '); + for (i = 0; lbuf[i]; i++) + putchar(lbuf[i] & 0377); + } +} diff --git a/diff/diffver.c b/diff/diffver.c @@ -0,0 +1,15 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)diff.sl 1.51 (gritter) 3/15/07"; +/* SLIST */ +/* +diff.c: Sccsid @(#)diff.c 1.24 (gritter) 3/27/05> +diff.h: Sccsid @(#)diff.h 1.15 (gritter) 3/26/05> +diffdir.c: Sccsid @(#)diffdir.c 1.30 (gritter) 1/22/06> +diffreg.c: Sccsid @(#)diffreg.c 1.30 (gritter) 3/15/07> +*/ diff --git a/diff/mkfile b/diff/mkfile @@ -0,0 +1,14 @@ +BIN = diff diffh +diff_OBJ = diff.o diffdir.o diffreg.o diffver.o +diffh_OBJ = diffh.o +INSTALL_BIN = diff +INSTALL_LIB = diffh +INSTALL_MAN1 = diff.1 +LOCAL_CFLAGS = -DDIFFH=\"$LIBDIR/diffh\" +DEPS = libcommon + +<$mkbuild/mk.default + +diff.o: diff.h +diffdir.o: diff.h +diffreg.o: diff.h diff --git a/ed/depsinc.mk b/ed/depsinc.mk @@ -0,0 +1 @@ +ED = $ed_DEPDIR/ed diff --git a/ed/ed.1 b/ed/ed.1 @@ -0,0 +1,1033 @@ +'\" t +.\" Sccsid @(#)ed.1 1.48 (gritter) 6/22/05 +.\" Parts taken from ed(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH ED 1 "6/22/05" "Heirloom Toolchest" "User Commands" +.if t .ds q \(aa +.if n .ds q ' +.SH NAME +ed \- text editor +.SH SYNOPSIS +\fBed\fR [\fB\-\fR\ |\ \fB\-s\fR] [\fB\-p\fI\ prompt\fR] [\fIname\fR] +.SH DESCRIPTION +.I Ed +is the standard text editor. +.PP +If a +.I name +argument is given, +.I ed +simulates an +.I e +command (see below)\| on the named file; that is to say, +the file is read into +.IR ed 's +buffer so that it can be edited. +The optional +.B \- +or +.B \-s +suppresses the printing +of character counts by +.IR e , +.IR r , +and +.I w +commands, +and of the `!' after completion of a shell command. +.PP +With the +.B \-p +option, +the given +.I prompt +string is printed before each command is read. +.PP +.I Ed +operates on a copy of any file it is editing; changes made +in the copy have no effect on the file until a +.IR w "" +(write)\| +command is given. +The copy of the text being edited resides +in a temporary file called the +.IR buffer . +.PP +The editor supports format specifications as defined in +.IR fspec (5). +If the terminal is configured to expand tabulators +(as enabled with +.I stty tab3 +or +.IR "stty \-tabs"), +and the first line of the file being edited +contains a format specification, +the +.I t +and +.I s +are interpreted, +that is, tabulators are expanded and lines are truncated +when printing to the terminal. For example, +.RS +<:t\-f s72:> +.sp +.RE +selects FORTRAN format and truncates lines at 72 characters. +No expansion or truncation is performed by +.I ed +when input is typed to the terminal. +.PP +Commands to +.I ed +have a simple and regular structure: zero or +more +.I addresses +followed by a single character +.I command, +possibly +followed by parameters to the command. +These addresses specify one or more lines in the buffer. +Missing addresses are supplied by default. +.PP +In general, only one command may appear on a line. +Certain commands allow the +addition of text to the buffer. +While +.I ed +is accepting text, it is said +to be in +.I "input mode." +In this mode, no commands are recognized; +all input is merely collected. +Input mode is left by typing a period `\fB.\fR' alone at the +beginning of a line. +.PP +.I Ed +supports a limited form of +.I "regular expression" +notation. +A regular expression specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. +In the following specification for regular expressions +the word `character' means any character but newline. +.B /usr/5bin/ed +uses simple regular expressions, +whereas +.BR /usr/5bin/s42/ed , +.BR /usr/5bin/posix/ed , +and +.B /usr/5bin/posix2001/ed +use basic regular expressions. +.SS "Simple Regular Expressions" +.IP 1. +Any character except a special character +matches itself. +Special characters are +the regular expression delimiter plus +.RB \e\|[\| . +and sometimes ^\|*\|$. +.IP 2. +A +.B .\& +matches any character. +.IP 3. +A \fB\e\fR followed by any character except a digit +or (\|) {\|} <\|> matches that character. +.IP 4. +A nonempty string +.I s +bracketed +\fB[\fI\|s\|\fB]\fR +(or +\fB[^\fIs\|\fB]\fR) +forms a \fIbracket expression\fR that +matches any character in (or not in) +.I s. +In +.I s, +\e has no special meaning, and ] may only appear as +the first letter. +A substring +\fIa\fB\-\fIb\fR, +with +.I a +and +.I b +in ascending ASCII order, stands for the inclusive +range of ASCII characters. +.IP 5. +A regular expression of form 1-4 followed by \fB*\fR matches a sequence of +0 or more matches of the regular expression. +.IP 6. +A regular expression of form 1-4 +followed by \fB\e{\fIm\fB,\fIn\fB\e}\fR +forms an \fIinterval expression\fR that +matches a sequence of \fIm\fR through \fIn\fR matches, inclusive, +of the regular expression. +The values of \fIm\fR and \fIn\fR must be non-negative +and smaller than 256. +The form \fB\e{\fIm\fB\e}\fR matches exactly \fIm\fR occurrences, +\fB\e{\fIm\fB,\e}\fR matches at least \fIm\fR occurrences. +.IP 7. +The sequence \fB\e<\fR forces the match +to occur only at the beginning of a ``variable'' or ``word''; +that is, either at the beginning of a line, +or just before a letter, digit or underline +and after a character not one of these. +.IP 8. +The sequence \fB\e>\fR matches the end +of a ``variable'' or ``word'', +i.\|e. either the end of the line +or before character which is neither a letter, +nor a digit, nor the underline character. +.IP 9. +A regular expression, +.I x, +of form 1-11, parenthesized +\fB\e(\fI\|x\|\fB\e)\fR +is called a \fIsubexpression\fR and +matches what +.I x +matches. +.IP 10. +A \fB\e\fR followed by a digit +.I n +forms a \fIbackreference\fR and +matches a copy of the string that the +parenthesized regular expression beginning with the +.IR n th +\e( matched. +.IP 11. +A regular expression of form 1-11, +.I x, +followed by a regular expression of form 1-10, +.I y +matches a match for +.I x +followed by a match for +.I y, +with the +.I x +match being as long as possible while still permitting a +.I y +match. +.IP 12. +A regular expression of form 1-11 preceded by \fB^\fR +(or followed by \fB$\fR), is constrained to matches that +begin at the left (or end at the right) end of a line +(\fIanchoring\fR). +.IP 13. +A regular expression of form 1-12 picks out the +longest among the leftmost matches in a line. +.IP 14. +An empty regular expression stands for a copy of the +last regular expression encountered. +.SS "Basic Regular Expressions" +Basic Regular Expressions add the following features +to Simple Regular Expressions: +.IP 15. +The special character \fB*\fR, as described in 5., +and the interval expressions described in 6. +can also be applied to subexpressions +as described in 9. +For POSIX.1-2001 utilities such as +.BR /usr/5bin/posix2001/ed , +subexpressions do not match the empty string +if there is a possible longer match. +.IP 16. +In bracket expressions as described in 4., +the following character sequences are considered special: +.IP +Character class expressions of the form +\fB[:\fIclass\fB:]\fR. +In the C LC_CTYPE locale, +the classes +.sp +.TS +l l l l. +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.TE +.sp +are recognized; +further locale-specific classes may be available. +A character class expression matches any character +that belongs to the given class in the current LC_CTYPE locale. +.IP +Collating symbol expressions of the form +\fB[.\fIc\fB.]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +A collating symbol expression +matches the specified collating symbol. +.IP +Equivalence class expressions of the form +\fB[=\fIc\fB=]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +An equivalence class expression +matches any character that has the same collating weight +as \fIc\fR. +.PP +Regular expressions are used in addresses to specify +lines and in one command +(see +.I s +below)\| +to specify a portion of a line which is to be replaced. +If it is desired to use one of +the regular expression metacharacters as an ordinary +character, that character may be preceded by `\e'. +This also applies to the character bounding the regular +expression (often `/')\| and to `\e' itself. +.PP +To understand addressing in +.I ed +it is necessary to know that at any time there is a +.I "current line." +Generally speaking, the current line is +the last line affected by a command; however, +the exact effect on the current line +is discussed under the description of +the command. +Addresses are constructed as follows. +.TP +1. +The character `\fB.\fR' addresses the current line. +.TP +2. +The character `\fB$\fR' addresses the last line of the buffer. +.TP +3. +A decimal number +.I n +addresses the +.IR n -th +line of the buffer. +.TP +4. +`\fB\(fm\fIx\fR' addresses the line marked with the name +.IR x , +which must be a lower-case letter. +Lines are marked with the +.I k +command described below. +.TP +5. +A regular expression enclosed in slashes `\fB/\fR' addresses +the line found by searching forward from the current line +and stopping at the first line containing a +string that matches the regular expression. +If necessary the search wraps around to the beginning of the +buffer. +.TP +6. +A regular expression enclosed in queries `\fB?\fR' addresses +the line found by searching backward from the current line +and stopping at the first line containing +a string that matches the regular expression. +If necessary +the search wraps around to the end of the buffer. +.TP +7. +An address followed by a plus sign `\fB+\fR' +or a minus sign `\fB\-\fR' followed by a decimal number +specifies that address plus +(resp. minus)\| the indicated number of lines. +The plus sign may be omitted. +.TP +8. +If an address begins with `\fB+\fR' or `\fB\-\fR' +the addition or subtraction is taken with respect to the current line; +e.g. `\-5' is understood to mean `\fB.\fR\-5'. +.TP +9. +If an address ends with `\fB+\fR' or `\fB\-\fR', +then 1 is added (resp. subtracted). +As a consequence of this rule and rule 8, +the address `\-' refers to the line before the current line. +Moreover, +trailing +`+' and `\-' characters +have cumulative effect, so `\-\-' refers to the current +line less 2. +.TP +10. +To maintain compatibility with earlier versions of the editor, +the character `\fB^\fR' in addresses is +equivalent to `\-'. +.PP +Commands may require zero, one, or two addresses. +Commands which require no addresses regard the presence +of an address as an error. +Commands which accept one or two addresses +assume default addresses when insufficient are given. +If more addresses are given than such a command requires, +the last one or two (depending on what is accepted)\| are used. +.PP +Addresses are separated from each other typically by a comma +`\fB,\fR'. +They may also be separated by a semicolon +`\fB;\fR'. +In this case the current line `\fB.\fR' is set to +the previous address before the next address is interpreted. +This feature can be used to determine the starting +line for forward and backward searches (`/', `?')\|. +The second address of any two-address sequence +must correspond to a line following the line corresponding to the first address. +.PP +Omission of the first address causes +the first line to be used with `,', +or the current line with `;', respectively; +if the second address is also omitted, +the last line of the buffer is used. +Thus a single `,' specifies the entire contents of the buffer, +and a single `;' specifies the contents +ranging from the current line to the last one. +.PP +In the following list of +.I ed +commands, the default addresses +are shown in parentheses. +The parentheses are not part of +the address, but are used to show that the given addresses are +the default. +.PP +As mentioned, it is generally illegal for more than one +command to appear on a line. +However, most commands may be suffixed by `p', `l', or `n', +in which case +the current line is either +printed, listed, or numbered respectively +in the way discussed below. +.TP 5 +\fR(\|\fI.\|\fR)\fB\|a\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +The append command reads the given text +and appends it after the addressed line. +`\fB.\fR' is left +on the last line input, if there +were any, otherwise at the addressed line. +Address `0' is legal for this command; text is placed +at the beginning of the buffer. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBc\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +The change +command deletes the addressed lines, then accepts input +text which replaces these lines. +`\fB.\fR' is left at the last line input; if there were none, +it is left at the line preceding the deleted lines. +For +.BR /usr/5bin/posix2001/ed , +a `0' as the first address is identical to `1'. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBd\fR +The delete command deletes the addressed lines from the buffer. +The line originally after the last line deleted becomes the current line; +if the lines deleted were originally at the end, +the new last line becomes the current line. +.TP 5 +\fBe\ \fIfilename\fR +The edit +command causes the entire contents of the buffer to be deleted, +and then the named file to be read in. +`\fB.\fR' is set to the last line of the buffer. +The number of characters read is typed. +`\fIfilename\fR' is remembered for possible use as a default file name +in a subsequent +.I r +or +.I w +command. +If `\fIfilename\fR' is missing, the remembered name is used. +A `\fIfilename\fR' starting with a `\fB!\fR' +causes the output of the shell command following this character +to be read in. +.TP 5 +\fBE\ \fIfilename\fR +This command is the same as +.IR e , +except that no diagnostic results when no +.I w +has been given since the last buffer alteration. +.TP 5 +\fBf\ \fIfilename\fR +The filename command prints the currently remembered file name. +If `\fIfilename\fR' is given, +the currently remembered file name is changed to `\fIfilename\fR'. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBg/\fIregular expression\fB/\fIcommand list\fR +In the global +command, the first step is to mark every line which matches +the given \fIregular expression\fR. +Then for every such line, the +given \fIcommand list\fR is executed +with `\fB.\fR' initially set to that line. +A single command or the first of multiple commands +appears on the same line with the global command. +All lines of a multi-line list except the last line must be ended with `\e'. +.I A, +.I i, +and +.I c +commands and associated input are permitted; +the `\fB.\fR' terminating input mode may be omitted if it would be on the +last line of the command list. +The commands +.I g +and +.I v +are not permitted in the command list. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBG/\fIregular expression\fB/\fR +The interactive global command +first marks every line matching the given \fIregular expression\fR. +Then each line is printed +and a command is read and executed for this line. +A single newline character causes the line to remain unchanged, +an isolated `\fB&\fR' repeats the command given for the previous line. +The command can be terminated by an interrupt signal. +.TP 5 +.B h +This command prints a verbose description for the +last error encountered. +.TP +.B H +This command acts like the +.I h +command, +but also causes verbose descriptions to be printed +on all following error conditions. +Another +.I H +turns verbose mode off. +.TP 5 +\fR(\|\fI.\|\fR)\|\fBi\fR +.br +.ns +.TP 5 +<text> +.br +.ns +.TP 5 +.B . +.br +This command inserts the given text before the addressed line. +`\fB.\fR' is left at the last line input, or, if there were none, +at the line before the addressed line. +This command differs from the +.I a +command only in the placement of the +text. +For +.BR /usr/5bin/posix2001/ed , +a `0' as the first address is identical to `1'. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.+1\fR)\|\fBj\fR +This command joins the addressed lines into a single line; +intermediate newlines simply disappear. +`\fB.\fR' is left at the resulting line. +.TP 5 +\fR(\fI.\fR)\|\fBk\fIx\fR +The mark command marks the addressed line with +name +.IR x , +which must be a lower-case letter. +The address form `\(fm\fIx\fR' then addresses this line. +.ne 2.5 +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBl\fR +The list command +prints the addressed lines in an unambiguous way: +.B /usr/5bin/ed +prints +non-graphic control characters in three-digit octal; +.BR /usr/5bin/s42/ed , +.BR /usr/5bin/posix/ed , +and +.B /usr/5bin/posix2001/ed +print control characters as C-style escape sequences +or in three-digit octal. +Long lines are folded. +The +.I l +command may be placed on the same line after any non-i/o +command. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBm\fIa\fR +The move command repositions the addressed lines after the line +addressed by +.IR a . +The last of the moved lines becomes the current line. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBn\fR +This command prints lines preceded by their line numbers. +It otherwise acts like the +.I p +command described below. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBp\fR +The print command prints the addressed lines. +`\fB.\fR' +is left at the last line printed. +The +.I p +command +may +be placed on the same line after any non-i/o command. +.TP +.B P +This command causes a prompt to be printed +before following commands are read. +The default prompt is a `*' character, +but can be set with the +.I \-p +command line option (which also enables the prompt). +Another +.I P +disables the prompt. +.TP 5 +.B q +The quit command causes +.I ed +to exit. +No automatic write +of a file is done. +.TP 5 +.B Q +This command is the same as +.I q, +except that no diagnostic results when no +.I w +has been given since the last buffer alteration. +.TP 5 +\fR(\fI$\fR)\|\fBr\ \fIfilename\fR +The read command +reads in the given file after the addressed line. +If no file name is given, +the remembered file name, if any, is used +(see +.I e +and +.I f +commands)\|. +The file name is remembered if there was no +remembered file name already. +Address `0' is legal for +.I r +and causes the +file to be read at the beginning of the buffer. +If the read is successful, the number of characters +read is typed. +`\fB.\fR' is left at the last line read in from the file. +A `filename' starting with a `\fB!\fR' +causes the output of the shell command following this character +to be read in. +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/\fR or, +.br +.ns +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/g\fR or, +.br +.ns +.TP 5 +\fR(\|\fI.\fB\|,\|\fI.\fR\|)\|\fBs/\fIregular expression\fB/\fIreplacement\fB/\fInumber\fR +The substitute command searches each addressed +line for an occurrence of the specified regular expression. +On each line in which a match is found, +all matched strings are replaced by the replacement specified, +if the global replacement indicator +.RB ` g ' +appears after the command. +If the global indicator does not appear, only the first occurrence +of the matched string is replaced; +if the \fInumber\fR indicator is given, +the numbered occurrence is replaced. +It is an error for the substitution to fail on all addressed lines. +Any character other than space or new-line +may be used instead of `/' to delimit the regular expression +and the replacement. +`\fB.\fR' is left at the last line substituted. +.IP +An ampersand +.RB ` & ' +appearing in the replacement +is replaced by the string matching the regular expression. +The special meaning of `&' in this context may be +suppressed by preceding it by +.RB ` \e '. +The characters `\|\fB\e\fIn\fR' +where +.I n +is a digit, +are replaced by the text matched by the +.IR n -th +regular subexpression +enclosed between `\e(' and `\e)'. +When +nested, parenthesized subexpressions +are present, +.I n +is determined by counting occurrences of `\e(' starting from the left. +.IP +A substitution string consisting of a single +.RB ` % ' +causes the string given on the previous substitution to be re-used. +.IP +Lines may be split by substituting new-line characters into them. +The new-line in the +replacement string +must be escaped by preceding it by +.RB ` \e '. +.TP 5 +\fR(\|\fI.\|\fB,\|\fI.\|\fR)\|\fBt\|\fIa\fR +This command acts just like the +.I m +command, except that a copy of the addressed lines is placed +after address +.I a +(which may be 0). +`\fB.\fR' is left on the last line of the copy. +.TP 5 +.B u +The undo command restores +the contents of the buffer +before the last command was executed. +If the undo command is given twice, +the current state is restored. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBv/\fIregular expression\fB/\fIcommand list\fR +This command is the same as the global command +.I g +except that the command list is executed +.I g +with `\fB.\fR' initially set to every line +.I except +those +matching the regular expression. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBV/\fIregular expression\fB/\fR +This command is the same as the interactive global command +.I G +except that the commands are read +.I g +with `\fB.\fR' initially set to every line +.I except +those +matching the regular expression. +.TP 5 +\fR(\fI1\fB,\fI$\fR)\|\fBw\ \fIfilename\fR +.br +The write command writes the addressed lines onto +the given file. +If the file does not exist, +it is created mode 666 (readable and writable by everyone)\|. +The file name is remembered if there was no +remembered file name already. +If no file name is given, +the remembered file name, if any, is used +(see +.I e +and +.I f +commands)\|. +`\fB.\fR' is unchanged. +If the command is successful, the number of characters written is +printed. +A `filename' starting with a `\fB!\fR' +causes the string following this character +to be executed as a shell command +with the addressed lines as standard input. +.TP +\fR(\fI1\fB,\fI$\fR)\fBW\ \fIfilename\fR +This command is the same as +.I w, +except that the addressed lines are appended to the file. +.TP 5 +\fR(\fI$\fR)\|\fB=\fR +The line number of the addressed line is typed. +`\fB.\fR' is unchanged by this command. +.TP 5 +\fB!\fR<shell command> +The remainder of the line after the `!' is sent +to +.IR sh (1) +to be interpreted as a command. +.RB ` . ' +is unchanged. +If the command starts with a +.RB ` ! ', +the previous command is inserted. +A +.RB ` % ' +causes the current file name to be inserted. +.TP 5 +\fR(\|\fI.+1\fR)\|<newline> +An address alone on a line causes the addressed line to be printed. +A blank line alone is equivalent to `.+1p'; it is useful +for stepping through text. +.PP +The following commands are extensions: +.TP 5 +\fR(\|\fI.\|\fR)\fB\|b\fR[\fIcount\fR] +Prints a screenful of lines, +starting at the addressed one, +and browses forward in the buffer by this amount. +With the optional +.I count +argument, the screen size for this and following +.I b +commands is set to the given number of lines. +.TP 5 +.B help +Causes a summary of +.I ed +commands along with short descriptions +to be printed on the terminal. +.TP 5 +.B N +Makes the +.I p +command behave like the +.I n +command and vice-versa. +If given a second time, +the original semantics are restored. +.TP 5 +\fR(\|\fI.\|\fR)\fB\|o\fR[\fIcount\fR] +Prints a screenful of lines centered around the addressed one. +The current line is not changed. +With the optional +.I count +argument, the amount of lines printed above and below +for this and following +.I o +commands is set to the given number. +.TP 5 +.B z +Performs the same actions as a +.I w +command followed by a +.I q +command. +.PP +If an interrupt signal is sent, +.I ed +prints a `?' and returns to its command level. +.PP +An input line that consists exactly of the two characters `\e.' +causes a period `.' to be inserted with the +.IR a , +.IR c , +and +.IR i +commands +in +.B /usr/5bin/ed +and +.BR /usr/5bin/s42/ed . +.PP +Some size limitations: +The maximum number of bytes in the buffer +corresponds to the address size; +on machines with 32-bit addressing, +it is 2\ G bytes, +with 64-bit addressing, +it is 9\ E bytes. +The limit on the number of lines depends on the amount of core: +each line takes 2 words. +.PP +If a line contains a NUL character, +regular expressions cannot match beyond this character. +A substitute command deletes a NUL +and all following characters on the line. +NUL characters in command input are discarded. +If an input file does not end with a newline, +.I ed +prints a message and appends one. +.PP +Omission of the `/' character +following the regular expression or the replacement string +to the global and substitute commands +causes the affected lines to be printed. +Thus the following commands have the same effect: +.RS +g/pattern g/pattern/p +.br +s/pattern/repl s/pattern/repl/p +.br +s/pattern/ s/pattern//p +.RE +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +the availability and composition of character classes +in basic regular expressions, +and the set of printable characters for the +.I l +command. +.TP +.B TMPDIR +Determines the location of the temporary file +if it contains the name of an accessible directory. +.SH FILES +/var/tmp/e* +.br +/tmp/e* +.br +ed.hup: work is saved here if terminal hangs up +.SH "SEE ALSO" +B. W. Kernighan, +.I +A Tutorial Introduction to the ED Text Editor +.br +B. W. Kernighan, +.I Advanced editing on UNIX +.br +bfs(1), +grep(1), +sed(1), +sh(1) +.SH DIAGNOSTICS +`?name' for inaccessible file; +`?' for +errors in commands, +possibly followed by a verbose description +(see the description for the +.I h +and +.I H +commands above). +.PP +To protect against throwing away valuable work, +a +.I q +or +.I e +command is considered to be in error, unless a +.I w +has occurred since the last buffer change. +A second +.I q +or +.I e +will be obeyed regardless. +.SH NOTES +A +.I !\& +command cannot be subject to a +.I g +command. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +For portable programs, restrict textual data +to the US-ASCII character set, +set the LC_CTYPE and LC_COLLATE variables to `C' or `POSIX', +and use the constructs in the second column +instead of the character class expressions as follows: +.RS +.sp +.TS +l l. +[[:alnum:]] [0\-9A\-Za\-z] +[[:alpha:]] [A\-Za\-z] +[[:blank:]] [\fI<tab><space>\fR] +[[:cntrl:]] [^\fI<space>\fR\-~] +[[:digit:]] [0\-9] +[[:graph:]] [!\-~] +[[:lower:]] [a\-z] +[[:print:]] [\fI<space>\fR\-~] +[[:punct:]] [!\-/:\-@[\-`{\-~] +[[:space:]] [\fI<tab><vt><ff><cr><space>\fR] +[[:upper:]] [A\-Z] +[[:xdigit:]] [0\-9a\-fA\-F] +.TE +.sp +.RE +.IR <tab> , +.IR <space> , +.IR <vt> , +.IR <ff> , +and +.I <cr> +indicate inclusion of +a literal tabulator, space, vertical tabulator, formfeed, +or carriage return character, respectively. +Do not put the +.IR <vt> , +.IR <ff> , +and +.I <cr> +characters into the range expression for the +.I space +class unless you actually want to match these characters. diff --git a/ed/ed.c b/ed/ed.c @@ -0,0 +1,2822 @@ +/* + * Editor + */ + +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, July 2003. + */ +/* from Unix 32V /usr/src/cmd/ed.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)ed_su3.sl 1.99 (gritter) 7/27/06"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)ed_sus.sl 1.99 (gritter) 7/27/06"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)ed_s42.sl 1.99 (gritter) 7/27/06"; +#else /* !SU3, !SUS, !S42 */ +static const char sccsid[] USED = "@(#)ed.sl 1.99 (gritter) 7/27/06"; +#endif /* !SU3, !SUS, !S42 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <stdlib.h> +#include <signal.h> +#include "sigset.h" +#include <termios.h> +#include <setjmp.h> +#include <libgen.h> +#include <inttypes.h> +#include <locale.h> +#include <wchar.h> +#include <ctype.h> +#include <wctype.h> +#include <limits.h> +#include <termios.h> +static int FNSIZE; +static int LBSIZE; +static int RHSIZE; +#define ESIZE 2048 +static int GBSIZE; +#undef EOF +#define EOF -1 +#define puts(s) xxputs(s) +#define getline(t, n) xxgetline(t, n) + +#if (LONG_MAX > 017777777777L) +#define MAXCNT 0777777777777777777777L /* 2^63-1 */ +#else +#define MAXCNT 017777777777L /* 2^31-1 */ +#endif +#define BLKMSK (MAXCNT>>8) /* was 0377 */ + +#define READ 0 +#define WRITE 1 +#define EXIST 2 + +struct tabulator { + struct tabulator *t_nxt; /* next list element */ + const char *t_str; /* tabulator string */ + int t_tab; /* tab stop position */ + int t_rep; /* repetitive tab count */ +}; + +static int peekc; +static int lastc; +static char *savedfile; +static char *file; +static struct stat fstbuf; +static char *linebuf; +static char *rhsbuf; +static char expbuf[ESIZE + 4]; +static long *zero; +static long *undzero; +static long *dot; +static long *unddot; +static long *dol; +static long *unddol; +static long *addr1; +static long *addr2; +static char *genbuf; +static long count; +static char *linebp; +static int ninbuf; +static int io; +static int ioeof; +static int pflag; +static char *wrtemp; +static uid_t myuid; +static void (*oldhup)(int); +static void (*oldquit)(int); +static void (*oldpipe)(int); +static int vflag = 1; +static int listf; +static int numbf; +static char *globp; +static int tfile = -1; +static long tline; +static char tfname[64]; +static char ibuff[512]; +static int iblock = -1; +static char obuff[512]; +static int oblock = -1; +static int ichanged; +static int nleft; +static long *names; +static long *undnames; +static int anymarks; +static int subnewa; +static int fchange; +static int wrapp; +static unsigned nlall = 128; +static const char *progname; +static const char *prompt = "*"; +static int Pflag; +static int prhelp; +static const char *prvmsg; +static int lastsig; +static int pipid = -1; +static int readop; +static int status; +static int mb_cur_max; +static int needsub; +static int insub; +static struct tabulator *tabstops; +static int maxlength; +static int rspec; +static int Nflag; +static int bcount = 22; +static int ocount = 11; + +static jmp_buf savej; + +static void usage(char, int); +static void commands(void); +static long *address(void); +static void setdot(void); +static void setall(void); +static void setnoaddr(void); +static void nonzero(void); +static void newline(void); +static void filename(int); +static void exfile(void); +static void onintr(int); +static void onhup(int); +static void onpipe(int); +static void error(const char *); +static void error2(const char *, const char *); +static void errput(const char *, const char *); +static int getchr(void); +static int gettty(void); +static long getnum(void); +static int getfile(void); +static void putfile(void); +static int append(int (*)(void), long *); +static void callunix(void); +static char *readcmd(void); +static void quit(int); +static void delete(void); +static void rdelete(long *, long *); +static void gdelete(void); +static char *getline(long, int); +static int putline(void); +static char *getblock(long, long); +static void blkio(long, char *, int); +static void init(void); +static void global(int, int); +static void globrd(char **, int); +static void join(void); +static void substitute(int); +static int compsub(void); +static int getsub(void); +static int dosub(int); +static int place(int, const char *, const char *); +static void move(int); +static void reverse(long *, long *); +static int getcopy(void); +static int execute(int, long *, int); +static void cmplerr(int); +static void doprnt(long *, long *); +static void putd(long); +static void puts(const char *); +static void nlputs(const char *); +static void list(const char *); +static int lstchr(int); +static void putstr(const char *); +static void putchr(int); +static void checkpoint(void); +static void undo(void); +static int maketf(int); +static int creatf(const char *); +static int sopen(const char *, int); +static void sclose(int); +static void fspec(const char *); +static const char *ftok(const char **); +static struct tabulator *tabstring(const char *); +static void freetabs(void); +static void expand(const char *); +static void growlb(const char *); +static void growrhs(const char *); +static void growfn(const char *); +static void help(void); + +#define INIT +#define GETC() getchr() +#define UNGETC(c) (peekc = c) +#define PEEKC() (peekc = getchr()) +#define RETURN(c) return c +#define ERROR(c) cmplerr(c) +static wint_t GETWC(char *); + +#if defined (SUS) || defined (S42) || defined (SU3) + +#include <regex.h> + +#define NBRA 9 + +static char *braslist[NBRA]; +static char *braelist[NBRA]; +static char *loc1, *loc2, *locs; +static int nbra; +static int circf; +static int nodelim; + +static char *compile(char *, char *, const char *, int); +static int step(const char *, const char *); + +#else /* !SUS, !S42, !SU3 */ + +#include <regexp.h> + +#endif /* !SUS, !S42, !SU3 */ + +int +main(int argc, char **argv) +{ + register int i; + void (*oldintr)(int); + + progname = basename(argv[0]); +#if defined (SUS) || defined (S42) || defined (SU3) + setlocale(LC_COLLATE, ""); +#endif + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + myuid = getuid(); + oldquit = sigset(SIGQUIT, SIG_IGN); + oldhup = sigset(SIGHUP, SIG_IGN); + oldintr = sigset(SIGINT, SIG_IGN); + if (sigset(SIGTERM, SIG_IGN) != SIG_IGN) + sigset(SIGTERM, quit); + oldpipe = sigset(SIGPIPE, onpipe); + argv++; + while (argc > 1 && **argv=='-') { + if ((*argv)[1] == '\0') { + vflag = 0; + goto next; + } else if ((*argv)[1] == '-' && (*argv)[2] == '\0') { + argv++; + argc--; + break; + } + letter: switch((*argv)[1]) { + + case 's': + vflag = 0; + break; + + case 'q': + sigset(SIGQUIT, SIG_DFL); + vflag = 1; + break; + + case 'p': + if ((*argv)[2]) + prompt = &(*argv)[2]; + else if (argv[1]) { + prompt = argv[1]; + argv++; + argc--; + } else + usage((*argv)[1], 1); + Pflag = 1; + goto next; + + default: + usage((*argv)[1], 0); + } + if ((*argv)[2]) { + (*argv)++; + goto letter; + } + next: argv++; + argc--; + } + + growfn("no space"); + if (argc>1) { + i = -1; + do + if (++i >= FNSIZE) + growfn("maximum of characters in " + "file names reached"); + while (savedfile[i] = (*argv)[i]); + globp = "e"; + } + names = malloc(26*sizeof *names); + undnames = malloc(26*sizeof *undnames); + zero = malloc(nlall*sizeof *zero); + if ((undzero = malloc(nlall*sizeof *undzero)) == NULL) + puts("no memory for undo"); + growlb("no space"); + growrhs("no space"); + init(); + if (oldintr != SIG_IGN) + sigset(SIGINT, onintr); + if (oldhup != SIG_IGN) + sigset(SIGHUP, onhup); + setjmp(savej); + if (lastsig) { + sigrelse(lastsig); + lastsig = 0; + } + commands(); + quit(0); + /*NOTREACHED*/ + return 0; +} + +static void +usage(char c, int misarg) +{ + if (c) { + write(2, progname, strlen(progname)); + if (misarg) + write(2, ": option requires an argument -- ", 33); + else + write(2, ": illegal option -- ", 20); + write(2, &c, 1); + write(2, "\n", 1); + } + write(2, "usage: ", 7); + write(2, progname, strlen(progname)); + write(2, " [- | -s] [-p string] [file]\n", 29); + exit(2); +} + +static void +commands(void) +{ + register long *a1; + register int c; + int n; + + for (;;) { + if (pflag) { + pflag = 0; + addr1 = addr2 = dot; + goto print; + } + if (Pflag && globp == NULL) + write(1, prompt, strlen(prompt)); + addr1 = 0; + addr2 = 0; + switch (c = getchr()) { + case ',': + case ';': + addr2 = c == ',' ? zero+1 : dot; + if (((peekc = getchr()) < '0' || peekc > '9') && + peekc != ' ' && peekc != '\t' && + peekc != '+' && peekc != '-' && + peekc != '^' && peekc != '?' && + peekc != '/' && peekc != '$' && + peekc != '.' && peekc != '\'') { + addr1 = addr2; + a1 = dol; + goto loop; + } + break; + default: + peekc = c; + } + do { + addr1 = addr2; + if ((a1 = address())==0) { + c = getchr(); + break; + } + loop: addr2 = a1; + if ((c=getchr()) == ';') { + c = ','; + dot = a1; + } + } while (c==','); + if (addr1==0) + addr1 = addr2; + switch(c) { + + case 'a': + setdot(); + newline(); + checkpoint(); + append(gettty, addr2); + continue; + + case 'c': +#if defined (SU3) + if (addr1 == zero && addr1+1 <= dol) { + if (addr1 == addr2) + addr2++; + addr1++; + } +#endif /* SU3 */ + delete(); + append(gettty, addr1-1); +#if defined (SUS) || defined (SU3) + if (dot == addr1-1 && addr1 <= dol) + dot = addr1; +#endif /* SUS || SU3 */ + continue; + + case 'd': + delete(); + continue; + + case 'E': + fchange = 0; + c = 'e'; + case 'e': + setnoaddr(); + if (vflag && fchange) { + fchange = 0; + error("warning: expecting `w'"); + } + filename(c); + init(); + addr2 = zero; + goto caseread; + + case 'f': + setnoaddr(); + filename(c); + puts(savedfile); + continue; + + case 'g': + global(1, 0); + continue; + + case 'G': + global(1, 1); + continue; + + case 'H': + prhelp = !prhelp; + /*FALLTHRU*/ + + case 'h': + if ((peekc = getchr()) == 'e') { + peekc = 0; + if (getchr() != 'l' || getchr() != 'p' || + getchr() != '\n') + error("illegal suffix"); + setnoaddr(); + help(); + continue; + } + newline(); + setnoaddr(); + if (prvmsg) + puts(prvmsg); + continue; + + case 'i': + setdot(); +#if defined (SU3) + if (addr1 == zero) { + if (addr1 == addr2) + addr2++; + addr1++; + if (dol != zero) + nonzero(); + } else +#endif /* SU3 */ + nonzero(); + newline(); + checkpoint(); + append(gettty, addr2-1); + if (dot == addr2-1) + dot++; + continue; + + + case 'j': + if (addr2==0) { + addr1 = dot; + addr2 = dot+1; + } + setdot(); + newline(); + nonzero(); + checkpoint(); + if (addr1 != addr2) + join(); + continue; + + case 'k': + if ((c = getchr()) < 'a' || c > 'z') + error("mark not lower case"); + newline(); + setdot(); + nonzero(); + names[c-'a'] = *addr2 & ~01; + anymarks |= 01; + continue; + + case 'm': + move(0); + continue; + + case '\n': + if (addr2==0) + addr2 = dot+1; + addr1 = addr2; + goto print; + + case 'n': + numbf = 1; + newline(); + goto print; + + case 'N': + newline(); + setnoaddr(); + Nflag = !Nflag; + continue; + + case 'b': + case 'o': + n = getnum(); + newline(); + setdot(); + nonzero(); + if (n >= 0) { + if (c == 'b') + bcount = n; + else + ocount = n; + } + if (c == 'b') { + a1 = addr2+bcount > dol ? dol : addr2 + bcount; + doprnt(addr1, a1); + dot = a1; + } else { + a1 = addr2+ocount > dol ? dol : addr2 + ocount; + doprnt(addr2-ocount<zero+1?zero+1:addr2-ocount, a1); + dot = addr2; + } + continue; + + case 'l': + listf++; + case 'p': + newline(); + print: + setdot(); + nonzero(); + doprnt(addr1, addr2); + dot = addr2; + continue; + + case 'P': + setnoaddr(); + newline(); + Pflag = !Pflag; + continue; + + case 'Q': + fchange = 0; + case 'q': + setnoaddr(); + newline(); + quit(0); + + case 'r': + filename(c); + caseread: + if ((io = sopen(file, READ)) < 0) { + lastc = '\n'; + error2("cannot open input file", file); + } + ioeof = 0; + setall(); + ninbuf = 0; + if (c == 'r') + checkpoint(); + n = zero != dol; + rspec = (c == 'e' || !n) && file[0] != '!'; + append(getfile, addr2); + rspec = 0; + exfile(); + fchange = n; + continue; + + case 's': + setdot(); + nonzero(); + substitute(globp!=0); + continue; + + case 't': + move(1); + continue; + + case 'u': + setdot(); + newline(); + if (unddot == NULL) + error("nothing to undo"); + undo(); + continue; + + case 'v': + global(0, 0); + continue; + + case 'V': + global(0, 1); + continue; + + case 'W': + wrapp++; + case 'w': + write: + setall(); + if (zero != dol) + nonzero(); + filename(c); + if(!wrapp || + ((io = open(file,O_WRONLY|O_APPEND)) == -1) || + ((lseek(io, 0, SEEK_END)) == -1)) { + struct stat st; + if (lstat(file, &st) == 0 && + (st.st_mode&S_IFMT) == S_IFREG && + st.st_nlink == 1 && + (myuid==0 || myuid==st.st_uid)) { + char *cp, *tp; + int nio; + if ((io = sopen(file, EXIST)) < 0) + error("cannot create output file"); + if ((wrtemp = malloc(strlen(file)+8)) == NULL) + error("out of memory"); + for (cp = file, tp = wrtemp; *cp; cp++) + *tp++ = *cp; + while (tp > wrtemp && tp[-1] != '/') + tp--; + for (cp = "\7XXXXXX"; *cp; cp++) + *tp++ = *cp; + *tp = '\0'; + if ((nio = mkstemp(wrtemp)) < 0) { + free(wrtemp); + wrtemp = NULL; + ftruncate(io, 0); + } else { + close(io); + io = nio; + } + } else { + if ((io = sopen(file, WRITE)) < 0) + error("cannot create output file"); + } + } + if (zero != dol) { + ioeof = 0; + wrapp = 0; + putfile(); + } + exfile(); + if (addr1==zero+1 && addr2==dol || addr1==addr2 && dol==zero) + fchange = 0; + if (c == 'z') + quit(0); + continue; + + case 'z': + if ((peekc=getchr()) != '\n') + error("illegal suffix"); + setnoaddr(); + goto write; + + case '=': + setall(); + newline(); + putd((addr2-zero)&MAXCNT); + putchr('\n'); + continue; + + case '!': + callunix(); + continue; + + case EOF: + return; + + } + error("unknown command"); + } +} + +static long * +address(void) +{ + register long *a1; + register int minus, c; + int n, relerr; + + minus = 0; + a1 = 0; + for (;;) { + c = getchr(); + if ('0'<=c && c<='9') { + n = 0; + do { + n *= 10; + n += c - '0'; + } while ((c = getchr())>='0' && c<='9'); + peekc = c; + if (a1==0) + a1 = zero; + if (minus<0) + n = -n; + a1 += n; + minus = 0; + continue; + } + relerr = 0; + if (a1 || minus) + relerr++; + switch(c) { + case ' ': + case '\t': + continue; + + case '+': + minus++; + if (a1==0) + a1 = dot; + continue; + + case '-': + case '^': + minus--; + if (a1==0) + a1 = dot; + continue; + + case '?': + case '/': + compile(NULL, expbuf, &expbuf[ESIZE], c); + a1 = dot; + for (;;) { + if (c=='/') { + a1++; + if (a1 > dol) + a1 = zero; + } else { + a1--; + if (a1 < zero) + a1 = dol; + } + if (execute(0, a1, 0)) + break; + if (a1==dot) + error("search string not found"); + } + break; + + case '$': + a1 = dol; + break; + + case '.': + a1 = dot; + break; + + case '\'': + if ((c = getchr()) < 'a' || c > 'z') + error("mark not lower case"); + for (a1=zero; a1<=dol; a1++) + if (names[c-'a'] == (*a1 & ~01)) + break; + break; + + default: + peekc = c; + if (a1==0) + return(0); + a1 += minus; + if (a1<zero || a1>dol) + error("line out of range"); + return(a1); + } + if (relerr) + error("bad number"); + } +} + +static void +setdot(void) +{ + if (addr2 == 0) + addr1 = addr2 = dot; + if (addr1 > addr2) + error("bad range"); +} + +static void +setall(void) +{ + if (addr2==0) { + addr1 = zero+1; + addr2 = dol; + if (dol==zero) + addr1 = zero; + } + setdot(); +} + +static void +setnoaddr(void) +{ + if (addr2) + error("Illegal address count"); +} + +static void +nonzero(void) +{ + if (addr1<=zero || addr2>dol) + error("line out of range"); +} + +static void +newline(void) +{ + register int c; + + if ((c = getchr()) == '\n') + return; + if (c=='p' || c=='l' || c=='n') { + pflag++; + if (c=='l') + listf++; + else if (c=='n') + numbf = 1; + if (getchr() == '\n') + return; + } + error("illegal suffix"); +} + +static void +filename(int comm) +{ + register char *p1, *p2; + register int c, i; + + count = 0; + c = getchr(); + if (c=='\n' || c==EOF) { + p1 = savedfile; + if (*p1==0 && comm!='f') + error("illegal or missing filename"); + p2 = file; + while (*p2++ = *p1++) + ; + return; + } + if (c!=' ') + error("no space after command"); + while ((c = getchr()) == ' ') + ; + if (c=='\n') + error("illegal or missing filename"); + i = 0; + do { + if (i >= FNSIZE) + growfn("maximum of characters in file names reached"); + file[i++] = c; + if (c==' ' && file[0] != '!' || c==EOF) + error("illegal or missing filename"); + } while ((c = getchr()) != '\n'); + file[i++] = 0; + if ((savedfile[0]==0 || comm=='e' || comm=='f') && file[0] != '!') { + p1 = savedfile; + p2 = file; + while (*p1++ = *p2++) + ; + } +} + +static void +exfile(void) +{ + sclose(io); + io = -1; + if (wrtemp) { + extern int rename(const char *, const char *); + if (rename(wrtemp, file) < 0) + error("cannot create output file"); + if (myuid == 0) + chown(file, fstbuf.st_uid, fstbuf.st_gid); + chmod(file, fstbuf.st_mode & 07777); + free(wrtemp); + wrtemp = NULL; + } + if (vflag) { + putd(count); + putchr('\n'); + } +} + +static void +onintr(int signo) +{ + lastsig = signo; + putchr('\n'); + lastc = '\n'; + if (readop) { + puts("\007read may be incomplete - beware!\007"); + fchange = 0; + } + error("interrupt"); +} + +static void +onhup(int signo) +{ + if (dol > zero && fchange) { + addr1 = zero+1; + addr2 = dol; + io = creat("ed.hup", 0666); + if (io < 0) { + char *home = getenv("HOME"); + if (home) { + char *fn = malloc(strlen(home) + 10); + if (fn) { + strcpy(fn, home); + strcat(fn, "/ed.hup"); + io = creat(fn, 0666); + } + } + } + if (io >= 0) + putfile(); + } + fchange = 0; + status = 0200 | signo; + quit(0); +} + +static void +onpipe(int signo) +{ + lastsig = signo; + error("write or open on pipe failed"); +} + +static void +error(const char *s) +{ + error2(s, NULL); +} + +static void +error2(const char *s, const char *fn) +{ + register int c; + + wrapp = 0; + listf = 0; + numbf = 0; + errput(s, fn); + count = 0; + if (lseek(0, 0, SEEK_END) > 0) + status = 2; + pflag = 0; + if (globp) + lastc = '\n'; + globp = 0; + peekc = lastc; + if(lastc) + while ((c = getchr()) != '\n' && c != EOF) + ; + if (io > 0) { + sclose(io); + io = -1; + } + if (wrtemp) { + unlink(wrtemp); + free(wrtemp); + wrtemp = NULL; + } + longjmp(savej, 1); +} + +static void +errput(const char *s, const char *fn) +{ + prvmsg = s; + if (fn) { + putchr('?'); + puts(fn); + } else + puts("?"); + if (prhelp) + puts(s); +} + +static int +getchr(void) +{ + char c; + if (lastc=peekc) { + peekc = 0; + return(lastc); + } + if (globp) { + if ((lastc = *globp++) != 0) + return(lastc); + globp = 0; + return(EOF); + } + if (read(0, &c, 1) <= 0) + return(lastc = EOF); + lastc = c; + return(lastc); +} + +static int +gettty(void) +{ + register int c, i; + register char *gf; + + i = 0; + gf = globp; + while ((c = getchr()) != '\n') { + if (c==EOF) { + if (gf) + peekc = c; + return(c); + } + if (c == 0) + continue; + if (i >= LBSIZE) + growlb("line too long"); + linebuf[i++] = c; + } + if (i >= LBSIZE-2) + growlb("line too long"); + linebuf[i++] = 0; + if (linebuf[0]=='.' && linebuf[1]==0) + return(EOF); +#if !defined (SUS) && !defined (SU3) + if (linebuf[0]=='\\' && linebuf[1]=='.' && linebuf[2]==0) + linebuf[0]='.', linebuf[1]=0; +#endif + return(0); +} + +static long +getnum(void) +{ + char scount[20]; + int i; + + i = 0; + while ((peekc=getchr()) >= '0' && peekc <= '9' && i < sizeof scount) { + scount[i++] = peekc; + peekc = 0; + } + scount[i] = '\0'; + return i ? atol(scount) : -1; +} + +static int +getfile(void) +{ + register int c, i, j; + static int nextj; + + i = 0; + j = nextj; + do { + if (--ninbuf < 0) { + if (ioeof || (ninbuf=read(io, genbuf, LBSIZE)-1) < 0) { + if (ioeof == 0 && ninbuf < -1) { + puts("input error"); + status = 1; + } + if (i > 0) { + puts("'\\n' appended"); + c = '\n'; + ioeof = 1; + goto wrc; + } + return(EOF); + } + j = 0; + } + c = genbuf[j++]&0377; + wrc: if (i >= LBSIZE) { + lastc = '\n'; + growlb("line too long"); + } + linebuf[i++] = c ? c : '\n'; + count++; + } while (c != '\n'); + linebuf[--i] = 0; + nextj = j; + if (rspec && dot == zero) + fspec(linebuf); + if (maxlength && i > maxlength) { + putstr("line too long: lno = "); + putd((dot - zero+1)&MAXCNT); + putchr('\n'); + } + return(0); +} + +static void +putfile(void) +{ + long *a1; + int n; + register char *fp, *lp; + register int nib; + + nib = 512; + fp = genbuf; + a1 = addr1; + do { + lp = getline(*a1++, 0); + if (maxlength) { + for (n = 0; lp[n]; n++); + if (n > maxlength) { + putstr("line too long: lno = "); + putd((a1-1 - zero)&MAXCNT); + putchr('\n'); + } + } + for (;;) { + if (--nib < 0) { + n = fp-genbuf; + if(write(io, genbuf, n) != n) + error("write error"); + nib = 511; + fp = genbuf; + } + count++; + if ((*fp++ = *lp++) == 0) { + fp[-1] = '\n'; + break; + } else if (fp[-1] == '\n') + fp[-1] = '\0'; + } + } while (a1 <= addr2); + n = fp-genbuf; + if(write(io, genbuf, n) != n) + error("write error"); +} + +static int +append(int (*f)(void), long *a) +{ + register long *a1, *a2, *rdot; + int nline, tl; + + nline = 0; + dot = a; + while ((*f)() == 0) { + if ((dol-zero)+1 >= nlall) { + long *ozero = zero; + nlall += 512; + if ((zero = realloc(zero, nlall*sizeof *zero))==NULL) { + lastc = '\n'; + zero = ozero; + error("out of memory for append"); + } + dot += zero - ozero; + dol += zero - ozero; + addr1 += zero - ozero; + addr2 += zero - ozero; + if (unddot) { + unddot += zero - ozero; + unddol += zero - ozero; + } + if (undzero) { + ozero = undzero; + if ((undzero = realloc(undzero, + nlall*sizeof *undzero)) == 0) { + puts("no memory for undo"); + free(ozero); + } + } + } + tl = putline(); + nline++; + a1 = ++dol; + a2 = a1+1; + rdot = ++dot; + while (a1 > rdot) + *--a2 = *--a1; + *rdot = tl; + } + return(nline); +} + +static void +callunix(void) +{ + char *line; + void (*savint)(int); + pid_t pid, rpid; + int retcode; + + setnoaddr(); + line = readcmd(); + if ((pid = fork()) == 0) { + sigset(SIGHUP, oldhup); + sigset(SIGQUIT, oldquit); + sigset(SIGPIPE, oldpipe); + execl(SHELL, "sh", "-c", line, NULL); + _exit(0100); + } else if (pid < 0) + error("fork failed - try again"); + savint = sigset(SIGINT, SIG_IGN); + while ((rpid = wait(&retcode)) != pid && rpid != -1) + ; + sigset(SIGINT, savint); + if (vflag) + puts("!"); +} + +#define cmadd(c) ((i>=cmsize ? \ + ((line=realloc(line,cmsize+=128)) == 0 ? \ + (error("line too long"),0) : 0, 0) \ + : 0), line[i++]=(c)) + +static char * +readcmd(void) +{ + static char *line, *prev; + static int cmsize, pvsize; + char *pp; + int c, mod = 0, i; + + i = 0; + if ((c = getchr()) == '!') { + for (pp = prev; *pp; pp++) + line[i++] = *pp; + mod = 1; + c = getchr(); + } + while (c != '\n' && c != EOF) { + if (c == '\\') { + c = getchr(); + if (c != '%') + cmadd('\\'); + cmadd(c); + } else if (c == '%') { + for (pp = savedfile; *pp; pp++) + cmadd(*pp); + mod = 1; + } else + cmadd(c); + c = getchr(); + } + cmadd('\0'); + if (pvsize < cmsize && (prev = realloc(prev, pvsize=cmsize)) == 0) + error("line too long"); + strcpy(prev, line); + if (mod) + nlputs(line); + return line; +} + +static void +quit(int signo) +{ + lastsig = signo; + if (vflag && fchange) { + fchange = 0; + error("warning: expecting `w'"); + } + if (wrtemp) + unlink(wrtemp); + unlink(tfname); + exit(status); +} + +static void +delete(void) +{ + setdot(); + newline(); + nonzero(); + checkpoint(); + rdelete(addr1, addr2); +} + +static void +rdelete(long *ad1, long *ad2) +{ + register long *a1, *a2, *a3; + + a1 = ad1; + a2 = ad2+1; + a3 = dol; + dol -= a2 - a1; + do { + *a1++ = *a2++; + } while (a2 <= a3); + a1 = ad1; + if (a1 > dol) + a1 = dol; + dot = a1; + fchange = 1; +} + +static void +gdelete(void) +{ + register long *a1, *a2, *a3; + + a3 = dol; + for (a1=zero+1; (*a1&01)==0; a1++) + if (a1>=a3) + return; + for (a2=a1+1; a2<=a3;) { + if (*a2&01) { + a2++; + dot = a1; + } else + *a1++ = *a2++; + } + dol = a1-1; + if (dot>dol) + dot = dol; + fchange = 1; +} + +static char * +getline(long tl, int nulterm) +{ + register char *bp, *lp; + register long nl; + + lp = linebuf; + bp = getblock(tl, READ); + nl = nleft; + tl &= ~0377; + while (*lp++ = *bp++) { + if (lp[-1] == '\n' && nulterm) { + lp[-1] = '\0'; + break; + } + if (--nl == 0) { + bp = getblock(tl+=0400, READ); + nl = nleft; + } + } + return(linebuf); +} + +static int +putline(void) +{ + register char *bp, *lp; + register long nl; + long tl; + + fchange = 1; + lp = linebuf; + tl = tline; + bp = getblock(tl, WRITE); + nl = nleft; + tl &= ~0377; + while (*bp = *lp++) { + if (*bp++ == '\n' && insub) { + *--bp = 0; + linebp = lp; + break; + } + if (--nl == 0) { + bp = getblock(tl+=0400, WRITE); + nl = nleft; + } + } + nl = tline; + tline += (((lp-linebuf)+03)>>1)&(MAXCNT-1); + return(nl); +} + +static char * +getblock(long atl, long iof) +{ + register long bno, off; + + bno = (atl>>8)&BLKMSK; + off = (atl<<1)&0774; + if (bno >= BLKMSK) { + lastc = '\n'; + error("temp file too big"); + } + nleft = 512 - off; + if (bno==iblock) { + ichanged |= iof; + return(ibuff+off); + } + if (bno==oblock) + return(obuff+off); + if (iof==READ) { + if (ichanged) + blkio(iblock, ibuff, 1); + ichanged = 0; + iblock = bno; + blkio(bno, ibuff, 0); + return(ibuff+off); + } + if (oblock>=0) + blkio(oblock, obuff, 1); + oblock = bno; + return(obuff+off); +} + +static void +blkio(long b, char *buf, int wr) +{ + lseek(tfile, b<<9, SEEK_SET); + if ((wr ? write(tfile, buf, 512) : read (tfile, buf, 512)) != 512) { + status = 1; + error("I/O error on temp file"); + } +} + +static void +init(void) +{ + register long *markp; + + tline = 2; + for (markp = names; markp < &names[26]; markp++) + *markp = 0; + for (markp = undnames; markp < &undnames[26]; markp++) + *markp = 0; + subnewa = 0; + anymarks = 0; + iblock = -1; + oblock = -1; + ichanged = 0; + tfile = maketf(tfile); + dot = dol = zero; + unddot = NULL; +} + +static void +global(int k, int ia) +{ + register int c; + register long *a1; + static char *globuf; + char mb[MB_LEN_MAX+1]; + int spflag = 0; + + if (globp) + error("multiple globals not allowed"); + setall(); + nonzero(); + if ((c=GETWC(mb))=='\n') + error("incomplete global expression"); + compile(NULL, expbuf, &expbuf[ESIZE], c); + if (!ia) { + globrd(&globuf, EOF); + if (globuf[0] == '\n') + globuf[0] = 'p', globuf[1] = '\n', globuf[2] = '\0'; + } else { + newline(); + spflag = pflag; + pflag = 0; + } + checkpoint(); + for (a1=zero; a1<=dol; a1++) { + *a1 &= ~01; + if (a1>=addr1 && a1<=addr2 && execute(0, a1, 0)==k) + *a1 |= 01; + } + /* + * Special case: g/.../d (avoid n^2 algorithm) + */ + if (!ia && globuf[0]=='d' && globuf[1]=='\n' && globuf[2]=='\0') { + gdelete(); + return; + } + for (a1=zero; a1<=dol; a1++) { + if (*a1 & 01) { + *a1 &= ~01; + dot = a1; + if (ia) { + puts(getline(*a1, 0)); + if ((c = getchr()) == EOF) + error("command expected"); + if (c == 'a' || c == 'c' || c == 'i') + error("a, i, or c not allowed in G"); + else if (c == '&') { + if ((c = getchr()) != '\n') + error("end of line expected"); + if (globuf == 0 || *globuf == 0) + error("no remembered command"); + } else if (c == '\n') { + a1 = zero; + continue; + } else + globrd(&globuf, c); + } + globp = globuf; + commands(); + globp = NULL; + a1 = zero; + } + } + if (ia) + pflag = spflag; +} + +static void +globrd(char **globuf, register int c) +{ + register int i; + + if (*globuf == 0 && (*globuf = malloc(GBSIZE=256)) == 0) + error("global too long"); + i = 0; + if (c != EOF) + (*globuf)[i++] = c; + while ((c = getchr()) != '\n') { + if (c==EOF) + error("incomplete global expression"); + if (c=='\\') { + c = getchr(); + if (c!='\n') + (*globuf)[i++] = '\\'; + } + (*globuf)[i++] = c; + if (i>=GBSIZE-4 && (*globuf=realloc(*globuf,GBSIZE+=256)) == 0) + error("global too long"); + } + (*globuf)[i++] = '\n'; + (*globuf)[i++] = 0; +} + +static void +join(void) +{ + register int i, j; + register long *a1; + + j = 0; + for (a1=addr1; a1<=addr2; a1++) { + i = getline(*a1, 0) - linebuf; + while (genbuf[j] = linebuf[i++]) + if (j++ >= LBSIZE-2) + growlb("line too long"); + } + i = 0; + j = 0; + while (linebuf[i++] = genbuf[j++]) + ; + *addr1 = putline(); + if (addr1<addr2) + rdelete(addr1+1, addr2); + dot = addr1; +} + +static void +substitute(int inglob) +{ + register long *markp; + register long *a1; + intptr_t nl; + int gsubf; + + checkpoint(); + gsubf = compsub(); + insub = 1; + for (a1 = addr1; a1 <= addr2; a1++) { + long *ozero; + if (execute(0, a1, 1)==0) + continue; + inglob |= dosub(gsubf < 2); + if (gsubf) { + int i = 1; + + while (*loc2) { + if (execute(1, NULL, 1)==0) + break; + inglob |= dosub(gsubf == -1 || ++i == gsubf); + } + } + subnewa = putline(); + *a1 &= ~01; + if (anymarks) { + for (markp = names; markp < &names[26]; markp++) + if (*markp == *a1) + *markp = subnewa; + } + *a1 = subnewa; + ozero = zero; + nl = append(getsub, a1); + nl += zero-ozero; + a1 += nl; + addr2 += nl; + } + insub = 0; + if (inglob==0) + error("no match"); +} + +static int +compsub(void) +{ + register int seof, c, i; + static char *oldrhs; + static int orhssz; + char mb[MB_LEN_MAX+1]; + + if ((seof = GETWC(mb)) == '\n' || seof == ' ') + error("illegal or missing delimiter"); + nodelim = 0; + compile(NULL, expbuf, &expbuf[ESIZE], seof); + i = 0; + for (;;) { + c = GETWC(mb); + if (c=='\\') { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = c; + c = GETWC(mb); + } else if (c=='\n') { + if (globp && *globp) { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = '\\'; + } + else if (nodelim) + error("illegal or missing delimiter"); + else { + peekc = c; + pflag++; + break; + } + } else if (c==seof) + break; + for (c = 0; c==0 || mb[c]; c++) { + if (i >= RHSIZE-2) + growrhs("replacement string too long"); + rhsbuf[i++] = mb[c]; + } + } + rhsbuf[i++] = 0; + if (rhsbuf[0] == '%' && rhsbuf[1] == 0) { + if (orhssz == 0) + error("no remembered replacement string"); + strcpy(rhsbuf, oldrhs); + } else { + if (orhssz < RHSIZE && + (oldrhs = realloc(oldrhs, orhssz=RHSIZE)) == 0) + error("replacement string too long"); + strcpy(oldrhs, rhsbuf); + } + if ((peekc = getchr()) == 'g') { + peekc = 0; + newline(); + return(-1); + } else if (peekc >= '0' && peekc <= '9') { + c = getnum(); + if (c < 1 || c > LBSIZE) + error("invalid count"); + newline(); + return c; + } + newline(); + return(0); +} + +static int +getsub(void) +{ + register char *p1, *p2; + + p1 = linebuf; + if ((p2 = linebp) == 0) + return(EOF); + while (*p1++ = *p2++) + ; + linebp = 0; + return(0); +} + +static int +dosub(int really) +{ + register char *lp, *sp; + register int i, j, k; + int c; + + if (!really) + goto copy; + i = 0; + j = 0; + k = 0; + while (&linebuf[i] < loc1) + genbuf[j++] = linebuf[i++]; + while (c = rhsbuf[k++]&0377) { + if (c=='&') { + j = place(j, loc1, loc2); + continue; + } else if (c == '\\') { + c = rhsbuf[k++]&0377; + if (c >='1' && c < nbra+'1') { + j = place(j, braslist[c-'1'], braelist[c-'1']); + continue; + } + } + if (j >= LBSIZE) + growlb("line too long"); + genbuf[j++] = c; + } + i = loc2 - linebuf; + loc2 = j + linebuf; +#if defined (SUS) || defined (SU3) || defined (S42) + if (loc1 == &linebuf[i]) { + int n; + wchar_t wc; + if (mb_cur_max > 1 && (n = mbtowc(&wc, loc2, mb_cur_max)) > 0) + loc2 += n; + else + loc2++; + } +#endif /* SUS || SU3 || S42 */ + while (genbuf[j++] = linebuf[i++]) + if (j >= LBSIZE) + growlb("line too long"); + if (really) { + lp = linebuf; + sp = genbuf; + } else { + copy: sp = linebuf; + lp = genbuf; + } + while (*lp++ = *sp++) + ; + return really; +} + +static int +place(register int j, register const char *l1, register const char *l2) +{ + + while (l1 < l2) { + genbuf[j++] = *l1++; + if (j >= LBSIZE) + growlb("line too long"); + } + return(j); +} + +static void +move(int cflag) +{ + register long *adt, *ad1, *ad2; + + setdot(); + nonzero(); + if ((adt = address())==0) + error("illegal move destination"); + newline(); + checkpoint(); + if (cflag) { + long *ozero; + intptr_t delta; + ad1 = dol; + ozero = zero; + append(getcopy, ad1++); + ad2 = dol; + delta = zero - ozero; + ad1 += delta; + adt += delta; + } else { + ad2 = addr2; + for (ad1 = addr1; ad1 <= ad2;) + *ad1++ &= ~01; + ad1 = addr1; + } + ad2++; + if (adt<ad1) { + dot = adt + (ad2-ad1); + if ((++adt)==ad1) + return; + reverse(adt, ad1); + reverse(ad1, ad2); + reverse(adt, ad2); + } else if (adt >= ad2) { + dot = adt++; + reverse(ad1, ad2); + reverse(ad2, adt); + reverse(ad1, adt); + } else + error("illegal move destination"); + fchange = 1; +} + +static void +reverse(register long *a1, register long *a2) +{ + register int t; + + for (;;) { + t = *--a2; + if (a2 <= a1) + return; + *a2 = *a1; + *a1++ = t; + } +} + +static int +getcopy(void) +{ + if (addr1 > addr2) + return(EOF); + getline(*addr1++, 0); + return(0); +} + +static int +execute(int gf, long *addr, int subst) +{ + register char *p1, *p2, c; + + for (c=0; c<NBRA; c++) { + braslist[c&0377] = 0; + braelist[c&0377] = 0; + } + if (gf) { + if (circf) + return(0); + p1 = linebuf; + p2 = genbuf; + while (*p1++ = *p2++) + ; + locs = p1 = loc2; + } else { + if (addr==zero) + return(0); + p1 = getline(*addr, 1); + locs = 0; + } + needsub = subst; + return step(p1, expbuf); +} + +static void +cmplerr(int c) +{ + const char *msg; + +#if !defined (SUS) && !defined (S42) && !defined (SU3) + expbuf[0] = 0; +#endif + switch (c) { + case 11: + msg = "Range endpoint too large"; + break; + case 16: + msg = "bad number"; + break; + case 25: + msg = "`\\digit' out of range"; + break; + case 36: + msg = "illegal or missing delimiter"; + break; + case 41: + msg = "no remembered search string"; + break; + case 42: + msg = "'\\( \\)' imbalance"; + break; + case 43: + msg = "Too many `\\(' s"; + break; + case 44: + msg = "more than 2 numbers given"; + break; + case 45: + msg = "'\\}' expected"; + break; + case 46: + msg = "first number exceeds second"; + break; + case 49: + msg = "'[ ]' imbalance"; + break; + case 50: + msg = "regular expression overflow"; + break; + case 67: + msg = "illegal byte sequence"; + break; + default: + msg = "regular expression error"; + break; + } + error(msg); +} + +static void +doprnt(long *bot, long *top) +{ + long *a1; + + a1 = bot; + do { + if (numbf ^ Nflag) { + putd(a1-zero); + putchr('\t'); + } + nlputs(getline(*a1++, 0)); + } while (a1 <= top); + pflag = 0; + listf = 0; + numbf = 0; +} + +static void +putd(long c) +{ + register int r; + + r = c%10; + c /= 10; + if (c) + putd(c); + putchr(r + '0'); +} + +static void +nlputs(register const char *sp) +{ + if (listf) + list(sp); + else if (tabstops) + expand(sp); + else + puts(sp); +} + +static void +puts(register const char *sp) +{ + while (*sp) { + if (*sp != '\n') + putchr(*sp++ & 0377); + else + sp++, putchr('\0'); + } + putchr('\n'); +} + +static void +list(const char *lp) +{ + int col, n; + wchar_t c; + + col = numbf ^ Nflag ? 8 : 0; + while (*lp) { + if (mb_cur_max > 1 && *lp&0200) + n = mbtowc(&c, lp, mb_cur_max); + else { + n = 1; + c = *lp&0377; + } + if (col+1 >= 72) { + col = 0; + putchr('\\'); + putchr('\n'); + } + if (n<0 || +#if defined (SUS) || defined (S42) || defined (SU3) + c == '\\' || +#endif /* SUS || S42 || SU3 */ + !(mb_cur_max>1 ? iswprint(c) : isprint(c))) { + if (n<0) + n = 1; + while (n--) + col += lstchr(*lp++&0377); + } else if (mb_cur_max>1) { + col += wcwidth(c); + while (n--) + putchr(*lp++&0377); + } else { + putchr(*lp++&0377); + col++; + } + } +#if defined (SUS) || defined (S42) || defined (SU3) + putchr('$'); +#endif + putchr('\n'); +} + +static int +lstchr(int c) +{ + int cad = 1, d; + +#if !defined (SUS) && !defined (S42) && !defined (SU3) + if (c=='\t') { + c = '>'; + goto esc; + } + if (c=='\b') { + c = '<'; + esc: + putchr('-'); + putchr('\b'); + putchr(c); + } else if (c == '\n') { + putchr('\\'); + putchr('0'); + putchr('0'); + putchr('0'); + cad = 4; +#else /* !SUS, !S42, !SU3 */ + if (c == '\n') + c = '\0'; + if (c == '\\') { + putchr('\\'); + putchr('\\'); + cad = 2; + } else if (c == '\a') { + putchr('\\'); + putchr('a'); + cad = 2; + } else if (c == '\b') { + putchr('\\'); + putchr('b'); + cad = 2; + } else if (c == '\f') { + putchr('\\'); + putchr('f'); + cad = 2; + } else if (c == '\r') { + putchr('\\'); + putchr('r'); + cad = 2; + } else if (c == '\t') { + putchr('\\'); + putchr('t'); + cad = 2; + } else if (c == '\v') { + putchr('\\'); + putchr('v'); + cad = 2; +#endif /* !SUS, !S42, !SU3 */ + } else { + putchr('\\'); + putchr(((c&~077)>>6)+'0'); + c &= 077; + d = c & 07; + putchr(c > d ? ((c-d)>>3)+'0' : '0'); + putchr(d+'0'); + cad = 4; + } + return cad; +} + +static void +putstr(const char *s) +{ + while (*s) + putchr(*s++); +} + +static char line[70]; +static char *linp = line; + +static void +putchr(int ac) +{ + register char *lp; + register int c; + + lp = linp; + c = ac; + *lp++ = c; + if(c == '\n' || lp >= &line[64]) { + linp = line; + write(1, line, lp-line); + return; + } + linp = lp; +} + +static void +checkpoint(void) +{ + long *a1, *a2; + + if (undzero && globp == NULL) { + for (a1 = zero+1, a2 = undzero+1; a1 <= dol; a1++, a2++) + *a2 = *a1; + unddot = &undzero[dot-zero]; + unddol = &undzero[dol-zero]; + for (a1 = names, a2 = undnames; a1 < &names[26]; a1++, a2++) + *a2 = *a1; + } +} + +#define swap(a, b) (t = a, a = b, b = t) + +static void +undo(void) +{ + long *t; + + if (undzero == NULL) + error("no undo information saved"); + swap(zero, undzero); + swap(dot, unddot); + swap(dol, unddol); + swap(names, undnames); +} + +static int +maketf(int fd) +{ + char *tmpdir; + + if (fd == -1) { + if ((tmpdir = getenv("TMPDIR")) == NULL || + (fd = creatf(tmpdir)) < 0) + if ((fd = creatf("/var/tmp")) < 0 && + (fd = creatf("/tmp")) < 0) + error("cannot create temporary file"); + } else + ftruncate(fd, 0); /* blkio() will seek to 0 anyway */ + return fd; +} + +static int +creatf(const char *tmpdir) +{ + if (strlen(tmpdir) >= sizeof tfname - 9) + return -1; + strcpy(tfname, tmpdir); + strcat(tfname, "/eXXXXXX"); + return mkstemp(tfname); +} + +static int +sopen(const char *fn, int rdwr) +{ + int pf[2], fd = -1; + + if (fn[0] == '!') { + fn++; + if (pipe(pf) < 0) + error("write or open on pipe failed"); + switch (pipid = fork()) { + case 0: + if (rdwr == READ) + dup2(pf[1], 1); + else + dup2(pf[0], 0); + close(pf[0]); + close(pf[1]); + sigset(SIGHUP, oldhup); + sigset(SIGQUIT, oldquit); + sigset(SIGPIPE, oldpipe); + execl(SHELL, "sh", "-c", fn, NULL); + _exit(0100); + default: + close(pf[rdwr == READ ? 1 : 0]); + fd = pf[rdwr == READ ? 0 : 1]; + break; + case -1: + error("fork failed - try again"); + } + } else if (rdwr == READ) + fd = open(fn, O_RDONLY); + else if (rdwr == EXIST) + fd = open(fn, O_WRONLY); + else /*if (rdwr == WRITE)*/ + fd = creat(fn, 0666); + if (fd >= 0 && rdwr == READ) + readop = 1; + if (fd >= 0) + fstat(fd, &fstbuf); + return fd; +} + +static void +sclose(int fd) +{ + int status; + + close(fd); + if (pipid >= 0) { + while (wait(&status) != pipid); + pipid = -1; + } + readop = 0; +} + +static void +fspec(const char *lp) +{ + struct termios ts; + const char *cp; + + freetabs(); + maxlength = 0; + if (tcgetattr(1, &ts) < 0 +#ifdef TAB3 + || (ts.c_oflag&TAB3) == 0 +#endif + ) + return; + while (lp[0]) { + if (lp[0] == '<' && lp[1] == ':') + break; + lp++; + } + if (lp[0]) { + lp += 2; + while ((cp = ftok(&lp)) != NULL) { + switch (*cp) { + case 't': + freetabs(); + if ((tabstops = tabstring(&cp[1])) == NULL) + goto err; + break; + case 's': + maxlength = atoi(&cp[1]); + break; + case 'm': + case 'd': + case 'e': + break; + case ':': + if (cp[1] == '>') { + if (tabstops == NULL) + if ((tabstops = tabstring("0")) + == NULL) + goto err; + return; + } + /*FALLTHRU*/ + default: + err: freetabs(); + maxlength = 0; + errput("PWB spec problem", NULL); + return; + } + } + } +} + +static const char * +ftok(const char **lp) +{ + const char *cp; + + while (**lp && **lp != ':' && (**lp == ' ' || **lp == '\t')) + (*lp)++; + cp = *lp; + while (**lp && **lp != ':' && **lp != ' ' && **lp != '\t') + (*lp)++; + return cp; +} + +static struct tabulator * +repetitive(int repetition) +{ + struct tabulator *tp, *tabspec; + int col, i; + + if ((tp = tabspec = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp->t_rep = repetition; + if (repetition > 0) { + for (col = 1+repetition, i = 0; i < 22; col += repetition) { + if ((tp->t_nxt = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp = tp->t_nxt; + tp->t_tab = col; + } + } + return tabspec; +} + +#define blank(c) ((c) == ' ' || (c) == '\t') + +static struct tabulator * +tablist(const char *s) +{ + struct tabulator *tp, *tabspec; + char *x; + int prev = 0, val; + + if ((tp = tabspec = calloc(1, sizeof *tp)) == NULL) + return NULL; + for (;;) { + while (*s == ',') + s++; + if (*s == '\0' || blank(*s) || *s == ':') + break; + val = strtol(s, &x, 10); + if (*s == '+') + val += prev; + prev = val; + if (*s == '-' || (*x != ',' && !blank(*x) && *x != ':' && + *x != '\0')) + return NULL; + s = x; + if ((tp->t_nxt = calloc(1, sizeof *tp)) == NULL) + return NULL; + tp = tp->t_nxt; + tp->t_tab = val; + } + return tabspec; +} + +static struct tabulator * +tabstring(const char *s) +{ + const struct { + const char *c_nam; + const char *c_str; + } canned[] = { + { "a", "1,10,16,36,72" }, + { "a2", "1,10,16,40,72" }, + { "c", "1,8,12,16,20,55" }, + { "c2", "1,6,10,14,49" }, + { "c3", "1,6,10,14,18,22,26,30,34,38,42,46,50,54,58,62,67" }, + { "f", "1,7,11,15,19,23" }, + { "p", "1,5,9,13,17,21,25,29,33,37,41,45,49,53,57,61" }, + { "s", "1,10,55" }, + { "u", "1,12,20,44" }, + { 0, 0 } + }; + + int i, j; + + if (s[0] == '-') { + if (s[1] >= '0' && s[1] <= '9' && ((i = atoi(&s[1])) != 0)) + return repetitive(i); + for (i = 0; canned[i].c_nam; i++) { + for (j = 0; canned[i].c_nam[j]; j++) + if (s[j+1] != canned[i].c_nam[j]) + break; + if ((s[j+1]=='\0' || s[j+1]==':' || blank(s[j+1])) && + canned[i].c_nam[j] == '\0') + return tablist(canned[i].c_str); + } + return NULL; + } else + return tablist(s); +} + +static void +freetabs(void) +{ + struct tabulator *tp; + + tp = tabstops; + while (tp) { + tabstops = tp->t_nxt; + free(tp); + tp = tabstops; + } +} + +static void +expand(const char *s) +{ + struct tabulator *tp = tabstops; + int col = 0, n = 1, m, tabcnt = 0, nspc; + wchar_t wc; + + while (*s) { + nspc = 0; + switch (*s) { + case '\n': + putchr('\0'); + s++; + continue; + case '\t': + if (tp) { + if (tp->t_rep) { + if (col % tp->t_rep == 0) { + nspc++; + col++; + } + while (col % tp->t_rep) { + nspc++; + col++; + } + break; + } + while (tp && (col>tp->t_tab || tp->t_tab == 0)) + tp = tp->t_nxt; + if (tp && col == tp->t_tab) { + nspc++; + col++; + tp = tp->t_nxt; + } + if (tp) { + while (col < tp->t_tab) { + nspc++; + col++; + } + tp = tp->t_nxt; + break; + } + } + tabcnt = 1; + nspc++; + break; + default: + if (mb_cur_max>1 && (n=mbtowc(&wc, s, mb_cur_max))>0) { + if ((m = wcwidth(wc)) > 0) + col += m; + } else { + col++; + n = 1; + } + } + if (maxlength && col > maxlength) { + putstr("\ntoo long"); + break; + } + if (nspc) { + while (nspc--) + putchr(' '); + s++; + } else + while (n--) + putchr(*s++); + } + if (tabcnt) + putstr("\ntab count"); + putchr('\n'); +} + +static wint_t +GETWC(char *mb) +{ + int c, n; + + n = 1; + mb[0] = c = GETC(); + mb[1] = '\0'; + if (mb_cur_max > 1 && c&0200 && c != EOF) { + int m; + wchar_t wc; + + while ((m = mbtowc(&wc, mb, mb_cur_max)) < 0 && n<mb_cur_max) { + mb[n++] = c = GETC(); + mb[n] = '\0'; + if (c == '\n' || c == EOF) + break; + } + if (m != n) + ERROR(67); + return wc; + } else + return c; +} + +static void +growlb(const char *msg) +{ + char *olb = linebuf; + int i; + + LBSIZE += 512; + if ((linebuf = realloc(linebuf, LBSIZE)) == NULL || + (genbuf = realloc(genbuf, LBSIZE)) == NULL) + error(msg); + if (linebuf != olb) { + loc1 += linebuf - olb; + loc2 += linebuf - olb; + for (i = 0; i < NBRA; i++) { + if (braslist[i]) + braslist[i] += linebuf - olb; + if (braelist[i]) + braelist[i] += linebuf - olb; + } + } +} + +static void +growrhs(const char *msg) +{ + RHSIZE += 256; + if ((rhsbuf = realloc(rhsbuf, RHSIZE)) == NULL) + error(msg); +} + +static void +growfn(const char *msg) +{ + FNSIZE += 64; + if ((savedfile = realloc(savedfile, FNSIZE)) == NULL || + (file = realloc(file, FNSIZE)) == NULL) + error(msg); + if (FNSIZE == 64) + file[0] = savedfile[0] = 0; +} + +#if defined (SUS) || defined (S42) || defined (SU3) +union ptrstore { + void *vp; + char bp[sizeof (void *)]; +}; + +static void * +fetchptr(const char *bp) +{ + union ptrstore u; + int i; + + for (i = 0; i < sizeof (void *); i++) + u.bp[i] = bp[i]; + return u.vp; +} + +static void +storeptr(void *vp, char *bp) +{ + union ptrstore u; + int i; + + u.vp = vp; + for (i = 0; i < sizeof (void *); i++) + bp[i] = u.bp[i]; +} + +#define add(c) ((i>=LBSIZE ? (growlb("regular expression overflow"),0) : 0), \ + genbuf[i++] = (c)) + +#define copy(s) { \ + int m; \ + for (m = 0; m==0 || s[m]; m++) \ + add(s[m]); \ +} + +static char * +compile(char *unused, char *ep, const char *endbuf, int seof) +{ + INIT + int c, d, i; + regex_t *rp; + char *op; + char mb[MB_LEN_MAX+1]; + + op = ep; + ep += 2; + if ((rp = fetchptr(ep)) == NULL) { + if ((rp = calloc(1, sizeof *rp)) == NULL) + ERROR(50); + storeptr(rp, ep); + } + ep += sizeof (void *); + i = 0; + nbra = 0; + do { + if ((c = GETWC(mb)) == seof) + add('\0'); + else if (c == '\\') { + copy(mb); + c = GETWC(mb); + if (c == '(') + nbra++; + goto normchar; + } else if (c == '[') { + add(c); + d = EOF; + do { + c = GETWC(mb); + if (c == EOF || c == '\n') + ERROR(49); + copy(mb); + if (d=='[' && (c==':' || c=='.' || c=='=')) { + d = c; + do { + c = GETWC(mb); + if (c == EOF || c == '\n') + ERROR(49); + copy(mb); + } while (c != d || PEEKC() != ']'); + c = GETWC(mb); + copy(mb); + c = EOF; + } + d = c; + } while (c != ']'); + } else { + if (c == EOF || c == '\n') { + if (c == '\n') + UNGETC(c); + mb[0] = c = '\0'; + } + if (c == '\0') + nodelim = 1; + normchar: copy(mb); + } + } while (genbuf[i-1] != '\0'); + if (genbuf[0]) { + int reflags = 0; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif + if (op[0]) + regfree(rp); + op[0] = 0; + switch (regcomp(rp, genbuf, reflags)) { + case 0: + break; + case REG_ESUBREG: + ERROR(25); + /*NOTREACHED*/ + case REG_EBRACK: + ERROR(49); + /*NOTREACHED*/ + case REG_EPAREN: + ERROR(42); + /*NOTREACHED*/ + case REG_BADBR: + case REG_EBRACE: + ERROR(45); + /*NOTREACHED*/ + case REG_ERANGE: + ERROR(11); + /*NOTREACHED*/ + case REG_ESPACE: + ERROR(50); + /*NOTREACHED*/ + default: + ERROR(-1); + } + op[0] = 1; + circf = op[1] = genbuf[0] == '^'; + } else if (op[0]) { + circf = op[1]; + } else + ERROR(41); + return ep + sizeof (void *); +} + +static int +step(const char *lp, const char *ep) +{ + regex_t *rp; + regmatch_t bralist[NBRA+1]; + int eflag = 0; + int res; + int i; + + rp = fetchptr(&ep[2]); + if (ep[0] == 0) + return 0; + if (locs) + eflag |= REG_NOTBOL; + if ((res = regexec(rp, lp, needsub? NBRA+1 : 0, bralist, eflag)) == 0 && + needsub) { + loc1 = (char *)lp + bralist[0].rm_so; + loc2 = (char *)lp + bralist[0].rm_eo; + for (i = 1; i <= NBRA; i++) { + if (bralist[i].rm_so != -1) { + braslist[i-1] = (char *)lp + bralist[i].rm_so; + braelist[i-1] = (char *)lp + bralist[i].rm_eo; + } else + braslist[i-1] = braelist[i-1] = NULL; + } + } + return res == 0; +} +#endif /* SUS || S42 || SU3 */ + +static void +help(void) +{ + const char *desc[] = { + "(.)a append up to .", + "(.)b[n] browse n lines", + "(.,.)c change up to .", + "(.,.)d delete lines", + "e [file] edit file", + "E [file] force edit", + "f [file] print or set file", + "(1,$)g/RE/cmd global cmd", + "(1,$)G/RE/ interactive global", + "h print last error", + "H toggle error messages", + "help print this screen", + "(.)i insert up to .", + "(.,.+1)j join lines", + "(.)kx mark line with x", + "(.,.)l list lines", + "(.,.)ma move lines to a", + "(.,.)n number lines", + "N revert n and p", + "(.)o[n] show n lines of context", + "(.,.)p print lines", + "P toggle prompt", + "q quit", + "Q force quit", + "($)r read file", + "(.,.)s/RE/repl/ search and replace", + "(.,.)s/RE/rp/g replace all occurrences", + "(.,.)s/RE/rp/n replace n-th occurrence", + "(.,.)ta transfer lines to a", + "u undo last change", + "(1,$)v/RE/cmd reverse global", + "(1,$)V/RE/ reverse i/a global", + "(1,$)w [file] write file", + "(1,$)W [file] append to file", + "z write buffer and quit", + "($)= print line number", + "!command execute shell command", + "(.+1)<newline> print one line", + "/RE find RE forwards", + "?RE find RE backwards", + "1 first line", + ". current line", + "$ last line", + ", 1,$", + "; .,$", + NULL + }; + char line[100]; + int c, half, i, k; + + half = (sizeof desc / sizeof *desc) / 2; + for (i = 0; i < half && desc[i]; i++) { + c = 0; + for (k = 0; desc[i][k]; k++) + line[c++] = desc[i][k]; + if (desc[i+half]) { + while (c < 40) + line[c++] = ' '; + for (k = 0; desc[i+half][k]; k++) + line[c++] = desc[i+half][k]; + } + line[c] = 0; + puts(line); + } +} diff --git a/ed/mkfile b/ed/mkfile @@ -0,0 +1,8 @@ +BIN = ed +OBJ = ed.o +LOCAL_CFLAGS = -DSU3 -DSHELL=\"$SHELL\" +INSTALL_BIN = ed +INSTALL_MAN1 = ed.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/expr/expr.1 b/expr/expr.1 @@ -0,0 +1,211 @@ +.\" +.\" Sccsid @(#)expr.1 1.16 (gritter) 2/3/05 +.\" Parts taken from expr(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH EXPR 1 "2/3/05" "Heirloom Toolchest" "User Commands" +.SH NAME +expr \- evaluate arguments as an expression +.SH SYNOPSIS +.B expr +.I arguments +\&.\|.\|. +.SH DESCRIPTION +The arguments are taken as an expression. +After evaluation, the result is written on the standard output. +Each token of the expression is a separate argument. +.PP +The operators and keywords are listed below. +The list is in order of increasing precedence, +with equal precedence operators grouped. +.TP +.I expr | expr +yields the first +.I expr +if it is neither null nor `0', otherwise +yields the second +.I expr. +.TP +.I expr & expr +yields the first +.I expr +if neither +.I expr +is null or `0', otherwise yields `0'. +.TP +.I expr relop expr +where +.I relop is one of +< <= = != >= >, +yields `1' if the indicated comparison is true, `0' if false. +The comparison is numeric if both +.I expr +are integers, otherwise lexicographic. +.TP +.IR expr " + " expr +.br +.IR expr " - " expr +.br +addition or subtraction of the arguments. +.TP +.IR expr " * " expr +.br +.IR expr " / " expr +.br +.IR expr " % " expr +.br +multiplication, division, or remainder of the arguments. +.TP +.IR expr " : " expr +The matching operator compares the string first argument +with the regular expression second argument. +Regular expression syntax is the same as that of +.IR ed (1); +.B /usr/5bin/expr +uses simple regular expressions, +.BR /usr/5bin/posix/expr , +.BR /usr/5bin/posix2001/expr , +and +.B /usr/5bin/s42/expr +use basic regular expressions. +The +\fB\\(\|.\|.\|.\|\\)\fP +pattern symbols can be used to select a portion of the +first argument. +Otherwise, +the matching operator yields the number of characters matched +(`0' on failure). +.TP +.RI match " expr expr" +Same as +.IR expr " : " expr . +.TP +.RI ( " expr " ) +parentheses for grouping. +.TP +.I string +Yields itself +unless it is part of a larger expression. +With +.BR /usr/5bin/posix/expr +and +.BR /usr/5bin/posix2001/expr , +all +.I strings +that form valid decimal numbers +are converted to the canonical form. +.PP +The following operators are supported only by +.B /usr/5bin/s42/expr +or if the +.B SYSV3 +environment variable is set: +.TP +.RI length " string" +Returns the number of characters in +.IR string . +.TP +.RI substr " string index count" +Returns a string that consists of +.I count +characters +beginning at position +.I index +of +.I string +(starting at 1). +.TP +.RI index " string set" +Returns the index in +.I string +(starting at 1) of the first occurrence +of one of the characters in +.IR set , +or 0 if no character is found. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See locale(7). +.TP +.B LC_COLLATE +Sets the collation sequence for string comparison, +and for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters in regular expressions, +for the match, length, substr, and index operators, +and the availability and composition of character classes +in basic regular expressions. +.TP +.B SYSV3 +Enables some additional operators as described above. +.SH EXAMPLES +.PP +To add 1 to the Shell variable +.IR a : +.IP +a=\`expr $a + 1\` +.PP +To find the filename part (least significant part) +of the pathname stored in variable +.I a, +which may or may not contain `/': +.IP +expr "$a" : \'.*/\e(\^.*\e)\' \'\^|\' "$a" +.LP +Note the quoted Shell metacharacters. +.\" Historic example, not to be deleted and useful just because of its errors +Also note that this example generates wrong results +if the result of the substitution is `0' +or if `$a' equals one of the +.I expr +operators. +Be sure that your code avoids such problems +and use +.IR basename (1) +if to actually cut out filename parts. +.SH "SEE ALSO" +ed(1), sh(1), test(1) +.SH DIAGNOSTICS +.I Expr +returns the following exit codes: +.PP + 0 if the expression is neither null nor `0', +.br + 1 if the expression +is null or `0', +.br + 2 for invalid expressions. +.SH NOTES +Integers are treated as 64-bit, 2's complement numbers. diff --git a/expr/expr.y b/expr/expr.y @@ -0,0 +1,546 @@ +/* from Unix 7th Edition /usr/src/cmd/expr.y */ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +%{ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (S42) +static const char sccsid[] USED = "@(#)expr_s42.sl 1.28 (gritter) 5/29/05"; +static int sus = 0; +#elif defined (SU3) +static const char sccsid[] USED = "@(#)expr_su3.sl 1.28 (gritter) 5/29/05"; +static int sus = 3; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)expr_sus.sl 1.28 (gritter) 5/29/05"; +static int sus = 1; +#else +static const char sccsid[] USED = "@(#)expr.sl 1.28 (gritter) 5/29/05"; +static int sus = 0; +#endif + +/* expression command */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> +#include <wchar.h> +#include <unistd.h> +#include <limits.h> +#include <ctype.h> +#include <inttypes.h> + +#include "atoll.h" + +#define EQL(x,y) !strcmp(x,y) + +#define NUMSZ 25 + +static char **Av; +static int Ac; +static int Argi; +static int mb_cur_max; +static char *progname; +extern int sysv3; + +static char *Mstring[1]; + +int yylex(void); +static char *_rel(int op, register char *r1, register char *r2); +static char *_arith(int op, char *r1, char *r2); +static char *_conj(int op, char *r1, char *r2); +static char *match(char *s, char *p); +static int ematch(char *s, register char *p); +static void errxx(int c); +static int yyerror(const char *s); +static int numeric(const char *s); +static int chars(const char *s, const char *end); +static void *srealloc(void *, size_t); +static void *smalloc(size_t); +static char *numpr(int64_t val); + +static char *substr(char *, const char *, const char *); +static char *length(const char *); +static char *eindex(const char *, const char *); + +#if defined (SUS) || defined (SU3) || defined (S42) +#include <regex.h> +static int nbra; +#else /* !SUS, !SU3, !S42 */ +#include <regexpr.h> +#endif /* !SUS, !SU3, !S42 */ +%} + +/* Yacc productions for "expr" command: */ + +%union { + char *val; +} + +%token <val> OR AND ADD SUBT MULT DIV REM EQ GT GEQ LT LEQ NEQ +%token <val> A_STRING SUBSTR LENGTH INDEX NOARG MATCH + +%type <val> expr + +/* operators listed below in increasing precedence: */ +%left OR +%left AND +%left EQ LT GT GEQ LEQ NEQ +%left ADD SUBT +%left MULT DIV REM +%left MCH +%left MATCH +%left SUBSTR +%left LENGTH INDEX +%% + +/* a single `expression' is evaluated and printed: */ + +expression: expr NOARG { + if (sus && numeric($1)) { + int64_t n; + n = atoll($1); + printf("%lld\n", n); + exit(n == 0); + } else + puts($1); + exit((!strcmp($1,"0")||!strcmp($1,"\0"))? 1: 0); + } + ; + + +expr: '(' expr ')' { $$ = $2; } + | expr OR expr { $$ = _conj(OR, $1, $3); } + | expr AND expr { $$ = _conj(AND, $1, $3); } + | expr EQ expr { $$ = _rel(EQ, $1, $3); } + | expr GT expr { $$ = _rel(GT, $1, $3); } + | expr GEQ expr { $$ = _rel(GEQ, $1, $3); } + | expr LT expr { $$ = _rel(LT, $1, $3); } + | expr LEQ expr { $$ = _rel(LEQ, $1, $3); } + | expr NEQ expr { $$ = _rel(NEQ, $1, $3); } + | expr ADD expr { $$ = _arith(ADD, $1, $3); } + | expr SUBT expr { $$ = _arith(SUBT, $1, $3); } + | expr MULT expr { $$ = _arith(MULT, $1, $3); } + | expr DIV expr { $$ = _arith(DIV, $1, $3); } + | expr REM expr { $$ = _arith(REM, $1, $3); } + | expr MCH expr { $$ = match($1, $3); } + | MATCH expr expr { $$ = match($2, $3); } + | SUBSTR expr expr expr { $$ = substr($2, $3, $4); } + | LENGTH expr { $$ = length($2); } + | INDEX expr expr { $$ = eindex($2, $3); } + | A_STRING + ; +%% + +int +main(int argc, char **argv) +{ + extern int yyparse(void); + + Ac = argc; + Argi = 1; + Av = argv; + progname = basename(argv[0]); + if (getenv("SYSV3") != NULL) + sysv3 = 1; + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (Av[1] && Av[1][0] == '-' && Av[1][1] == '-' && Av[1][2] == '\0') + Argi++; + yyparse(); + /*NOTREACHED*/ + return 0; +} + +static const char *operators[] = { + "|", "&", "+", "-", "*", "/", "%", ":", + "=", "==", "<", "<=", ">", ">=", "!=", + "match", "substr", "length", "index", + "\0" +}; + +static int op[] = { + OR, AND, ADD, SUBT, MULT, DIV, REM, MCH, + EQ, EQ, LT, LEQ, GT, GEQ, NEQ, + MATCH, SUBSTR, LENGTH, INDEX +}; + +int +yylex(void) +{ + register char *p; + register int i; + + if(Argi >= Ac) return NOARG; + + p = Av[Argi++]; + + if((*p == '(' || *p == ')') && p[1] == '\0') + return (int)*p; + for(i = 0; *operators[i]; ++i) + if(EQL(operators[i], p)) + return op[i]; + + yylval.val = p; + return A_STRING; +} + +static char * +_rel(int op, register char *r1, register char *r2) +{ + register int64_t i; + + if (numeric(r1) && numeric(r2)) + i = atoll(r1) - atoll(r2); + else + i = strcoll(r1, r2); + switch(op) { + case EQ: i = i==0; break; + case GT: i = i>0; break; + case GEQ: i = i>=0; break; + case LT: i = i<0; break; + case LEQ: i = i<=0; break; + case NEQ: i = i!=0; break; + } + return i? "1": "0"; +} + +static char * +_arith(int op, char *r1, char *r2) +{ + int64_t i1, i2; + register char *rv; + + if (!numeric(r1) || !numeric(r2)) + yyerror("non-numeric argument"); + i1 = atoll(r1); + i2 = atoll(r2); + + switch(op) { + case ADD: i1 = i1 + i2; break; + case SUBT: i1 = i1 - i2; break; + case MULT: i1 = i1 * i2; break; + case DIV: + if (i2 == 0) yyerror("division by zero"); + i1 = i1 / i2; break; + case REM: i1 = i1 % i2; break; + } + rv = numpr(i1); + return rv; +} + +static char * +_conj(int op, char *r1, char *r2) +{ + register char *rv = NULL; + + switch(op) { + + case OR: + if(EQL(r1, "0") + || EQL(r1, "")) + if(EQL(r2, "0") + || EQL(r2, "")) + rv = "0"; + else + rv = r2; + else + rv = r1; + break; + case AND: + if(EQL(r1, "0") + || EQL(r1, "")) + rv = "0"; + else if(EQL(r2, "0") + || EQL(r2, "")) + rv = "0"; + else + rv = r1; + break; + } + return rv; +} + +static char * +match(char *s, char *p) +{ + register char *rv; + int gotcha; + + gotcha = ematch(s, p); + if(nbra) { + if (gotcha) { + rv = smalloc(strlen(Mstring[0])+1); + strcpy(rv, Mstring[0]); + } else + rv = ""; + } else + rv = numpr(gotcha); + return rv; +} + +#if defined (SUS) || defined (SU3) || defined (S42) +static int +ematch(char *s, register char *p) +{ + regex_t re; + register int num; + regmatch_t bralist[2]; + int reflags = 0, val; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif + if ((num = regcomp(&re, p, reflags)) != 0) + errxx(0); + nbra = re.re_nsub; + if (regexec(&re, s, 2, bralist, 0) == 0 && bralist[0].rm_so == 0) { + if (re.re_nsub >= 1) { + num = bralist[1].rm_eo - bralist[1].rm_so; + Mstring[0] = srealloc(Mstring[0], num + 1); + strncpy(Mstring[0], s + bralist[1].rm_so, num); + Mstring[0][num] = '\0'; + } + val = chars(s, &s[bralist[0].rm_eo]); + } else + val = 0; + regfree(&re); + return val; +} +#else /* !SUS, !SU3, !S42 */ +static int +ematch(char *s, register char *p) +{ + char *expbuf; + register int num, val; + + if ((expbuf = compile(p, NULL, NULL)) == NULL) + errxx(regerrno); + if(nbra > 1) + yyerror("Too many '\\('s"); + if(advance(s, expbuf)) { + if(nbra == 1) { + p = braslist[0]; + num = braelist[0] ? braelist[0] - p : 0; + Mstring[0] = srealloc(Mstring[0], num + 1); + strncpy(Mstring[0], p, num); + Mstring[0][num] = '\0'; + } + val = chars(s, loc2); + } else + val = 0; + free(expbuf); + return(val); +} +#endif /* !SUS, !SU3, !S42 */ + +/*ARGSUSED*/ +static void +errxx(int c) +{ + yyerror("RE error"); +} + +static int +yyerror(const char *s) +{ + fprintf(stderr, "%s: %s\n", progname, s); + exit(2); +} + +static int +numeric(const char *s) +{ + if (*s == '-') + s++; + if (!isdigit(*s & 0377)) + return 0; + do + s++; + while (isdigit(*s & 0377)); + return (*s == '\0'); +} + +static int +chars(const char *s, const char *end) +{ + int count = 0, n; + wchar_t wc; + + if (mb_cur_max > 1) { + while (s < end) { + if ((n = mbtowc(&wc, s, MB_LEN_MAX)) >= 0) + count++; + s += n > 0 ? n : 1; + } + } else + count = end - s; + return count; +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "no memory\n", 10); + exit(077); + } + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static char * +numpr(int64_t val) +{ + char *rv; + int ret; + + rv = smalloc(NUMSZ); + ret = snprintf(rv, NUMSZ, "%lld", (long long)val); + if (ret < 0 || ret >= NUMSZ) { + rv = srealloc(rv, ret + 1); + ret = snprintf(rv, ret, "%lld", (long long)val); + if (ret < 0) + yyerror("illegal number"); + } + return rv; +} + +#define next(wc, s, n) (mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowc(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? illseq() : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)) + +static int +illseq(void) +{ + yyerror("illegal byte sequence"); + /*NOTREACHED*/ + return 0; +} + +static char * +substr(char *v, const char *s, const char *w) +{ + long si, wi; + char *res; + wchar_t wc; + int n; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + si = atoll(s); + wi = atoll(w); + while (--si) + if (*v) { + next(wc, v, n); + v += n; + } + res = v; + while (wi--) + if (*v) { + next(wc, v, n); + v += n; + } + *v = '\0'; + return res; +} + +static char * +length(const char *s) +{ + long i = 0; + char *rv; + wchar_t wc; + int n; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + while (*s) { + next(wc, s, n); + s += n; + ++i; + } + rv = numpr(i); + return rv; +} + +static char * +eindex(const char *s, const char *t) +{ + long i, j, x; + char *rv; + wchar_t ws, wt; + int ns, nt; + +#ifndef S42 + if (sysv3 == 0) + yyerror("syntax error"); +#endif + for (i = 0, x = 0; s[i]; x++, i += ns) { + next(ws, &s[i], ns); + for (j = 0; t[j]; j += nt) { + next(wt, &t[j], nt); + if (ws == wt) { + rv = numpr(++x); + return rv; + } + } + } + return "0"; +} diff --git a/expr/mkfile b/expr/mkfile @@ -0,0 +1,10 @@ +BIN = expr +OBJ = expr.o +LOCAL_CFLAGS = -DSU3 +INSTALL_BIN = expr +INSTALL_MAN1 = expr.1 +CLEAN_FILES = expr.c +DEPS = yacc libcommon + +<$mkbuild/mk.default + diff --git a/find/find.1 b/find/find.1 @@ -0,0 +1,558 @@ +'\" t +.\" Sccsid @(#)find.1 1.44 (gritter) 8/14/05 +.\" Parts taken from find(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH FIND 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +find \- find files +.SH SYNOPSIS +.B find +.I pathname-list expression +.SH DESCRIPTION +.I Find +recursively descends +the directory hierarchy for +each pathname in the +.I pathname-list +(i.\|e., one or more pathnames) +seeking files that match a boolean +.I expression +written in the primaries given below. +In the descriptions, the argument +.I n +is used as a decimal integer +where +.I +n +means more than +.I n, +.I \-n +means less than +.I n +and +.I n +means exactly +.IR n . +.TP 10n +.BR \-name " filename" +True if the +.I filename +argument matches the current file name. +Normal +Shell +argument syntax +as described in +.IR glob (7) +may be used if escaped (watch out for +`[', `?' and `*'). +The internationalization constructs +`[[:class:]]', `[[=e=]]', and `[[.cs.]]' +are understood with +.BR /usr/5bin/s42/find , +.BR /usr/5bin/posix/find , +and +.BR /usr/5bin/posix2001/find , +but not with +.BR /usr/5bin/find . +.TP +.BR \-perm " mode" +True if the file permission flags +exactly +match the +octal number +or symbolic +.I mode +(see +.IR chmod (1)). +If +.I mode +is prefixed by a minus sign, +the flags are compared: +.IR (flags&mode)==mode . +.TP +.BR \-type " c" +True if the type of the file +is +.I c, +where +.I c +is +.sp +.TS +lfB l. +b block special file; +c character special file; +d directory; +D Solaris door; +f plain file; +l symbolic link; +n HP-UX network special file; +p named pipe; +s socket. +.TE +.TP +.B \-follow +Always true; +causes find to follow symbolic links. +The `\fB\-type\fR l' condition never occurs in this case. +.TP +.BR \-links " n" +True if the file has +.I n +links. +.TP +.BR \-user " uname" +True if the file belongs to the user +.I uname +(login name or numeric user ID). +.TP +.BR \-group " gname" +True if the file belongs to group +.I gname +(group name or numeric group ID). +.TP +.BR \-size " n[" c ] +True if the file is +.I n +blocks long (512 bytes per block), +or, with +.BR c , +.I n +bytes long. +.TP +.BR \-inum " n" +True if the file has inode number +.I n. +.TP +.BR \-atime " n" +True if the file has been accessed in +.I n +days. +.TP +.BR \-mtime " n" +True if the file has been modified in +.I n +days. +.TP +.BR \-ctime " n" +True if the file inode has been changed in +.I n +days. +.TP +.BR \-exec " command ... " ; +True if the executed command returns +a zero value as exit status. +The end of the command must be punctuated by an (escaped) +semicolon. +A command argument `{}' is replaced by the +current pathname. +.TP +.BR \-exec " command ... " "{} +" +Always true. +The +.B {} +argument is replaced by a set of aggregated pathnames. +Each pathname is passed to the command as a single argument. +Every time a limit of arguments is reached +by the pathnames found so far, +the command is executed, +and aggregating starts using a new set +beginning with the next pathname. +If any invocation of command +returns a non-zero exit status, +find will return a non-zero exit status +when its processing is done. +.TP +.BR \-ok " command ... " ; +Like +.B \-exec +except that the generated command is written on +the standard output, then the standard input is read +and the command executed only upon response +.BR y . +.TP +.B \-print +Always true; +causes the current pathname to be printed. +If no expression is given, +.B \-print +is used per default +(as a change introduced by POSIX.2). +.TP +.BR \-newer " file" +True if +the current file has been modified more recently than the argument +.I file. +.TP +.BR \-anewer " file" +True if +the current file has been accessed more recently than the argument +.I file +was modified. +This primary is an extension. +.TP +.BR \-cnewer " file" +True if a status change +has occurred on the current file +more recently than the argument +.I file +was modified. +This primary is an extension. +.TP +.B \-depth +Always true; +causes the contents of each directory +to be examined before the directory itselves. +.TP +.BR \-fstype " type" +True if the current file +resides on a file system of the given type. +.TP +.B \-local +True if the file is on a local file system. +Are file system types except for +.I nfs +and +.I smbfs +are currently considered local. +.TP +.B \-mount +Always true; +restricts the search to directories +that have the same device id +as the currently examined path operand. +.TP +.B \-xdev +The same as +.BR \-mount . +This primary has been introduced by POSIX. +.TP +.B \-nouser +True if the file is owned by a user +that has no login name. +.TP +.B \-nogroup +True if the file is owned by a group +that lacks a group name. +.TP +.B \-prune +Always true. +Causes the search for the current path +to be stopped once the primary is evaluated. +When combined with +.BR \-depth , +.B \-prune +has no effect. +.TP +.BR \-cpio " device" +Always true. +Writes the file on the named device +in binary cpio format (5120-byte records). +Implies +.BR \-depth . +.TP +.BR \-ncpio " device" +Always true. +Writes the file on the named device +in SVR4 ASCII cpio format (5120-byte records). +Implies +.BR \-depth . +.PP +The primaries may be combined using the following operators +(in order of decreasing precedence): +.TP 4 +1) +A parenthesized group of primaries and operators +(parentheses are special to the Shell and must be escaped). +.TP 4 +2) +The negation of a primary +(`!' is the unary +.I not +operator). +.TP 4 +3) +Concatenation of primaries +(the +.I and +operation +is implied by the juxtaposition of two primaries +or by an explicit +.B \-a +operator). +.TP 4 +4) +Alternation of primaries +.RB "(`" \-o "' is the" +.I or +operator). +.PP +Options have been introduced by POSIX.1-2001 +in addition to the expression operators. +They must preceed the +.I pathname-list +one the command line +and have no effect on boolean expression processing. +.TP +.B \-H +Follow symbolic links given on the command line, +but do not follow symbolic links encountered during directory traversal. +.TP +.B \-L +Follow all symbolic links found, +like the +.I \-follow +primary. +.PP +With the +.I \-follow +primary or the +.I \-L +option, hierarchy loops caused by symbolic links are detected, +but only +.B /usr/5bin/posix2001/find +prints an error message. +The offending link is not followed, +and operation continues with the next directory entry found. +.SH EXAMPLES +To remove all files named +`a.out' or `*.o' that have not been accessed for a week: +.IP "" .2i +find / \\( \-name a.out \-o \-name \'*.o\' \\) +\-atime +7 \-exec rm {} \\; +.PP +The rm command is executed once for each file. +The form +.IP "" .2i +find / \\( \-name a.out \-o \-name \'*.o\' \\) +\-atime +7 \-exec rm {} + +.PP +is faster since the rm command is executed with a set of pathnames. +.PP +To find all files below the directory `documents' +that contain the regular expression `string': +.IP "" .2i +find documents \-type f \-exec grep string {} + +.PP +To find all files in the directory `home', +not descending into its subdirectories: +.IP "" .2i +find home ! \-name home \-prune +.PP +To check whether the file `diary' +has been updated within the last two days; +the name of the file is printed if true, +and is not printed otherwise: +.IP "" .2i +find diary \-prune \-mtime \-2 +.SH FILES +/etc/passwd +.br +/etc/group +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols in patterns with +.BR /usr/5bin/s42/find , +.BR /usr/5bin/posix/find , +and +.BR /usr/5bin/posix2001/find . +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +and character class expressions +in patterns. +.TP +.B SYSV3 +Causes the text of some diagnostic messages to be changed; +causes +.I \-ncpio +to create traditional ASCII cpio format archives. +.SH "SEE ALSO" +chmod(1), +cpio(1), +pax(1), +sh(1), +xargs(1), +stat(2), +glob(7), +locale(7) +.SH NOTES +Undesired effects can result if file names printed by +.I find +contain newline characters, +as shown by the following command sequence: +.RS +.sp +.nf +$ mkdir \-p \'dummy +> /etc\' +$ touch \'dummy +> /etc/passwd\' +$ find . \-print +\&. +\&./dummy +.sp +\&./dummy +/etc +\&./dummy +/etc/passwd +$\ +.fi +.sp +.RE +Shell scripts or utilities unaware of this problem +will operate on +.I /etc/passwd +(or other arbitrary file names) +when reading such output; +a malicious user might create such files +to read or overwrite privileged information. +To circumvent this problem, +one of the following proposals should be taken +unless the file hierarchy traversed by the +.I find +command is definitively known not to contain such file names: +.IP \(en 2 +If the output is read by the +.I xargs +utility to gain faster execution by aggregating command arguments as in +.in +2 +.sp +find . \-print | xargs \fIcommand\fR +.sp +.in -2 +a safe and equally fast substitute is the +.in +2 +.sp +find . \-exec \fIcommand\fR {} + +.sp +.in -2 +operand of +.IR find ; +it is not portably accepted by +.I find +implementations, though. +.IP \(en 2 +A universal solution for submitting file names to the +.I xargs +utility is given in the +.I NOTES +section of +.IR xargs (1). +.IP \(en 2 +The method employed by this script can be generalized as follows: +If the script or utility reading the output of +.I find +provides the necessary parsing capabilities, +special path prefixes can be given to the +.I find +command, such as +.in +2 +.sp +find /.//. \-print +.sp +.in -2 +for absolute path names or +.in +2 +.sp +find .//. \-print +.sp +.in -2 +for relative path names. +Since adjacent slash characters never appear +in relative file names found during directory traversal, +they can be taken as delimiters; +a line starts a new path name +only if the delimiter appears. +.IP \(en 2 +The +.I \-name +operand can be used to exclude all path names +that contain newline characters, as in +.in +2 +.sp +.nf +$ find . \-name \'* +> *\' \-prune \-o ! \-name \'* +> *\' \-print +.sp +.fi +.in -2 +Note that certain other implementations of +.I find +require a leading period in the pattern +to match file names with a leading period; +it may be necessary to exclude such patterns as well. +.IP \(en 2 +The +.I \-depth +operand cannot be combined with the +.I \-prune +operand used in the previous example. +When the directory name must be printed +after file names below that directory, +as with the +.IR cpio +command, +file names that leave the specified path hierarchy +should be filtered out: +.in +2 +.sp +find . \-depth | egrep \'^\e./\' | cpio \-oc \-O /dev/rmt/c0s0 +.sp +.in -2 +(note the escaped regular expression meta-character). +.IP \(en 2 +The +.I \-cpio +and +.I \-ncpio +operands will automatically exclude file names +that contain newline characters +with this +.I find +implementation. +.PP +The +.I \-print0 +operand supported by some other implementations +is considered a very limited work-around +since it does not allow the output to be processed +by utilities unaware of NUL characters; +it has therefore not been included here. diff --git a/find/find.c b/find/find.c @@ -0,0 +1,1554 @@ +/* find COMPILE: cc -o find -s -O -i find.c -lS */ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* from Unix 7th Edition /usr/src/cmd/find.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)find_su3.sl 1.45 (gritter) 5/8/06"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)find_sus.sl 1.45 (gritter) 5/8/06"; +#else +static const char sccsid[] USED = "@(#)find.sl 1.45 (gritter) 5/8/06"; +#endif + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <unistd.h> +#include <pwd.h> +#include <time.h> +#include <grp.h> +#include <stdarg.h> +#include <libgen.h> +#include <errno.h> +#include <locale.h> +#include <signal.h> +#if defined (SUS) || defined (SU3) +#include <fnmatch.h> +#endif +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +#include <mntent.h> +#endif +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <sys/param.h> +#include <sys/mount.h> +#endif +#ifdef _AIX +#include <sys/sysmacros.h> +#endif +#ifndef major +#include <sys/mkdev.h> +#endif +#if __NetBSD_Version__>= 300000000 +#include <sys/statvfs.h> +#define statfs statvfs +#endif +#include "getdir.h" +#include "atoll.h" +#define A_DAY 86400L /* a day full of seconds */ +#define EQ(x, y) (strcmp(x, y)==0) + +#ifndef MNTTYPE_IGNORE +#define MNTTYPE_IGNORE "" +#endif + +#ifndef S_IFDOOR +#define S_IFDOOR 0xD000 +#endif + +#ifndef S_IFNWK +#define S_IFNWK 0x9000 +#endif + +#undef ctime +#define ctime find_ctime + +static char *Pathname; + +struct aggregate { /* for exec ... {} + */ + long a_cnt; /* count of arguments */ + long a_cur; /* current position in aggregate */ + long a_csz; /* aggregate current length */ + long a_msz; /* aggregate maximum length */ + char **a_vec; /* arguments */ + char *a_spc; /* aggregate space */ + long a_maxarg; /* maximum arguments in e_vec */ +}; + +struct anode { + int (*F)(struct anode *); + union anode_l { + struct anode *L; + char *pat; + time_t t; + uid_t u; + gid_t g; + ino_t i; + nlink_t link; + off_t sz; + mode_t per; + int com; + FILE *fp; + char *fstype; + } l; + union anode_r { + struct anode *R; + int s; + pid_t pid; + struct aggregate *a; + } r; +}; +static char *Fname; +static time_t Now; +static int Argc, + Ai, + Pi; +static char **Argv; +/* cpio stuff */ +static int Cpio; + +static struct stat Statb; + +/* + * Keep track of all visited directories, to avoid loops caused by + * symbolic links and to free storage and close files after fork(). + */ +static struct visit { + struct getdb *v_db; /* getdb struct for this level */ + ino_t v_ino; /* inode number */ + int v_fd; /* file descriptor */ + dev_t v_dev; /* device id */ +} *visited; +static int vismax; /* number of members in visited */ + +/* + * For -fstype, keep track of all filesystem types known to the system. If + * we had st_fstype in struct stat as SVR4 does, this would be far more + * reliable. + */ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static struct fstype { + dev_t fsdev; /* device id of filesystem */ + char *fstype; /* filesystem type */ +} *fstypes, *fscur; +#endif /* __linux__ || _AIX || __hpux */ + +static int Home = -1; +static int wanthome; +static mode_t um; /* user's umask */ +static const char *progname; +static int status; /* exit status */ +static int depth; /* -depth flag */ +static int Print = 1; /* implicit -print */ +static int Prune; /* -prune at this point */ +static int Mount; /* -mount, -xdev */ +static int Execplus; /* have a -exec command {} + node */ +static int HLflag; /* -H or -L option given */ +static char *Statfs; /* result of statfs() on FreeBSD */ +static int incomplete; /* encountered an incomplete statement */ +extern int sysv3; + +static int (*statfn)(const char *, struct stat *) = lstat; + +static struct anode *expr(void); +static struct anode *e1(void); +static struct anode *e2(void); +static struct anode *e3(void); +static struct anode *mk(struct anode *); +static void oper(const char **); +static char *nxtarg(int); +static int and(struct anode *); +static int or(struct anode *); +static int not(struct anode *); +static int glob(struct anode *); +static int print(struct anode *); +static int prune(struct anode *); +static int null(struct anode *); +static int mtime(struct anode *); +static int atime(struct anode *); +static int ctime(struct anode *); +static int user(struct anode *); +static int ino(struct anode *); +static int group(struct anode *); +static int nogroup(struct anode *); +static int nouser(struct anode *); +static int links(struct anode *); +static int size(struct anode *); +static int sizec(struct anode *); +static int perm(struct anode *); +static int type(struct anode *); +static int exeq(struct anode *); +static int ok(struct anode *); +static int cpio(struct anode *); +static int newer(struct anode *); +static int cnewer(struct anode *); +static int anewer(struct anode *); +static int fstype(struct anode *); +static int local(struct anode *); +static int scomp(long long, long long, char); +static int doex(int, struct aggregate *); +static struct aggregate *mkagg(long); +static uid_t getunum(const char *); +static gid_t getgnum(const char *); +static const char *getuser(uid_t); +static const char *getgroup(gid_t); +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static void getfscur(dev_t); +static void getfstypes(void); +#endif /* __linux__ || _AIX || __hpux */ +static int descend(char *, struct anode *, int); +static int descend1(char *, struct anode *, int); +static int descend2(char *, struct anode *, int); +static void setpath(char *, const char *, int); +static void pr(const char *, ...); +static void er(const char *, ...); +static void usage(void); +static void *srealloc(void *, size_t); +static void mkcpio(struct anode *, const char *, int); +static void trailer(struct anode *, int); +static void mknewer(struct anode *, const char *, int (*)(struct anode *)); +static mode_t newmode(const char *ms, const mode_t pm); + +int +main(int argc, char **argv) +{ + struct anode *exlist; + struct anode nlist = { null, { 0 }, { 0 } }; + int paths; + register char *sp = 0; + int i, j; + + time(&Now); + umask(um = umask(0)); + progname = basename(argv[0]); + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + if (getenv("SYSV3") != NULL) + sysv3 = 1; + for (i = 1; i < argc; i++) { + if (argv[i][0] != '-' || argv[i][1] == '\0') + break; + if (argv[i][1] == '-') { + i++; + break; + } + for (j = 1; argv[i][j]; j++) + if (argv[i][j] != 'H' && argv[i][j] != 'L') + goto brk; + for (j = 1; argv[i][j]; j++) + HLflag = argv[i][j]; + } +brk: if (HLflag == 'L') + statfn = stat; + argc -= i - 1; + argv += i - 1; + Argc = argc; Argv = argv; + if(argc<2) { + pr("insufficient number of arguments"); + usage(); + } + for(Ai = paths = 1; Ai < argc; ++Ai, ++paths) + if(*Argv[Ai] == '-' || EQ(Argv[Ai], "(") || EQ(Argv[Ai], "!")) + break; + if(paths == 1) /* no path-list */ + usage(); + if(Ai<argc) { + if(!(exlist = expr())) /* parse and compile the arguments */ + er("find: parsing error"); + if(Ai<argc) { + pr("bad option %s", argv[Ai]); + usage(); + } + } else + exlist = &nlist; + if (paths > 2) + wanthome = 1; + if (wanthome && (Home = open(".", O_RDONLY)) < 0) + er("bad starting directory"); + for(Pi = 1; Pi < paths; ++Pi) { + if (Pi > 1 && Home >= 0 && fchdir(Home) < 0) + er("bad starting directory"); + setpath(Pathname, Argv[Pi], 0); + Fname = sp = Pathname; + do + if (sp[0] == '/') + Fname = &sp[1]; + while (*sp++); + descend(Pathname, exlist, 0); /* to find files that match */ + } + if(Cpio || Execplus) + trailer(exlist, 1); + exit(status); +} + +/* compile time functions: priority is expr()<e1()<e2()<e3() */ + +/*ARGSUSED*/ +static struct anode *expr(void) { /* parse ALTERNATION (-o) */ + register struct anode * p1; + struct anode n = { 0, { 0 }, { 0 } }; + + p1 = e1() /* get left operand */ ; + if(EQ(nxtarg(0), "-o")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); + n.F = or, n.l.L = p1, n.r.R = expr(); + return(mk(&n)); + } + else if(Ai <= Argc) --Ai; + return(p1); +} +static struct anode *e1(void) { /* parse CONCATENATION (formerly -a) */ + register struct anode * p1; + register char *a; + struct anode n = { 0, { 0 }, { 0 } }; + + p1 = e2(); + a = nxtarg(0); + if(EQ(a, "-a")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); +And: + n.F = and, n.l.L = p1, n.r.R = e1(); + return(mk(&n)); + } else if(EQ(a, "(") || EQ(a, "!") || (*a=='-' && !EQ(a, "-o"))) { + --Ai; + goto And; + } else if(Ai <= Argc) --Ai; + return(p1); +} +static struct anode *e2(void) { /* parse NOT (!) */ + struct anode n = { 0, { 0 }, { 0 } }; + if(EQ(nxtarg(0), "!")) { + const char *ops[] = { "-o", "-a", "!", 0 }; + oper(ops); + n.F = not, n.l.L = e3(); + return(mk(&n)); + } + else if(Ai <= Argc) --Ai; + return(e3()); +} +static struct anode *e3(void) { /* parse parens and predicates */ + struct anode *p1; + struct anode n = { 0, { 0 }, { 0 } }; + long i, k; + register char *a, *b, s, *p, *q; + + a = nxtarg(0); + if(EQ(a, "(")) { + const char *ops[] = { "-o", "-a", 0 }; + oper(ops); + p1 = expr(); + a = nxtarg(1); + if(!EQ(a, ")")) goto err; + return(p1); + } + else if(EQ(a, "-depth")) { + depth = 1; + n.F = null; + } else if(EQ(a, "-follow")) { + statfn = stat; + n.F = null; + } else if(EQ(a, "-mount") || EQ(a, "-xdev")) { + Mount = 1; + n.F = null; + } else if(EQ(a, "-print")) { + Print = 0; + n.F = print; + } else if(EQ(a, "-prune")) + n.F = prune; + else if(EQ(a, "-nogroup")) + n.F = nogroup; + else if(EQ(a, "-nouser")) + n.F = nouser; + else if(EQ(a, "-local")) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + getfstypes(); +#endif /* __linux__ || _AIX || __hpux */ + n.F = local; + Statfs = a; + } + if (n.F) + return mk(&n); + b = nxtarg(2); + s = *b; + /*if(s=='+') b++;*/ + if(EQ(a, "-name")) + n.F = glob, n.l.pat = b; + else if(EQ(a, "-mtime")) + n.F = mtime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-atime")) + n.F = atime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-ctime")) + n.F = ctime, n.l.t = atol(b), n.r.s = s; + else if(EQ(a, "-user")) + n.F = user, n.l.u = getunum(b), n.r.s = s; + else if(EQ(a, "-inum")) + n.F = ino, n.l.i = atoll(b), n.r.s = s; + else if(EQ(a, "-group")) + n.F = group, n.l.g = getgnum(b), n.r.s = s; + else if(EQ(a, "-size")) { + n.l.sz = atoll(b), n.r.s = s; + while (b[0] && b[1]) + b++; + if (b[0] == 'c') + n.F = sizec; + else + n.F = size; + } + else if(EQ(a, "-links")) + n.F = links, n.l.link = atol(b), n.r.s = s; + else if(EQ(a, "-perm")) { + while (*b == '-') + b++; + n.F = perm, n.l.per = newmode(b, 0), n.r.s = s; +#if defined (SUS) || defined (SU3) + if (s == '-') + n.l.per &= 07777; +#endif + } + else if(EQ(a, "-type")) { + i = b[0] == '-' || b[0] == '+' ? b[1] : b[0]; + i = i=='d' ? S_IFDIR : + i=='b' ? S_IFBLK : + i=='c' ? S_IFCHR : + i=='D' ? S_IFDOOR : + i=='f' ? S_IFREG : + i=='l' ? S_IFLNK : + i=='n' ? S_IFNWK : + i=='p' ? S_IFIFO : + i=='s' ? S_IFSOCK : + 0; + n.F = type, n.l.per = i; + } + else if (EQ(a, "-exec")) { + Print = 0; + wanthome = 1; + i = Ai - 1; + q = ""; + k = 0; + while(!EQ(p = nxtarg(1), ";")) { + if (EQ(p, "+") && EQ(q, "{}")) { + n.r.a = mkagg(k); + break; + } + q = p; + k += strlen(p) + 1; + } + n.F = exeq, n.l.com = i; + } + else if (EQ(a, "-ok")) { + Print = 0; + wanthome = 1; + i = Ai - 1; + while(!EQ(p = nxtarg(1), ";")); + n.F = ok, n.l.com = i; + } + else if(EQ(a, "-cpio")) + mkcpio(&n, b, 0); + else if(EQ(a, "-ncpio")) + mkcpio(&n, b, 1); + else if(EQ(a, "-newer")) + mknewer(&n, b, newer); + else if(EQ(a, "-anewer")) + mknewer(&n, b, anewer); + else if(EQ(a, "-cnewer")) + mknewer(&n, b, cnewer); + else if(EQ(a, "-fstype")) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + getfstypes(); +#endif /* __linux__ || _AIX || __hpux */ + n.F = fstype, n.l.fstype = b; + Statfs = a; + } + if (n.F) { + if (incomplete) + nxtarg(1); + return mk(&n); + } +err: pr("bad option %s", a); + usage(); + /*NOTREACHED*/ + return 0; +} +static struct anode *mk(struct anode *p) +{ + struct anode *n; + + n = srealloc(NULL, sizeof *n); + *n = *p; + return(n); +} +static void oper(const char **ops) +{ + char *a; + + a = nxtarg(-1); + while (*ops) + if (EQ(a, *ops++)) + er("operand follows operand"); + Ai--; +} + +static char *nxtarg(int must) { /* get next arg from command line */ + static int strikes = 0; + + if(must==1 && Ai>=Argc || strikes==3) + er("incomplete statement"); + if(Ai>=Argc) { + if (must >= 0) + strikes++; + incomplete = 1; + Ai = Argc + 1; + return(""); + } + return(Argv[Ai++]); +} + +/* execution time functions */ +static int and(register struct anode *p) +{ + return(((*p->l.L->F)(p->l.L)) && ((*p->r.R->F)(p->r.R))?1:0); +} +static int or(register struct anode *p) +{ + return(((*p->l.L->F)(p->l.L)) || ((*p->r.R->F)(p->r.R))?1:0); +} +static int not(register struct anode *p) +{ + return( !((*p->l.L->F)(p->l.L))); +} +#if !defined (SUS) && !defined (SU3) +static int glob(register struct anode *p) +{ + extern int gmatch(const char *, const char *); + return(gmatch(Fname, p->l.pat)); +} +#else /* SUS, SU3 */ +static int glob(register struct anode *p) +{ + int val; +#ifdef __GLIBC__ + /* avoid glibc's broken [^...] */ + extern char **environ; + char **savenv = environ; + char *newenv[] = { "POSIXLY_CORRECT=", NULL }; + environ = newenv; +#endif /* __GLIBC__ */ + val = fnmatch(p->l.pat, Fname, FNM_PATHNAME) == 0; +#ifdef __GLIBC__ + environ = savenv; +#endif /* __GLIBC__ */ + return val; +} +#endif /* SUS, SU3 */ +/*ARGSUSED*/ +static int print(register struct anode *p) +{ + puts(Pathname); + return(1); +} +/*ARGSUSED*/ +static int prune(register struct anode *p) +{ + if (!depth) + Prune = 1; + return(1); +} +/*ARGSUSED*/ +static int null(register struct anode *p) +{ + return(1); +} +static int mtime(register struct anode *p) +{ + return(scomp((Now - Statb.st_mtime) / A_DAY, p->l.t, p->r.s)); +} +static int atime(register struct anode *p) +{ + return(scomp((Now - Statb.st_atime) / A_DAY, p->l.t, p->r.s)); +} +static int ctime(register struct anode *p) +{ + return(scomp((Now - Statb.st_ctime) / A_DAY, p->l.t, p->r.s)); +} +static int user(register struct anode *p) +{ + return(scomp(Statb.st_uid, p->l.u, p->r.s)); +} +static int ino(register struct anode *p) +{ + return(scomp(Statb.st_ino, p->l.u, p->r.s)); +} +static int group(register struct anode *p) +{ + return(p->l.u == Statb.st_gid); +} +static int nogroup(register struct anode *p) +{ + return(getgroup(Statb.st_gid) == NULL); +} +static int nouser(register struct anode *p) +{ + return(getuser(Statb.st_uid) == NULL); +} +static int links(register struct anode *p) +{ + return(scomp(Statb.st_nlink, p->l.link, p->r.s)); +} +static int size(register struct anode *p) +{ + return(scomp(Statb.st_size?(Statb.st_size+511)>>9:0, p->l.sz, p->r.s)); +} +static int sizec(register struct anode *p) +{ + return(scomp(Statb.st_size, p->l.sz, p->r.s)); +} +static int perm(register struct anode *p) +{ + register int i; + i = (p->r.s=='-') ? p->l.per : 07777; /* '-' means only arg bits */ + return((Statb.st_mode & i & 07777) == p->l.per); +} +static int type(register struct anode *p) +{ + return((Statb.st_mode&S_IFMT)==p->l.per); +} +static int exeq(register struct anode *p) +{ + if (p->r.a) { + if (Pathname) { + size_t sz = strlen(Pathname) + 1; + if (p->r.a->a_csz + sz <= p->r.a->a_msz && + p->r.a->a_cur < p->r.a->a_maxarg-1) { + strcpy(p->r.a->a_vec[p->r.a->a_cur++] = + &p->r.a->a_spc[p->r.a->a_csz], + Pathname); + p->r.a->a_csz += sz; + return 1; + } else { + if (p->r.a->a_cur == 0) { + p->r.a->a_vec[p->r.a->a_cur++] = + Pathname; + p->r.a->a_vec[p->r.a->a_cur] = NULL; + } + else { + p->r.a->a_vec[p->r.a->a_cur] = NULL; + fflush(stdout); + doex(p->l.com, p->r.a); + return exeq(p); + } + } + } else { + if (p->r.a->a_cur == 0) + return 1; + p->r.a->a_vec[p->r.a->a_cur] = NULL; + } + } + fflush(stdout); /* to flush possible `-print' */ + return(doex(p->l.com, p->r.a)); +} +static int ok(struct anode *p) +{ + char c; int yes; + yes = 0; + fflush(stdout); /* to flush possible `-print' */ + fprintf(stderr, "< %s ... %s >? ", Argv[p->l.com], Pathname); + if (read(0, &c, 1) != 1) + exit(2); + yes = c == 'y'; + if (c != '\n') + while (read(0, &c, 1) == 1 && c != '\n'); + if(yes) return(doex(p->l.com, 0)); + return(0); +} + +static int cpio(struct anode *p) +{ + if (strchr(Pathname, '\n')) { + pr("file name \"%s\" contains a newline character; " + "file not archived", Pathname); + status |= 1; + } else + fprintf(p->l.fp, "%s\n", Pathname); + return(1); +} +static int newer(register struct anode *p) +{ + return Statb.st_mtime > p->l.t; +} +static int anewer(register struct anode *p) +{ + return Statb.st_atime > p->l.t; +} +static int cnewer(register struct anode *p) +{ + return Statb.st_ctime > p->l.t; +} +static int fstype(register struct anode *p) +{ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + return(EQ(fscur->fstype, p->l.fstype)); +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + return(EQ(Statfs, p->l.fstype)); +#else + return(EQ(Statb.st_fstype, p->l.fstype)); +#endif +} +static int local(register struct anode *p) +{ +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + return(strcmp(fscur->fstype, "nfs") && strcmp(fscur->fstype, "smbfs")); +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + return(strcmp(Statfs, "nfs") != 0); +#else + return(strcmp(Statb.st_fstype, "nfs") != 0); +#endif +} + +/* support functions */ +/* funny signed compare */ +static int scomp(register long long a, register long long b, register char s) +{ + if(s == '+') + return(a > b); + if(s == '-') + return(a < (b * -1)); + return(a == b); +} + +static int +doex(int com, struct aggregate *a) +{ + register int np; + register char *na; + char **oargv; + int oargc; + static char **nargv; + static int narga; + static int ccode; + pid_t pid; + + ccode = np = 0; + oargv = Argv; + oargc = com; + while (na=oargv[oargc++]) { + if (np >= narga-1) + nargv = srealloc(nargv, (narga+=20) * sizeof *nargv); + if(strcmp(na, ";")==0 && oargv == Argv) break; + if(strcmp(na, "{}")==0 && oargv == Argv) { + if (a) { + oargv = a->a_vec; + oargc = 0; + } else + nargv[np++] = Pathname; + } + else nargv[np++] = na; + } + if (a) { + a->a_cur = 0; + a->a_csz = 0; + } + if (np==0) return(9); + nargv[np] = 0; + if(pid = fork()) /*parent*/ while (wait(&ccode) != pid); + else { /*child*/ + if (fchdir(Home) < 0) { + pr("bad starting directory"); + _exit(1); + } + execvp(nargv[0], nargv); + _exit(1); + } + if (a && ccode) { + if (WIFSIGNALED(ccode)) + status |= WTERMSIG(ccode) | 0200; + else if (WIFEXITED(ccode)) + status |= WEXITSTATUS(ccode); + } + return(ccode && a==NULL ? 0:1); +} + +static struct aggregate *mkagg(long baselen) +{ + static size_t envsz; + extern char **environ; + register int i; + struct aggregate *a; + + a = srealloc(NULL, sizeof *a); + if (envsz == 0) + for (i = 0; environ[i]; i++) + envsz += strlen(environ[i]) + 1; + a->a_msz = sysconf(_SC_ARG_MAX) - baselen - envsz - 2048; + a->a_spc = srealloc(NULL, a->a_msz); + a->a_maxarg = 8192; + a->a_vec = srealloc(NULL, a->a_maxarg * sizeof *a->a_vec); + a->a_csz = 0; + a->a_cur = 0; + Execplus = 1; + return a; +} + +static uid_t getunum(const char *s) { /* find user name and return number */ + struct passwd *pwd; + char *x; + uid_t u; + + if ((pwd = getpwnam(s)) != NULL) + return pwd->pw_uid; + u = strtol(s, &x, 10); + if (*x == '\0') + return u; + er("cannot find %s name", s); + /*NOTREACHED*/ + return 0; +} + +static gid_t getgnum(const char *s) { /* find group name and return number */ + struct group *grp; + char *x; + gid_t g; + + if ((grp = getgrnam(s)) != NULL) + return grp->gr_gid; + g = strtol(s, &x, 10); + if (*x == '\0') + return g; + er("cannot find %s name", s); + /*NOTREACHED*/ + return 0; +} + +#define CACHESIZE 16 + +static const char *getuser(uid_t uid) +{ + static struct { + char *name; + uid_t uid; + } cache[CACHESIZE]; + static int last; + int i; + struct passwd *pwd; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].uid == uid) + goto found; + if ((pwd = getpwuid(uid)) != NULL) + name = pwd->pw_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = strdup(name); + cache[i].uid = uid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +static const char *getgroup(gid_t gid) +{ + static struct { + char *name; + gid_t gid; + } cache[CACHESIZE]; + static int last; + int i; + struct group *grp; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].gid == gid) + goto found; + if ((grp = getgrgid(gid)) != NULL) + name = grp->gr_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = strdup(name); + cache[i].gid = gid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +#if defined (__linux__) || defined (_AIX) || defined (__hpux) +static void getfscur(dev_t dev) +{ + int i; + + for (i = 0; fstypes[i].fstype; i++) + if (fstypes[i].fsdev == dev) { + fscur = &fstypes[i]; + return; + } + er("filesystem type for %s unknown", Pathname); +} + +static void getfstypes(void) +{ + struct stat st; + FILE *fp; + struct mntent *mp; +#ifdef __hpux + const char mtab[] = "/etc/mnttab"; +#else /* __linux__, _AIX */ + const char mtab[] = "/etc/mtab"; +#endif /* __linux__, _AIX */ + int i = 0; + + if (fstypes) + return; + if ((fp = setmntent(mtab, "r")) == NULL) + er("cannot open %s: %s", mtab, strerror(errno)); + while ((mp = getmntent(fp)) != NULL) { + if (EQ(mp->mnt_type, MNTTYPE_IGNORE)) + continue; + if (stat(mp->mnt_dir, &st) < 0) + continue; + fstypes = srealloc(fstypes, (i+1) * sizeof *fstypes); + fstypes[i].fsdev = st.st_dev; + fstypes[i].fstype = strdup(mp->mnt_type); + i++; + } + endmntent(fp); +} +#endif /* __linux__ || _AIX || __hpux */ + +/* + * First part of descend, called for any file found. + */ +static int descend(char *fname, struct anode *exlist, int level) +{ + struct stat ost; + register char *c1; + int i; + int rv = 0; + + if(statfn(fname, &Statb)<0) { + if (statfn != lstat && lstat(fname, &Statb) == 0) + nof: c1 = "cannot follow symbolic link %s: %s"; + else if (sysv3) + c1 = "stat() failed: %s: %s"; + else if (errno == ENOENT || errno == ENOTDIR) + c1 = "cannot open %s: %s"; + else + c1 = "stat() error %s: %s"; + pr(c1, Pathname, strerror(errno)); + status = 18; + return(0); + } + if (level == 0 && HLflag == 'H' && (Statb.st_mode&S_IFMT) == S_IFLNK) { + struct stat nst; + if (stat(fname, &nst) == 0) + Statb = nst; + else if (errno == ELOOP) + goto nof; + } +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) + if (Statfs != NULL) { + static struct statfs sf; + if (statfs(fname, &sf) < 0) { + pr("statfs() error %s: %s", Pathname, strerror(errno)); + status = 18; + return(0); + } + Statfs = sf.f_fstypename; + } +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + if (Mount) { + static dev_t curdev; + if (level == 0) + curdev = Statb.st_dev; + else if (curdev != Statb.st_dev) + return(0); + } + Prune = 0; + if (!depth) { +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + if (fstypes) + getfscur(Statb.st_dev); +#endif /* __linux__ || _AIX || __hpux */ + if((*exlist->F)(exlist) && Print) + puts(Pathname); + } else + ost = Statb; + if(Prune || (Statb.st_mode&S_IFMT)!=S_IFDIR) + goto reg; + if (statfn != lstat) { + for (i = 0; i < level; i++) + if (Statb.st_dev == visited[i].v_dev && + Statb.st_ino == visited[i].v_ino) { +#ifdef SU3 + pr("Symbolic link loop at %s", Pathname); + status = 18; +#endif /* SU3 */ + goto reg; + } + } + if (level >= vismax) { + vismax += 20; + visited = srealloc(visited, sizeof *visited * vismax); + } + visited[level].v_dev = Statb.st_dev; + visited[level].v_ino = Statb.st_ino; + + rv = descend1(fname, exlist, level); + +reg: + if (depth) { + Statb = ost; +#if defined (__linux__) || defined (_AIX) || defined (__hpux) + if (fstypes) + getfscur(Statb.st_dev); +#endif /* __linux__ || _AIX || __hpux */ + if ((*exlist->F)(exlist) && Print) + puts(Pathname); + } + return(rv); +} + +/* + * Second part of descend, called for any directory found. + */ +static int descend1(char *fname, struct anode *exlist, int level) +{ + int dir = 0; /* open directory */ + register char *c1; + struct getdb *db; + register struct direc *dp; + int endofname; + int err; + int oflags = O_RDONLY; + +#ifdef O_DIRECTORY + oflags |= O_DIRECTORY; +#endif +#ifdef O_NOFOLLOW + if (statfn == lstat && (HLflag != 'H' || level > 0)) + oflags |= O_NOFOLLOW; +#endif + if ((dir = open(fname, oflags)) < 0 || + fcntl(dir, F_SETFD, FD_CLOEXEC) < 0 || + fchdir(dir) < 0) { + if (dir >= 0) + close(dir); + else if (errno == EMFILE && descend2(fname, exlist, level)) + /* + * A possible performance improvement would be to + * call descend2() in the directory above, since + * the current method involves one fork() call per + * subdirectory at this level. The condition occurs + * so rarely that it seems hardly worth optimization + * though. + */ + return 0; + pr("cannot open %s: %s", Pathname, strerror(errno)); + status = 18; + return 0; + } + if ((db = getdb_alloc(Pathname, dir)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + visited[level].v_db = db; + visited[level].v_fd = dir; + for(c1 = Pathname; *c1; ++c1); + if(*(c1-1) == '/') + --c1; + endofname = c1 - Pathname; + + while ((dp = getdir(db, &err)) != NULL) { + if((dp->d_name[0]=='.' && dp->d_name[1]=='\0') || + (dp->d_name[0]=='.' && + dp->d_name[1]=='.' && dp->d_name[2]=='\0')) + continue; + setpath(&Pathname[endofname], dp->d_name, 1); + Fname = &Pathname[endofname+1]; + if(descend(Fname, exlist, level+1)) { + if (fchdir(dir) < 0) + er("bad directory tree"); + } + } + Pathname[endofname] = '\0'; + getdb_free(db); + if (err) { + pr("cannot read dir %s: %s", Pathname, strerror(errno)); + status = 18; + } + close(dir); + visited[level].v_fd = -1; + return 1; +} + +/* + * Third part of descend, called if the limit of open file descriptors + * is exceeded (EMFILE). + */ +static int descend2(char *fname, struct anode *exlist, int level) +{ + pid_t pid; + int i; + + if (Cpio || Execplus) + trailer(exlist, 0); + fflush(stdout); + switch (pid = fork()) { + case 0: + for (i = 0; i < level-1; i++) { + if (visited[i].v_fd >= 0) { + getdb_free(visited[i].v_db); + close(visited[i].v_fd); + visited[i].v_fd = -1; + } + } + status |= 0; + descend1(fname, exlist, level); + if (Cpio || Execplus) + trailer(exlist, 0); + exit(status); + /*NOTREACHED*/ + default: + while (waitpid(pid, &i, 0) != pid); + if (i && WIFSIGNALED(i)) { + struct rlimit rl; + + rl.rlim_cur = rl.rlim_max = 0; + setrlimit(RLIMIT_CORE, &rl); + raise(WTERMSIG(i)); + pause(); + } + if (i) + status |= WEXITSTATUS(i); + return 1; + case -1: + return 0; + } +} +static void setpath(char *eos, const char *fn, int slash) +{ + static char *pathend; + char *opath; + + for (;;) { + if (eos >= pathend) { + pathend += 14; + opath = Pathname; + Pathname = srealloc(Pathname, pathend - Pathname); + eos += Pathname - opath; + pathend += Pathname - opath; + } + if (slash) { + *eos++ = '/'; + slash = 0; + } else + if ((*eos++ = *fn++) == '\0') + break; + } +} + +static void pr(const char *s, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + fprintf(stderr, "\n"); +} + +static void er(const char *s, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, s); + vfprintf(stderr, s, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + +static void usage(void) +{ + er("path-list predicate-list"); +} + +static void *srealloc(void *op, size_t n) +{ + void *np; + + if ((np = realloc(op, n)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +static void mkcpio(struct anode *p, const char *b, int ascii) +{ + int fd, pd[2]; + char flags[20], *cp; + + p->F = cpio; + if (*b == '\0') + return; + depth = 1; + Print = 0; + Cpio = 1; + if (pipe(pd) < 0 || (p->l.fp = fdopen(pd[1], "w")) == NULL) + er("pipe() %s", strerror(errno)); + if ((fd = creat(b, 0666)) < 0) + er("cannot create %s", b); + switch (p->r.pid = fork()) { + case -1: + er("can't fork"); + /*NOTREACHED*/ + case 0: + dup2(pd[0], 0); + close(pd[0]); + close(pd[1]); + dup2(fd, 1); + close(fd); + cp = flags; + *cp++ = '-'; + *cp++ = 'o'; + *cp++ = 'B'; + if (ascii) + *cp++ = 'c'; + if (statfn == stat) + *cp++ = 'L'; + *cp = '\0'; + execlp("cpio", "cpio", flags, NULL); + pr("cannot exec cpio: %s", strerror(errno)); + _exit(0177); + /*NOTREACHED*/ + } + close(pd[0]); + close(fd); +} + +static void +trailer(register struct anode *p, int termcpio) +{ + char *Opath = Pathname; + Pathname = 0; + if (p->F == or || p->F == and) { + trailer(p->l.L, termcpio); + trailer(p->r.R, termcpio); + } else if (p->F == not) + trailer(p->l.L, termcpio); + else if (p->F == cpio) { + if (termcpio) { + int s; + + fclose(p->l.fp); + while (waitpid(p->r.pid, &s, 0) != p->r.pid); + if (s) { + if (WIFEXITED(s)) + status |= WEXITSTATUS(s); + else if (WIFSIGNALED(s)) + status |= WTERMSIG(s) | 0200; + } + } else + fflush(p->l.fp); + } else if (p->F == exeq && p->r.a) + exeq(p); + Pathname = Opath; +} + +static void +mknewer(struct anode *p, const char *b, int (*f)(struct anode *)) +{ + if (*b && stat(b, &Statb) < 0) + er("cannot access %s", b); + p->l.t = Statb.st_mtime; + p->F = f; +} + +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* from Unix 7th Edition /usr/src/cmd/chmod.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define USER 05700 /* user's bits */ +#define GROUP 02070 /* group's bits */ +#define OTHER 00007 /* other's bits */ +#define ALL 07777 /* all */ + +#define READ 00444 /* read permit */ +#define WRITE 00222 /* write permit */ +#define EXEC 00111 /* exec permit */ +#define SETID 06000 /* set[ug]id */ +#define STICKY 01000 /* sticky bit */ + +#ifndef S_ENFMT +#define S_ENFMT 02000 /* mandatory locking bit */ +#endif + +static mode_t absol(const char **); +static mode_t who(const char **, mode_t *); +static int what(const char **); +static mode_t where(const char **, mode_t, int *, int *, const mode_t); + +static mode_t +newmode(const char *ms, const mode_t pm) +{ + register mode_t o, m, b; + int lock, setsgid = 0, cleared = 0, copy = 0; + mode_t nm, om, mm; + const char *mo = ms; + + nm = om = pm; + m = absol(&ms); + if (!*ms) { + nm = m; + goto out; + } + if ((lock = (nm&S_IFMT) != S_IFDIR && (nm&(S_ENFMT|S_IXGRP)) == S_ENFMT) + == 01) + nm &= ~(mode_t)S_ENFMT; + do { + m = who(&ms, &mm); + while (o = what(&ms)) { + b = where(&ms, nm, &lock, &copy, pm); + switch (o) { + case '+': + nm |= b & m & ~mm; + if (b & S_ISGID) + setsgid = 1; + if (lock & 04) + lock |= 02; + break; + case '-': + nm &= ~(b & m & ~mm); + if (b & S_ISGID) + setsgid = 1; + if (lock & 04) + lock = 0; + break; + case '=': + nm &= ~m; + nm |= b & m & ~mm; + lock &= ~01; + if (lock & 04) + lock |= 02; + om = 0; + if (copy == 0) + cleared = 1; + break; + } + lock &= ~04; + } + } while (*ms++ == ','); + if (*--ms) + er("bad permissions: %s", mo); +out: if (pm & S_IFDIR) { + if ((pm & S_ISGID) && setsgid == 0) + nm |= S_ISGID; + else if ((nm & S_ISGID) && setsgid == 0) + nm &= ~(mode_t)S_ISGID; + } + return(nm); +} + +static mode_t +absol(const char **ms) +{ + register int c, i; + + i = 0; + while ((c = *(*ms)++) >= '0' && c <= '7') + i = (i << 3) + (c - '0'); + (*ms)--; + return(i); +} + +static mode_t +who(const char **ms, mode_t *mp) +{ + register int m; + + m = 0; + *mp = 0; + for (;;) switch (*(*ms)++) { + case 'u': + m |= USER; + continue; + case 'g': + m |= GROUP; + continue; + case 'o': + m |= OTHER; + continue; + case 'a': + m |= ALL; + continue; + default: + (*ms)--; + if (m == 0) { + m = ALL; + *mp = um; + } + return m; + } +} + +static int +what(const char **ms) +{ + switch (**ms) { + case '+': + case '-': + case '=': + return *(*ms)++; + } + return(0); +} + +static mode_t +where(const char **ms, mode_t om, int *lock, int *copy, const mode_t pm) +{ + register mode_t m; + + m = 0; + *copy = 0; + switch (**ms) { + case 'u': + m = (om & USER) >> 6; + goto dup; + case 'g': + m = (om & GROUP) >> 3; + goto dup; + case 'o': + m = (om & OTHER); + dup: + *copy = 1; + m &= (READ|WRITE|EXEC); + m |= (m << 3) | (m << 6); + ++(*ms); + return m; + } + for (;;) switch (*(*ms)++) { + case 'r': + m |= READ; + continue; + case 'w': + m |= WRITE; + continue; + case 'x': + m |= EXEC; + continue; + case 'X': + if ((pm&S_IFMT) == S_IFDIR || (pm & EXEC)) + m |= EXEC; + continue; + case 'l': + if ((pm&S_IFMT) != S_IFDIR) + *lock |= 04; + continue; + case 's': + m |= SETID; + continue; + case 't': + m |= STICKY; + continue; + default: + (*ms)--; + return m; + } +} diff --git a/find/mkfile b/find/mkfile @@ -0,0 +1,8 @@ +BIN = find +OBJ = find.o +LOCAL_CFLAGS = -DGETDIR -DSU3 +INSTALL_BIN = find +INSTALL_MAN1 = find.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/fmt/fmt.1 b/fmt/fmt.1 @@ -0,0 +1,115 @@ +.\" Copyright (c) 1980 Regents of the University of California. +.\" All rights reserved. The Berkeley software License Agreement +.\" specifies the terms and conditions for redistribution. +.\" +.\" Copyright (c) 1980, 1990, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" Copyright for changes (c) 2003 +.\" Gunnar Ritter. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" This product includes software developed by Gunnar Ritter +.\" and his contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS '\fIAS IS\fR' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" from 4.3BSD fmt.1 6.1 (Berkeley) 4/29/85 +.\" +.\" from FMT 1 "April 29, 1985" +.TH FMT 1 "5/6/03" "Heirloom Toolchest" "User Commands" +.SH NAME +fmt \- simple text formatter +.SH SYNOPSIS +\fBfmt\fR [\fB\-c\fR] [\fB\-s\fR] [\fB\-w\ \fIwidth\fR | \fB\-\fIwidth\fR] +[\fIfile\ ...\fR] +.SH DESCRIPTION +.I Fmt +is a simple text formatter which reads the concatenation of input +files (or standard input if none are given) and produces on +standard output a version of its input with lines as close to +72 characters long as possible. The spacing at the beginning +of the input lines is preserved in the output, as are blank lines +and interword spacing. +.PP +.I Fmt +is meant to format mail messages prior to sending, but may also be useful +for other simple tasks. +For instance, +within visual mode of the +.I ex +editor (e.g. +.IR vi ) +the command +.sp + !}fmt +.sp +will reformat a paragraph, +evening the lines. +.PP +The following options can be used +to alter the behavior of +.IR fmt : +.TP +.B \-c +Select crown margin mode +(for tagged paragraphs). +Paragraphs are separated by empty lines. +The indenting of the first and second line +of each paragraph is preserved, +and following lines are indented +like the second line. +.TP +.B \-s +Split lines, +but do not join lines +(i.\|e. preserve any newline character +found in the input file). +.TP +\fB\-w\fI\ width\fR +Set the length of generated output lines to +.I width +characters. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters, +the width of characters, +and the set of word-separating characters. +.SH "SEE ALSO" +nroff(1), +mailx(1), +vi(1) +.SH NOTES +The program was designed to be simple and fast \- for more complex +operations, the standard text processors are likely to be more appropriate. +.PP +The original version of this program was written by +Kurt Shoens. diff --git a/fmt/fmt.c b/fmt/fmt.c @@ -0,0 +1,678 @@ +/* + * This code contains changes by + * Gunnar Ritter, Freiburg i. Br., Germany, April 2003. All rights reserved. + * + * Conditions 1, 2, and 4 and the no-warranty notice below apply + * to these changes. + * + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* + * Copyright (c) 1980 Regents of the University of California. + * All rights reserved. The Berkeley software License Agreement + * specifies the terms and conditions for redistribution. + */ + +/* from 4.3BSD fmt.c 5.2 (Berkeley) 6/21/85 */ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)fmt.sl 1.9 (gritter) 5/29/05"; + +#include <stdio.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +#include <ctype.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> + +#ifdef __GLIBC__ +#ifdef _IO_putc_unlocked +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif + +#include <iblok.h> +#include <asciitype.h> + +/* + * fmt -- format the concatenation of input files or standard input + * onto standard output. Designed for use with Mail ~| + * + * Syntax: fmt [ -width ] [ name ... ] + * Author: Kurt Shoens (UCB) 12/7/78 + */ + +static int pfx; /* Current leading blank count */ +static long long lineno; /* Current input line */ +static int mark; /* we saw a head line */ +static long width = 72; /* Width that we will not exceed */ +static int cflag; /* crown margin mode */ +static int sflag; /* split only */ +static const char *progname; /* argv0 */ +static int mb_cur_max; + + +static const char *headnames[] = {"To", "Subject", "Cc", "Bcc", "bcc", 0}; + +static void setwidth(const char *); +static void usage(void); +static void fmt(struct iblok *); +static void prefix(const wchar_t *); +static void split(const wchar_t *); +static void setout(void); +static void pack(const wchar_t *); +static void oflush(void); +static void tabulate(wchar_t *); +static void leadin(void); +static int chkhead(const char *, const wchar_t *); +static int fromline(const wchar_t *); +static size_t colwidth(const wchar_t *); +static size_t colwidthn(const wchar_t *, const wchar_t *); +static void growibuf(void); +static void growobuf(void); + +/* + * Drive the whole formatter by managing input files. Also, + * cause initialization of the output stuff and flush it out + * at the end. + */ + +int +main(int argc, char **argv) +{ + register struct iblok *fi; + register int errs = 0, i; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + setout(); + lineno = 1; + for (i = 1; i < argc && argv[i][0] == '-' && argv[i][1]; i++) { + if (argv[i][1] == '-' && argv[i][2] == '\0') { + i++; + break; + } + nopt: switch (argv[i][1]) { + case '\0': + continue; + case 'c': + cflag = 1; + break; + case 's': + sflag = 1; + break; + case 'w': + if (argv[i][2]) { + setwidth(&argv[i][2]); + continue; + } else if (i < argc) { + setwidth(argv[++i]); + continue; + } else + setwidth(NULL); + break; + case '0': + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + setwidth(&argv[i][1]); + continue; + default: + usage(); + exit(2); + } + argv[i]++; + goto nopt; + } + if (i < argc) { + while (i < argc) { + if ((fi = ib_open(argv[i], 0)) == NULL) { + perror(argv[i]); + errs |= 1; + } else + fmt(fi); + i++; + } + } else { + if ((fi = ib_alloc(0, 0)) == NULL) { + perror("stdin"); + errs |= 1; + } else + fmt(fi); + } + oflush(); + exit(errs); +} + +static void +setwidth(const char *s) +{ + char *x; + + if (s == NULL || (width = strtol(s, &x, 10), + width <= 0 || + *x != '\0' || *s == '+' || *s == '-')) { + usage(); + fprintf(stderr, " Non-numeric character found " + "in width specification\n"); + exit(2); + } +} + +static void +usage(void) +{ + fprintf(stderr, + "usage: %s [-c] [-s] [-w width | -width] [inputfile...]\n", + progname); +} + +static char * +getvalid(struct iblok *ip, wint_t *wp, int *mp) +{ + char *cp; + + do + cp = ib_getw(ip, wp, mp); + while (cp && *wp == WEOF); + return cp; +} + +#define get(mp, fi, c, m, b) (mp = mb_cur_max > 1 ? getvalid(fi, &c, &m) : \ + (b = c = ib_get(fi), m = 1, c != (wint_t)EOF ? &b : 0)) + +static int ibufsize; +static wchar_t *linebuf; +static wchar_t *canonb; + +/* + * Read up characters from the passed input file, forming lines, + * doing ^H processing, expanding tabs, stripping trailing blanks, + * and sending each line down for analysis. + */ +static void +fmt(struct iblok *fi) +{ + register int p, p2; + wint_t c; + register long col; + char *mp; + int m; + char b; + + get(mp, fi, c, m, b); + while (c != (wint_t)EOF) { + + /* + * Collect a line, doing ^H processing. + * Leave tabs for now. + */ + + p = 0; + while (c != '\n' && c != (wint_t)EOF) { + if (c == '\b') { + get(mp, fi, c, m, b); + continue; + } + if (!(mb_cur_max > 1 ? iswprint(c) : isprint(c)) && + c != '\t') { + get(mp, fi, c, m, b); + continue; + } + if (p >= ibufsize) + growibuf(); + linebuf[p++] = c; + get(mp, fi, c, m, b); + } + if (p >= ibufsize) + growibuf(); + linebuf[p] = '\0'; + + /* + * Toss anything remaining on the input line. + */ + + while (c != '\n' && c != (wint_t)EOF) + get(mp, fi, c, m, b); + + /* + * Expand tabs on the way to canonb. + */ + + col = 0; + p = p2 = 0; + while (c = linebuf[p++]) { + if (c != '\t') { + if (mb_cur_max > 1) + col += wcwidth(c); + else + col++; + if (p2 >= ibufsize) + growibuf(); + canonb[p2++] = c; + continue; + } + do { + if (p2 >= ibufsize) + growibuf(); + canonb[p2++] = ' '; + col++; + } while ((col & 07) != 0); + } + + /* + * Swipe trailing blanks from the line. + */ + + for (p2--; p2 >= 0 && canonb[p2] == ' '; p2--) + ; + if (p2 >= ibufsize-1) + growibuf(); + canonb[++p2] = '\0'; + prefix(canonb); + if (c != (wint_t)EOF) + get(mp, fi, c, m, b); + } +} + +/* + * Take a line devoid of tabs and other garbage and determine its + * blank prefix. If the indent changes, call for a linebreak. + * If the input line is blank, echo the blank line on the output. + * Finally, if the line minus the prefix is a mail header, try to keep + * it on a line by itself. + */ + +static void +prefix(const wchar_t *line) +{ + register const wchar_t *cp; + register const char **hp; + register long np; + register int h; + static int nlpp; /* number of lines on current paragraph */ + + if (wcslen(line) == 0) { + nlpp = 0; + oflush(); + putchar('\n'); + mark = 0; + return; + } + for (cp = line; *cp == ' '; cp++) + ; + np = cp - line; + + /* + * The following horrible expression attempts to avoid linebreaks + * when the indent changes due to a paragraph. + */ + + if (!cflag && np != pfx && (np > pfx || abs(pfx-np) > 8)) + oflush(); + if (h = fromline(cp)) + oflush(), mark = 1; + else if (mark) { + for (hp = &headnames[0]; *hp != NULL; hp++) + if (chkhead(*hp, cp)) { + h = 1; + oflush(); + break; + } + } + if (!h && (h = (*cp == '.' || sflag))) + oflush(); + if (!cflag || nlpp < 2) + pfx = np; + split(cp); + if (h) + oflush(); + nlpp++; + lineno++; +} + +/* + * Split up the passed line into output "words" which are + * maximal strings of non-blanks with the blank separation + * attached at the end. Pass these words along to the output + * line packer. + */ + +static wchar_t *word; + +static void +split(const wchar_t *line) +{ + register const wchar_t *cp; + register wchar_t *cp2; + + cp = line; + while (*cp) { + cp2 = word; + + /* + * Collect a 'word,' allowing it to contain escaped + * white space. + */ + + while (*cp && *cp != ' ') { + if (*cp == '\\' && iswspace(cp[1])) + *cp2++ = *cp++; + *cp2++ = *cp++; + } + + /* + * Guarantee a space at end of line. + * Two spaces after end of sentence punctuation. + */ + + if (*cp == '\0') { + *cp2++ = ' '; + if (strchr(".:!?", cp[-1])) + *cp2++ = ' '; + } + while (*cp == ' ') + *cp2++ = *cp++; + *cp2 = '\0'; + pack(word); + } +} + +/* + * Output section. + * Build up line images from the words passed in. Prefix + * each line with correct number of blanks. The buffer "outbuf" + * contains the current partial line image, including prefixed blanks. + * "outp" points to the next available space therein. When outp is NOSTR, + * there ain't nothing in there yet. At the bottom of this whole mess, + * leading tabs are reinserted. + */ + +static int obufsize; +static wchar_t *outbuf; /* Sandbagged output line image */ +static wchar_t *outp; /* Pointer in above */ + +/* + * Initialize the output section. + */ + +static void +setout(void) +{ + outp = NULL; +} + +/* + * Pack a word onto the output line. If this is the beginning of + * the line, push on the appropriately-sized string of blanks first. + * If the word won't fit on the current line, flush and begin a new + * line. If the word is too long to fit all by itself on a line, + * just give it its own and hope for the best. + */ + +static void +pack(const wchar_t *word) +{ + register const wchar_t *cp; + register long s, t; + + if (outp == NULL) + leadin(); + t = colwidth(word); + s = colwidthn(outbuf, outp); + if (t+s <= width) { + + /* + * In like flint! + */ + + for (cp = word; *cp; cp++) { + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp++ = *cp; + } + return; + } + if (s > pfx) { + oflush(); + leadin(); + } + for (cp = word; *cp; cp++) { + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp++ = *cp; + } +} + +/* + * If there is anything on the current output line, send it on + * its way. Set outp to NULL to indicate the absence of the current + * line prefix. + */ + +static void +oflush(void) +{ + if (outp == NULL) + return; + if (outp >= &outbuf[obufsize]) + growobuf(); + *outp = '\0'; + tabulate(outbuf); + outp = NULL; +} + +/* + * Take the passed line buffer, insert leading tabs where possible, and + * output on standard output (finally). + */ + +static void +tabulate(wchar_t *line) +{ + register wchar_t *cp; + register int b, t; + + /* + * Toss trailing blanks in the output line. + */ + + cp = line + wcslen(line) - 1; + while (cp >= line && *cp == ' ') + cp--; + *++cp = '\0'; + + /* + * Count the leading blank space and tabulate. + */ + + for (cp = line; *cp == ' '; cp++) + ; + b = cp-line; + t = b >> 3; + b &= 07; + if (t > 0) + do + putchar('\t'); + while (--t); + if (b > 0) + do + putchar(' '); + while (--b); + while (*cp) { + if (mb_cur_max > 1 && *cp & ~(wchar_t)0177) { + char mb[MB_LEN_MAX]; + int i, n; + n = wctomb(mb, *cp); + for (i = 0; i < n; i++) + putchar(mb[i]); + } else + putchar(*cp); + cp++; + } + putchar('\n'); +} + +/* + * Initialize the output line with the appropriate number of + * leading blanks. + */ + +static void +leadin(void) +{ + register long b; + + if (outbuf == 0) + growobuf(); + for (b = 0; b < pfx; b++) { + if (b >= obufsize) + growobuf(); + outbuf[b] = ' '; + } + outp = &outbuf[b]; +} + +/* + * Is s2 the mail header field name s1? + */ + +static int +chkhead(register const char *s1, register const wchar_t *s2) +{ + + while (*s1 && *s1++ == *s2++); + if (*s1 != '\0') + return 0; + return 1; +} + +/* + * Sloppy recognition of Unix From_ lines (not according to the POSIX.2 + * mailx specification, but oriented on actual Unix tradition). We match + * the ERE + * ^From .* [A-Z][a-z][a-z] [A-Z][a-z][a-z] \ + * [0-9 ]?[0-9] [0-9][0-9]:[0-9][0-9]:[0-9][0-9] + */ + +static int +fromline(const wchar_t *cp) +{ + if (cp[0] != 'F' || cp[1] != 'r' || cp[2] != 'o' || cp[3] != 'm' || + cp[4] != ' ') + return 0; + cp += 5; + while (*cp && *cp != ' ') + cp++; + if (*cp++ != ' ') + return 0; + if (!upperchar(cp[0]) || !lowerchar(cp[1]) || !lowerchar(cp[2]) || + cp[3] != ' ' || + !upperchar(cp[4]) || !lowerchar(cp[5]) || !lowerchar(cp[6]) || + cp[7] != ' ') + return 0; + cp += 8; + if (digitchar(*cp) || *cp == ' ') + cp++; + if (!digitchar(cp[0]) || cp[1] != ' '|| + !digitchar(cp[2]) || !digitchar(cp[3]) || + cp[4] != ':' || + !digitchar(cp[5]) || !digitchar(cp[6]) || + cp[7] != ':' || + !digitchar(cp[8]) || !digitchar(cp[9])) + return 0; + return 1; +} + +static size_t +colwidth(const wchar_t *cp) +{ + size_t n = 0; + + if (mb_cur_max > 1) + while (*cp) + n += wcwidth(*cp++); + else + n = wcslen(cp); + return n; +} + +static size_t +colwidthn(const wchar_t *bot, const wchar_t *top) +{ + size_t n = 0; + + if (mb_cur_max > 1) + while (bot < top) + n += wcwidth(*bot++); + else + n = top - bot; + return n; +} + +static void +growibuf(void) +{ + ibufsize += 128; + if ((word = realloc(word, ibufsize * sizeof *word)) == 0 || + (linebuf = realloc(linebuf, ibufsize * sizeof *linebuf)) == 0 || + (canonb = realloc(canonb, ibufsize * sizeof *canonb)) == 0) { + fprintf(stderr, "%s: input line too long\n", progname); + exit(1); + } +} + +static void +growobuf(void) +{ + int diff = 0; + + if (outp != NULL) + diff = outp - outbuf; + obufsize += 128; + if ((outbuf = realloc(outbuf, obufsize * sizeof *outbuf)) == 0) { + fprintf(stderr, "%s: output line too long\n", progname); + exit(1); + } + if (outp != NULL) + outp = &outbuf[diff]; +} diff --git a/fmt/mkfile b/fmt/mkfile @@ -0,0 +1,7 @@ +BIN = fmt +OBJ = fmt.o +INSTALL_BIN = fmt +INSTALL_MAN1 = fmt.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/grep/ac.c b/grep/ac.c @@ -0,0 +1,578 @@ +/* + * Aho-Corasick algorithm derived from Unix 32V /usr/src/cmd/fgrep.c, + * additionally incorporating the fix from the v7 addenda tape. + * + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, September 2002. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)fgrep.sl 2.10 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <limits.h> +#include "grep.h" +#include "alloc.h" + +#include <mbtowi.h> + +#define MAXSIZ 256 +#define QSIZE 128 + +struct words { + struct words *nst; + struct words *link; + struct words *fail; + int inp; + char out; +}; + +static struct words *w, *wcur; +static struct words *smax; +static struct words *q; + +static void ac_build(void); +static int ac_match(const char *, size_t); +static int ac_matchw(const char *, size_t); +static int ac_range(struct iblok *, char *); +static int ac_rangew(struct iblok *, char *); +static void cgotofn(void); +static void check(int, int); +static void woverflo(void); +static void qoverflo(struct words ***queue, int *qsize); +static void cfail(void); +static int a0_match(const char *, size_t); +static int a1_match(const char *, size_t); + +void +ac_select(void) +{ + build = ac_build; + match = mbcode ? ac_matchw : ac_match; + matchflags &= ~MF_NULTERM; + matchflags |= MF_LOCONV; +} + +static void +ac_build(void) +{ + struct expr *e; + + if (e0->e_flg & E_NULL) { + match = a0_match; + return; + } + for (e = e0; e; e = e->e_nxt) { + if (e->e_len == 0 && !xflag) { + match = a1_match; + return; + } + } + cgotofn(); + cfail(); + if (!iflag) + range = mbcode ? ac_rangew : ac_range; +} + +static int +ac_match(const char *line, size_t sz) +{ + register const char *p; + register int z; + register struct words *c; + int failed; + + p = line; + failed = 0; + c = w; + if (p == &line[sz]) + z = '\n'; + else + z = *p & 0377; + for (;;) { + nstate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + if (failed || p < &line[sz]) + return 0; + } + return 1; + } + if (++p >= &line[sz]) { + if (z == '\n') + return 0; + else + z = '\n'; + } else + z = *p & 0377; + } +} + +static int +ac_range(struct iblok *ip, char *last) +{ + register char *p; + register struct words *c; + int failed; + + p = ip->ib_cur; + lineno++; + failed = 0; + c = w; + for (;;) { + nstate: + if (c->inp == (*p & 0377)) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == (*p & 0377)) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + register char *ep = p; + while (*ep != '\n') + ep++; + if ((failed || ep > p) && vflag == 0) { + ip->ib_cur = &ep[1]; + goto nogood; + } + } + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + nogood: if ((p = ip->ib_cur) > last) + return 0; + lineno++; + c = w; + failed = 0; + continue; + } + if (*p++ == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + c = w; + failed = 0; + } + } +} + +static int +ac_matchw(const char *line, size_t sz) +{ + register const char *p; + wint_t z; + register struct words *c; + int failed, n = 0; + + p = line; + failed = 0; + c = w; + if (p == &line[sz]) + z = '\n'; + else { + if (*p & 0200) { + if ((n = mbtowi(&z, p, &line[sz] - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + } + for (;;) { + nstate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + if (failed || p < &line[sz]) + return 0; + } + return 1; + } + p += n; + if (p >= &line[sz]) { + if (z == '\n') + return 0; + else + z = '\n'; + } else { + if (*p & 0200) { + if ((n = mbtowi(&z, p, &line[sz] - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + } + } +} + +static int +ac_rangew(struct iblok *ip, char *last) +{ + register char *p; + wint_t z; + register struct words *c; + int failed, n = 0; + + p = ip->ib_cur; + lineno++; + failed = 0; + c = w; + for (;;) { + nstate: + if (*p & 0200) { + if ((n = mbtowi(&z, p, last + 1 - p)) < 0) { + n = 1; + z = WEOF; + } + } else { + z = *p; + n = 1; + } + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto nstate; + } + else { + c = c->fail; + failed = 1; + if (c==0) { + c = w; + istate: + if (c->inp == z) { + c = c->nst; + } + else if (c->link != 0) { + c = c->link; + goto istate; + } + } + else goto nstate; + } + if (c->out) { + if (xflag) { + register char *ep = p; + while (*ep != '\n') + ep++; + if ((failed || ep > p) && vflag == 0) { + ip->ib_cur = &ep[1]; + goto nogood; + } + } + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + nogood: if ((p = ip->ib_cur) > last) + return 0; + lineno++; + c = w; + failed = 0; + continue; + } + p += n; + if (p[-n] == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + c = w; + failed = 0; + } + } +} + +static void +cgotofn(void) +{ + register int c; + register struct words *s; + + woverflo(); + s = smax = w = wcur; +nword: for(;;) { + c = nextch(); + if (c==EOF) + return; + if (c == '\n') { + if (xflag) { + for(;;) { + if (s->inp == c) { + s = s->nst; + break; + } + if (s->inp == 0) goto nenter; + if (s->link == 0) { + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->link = smax; + s = smax; + goto nenter; + } + s = s->link; + } + } + s->out = 1; + s = w; + } else { + loop: if (s->inp == c) { + s = s->nst; + continue; + } + if (s->inp == 0) goto enter; + if (s->link == 0) { + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->link = smax; + s = smax; + goto enter; + } + s = s->link; + goto loop; + } + } + + enter: + do { + s->inp = c; + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->nst = smax; + s = smax; + } while ((c = nextch()) != '\n' && c!=EOF); + if (xflag) { + nenter: s->inp = '\n'; + if (++smax >= &wcur[MAXSIZ]) + woverflo(); + s->nst = smax; + } + smax->out = 1; + s = w; + if (c != EOF) + goto nword; +} + +static void +check(int val, int incr) +{ + if ((unsigned)(val + incr) >= INT_MAX) { + fprintf(stderr, "%s: wordlist too large\n", progname); + exit(2); + } +} + +static void +woverflo(void) +{ + wcur = smax = scalloc(MAXSIZ, sizeof *smax); +} + +static void +qoverflo(struct words ***queue, int *qsize) +{ + check(*qsize, QSIZE); + *queue = srealloc(*queue, (*qsize += QSIZE) * sizeof **queue); +} + +static void +cfail(void) +{ + struct words **queue = NULL; + int front, rear; + int qsize = 0; + struct words *state; + int bstart; + register char c; + register struct words *s; + qoverflo(&queue, &qsize); + s = w; + front = rear = 0; +init: if ((s->inp) != 0) { + queue[rear++] = s->nst; + if (rear >= qsize - 1) + qoverflo(&queue, &qsize); + } + if ((s = s->link) != 0) { + goto init; + } + + while (rear!=front) { + s = queue[front]; + if (front == qsize-1) + qoverflo(&queue, &qsize); + front++; + cloop: if ((c = s->inp) != 0) { + bstart = 0; + q = s->nst; + queue[rear] = q; + if (front < rear) { + if (rear >= qsize-1) + qoverflo(&queue, &qsize); + rear++; + } else + if (++rear == front) + qoverflo(&queue, &qsize); + state = s->fail; + floop: if (state == 0) { + state = w; + bstart = 1; + } + if (state->inp == c) { + qloop: q->fail = state->nst; + if ((state->nst)->out == 1) + q->out = 1; + if ((q = q->link) != 0) goto qloop; + } + else if ((state = state->link) != 0) + goto floop; + else if (bstart == 0) { + state = 0; + goto floop; + } + } + if ((s = s->link) != 0) + goto cloop; + } + free(queue); +} + +/*ARGSUSED*/ +static int +a0_match(const char *str, size_t sz) +{ + return 0; +} + +/*ARGSUSED*/ +static int +a1_match(const char *str, size_t sz) +{ + return 1; +} diff --git a/grep/alloc.c b/grep/alloc.c @@ -0,0 +1,81 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)alloc.c 1.3 (gritter) 4/17/03> */ + +/* + * Memory allocation routines. + */ + +#include <stdlib.h> +#include <unistd.h> +#include "alloc.h" + +/* + * Memory allocation with check. + */ +void * +smalloc(size_t nbytes) +{ + void *p; + + if ((p = (void *)malloc(nbytes)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return p; +} + +/* + * Memory reallocation with check. + */ +void * +srealloc(void *ptr, size_t nbytes) +{ + void *cp; + + if ((cp = (void *)realloc(ptr, nbytes)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return cp; +} + +/* + * Zero-filled allocation with check. + */ +void * +scalloc(size_t nelem, size_t elsize) +{ + void *cp; + + if ((cp = calloc(nelem, elsize)) == NULL) { + write(2, "Out of memory\n", 14); + exit(077); + } + return cp; +} diff --git a/grep/alloc.h b/grep/alloc.h @@ -0,0 +1,34 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)alloc.h 1.3 (gritter) 4/17/03> */ + +#include <sys/types.h> + +extern void *smalloc(size_t); +extern void *srealloc(void *, size_t); +extern void *scalloc(size_t, size_t); diff --git a/grep/config.h b/grep/config.h @@ -0,0 +1,4 @@ +/* Auto-generated by make. Do not edit! */ +#include <wchar.h> +#include <wctype.h> +#define LONGLONG diff --git a/grep/egrep.1 b/grep/egrep.1 @@ -0,0 +1,388 @@ +'\" t +.\" Sccsid @(#)egrep.1 1.42 (gritter) 8/14/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH EGREP 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +egrep \- search a file for a pattern using full regular expressions +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/egrep\fR [\fB\-e\fI\ pattern_list\fR\ ...] +[\fB\-f\fI\ pattern_file\fR] [\fB\-bchilnrRvz\fR] +[\fIpattern_list\fR] [\fIfile\fR\ ...] +.HP +.ad l +.PD 0 +\fB/usr/5bin/posix/egrep\fR \fB\-e\fI\ pattern_list\fR\ ... +[\fB\-f\fI\ pattern_file\fR] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/egrep\fR \fB\-f\fI\ pattern_file\fR +[\fB\-e\fI\ pattern_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/egrep\fR [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] [\fB\-bhinsrRvxz\fR] +\fIpattern_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B egrep +command searches the lines of the specified files +(or of standard input) +for occurrences of +.I pattern. +The default behavior is to print each matching line to standard output. +.PP +The +.B /usr/5bin/egrep +command accepts full regular expressions; +it uses a deterministic algorithm with moderate space requirements. +.PP +The +.B /usr/5bin/posix/egrep +command accepts extended regular expressions. +It uses a deterministic algorithm with moderate space requirements +unless the expression includes multi-character collating elements, +which cause the use of a nondeterministic algorithm. +.PP +.B /usr/5bin/s42/egrep +and +.B /usr/5bin/posix2001/egrep +are identical to +.BR /usr/5bin/posix/egrep . +.SS "Full Regular Expressions" +.PP +In the following description `character' excludes +newline: +.IP 1. +A \fB\e\fR followed by a single character +matches that character. +.IP 2. +The character \fB^\fR +(\fB$\fR) matches the beginning (end) of a line +as an \fIanchor\fR. +.IP 3. +A +.B .\& +matches any character. +.IP 4. +A single character not otherwise endowed with special +meaning matches that character. +.IP 5. +A string enclosed in brackets \fB[\|]\fR +forms a \fIbracket expression\fR that +matches any single character from the string. +Ranges of ASCII character codes may be abbreviated +as in `\fIa\fB\-\fIz0\fB\-\fI9\fR'. +A ] +may occur only as the first character of the string. +A literal \- must be placed where it can't be +mistaken as a range indicator. +.IP 6. +A regular expression followed by \fB*\fR (\fB+\fR, \fB?\fR) matches a sequence +of 0 or more (1 or more, 0 or 1) +matches of the regular expression. +.IP 7. +Two regular expressions concatenated +match a match of the first followed by a match of +the second. +.IP 8. +Two regular expressions separated by \fB|\fR or newline +match either a match for the first or a match for the +second (\fIalternation\fR). +.IP 9. +A regular expression enclosed in parentheses \fB(\|)\fR +matches a match for the regular expression (\fIgrouping\fR). +.LP +The order of precedence of operators +is [\|] then (\|) then +*+? then concatenation then | and newline. +.SS "Extended Regular Expressions" +Extended Regular Expressions add the following features +to Full Regular Expressions: +.IP 10. +A regular expression +followed by \fB{\fIm\fB,\fIn\fB}\fR +forms an \fIinterval expression\fR that +matches a sequence of \fIm\fR through \fIn\fR matches, inclusive, +of the regular expression. +The values of \fIm\fR and \fIn\fR must be non-negative +and smaller than 255. +The form \fB{\fIm\fB}\fR matches exactly \fIm\fR occurrences, +\fB{\fIm\fB,}\fR matches at least \fIm\fR occurrences. +.IP 11. +In bracket expressions as described in 5., +the following character sequences are considered special: +.IP +Character class expressions of the form +\fB[:\fIclass\fB:]\fR. +In the C LC_CTYPE locale, +the classes +.sp +.TS +l l l l. +[:alnum:] [:cntrl:] [:lower:] [:space:] +[:alpha:] [:digit:] [:print:] [:upper:] +[:blank:] [:graph:] [:punct:] [:xdigit:] +.TE +.sp +are recognized; +further locale-specific classes may be available. +A character class expression matches any character +that belongs to the given class in the current LC_CTYPE locale. +.IP +Collating symbol expressions of the form +\fB[.\fIc\fB.]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +A collating symbol expression +matches the specified collating symbol. +.IP +Equivalence class expressions of the form +\fB[=\fIc\fB=]\fR, +where \fIc\fR is a collating symbol +in the current LC_COLLATE locale. +An equivalence class expression +matches any character that has the same collating weight +as \fIc\fR. +.LP +The order of precedence of operators +is [=\|=] [:\|:] [.\|.] +then [\|] +then (\|) +then *+? {m,n} +then concatenation +then ^ $ +then | and newline. +.PP +Care should be taken when using the characters +$ * [ ^ | ? \' " ( ) and \e in the expression +as they are also meaningful to the Shell. +It is safest to enclose the entire expression +argument in single quotes \' \'. +.PP +Both +.B /usr/5bin/egrep +and +.B /usr/5bin/posix/egrep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.BI \-e\ pattern_list +Specifies one or more patterns, separated by newline characters. +A line is selected if one or more of the specified patterns are found. +.TP +.BI \-f\ pattern_file +One or more patterns, separated by newline +characters, are read from +.I pattern_file. +If multiple +.B \-e +or +.B \-f +options are supplied to +.BR /usr/5bin/posix/egrep , +all of the pattern lists will be evaluated. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-v +All lines but those matching are printed. +.PP +The following options are supported by +.B /usr/5bin/posix/egrep +only: +.TP +.B \-q +Do not write anything to standard output. +.TP +.B \-s +Error messages for nonexistent or unreadable files are suppressed. +.TP +.B \-x +Consider only lines consisting of the pattern as a whole, +like a regular expression surrounded by +.I ^ +and +.I $. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I egrep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I egrep +searches for the pattern in its output. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in extended regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both full and extended regular expressions, +the availability and composition of character classes +in extended regular expressions, +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +fgrep(1), +grep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. +.SH NOTES +If a line contains a +.SM NUL +character, +only matches up to this character are found with +.BR /usr/5bin/posix/egrep . +The entire matching line will be printed. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +For portable programs, restrict textual data +to the US-ASCII character set, +set the LC_CTYPE and LC_COLLATE variables to `C' or `POSIX', +and use the constructs in the second column +instead of the character class expressions as follows: +.RS +.sp +.TS +l l. +[[:alnum:]] [0\-9A\-Za\-z] +[[:alpha:]] [A\-Za\-z] +[[:blank:]] [\fI<tab><space>\fR] +[[:cntrl:]] [^\fI<space>\fR\-~] +[[:digit:]] [0\-9] +[[:graph:]] [!\-~] +[[:lower:]] [a\-z] +[[:print:]] [\fI<space>\fR\-~] +[[:punct:]] [!\-/:\-@[\-`{\-~] +[[:space:]] [\fI<tab><vt><ff><cr><space>\fR] +[[:upper:]] [A\-Z] +[[:xdigit:]] [0\-9a\-fA\-F] +.TE +.sp +.RE +.IR <tab> , +.IR <space> , +.IR <vt> , +.IR <ff> , +and +.I <cr> +indicate inclusion of +a literal tabulator, space, vertical tabulator, formfeed, +or carriage return character, respectively. +Do not put the +.IR <vt> , +.IR <ff> , +and +.I <cr> +characters into the range expression for the +.I space +class unless you actually want to match these characters. +.PP +Interval expressions were newly introduced +with extended regular expressions +and cannot be used in portable programs. +To put a literal +.RB ` { ' +character into an expression, +use +.IR [{] . diff --git a/grep/fgrep.1 b/grep/fgrep.1 @@ -0,0 +1,179 @@ +.\" +.\" Sccsid @(#)fgrep.1 1.24 (gritter) 1/24/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH FGREP 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +fgrep \- search a file for a character string +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/fgrep\fR [\fB\-e\fI\ string_list\fR\ ...] +[\fB\-f\fI\ string_file\fR] [\fB\-bchilnrRvxz\fR] +[\fIstring_list\fR] [\fIfile\fR\ ...] +.HP +.ad l +.PD 0 +\fB/usr/5bin/posix/fgrep\fR \fB\-e\fI\ string_list\fR\ ... +[\fB\-f\fI\ string_file\fR] [\fB\-c\fR|\fB\-l\fR] +[\fB\-bhinrRvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/fgrep\fR \fB\-f\fI\ string_file\fR +[\fB\-e\fI\ string_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR] +[\fB\-bhinrRvxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/fgrep\fR [\fB\-c\fR|\fB\-l\fR] [\fB\-bhinrRvxz\fR] +\fIstring_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B fgrep +command searches the lines of the specified files +(or of standard input) +for occurrences of any of the newline separated strings in +.I string_list. +The default behavior is to print each matching line to standard output. +.PP +Both +.B /usr/5bin/fgrep +and +.B /usr/5bin/posix/fgrep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.BI \-e\ string_list +Specifies one or more strings, separated by newline characters. +A line is selected if one or more of the specified strings are found. +.TP +.BI \-f\ string_file +One or more strings, separated by newline +characters, are read from +.I string_file. +If multiple +.B \-e +or +.B \-f +options are supplied to +.BR /usr/5bin/posix/fgrep , +all of the pattern lists will be evaluated. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-v +All lines but those matching are printed. +.TP +.B \-x +(Exact) only lines matched in their entirety are printed. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I fgrep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I fgrep +searches for the pattern in its output. +.PP +.B /usr/5bin/s42/fgrep +and +.B /usr/5bin/posix2001/fgrep +are identical to +.BR /usr/5bin/posix/fgrep . +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +egrep(1), +grep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. diff --git a/grep/grep.1 b/grep/grep.1 @@ -0,0 +1,297 @@ +'\" t +.\" Sccsid @(#)grep.1 1.36 (gritter) 8/14/05 +.\" Parts taken from grep(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH GREP 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +grep \- search a file for a pattern +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/5bin/grep\fR [\fB\-bchilnrRsvwz\fR] +\fIpattern\fR [\fIfile\fR\ ...] +.HP +.PD 0 +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +\fB\-e\fI\ pattern_list\fR\ ... +[\fB\-f\fI\ pattern_file\fR] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvwxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +\fB\-f\fI\ pattern_file\fR +[\fB\-e\fI\ pattern_list\fR\ ...] [\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] +[\fB\-bhinrRsvwxz\fR] [\fIfile\fR\ ...] +.HP +.ad l +\fB/usr/5bin/posix/grep\fR [\fB\-E\fR|\fB\-F\fR] +[\fB\-c\fR|\fB\-l\fR|\fB\-q\fR] [\fB\-bhinrRsvwxz\fR] +\fIpattern_list\fR [\fIfile\fR\ ...] +.br +.PD +.ad b +.hy 1 +.SH DESCRIPTION +The +.B grep +command searches the lines of the specified files +(or of standard input) +for occurrences of the regular expression +.I pattern. +The default behavior is to print each matching line to standard output. +.PP +The +.B /usr/5bin/grep +command accepts one pattern +that is treated as a simple regular expression; +it uses a compact nondeterministic algorithm. +.PP +The +.B /usr/5bin/posix/grep +command uses basic regular expressions by default +and accepts a newline-separated list of patterns +as described for the +.B \-e +option below. +It uses a deterministic algorithm with moderate space requirements +for most expressions; +backreferences, word delimiters, and multi-character collating elements +cause a nondeterministic algorithm to be used. +.PP +.B /usr/5bin/s42/grep +and +.B /usr/5bin/posix2001/grep +are identical to +.BR /usr/5bin/posix/grep . +.PP +See the description of +.IR ed (1) +for the specifications of simple and basic regular expressions. +.PP +Care should be taken when using the characters +$ * [ ^ | ? \' " ( ) and \e in the expression +as they are also meaningful to the Shell. +It is safest to enclose the entire expression +argument in single quotes \' \'. +.PP +Both +.B /usr/5bin/grep +and +.B /usr/5bin/posix/grep +accept the following options: +.TP +.B \-b +Each line is preceded by the block number on which it was found. +This is sometimes useful +in locating disk block numbers by context. +Block numbers start with 0. +.TP +.B \-c +Only a count of matching lines is printed. +.TP +.B \-h +Normally, the name of each input file is printed before a match +if there is more that one input file. +When this option is present, no file names are printed. +.TP +.B \-i +Upper- and lowercase differences are ignored when searching matches. +.TP +.B \-l +The names of files with matching lines are listed +(once) separated by newlines. +.TP +.B \-n +Each line is preceded by its line number in the file. +Line numbers start with 1. +.TP +.B \-s +Error messages for nonexistent or unreadable files are suppressed. +.TP +.B \-v +All lines but those matching are printed. +.PP +The following options are supported by +.B /usr/5bin/posix/grep +only: +.TP +.BI \-e\ pattern_list +Specifies one or more patterns, separated by newline characters. +A line is selected if one or more of the specified patterns are found. +.TP +.B \-E +All patterns are interpreted as extended regular expressions +as described in +.IR egrep (1). +.TP +.BI \-f\ pattern_file +One or more patterns, separated by newline +characters, are read from +.I pattern_file. +.TP +.B \-F +All patterns are interpreted as fixed strings, +as with +.IR fgrep (1). +.TP +.B \-q +Do not write anything to standard output. +.TP +.B \-x +Consider only lines consisting of the pattern as a whole, +like a regular expression surrounded by +.I ^ +and +.I $. +.PP +The following options are supported as extensions: +.TP +.B \-r +With this option given, +.I grep +does not directly search in each given file that is a directory, +but descends it recursively +and scans each regular file found below it. +Device files are ignored. +Symbolic links are followed. +.TP +.B \-R +Operates recursively as with the +.I \-r +option, +but does not follow symbolic links that point to directories +unless if they are explicitly specified as arguments. +.TP +.B \-w +Searches for the patterns treated as words, +as if they were surrounded by `\e<\ \e>'. +Only available if neither the +.I \-E +nor the +.I \-F +option are also supplied. +.TP +.B \-z +If an input file is found to be compressed with +.IR compress (1), +.IR gzip (1), +or +.IR bzip2 (1), +the appropriate compression program is started, +and +.I grep +searches for the pattern in its output. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +the availability and composition of character classes +in basic regular expressions, +and the case mapping for the +.B \-i +option. +.SH "SEE ALSO" +ed(1), +egrep(1), +fgrep(1), +sed(1), +locale(7) +.SH DIAGNOSTICS +Exit status is 0 if any matches are found, +1 if none, 2 for syntax errors or inaccessible files. +.SH NOTES +If a line contains a +.SM NUL +character, +only matches up to this character are found +(unless +.B /usr/5bin/posix/grep +is used with the +.I \-F +option). +The entire matching line will be printed. +.PP +The LC_COLLATE variable has currently no effect. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. +.PP +The options supported by +.B /usr/5bin/posix/grep +that are not accepted by +.B /usr/5bin/grep +can easily be replaced by portable constructs: +Use +.I egrep +instead of +.BR \-E , +.I fgrep +instead of +.BR \-F . +Use +.I egrep +if you need the +.B \-e +or +.B \-f +option, +use only one of them and that only once; +if necessary, use text processing tools +to generate a single expression list before. +Redirect standard output to +.I /dev/null +for +.B \-q +(the possible speedup with +.I \-q +is never worth human time spent with porting scripts), +and use the +.RI ` ^ ' +and +.RI ` $ ' +meta-characters instead of +.BR \-x . diff --git a/grep/grep.c b/grep/grep.c @@ -0,0 +1,727 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)grep.c 1.53 (gritter) 12/27/06> */ + +/* + * Code common to all grep flavors. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> +#include <limits.h> +#include <ctype.h> +#include <dirent.h> +#include <errno.h> + +#include "grep.h" +#include "alloc.h" + +/* + * Generic flags and the like. + */ +int Eflag; /* use EREs */ +int Fflag; /* use fixed strings */ +int bflag; /* print buffer count */ +int cflag; /* print count only */ +int fflag; /* had pattern file argument */ +int hflag; /* do not print filenames */ +int iflag; /* ignore case */ +int lflag; /* print filenames only */ +int nflag; /* print line numbers */ +int qflag; /* no output at all */ +int (*rflag)(const char *, struct stat *); /* operate recursively */ +int sflag; /* avoid error messages */ +int vflag; /* inverse selection */ +int wflag; /* search for words */ +int xflag; /* match entire line */ +int zflag; /* decompress compressed files */ +int mb_cur_max; /* avoid multiple calls to MB_CUR_MAX */ +unsigned status = 1; /* exit status */ +off_t lmatch; /* count of line matches */ +off_t lineno; /* current line number */ +char *progname; /* argv[0] to main() */ +char *filename; /* name of current file */ +char *options; /* for getopt() */ +void (*build)(void); /* compile function */ +int (*match)(const char *, size_t); /* comparison function */ +int (*range)(struct iblok *, char *); /* grep range of lines */ + +/* + * Regexp variables. + */ +struct expr *e0; /* start of expression list */ +enum matchflags matchflags; /* matcher flags */ + +/* + * To avoid link loops with -r. + */ +static struct visit { + ino_t v_ino; + dev_t v_dev; +} *visited; +static int vismax; /* number of members in visited */ + +/* + * Lower-case a character string. + */ +size_t +loconv(register char *dst, register char *src, size_t sz) +{ + char *odst = dst; + + if (mbcode) { + char mb[MB_LEN_MAX]; + wchar_t wc; + int len, i, nlen; + + while (sz > 0) { + if ((*src & 0200) == 0) { + *dst++ = tolower(*src); + src++; + sz--; + } else if ((len = mbtowc(&wc, src, sz)) <= 0 || + len > sz) { + *dst++ = *src++; + sz--; + } else { + wc = towlower(wc); + if (len >= mb_cur_max) { + if ((nlen = wctomb(dst, wc)) <= len) { + dst += nlen; + src += len; + sz -= len; + } else { + *dst++ = *src++; + sz--; + } + } else { + if ((nlen = wctomb(mb, wc)) <= len) { + sz -= len; + src += len; + for (i = 0; i < nlen; i++) + *dst++ = mb[i]; + } else { + *dst++ = *src++; + sz--; + } + } + } + } + } else { + while (sz--) { + *dst++ = tolower(*src & 0377); + src++; + } + } + return dst - odst; +} + +/* + * Determine if pat ends with an unescaped dollar sign. + */ +static int +termdollar(const char *pat, long len) +{ + int dollar = 1; + + if (len == 0 || pat[len - 1] != '$') + return 0; + pat += --len - 1; + while (len-- && *pat-- == '\\') + dollar = !dollar; + return dollar; +} + +/* + * Surround the pattern with \< \>. + */ +void +wcomp(char **pat, long *len) +{ + char *wp = smalloc(*len + 5); + + memcpy(&wp[2], *pat, *len); + if ((*pat)[0] == '^') + memcpy(wp, "^\\<", 3); + else + memcpy(wp, "\\<", 2); + if (termdollar(*pat, *len)) + strcpy(&wp[*len-1+2], "\\>$"); + else + strcpy(&wp[*len+2], "\\>"); + *len += 4; + *pat = wp; +} + +static struct iblok * +redirect(struct iblok *ip, const char *arg0, const char *arg1) +{ + struct iblok *nip = NULL; + int pd[2]; + pid_t pid; + + if (pipe(pd) < 0) + return NULL; + switch (pid = fork()) { + case 0: + if (lseek(ip->ib_fd, -(ip->ib_end - ip->ib_cur), + SEEK_CUR) == (off_t)-1) { + int xpd[2]; + if (pipe(xpd) == 0 && fork() == 0) { + ssize_t rd, wo, wt; + close(xpd[0]); + for (;;) { + rd = ip->ib_end - ip->ib_cur; + wo = wt = 0; + do { + if ((wo = write(xpd[1], + &ip->ib_cur[wt], + rd - wt)) + <= 0) { + if (errno == EINTR) + continue; + _exit(0); + } + wt += wo; + } while (wt < rd); + if (ib_read(ip) == EOF) + break; + ip->ib_cur--; + } + _exit(0); + } else { + close(xpd[1]); + dup2(xpd[0], 0); + close(xpd[0]); + } + } else { + if (ip->ib_fd) + dup2(ip->ib_fd, 0); + } + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); + dup2(pd[1], 1); + close(pd[0]); + close(pd[1]); + execlp(arg0, arg0, arg1, NULL); + fprintf(stderr, "%s: could not exec %s\n", progname, arg0); + _exit(0177); + /*NOTREACHED*/ + case -1: + fprintf(stderr, "%s: cannot fork()\n", progname); + status = 2; + return NULL; + default: + close(pd[1]); + nip = ib_alloc(pd[0], 0); + nip->ib_pid = pid; + return nip; + } +} + +/* + * Report a matching line. + */ +void +report(const char *line, size_t llen, off_t bcnt, int addnl) +{ + if (filename && !hflag) + printf("%s:", filename); +#ifdef LONGLONG + if (bflag) + printf("%llu:", (long long)bcnt); + if (nflag) + printf("%llu:", (long long)lineno); +#else /* !LONGLONG */ + if (bflag) + printf("%lu:", (long)bcnt); + if (nflag) + printf("%lu:", (long)lineno); +#endif /* !LONGLONG */ + if (line && llen) + fwrite(line, sizeof *line, llen, stdout); + if (addnl) + putchar('\n'); +} + +/* + * Check line for match. If necessary, the line gets NUL-terminated (so + * its address range must be writable then). When ignoring character case, + * a lower-case-only copy of the line is made instead. If a match is found, + * statistics are printed. Returns 1 if main loop shall terminate, 0 else. + */ +static int +matchline(char *line, size_t sz, int putnl, struct iblok *ip) +{ + size_t csz = sz; + int terminate = 0; + char lbuf[512], *abuf = NULL, *cline = line; + + if (iflag && (matchflags & MF_LOCONV)) { + if (sz >= sizeof lbuf - 1) { + abuf = smalloc(sz + 1); + cline = abuf; + } else + cline = lbuf; + csz = loconv(cline, line, sz); + cline[csz] = '\0'; + } else if (matchflags & MF_NULTERM) + cline[sz] = '\0'; + lineno++; + if (match(cline, csz) ^ vflag) { + lmatch++; + if (qflag == 0) { + if (status == 1) + status = 0; + if (lflag) { + puts(filename ? filename : stdinmsg); + } else if (!cflag) + report(line, sz, (ib_offs(ip)-1) / BSZ, putnl); + } else + exit(0); + if (qflag || lflag) + terminate = 1; + } + if (abuf) + free(abuf); + return terminate; +} + +/* + * Check all lines within ip->ib_cur and last which contains the last + * newline. If the main loop shall terminate, 1 is returned. + */ +static int +gn_range(struct iblok *ip, char *last) +{ + char *nl; + + while ((nl = memchr(ip->ib_cur, '\n', last + 1 - ip->ib_cur)) != NULL) { + if (matchline(ip->ib_cur, nl - ip->ib_cur, 1, ip)) + return 1; + if (nl == last) + return 0; + ip->ib_cur = nl + 1; + } + return 0; +} + +/* + * Main grep routine. The line buffer herein is only used for overlaps + * between file buffer fills. + */ +static struct iblok * +grep(struct iblok *ip) +{ + char *line = NULL; /* line buffer */ + register char *lastnl; /* last newline in file buffer */ + size_t sz = 0; /* length of line in line buffer */ + char *cp; + int hadnl; /* lastnl points to newline char */ + int oom = 0; /* got out of memory */ + + lineno = lmatch = 0; + if (ib_read(ip) == EOF) + goto endgrep; + ip->ib_cur--; + if (zflag) { + struct iblok *np; + for (;;) { + sz = ip->ib_end - ip->ib_cur; + if (sz > 3 && memcmp(ip->ib_cur, "BZh", 3) == 0) + np = redirect(ip, "bzip2", "-cd"); + else if (sz > 2 && + memcmp(ip->ib_cur, "\37\235", 2) == 0) + np = redirect(ip, "zcat", NULL); + else if (sz > 2 && + memcmp(ip->ib_cur, "\37\213", 2) == 0) + np = redirect(ip, "gzip", "-cd"); + else + break; + if (np == NULL) + break; + if (ip->ib_fd) + ib_close(ip); + else + ib_free(ip); + ip = np; + if (ib_read(ip) == EOF) + goto endgrep; + ip->ib_cur--; + } + } + for (;;) { + for (lastnl = ip->ib_end - 1; + *lastnl != '\n' && lastnl > ip->ib_cur; + lastnl--); + if (hadnl = (ip->ib_cur < ip->ib_end && *lastnl == '\n')) + if (range(ip, lastnl)) + break; + if (lastnl < ip->ib_end - hadnl) { + /* + * Copy the partial line from file buffer to line + * buffer. Allocate enough space to zero-terminate + * the line later if necessary. + */ + sz = ip->ib_end - lastnl - hadnl; + line = smalloc(sz + 1); + memcpy(line, lastnl + hadnl, sz); + ip->ib_cur = lastnl + hadnl; + } else + line = NULL; +nextbuf: + if (ib_read(ip) == EOF) { + if (line) { + matchline(line, sz, sus, ip); + free(line); + line = NULL; + sz = 0; + } + break; + } + ip->ib_cur--; + if (line) { + /* + * Append the partial line at the beginning of the + * file buffer to the line buffer. + */ + size_t oldsz = sz; + if ((cp = memchr(ip->ib_cur, '\n', + ip->ib_end - ip->ib_cur)) == NULL) { + char *nline; + /* + * Ugh. This is really a huge line. Store the + * entire file buffer in the line buffer and + * read the next part of the file. + */ + sz += ip->ib_end - ip->ib_cur; + if ((nline = realloc(line, sz + 1)) == NULL) { + sz = oldsz; + cp = &ip->ib_end[-1]; + oom++; + } else { + line = nline; + memcpy(line + oldsz, ip->ib_cur, + ip->ib_end - ip->ib_cur); + goto nextbuf; + } + } + if ((sz = cp - ip->ib_cur) > 0) { + char *nline; + sz += oldsz; + if ((nline = realloc(line, sz + 1)) == NULL) { + sz = oldsz; + oom++; + } else { + line = nline; + memcpy(line + oldsz, ip->ib_cur, + cp - ip->ib_cur); + } + } else + sz = oldsz; + if (matchline(line, sz, 1, ip)) + break; + free(line); + line = NULL; + sz = 0; + ip->ib_cur = cp + (oom == 0); + oom = 0; + } + } +endgrep: + if (!qflag && cflag) { + if (filename && !hflag) + printf("%s:", filename); +#ifdef LONGLONG + printf("%llu\n", (long long)lmatch); +#else + printf("%lu\n", (long)lmatch); +#endif + } + return ip; +} + +/* + * Grep a named file. + */ +static void +fngrep(const char *fn, int level) +{ + struct iblok *ip; + struct stat st; + int i; + + if (rflag && fn && (level ? rflag : stat)(fn, &st) == 0) { + if (rflag != lstat) { + for (i = 0; i < level; i++) + if (st.st_dev == visited[i].v_dev && + st.st_ino == visited[i].v_ino) + return; + if (level >= vismax) { + vismax += 20; + visited = srealloc(visited, sizeof *visited * + vismax); + } + visited[level].v_dev = st.st_dev; + visited[level].v_ino = st.st_ino; + } + mode: switch (st.st_mode&S_IFMT) { +#define ignoring(t, s) fprintf(stderr, "%s: ignoring %s %s\n", progname, t, s) + case S_IFIFO: + ignoring("named pipe", fn); + return; + case S_IFBLK: + ignoring("block device", fn); + return; + case S_IFCHR: + ignoring("block device", fn); + return; +#ifdef S_IFSOCK + case S_IFSOCK: + ignoring("socket", fn); + return; +#endif /* S_IFSOCK */ + case S_IFLNK: + if (stat(fn, &st) < 0 || (st.st_mode&S_IFMT) == S_IFDIR) + return; + goto mode; + default: + break; + case S_IFDIR: { + char *path; + int pend, psize, i; + DIR *df; + struct dirent *dp; + + if (hflag == 2) + hflag = 0; + if ((df = opendir(fn)) == NULL) { + if (sflag == 0) + fprintf(stderr, "%s: can't open " + "directory %s\n", + progname, fn); + if (!qflag || status == 1) + status = 2; + return; + } + pend = strlen(fn); + path = malloc(psize = pend + 2); + strcpy(path, fn); + path[pend++] = '/'; + while ((dp = readdir(df)) != NULL) { + if (dp->d_name[0] == '.' && + (dp->d_name[1] == '\0' || + dp->d_name[1] == '.' && + dp->d_name[2] == '\0')) + continue; + i = 0; + do { + if (pend + i >= psize) + path = srealloc(path, + psize += 14); + path[pend+i] = dp->d_name[i]; + } while (dp->d_name[i++]); + filename = path; + fngrep(path, level+1); + } + free(path); + closedir(df); + return; + } + } + } + if (fn) { + if ((ip = ib_open(fn, 0)) == NULL) { + if (sflag == 0) + fprintf(stderr, "%s: can't open %s\n", + progname, fn); + if (!qflag || status == 1) + status = 2; + return; + } + } else + ip = ib_alloc(0, 0); + ip = grep(ip); + if (ip->ib_fd) { + ib_close(ip); + if (zflag && ip->ib_pid) { + int s; + waitpid(ip->ib_pid, &s, 0); + if (s) + status = 2; + } + } else + ib_free(ip); +} + +int +main(int argc, char **argv) +{ + int i, hadpat = 0; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + progname = basename(argv[0]); + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + range = gn_range; + init(); + while ((i = getopt(argc, argv, options)) != EOF) { + switch (i) { + case 'E': + Eflag |= 1; + rc_select(); + break; + case 'F': + if (Eflag&2) + Eflag = 0; + Fflag |= 1; + ac_select(); + break; + case 'b': + bflag = 1; + break; + case 'c': + cflag = 1; + break; + case 'e': + patstring(optarg); + hadpat++; + break; + case 'f': + fflag++; + patfile(optarg); + hadpat++; + break; + case 'h': + hflag = 1; + break; + case 'i': + case 'y': + iflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'q': + qflag = 1; + break; + case 'r': + rflag = stat; + break; + case 'R': + rflag = lstat; + break; + case 's': + sflag = 1; + break; + case 'v': + vflag = 1; + break; + case 'w': + wflag = 1; + break; + case 'x': + xflag = 1; + break; + case 'z': + zflag = 1; + break; + default: + if (!(Fflag&2)) + usage(); + status = 2; + } + } + if (sus) { + if (Fflag == 2) { + if (sflag) { + optind = 1; + argv[1] = "-s"; + getopt(argc, argv, ""); + usage(); + } + if (qflag) { + optind = 1; + argv[1] = "-q"; + getopt(argc, argv, ""); + usage(); + } + } + if (Fflag && status == 2) + usage(); + if (Eflag == 1 && Fflag == 1 || cflag + lflag + qflag > 1) + usage(); + if (wflag && (Eflag || Fflag)) + usage(); + } + if (cflag) + lflag = 0; + if (hadpat == 0) { + if (optind >= argc) + misop(); + patstring(argv[optind++]); + } else if (e0 == NULL) + patstring(NULL); + build(); + if (optind != argc) { + if (optind + 1 == argc) + hflag = 2; + do { + if (sus && argv[optind][0] == '-' && + argv[optind][1] == '\0') { + filename = NULL; + fngrep(NULL, 0); + } else { + filename = argv[optind]; + fngrep(argv[optind], 0); + } + } while (++optind < argc); + } else { + if (lflag && !sus && (Eflag || Fflag)) + exit(1); + fngrep(NULL, 0); + } + return status; +} diff --git a/grep/grep.h b/grep/grep.h @@ -0,0 +1,146 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)grep.h 1.23 (gritter) 1/4/05> */ + +#include <sys/types.h> +#include <regex.h> + +#include "iblok.h" + +#include "config.h" + +#define BSZ 512 /* block size */ + +/* + * Expression flags. + */ +enum eflags { + E_NONE = 0, /* no flags set */ + E_NL = 1, /* pattern ends with newline */ + E_NULL = 2 /* no pattern, not even an empty one */ +}; + +/* + * List of search expressions; not used for compile() matching. + */ +struct expr { + struct expr *e_nxt; /* next item in list */ + char *e_pat; /* search pattern */ + regex_t *e_exp; /* compiled pattern from regcomp() */ + long e_len; /* pattern length */ + enum eflags e_flg; /* expression flags */ +}; + +/* + * Matcher flags. + */ +enum matchflags { + MF_NULTERM = 01, /* search string must be \0 terminated*/ + MF_LOCONV = 02 /* lower-case search string if -i is set */ +}; + +/* + * Variables in grep.c. + */ +extern int Eflag; /* use EREs */ +extern int Fflag; /* use fixed strings */ +extern int bflag; /* print buffer count */ +extern int cflag; /* print count only */ +extern int fflag; /* had pattern file argument */ +extern int hflag; /* do not print filenames */ +extern int iflag; /* ignore case */ +extern int lflag; /* print filenames only */ +extern int nflag; /* print line numbers */ +extern int qflag; /* no output at all */ +extern int sflag; /* avoid error messages */ +extern int vflag; /* inverse selection */ +extern int wflag; /* search for words */ +extern int xflag; /* match entire line */ +extern int mb_cur_max; /* MB_CUR_MAX */ +#define mbcode (mb_cur_max>1) /* multibyte characters in use */ +extern unsigned status; /* exit status */ +extern off_t lmatch; /* count of matching lines */ +extern off_t lineno; /* current line number */ +extern char *progname; /* argv[0] to main() */ +extern char *filename; /* name of current file */ +extern void (*build)(void); /* compile function */ +extern int (*match)(const char *, size_t); /* comparison */ +extern int (*range)(struct iblok *, char *); /* grep range */ +extern struct expr *e0; /* start of expression list */ +extern enum matchflags matchflags; /* matcher flags */ + +/* + * These differ amongst grep flavors. + */ +extern int sus; /* POSIX.2 command version in use */ +extern char *stdinmsg; /* name for standard input */ +extern char *usagemsg; /* usage string */ +extern char *options; /* for getopt() */ + +/* + * In grep.c. + */ +extern size_t loconv(char *, char *, size_t); +extern void wcomp(char **, long *); +extern void report(const char *, size_t, off_t, int); + +/* + * Flavor dependent. + */ +extern void usage(void); +extern void misop(void); +extern void rc_error(struct expr *, int); +extern void init(void); + +/* + * Traditional egrep only. + */ +extern void eg_select(void); + +/* + * Fgrep only. + */ +extern void ac_select(void); + +/* + * compile()/step()-related. + */ +extern void st_select(void); + +/* + * regcomp()/regexec()-related. + */ +extern void rc_select(void); + +/* + * Not for SVID3 grep. + */ +extern void patstring(char *); +extern void patfile(char *); +extern int nextch(void); +extern void outline(struct iblok *, char *, size_t); diff --git a/grep/grid.c b/grep/grid.c @@ -0,0 +1,50 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)grep.sl 2.51 (gritter) 12/27/06"; +/* SLIST */ +/* +ac.c:static const char sccsid[] USED = "@(#)fgrep.sl 2.10 (gritter) 5/29/05"; +alloc.c: Sccsid @(#)alloc.c 1.3 (gritter) 4/17/03> +alloc.h: Sccsid @(#)alloc.h 1.3 (gritter) 4/17/03> +egrep.y:static const char sccsid[] USED = "@(#)egrep.sl 2.22 (gritter) 5/29/05"; +fgrep.c: Sccsid @(#)fgrep.c 1.12 (gritter) 12/17/04> +ggrep.c: Sccsid @(#)ggrep.c 1.26 (gritter) 1/4/05> +grep.c: Sccsid @(#)grep.c 1.53 (gritter) 12/27/06> +grep.h: Sccsid @(#)grep.h 1.23 (gritter) 1/4/05> +plist.c: Sccsid @(#)plist.c 1.22 (gritter) 12/8/04> +rcomp.c: Sccsid @(#)rcomp.c 1.27 (gritter) 2/6/05> +sus.c: Sccsid @(#)sus.c 1.24 (gritter) 5/29/05> +svid3.c: Sccsid @(#)svid3.c 1.7 (gritter) 4/17/03> +*/ diff --git a/grep/mkfile b/grep/mkfile @@ -0,0 +1,49 @@ +BIN = grep +OBJ = alloc.o grep.o grid.o plist.o rcomp.o sus.o ac.o +LOCAL_CFLAGS = -DSU3 +INSTALL_BIN = grep +INSTALL_MAN1 = grep.1 egrep.1 fgrep.1 +INSTALL_SYMLINK = \ + grep /bin/egrep \ + grep /bin/fgrep +DEPS = libcommon + +<$mkbuild/mk.common + +grep: $OBJ + +config.h:Q: + echo '/* Auto-generated by make. Do not edit! */' >config.h + echo -n "Checking for wchar.h... " + echo '#include <wchar.h>' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#include <wchar.h>' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + echo -n "Checking for wctype.h... " + echo '#include <wctype.h>' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#include <wctype.h>' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + echo -n "Checking for long long... " + echo 'long long foo;' >___build$$$$.c + $CC $CFLAGS2 $CPPFLAGS $IWCHAR $ICOMMON $IUXRE $LARGEF -c ___build$$$$.c >/dev/null 2>&1 + if test $? = 0 && test -f ___build$$$$.o + then echo '#define LONGLONG' >>config.h + else echo "not " + fi + rm -f ___build$$$$.o ___build$$$$.c + echo "found." + +grep.o: grep.h config.h alloc.h +plist.o: grep.h config.h alloc.h +sus.o: grep.h config.h alloc.h +ac.o: alloc.h grep.h +rcomp.o: grep.h config.h alloc.h diff --git a/grep/plist.c b/grep/plist.c @@ -0,0 +1,213 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)plist.c 1.22 (gritter) 12/8/04> */ + +/* + * Pattern list routines. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <ctype.h> + +#include "grep.h" +#include "alloc.h" + +/* + * Add a pattern starting at the given node of the expression list. + */ +static void +addpat(struct expr **e, char *pat, long len, enum eflags flg) +{ + if (e0) { + (*e)->e_nxt = (struct expr *)smalloc(sizeof **e); + (*e) = (*e)->e_nxt; + } else + e0 = (*e) = (struct expr *)smalloc(sizeof **e); + if (wflag) + wcomp(&pat, &len); + (*e)->e_nxt = NULL; + (*e)->e_pat = pat; + (*e)->e_len = len; + (*e)->e_flg = flg; +} + +/* + * Read patterns from pattern string. In traditional command versions, -f + * overrides all -e and all previous -f options. In POSIX.2 command versions, + * all -e and -f options are cumulated. + */ +void +patstring(char *cp) +{ + struct expr *e = NULL; + char *ep; + int nl; + + if (e0) { + if (sus) + for (e = e0; e->e_nxt; e = e->e_nxt); + else if (fflag) + return; + else + e0 = NULL; + } + if (cp) { + do { + if ((nl = (ep = strchr(cp, '\n')) != NULL) != 0) + *ep = 0; + addpat(&e, cp, ep ? ep - cp : strlen(cp), nl); + cp = ep + 1; + if (nl) + *ep = '\n'; + } while (ep); + } else + addpat(&e, strdup(""), 0, E_NULL); +} + +/* + * Read patterns from file. + */ +void +patfile(char *fn) +{ + struct stat st; + struct expr *e = NULL; + char *cp; + struct iblok *ip; + size_t sz, len; + int nl; + + if ((ip = ib_open(fn, 0)) == NULL || fstat(ip->ib_fd, &st) < 0) { + fprintf(stderr, "%s: can't open %s\n", progname, fn); + exit(2); + } + if (e0) { + if (sus) + for (e = e0; e->e_nxt; e = e->e_nxt); + else + e0 = NULL; + } + while (cp = NULL, sz = 0, + (len = ib_getlin(ip, &cp, &sz, srealloc)) > 0) { + if ((nl = cp[len - 1] == '\n') != 0) + cp[len - 1] = '\0'; + addpat(&e, cp, len - nl, nl); + } + ib_close(ip); +} + +/* + * getc() substitute operating on the pattern list. + */ +int +nextch(void) +{ + static struct expr *e; + static char *cp; + static long len; + static int oneof; + wchar_t wc; + int n; + + if (oneof) + return EOF; + if (e == NULL) { + e = e0; + if (e->e_flg & E_NULL) { + oneof++; + return EOF; + } + } + if (cp == NULL) { + cp = e->e_pat; + len = e->e_len; + } + if (mbcode && *cp & 0200) { + if ((n = mbtowc(&wc, cp, MB_LEN_MAX)) < 0) { + fprintf(stderr, "%s: illegal byte sequence\n", + progname); + exit(1); + } + cp += n; + len -= n; + } else { + wc = *cp++ & 0377; + len--; + } + if (len >= 0) + return iflag ? mbcode && wc & ~(wchar_t)0177 ? + towlower(wc) : tolower(wc) : wc; + cp = NULL; + n = e->e_flg & E_NL; + if ((e = e->e_nxt) == NULL) { + oneof++; + if (!n) + return EOF; + } + return '\n'; +} + +/* + * Print matching line based on ip->ib_cur and moff. Advance ip->ib_cur to start + * of next line. Used from special rangematch functions. + */ +void +outline(struct iblok *ip, char *last, size_t moff) +{ + register char *sol, *eol; /* start and end of line */ + + if (qflag == 0) { + if (status == 1) + status = 0; + if (lflag) { + puts(filename ? filename : stdinmsg); + } else { + lmatch++; + sol = ip->ib_cur + moff; + if (*sol == '\n' && sol > ip->ib_cur) + sol--; + while (sol > ip->ib_cur && *sol != '\n') + sol--; + if (sol > ip->ib_cur) + sol++; + ip->ib_cur += moff; + for (eol = ip->ib_cur; eol <= last + && *eol != '\n'; eol++); + if (!cflag) + report(sol, eol - sol, ib_offs(ip) / BSZ, 1); + ip->ib_cur = eol + 1; + } + } else /* qflag != 0 */ + exit(0); +} diff --git a/grep/rcomp.c b/grep/rcomp.c @@ -0,0 +1,350 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)rcomp.c 1.27 (gritter) 2/6/05> */ + +/* + * Code involving POSIX.2 regcomp()/regexpr() routines. + */ + +#include "grep.h" +#include "alloc.h" +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <mbtowi.h> + +static int emptypat; + +#ifdef UXRE +#include <regdfa.h> +static int rc_range(struct iblok *, char *); +static int rc_rangew(struct iblok *, char *); +#endif + +/* + * Check whether line matches any pattern of the pattern list. + */ +static int +rc_match(const char *str, size_t sz) +{ +#ifndef UXRE + struct expr *e; +#endif + regmatch_t pmatch[1]; + int gotcha = 0; + + if (emptypat) { + if (xflag) { + if (*str == '\0') + return 1; + } else + return 1; + } +#ifdef UXRE + if (e0->e_exp) + gotcha = (regexec(e0->e_exp, str, 1, pmatch, 0) == 0); +#else /* !UXRE */ + for (e = e0; e; e = e->e_nxt) { + if (e->e_exp) { + gotcha = (regexec(e->e_exp, str, 1, pmatch, 0) == 0); + if (gotcha) + break; + } + } +#endif /* !UXRE */ + if (gotcha) + if (!xflag || (pmatch[0].rm_so == 0 + && pmatch[0].rm_eo == sz)) + return 1; + return 0; +} + +/* + * Compile a pattern structure using regcomp(). + */ +static void +rc_build(void) +{ + int rerror = REG_BADPAT; + int rflags = 0; + size_t sz; +#ifdef UXRE + char *pat, *cp; +#endif /* UXRE */ + struct expr *e; + + if ((e0->e_flg & E_NULL) == 0) { + for (sz = 0, e = e0; e; e = e->e_nxt) { + if (e->e_len > 0) + sz += e->e_len + 1; + else + emptypat = 1; + } + } else + sz = 1; + if ((e0->e_flg & E_NULL || emptypat) && sus == 0) + rc_error(e0, rerror); + if (sz == 0 || (emptypat && xflag == 0)) { + e0->e_exp = NULL; + return; + } +#ifdef UXRE + pat = smalloc(sz); + for (cp = pat, e = e0; e; e = e->e_nxt) { + if (e->e_len > 0) { + memcpy(cp, e->e_pat, e->e_len); + cp[e->e_len] = '\n'; + cp = &cp[e->e_len + 1]; + } + } + pat[sz - 1] = '\0'; + if (iflag) + rflags |= REG_ICASE; + if (Eflag) + rflags |= (sus ? REG_EXTENDED : REG_OLDERE|REG_NOI18N) | + REG_MTPARENBAD; + else { + rflags |= REG_ANGLES; + if (sus >= 3) + rflags |= REG_AVOIDNULL; + } + if (xflag) + rflags |= REG_ONESUB; + else + rflags |= REG_NOSUB; + if ((e = e0)->e_nxt) + rflags |= REG_NLALT; + e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); + if ((rerror = regcomp(e->e_exp, pat, rflags)) != 0) + rc_error(e, rerror); + free(pat); + if (!xflag && e->e_exp->re_flags & REG_DFA) + range = mbcode ? rc_rangew : rc_range; +#else /* !UXRE */ + if (iflag) + rflags |= REG_ICASE; + if (Eflag) + rflags |= REG_EXTENDED; + if (!xflag) + rflags |= REG_NOSUB; + for (e = e0; e; e = e->e_nxt) { + e->e_exp = (regex_t *)smalloc(sizeof *e->e_exp); + if ((rerror = regcomp(e->e_exp, e->e_pat, rflags)) != 0) + rc_error(e, rerror); + } +#endif /* !UXRE */ +} + +void +rc_select(void) +{ + build = rc_build; + match = rc_match; + matchflags |= MF_NULTERM; + matchflags &= ~MF_LOCONV; +} + +/* + * Derived from Unix 32V /usr/src/cmd/egrep.y + * + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifdef UXRE +/* + * Range search for singlebyte locales using the modified UNIX(R) Regular + * Expression Library DFA. + */ +static int +rc_range(struct iblok *ip, char *last) +{ + char *p; + int c, cstat, nstat; + Dfa *dp = e0->e_exp->re_dfa; + + p = ip->ib_cur; + lineno++; + cstat = dp->anybol; + if (dp->acc[cstat]) + goto found; + for (;;) { + if ((nstat = dp->trans[cstat][*p & 0377]) == 0) { + /* + * '\0' is used to indicate end-of-line. If a '\0' + * character appears in input, it matches '$' but + * the DFA remains in dead state afterwards; there + * is thus no need to handle this condition + * specially to get the same behavior as in plain + * regexec(). + */ + if ((c = *p & 0377) == '\n') + c = '\0'; + if ((nstat = regtrans(dp, cstat, c, 1)) == 0) + goto fail; + dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; + } + if (dp->acc[cstat = nstat - 1]) { + found: for (;;) { + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + fail: ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + if ((p = ip->ib_cur) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol] == 0) + goto brk2; + } + } + if (*p++ == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol]) + goto found; + } + brk2:; + } +} + +/* + * Range search for multibyte locales using the modified UNIX(R) Regular + * Expression Library DFA. + */ +static int +rc_rangew(struct iblok *ip, char *last) +{ + char *p; + int n, cstat, nstat; + wint_t wc; + Dfa *dp = e0->e_exp->re_dfa; + + p = ip->ib_cur; + lineno++; + cstat = dp->anybol; + if (dp->acc[cstat]) + goto found; + for (;;) { + if (*p & 0200) { + if ((n = mbtowi(&wc, p, last + 1 - p)) < 0) { + n = 1; + wc = WEOF; + } + } else { + wc = *p; + n = 1; + } + if ((wc & ~(wchar_t)(NCHAR-1)) != 0 || + (nstat = dp->trans[cstat][wc]) == 0) { + /* + * '\0' is used to indicate end-of-line. If a '\0' + * character appears in input, it matches '$' but + * the DFA remains in dead state afterwards; there + * is thus no need to handle this condition + * specially to get the same behavior as in plain + * regexec(). + */ + if (wc == '\n') + wc = '\0'; + if ((nstat = regtrans(dp, cstat, wc, mb_cur_max)) == 0) + goto fail; + dp->trans[cstat]['\n'] = dp->trans[cstat]['\0']; + } + if (dp->acc[cstat = nstat - 1]) { + found: for (;;) { + if (vflag == 0) { + succeed: outline(ip, last, p - ip->ib_cur); + if (qflag || lflag) + return 1; + } else { + fail: ip->ib_cur = p; + while (*ip->ib_cur++ != '\n'); + } + if ((p = ip->ib_cur) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol] == 0) + goto brk2; + } + } + p += n; + if (p[-n] == '\n') { + if (vflag) { + p--; + goto succeed; + } + if ((ip->ib_cur = p) > last) + return 0; + lineno++; + if (dp->acc[cstat = dp->anybol]) + goto found; + } + brk2:; + } +} +#endif /* UXRE */ diff --git a/grep/sus.c b/grep/sus.c @@ -0,0 +1,133 @@ +/* + * grep - search a file for a pattern + * + * Gunnar Ritter, Freiburg i. Br., Germany, April 2001. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* Sccsid @(#)sus.c 1.24 (gritter) 5/29/05> */ + +/* + * Code for POSIX.2 command version only. + */ + +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "alloc.h" +#include "grep.h" + +#if defined (SU3) +int sus = 3; +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char su3id[] USED = "@(#)grep_su3.sl 1.24 (gritter) 5/29/05"; +#else +int sus = 1; +#endif +char *stdinmsg = "(standard input)"; + +/* + * Usage message. + */ +void +usage(void) +{ + char *sEF, *sq, *ss; + + if (*progname == 'f') { + sq = ""; + ss = ""; + } else { + sq = "|-q"; + ss = "s"; + } + if (*progname == 'f' || *progname == 'e') + sEF = ""; + else + sEF = "[-E|-F] "; + fprintf(stderr, "%s: Usage:\n\ + %s[-c|-l%s] [-bhin%svx] pattern [file ...]\n\ + %s[-c|-l%s] [-bhin%svx] -e pattern ... [-f file ...] [file ...]\n\ + %s[-c|-l%s] [-bhin%svx] -f file ... [-e pattern ...] [file ...]\n", + progname, + sEF, sq, ss, + sEF, sq, ss, + sEF, sq, ss); + exit(2); +} + +void +misop(void) +{ + usage(); +} + +void +rc_error(struct expr *e, int rerror) +{ + char *regerrs; + size_t resz; + + resz = regerror(rerror, e->e_exp, NULL, 0) + 1; + regerrs = smalloc(resz); + regerror(rerror, e->e_exp, regerrs, resz); + fprintf(stderr, "%s: RE error: %s\n", progname, regerrs); + exit(2); +} + +void +init(void) +{ + switch (*progname) { + case 'e': + Eflag = 2; + rc_select(); + options = "EFbce:f:hilnqrRsvxyz"; + break; + case 'f': + Fflag = 2; + ac_select(); + options = "Fbce:f:hilnqrRsvxyz"; + break; + default: + rc_select(); + options = "EFbce:f:hilnqrRsvwxyz"; + } +} + +void +eg_select(void) +{ +} + +void +st_select(void) +{ +} diff --git a/hd/hd.1 b/hd/hd.1 @@ -0,0 +1,160 @@ +.\" +.\" Sccsid @(#)hd.1 1.8 (gritter) 12/5/04 +.TH HD 1XNX "12/5/04" "Heirloom Toolchest" "XENIX System Compatibility" +.SH NAME +hd \- (XENIX) display files in hexadecimal format +.SH SYNOPSIS +\fBhd\fR +[\fB\-acbwlAxdov\fR] +[\fB\-t\fR] +[\fB\-s\fI\ offset\fR] +[\fB\-n\fI\ count\fR] +[\fIfiles\fR] +.SH DESCRIPTION +The +.I hd +command +displays the contents +of the given +.I files +(or of standard input +if no such argument is present) +in hexadecimal, decimal or octal format, +16 bytes per line. +It optionally displays printable ASCII characters in the rightmost column. +The file address is printed in the leftmost column. +.PP +Output format selection is done +with options; each format selection option consists +(optionally) of a format selector and a base selector. +.PP +The format selectors are as follows; +if no format selector is present, +the base selector is applied to all available format selectors: +.TP +.B \-b +Separates the input in bytes; +each byte is printed as a numerical value. +.TP +.B \-c +Separates the input in bytes; +each byte that is also a printable character +is printed as such, +certain characters are printed as C language escapes, +others as a numerical value. +.TP +.B \-w +Separates the input in 16-bit words; +each word is printed as a numerical value. +.TP +.B \-l +Separates the input in 32-bit long words; +each word is printed as a numerical value. +.PP +The following base selectors are accepted; +if the base selector is omitted, +all base selectors are applied to the chosen format selectors: +.TP +.B \-o +Selects octal output. +.TP +.B \-d +Selects unsigned decimal output. +.TP +.B \-x +Selects hexadecimal output. +.PP +The format of the addresses at the left +can also be selected by using the +.TP +.B \-a +option followed by one of the base selectors; +only one base selector is applied. +The default address base is hexadecimal. +.PP +The following options also affect the output format: +.TP +.B \-A +Causes bytes that are printable characters to be displayed +as such at the right; nonprintable bytes are printed as `.'. +.TP +.B \-t +Text file format; overrides all other format options except +.IR \-a . +Each line of the input is preceded by its address; +printable characters are displayed as such, +except for `\e', `^', and '~', which are prefixed by a `\e' character. +ASCII control characters are prefixed by `^'. +Bytes with the highest bit set that do not form a printable character +are indicated by '~', +followed by the corresponding ASCII character as above. +.TP +.B \-v +Unless +.I \-t +is used, +a set of immediately following indentical 16-byte sets +is abbreviated by printing `*' for the second and further ones. +This option inhibits this behavior +and causes all lines to be displayed. +This option is an extension. +.PP +If no output selection is present, the default is +.IR \-bxA . +.PP +The following options affect the handling of input files: +.TP +\fB\-s\fI\ offset\fR +Selects a starting offset within each file. +.I Offset +is interpreted as a decimal value +unless prefixed by +.BR 0 , +which causes interpretation as an octal value, +or +.BR 0x , +which causes interpretation as a hexadecimal value. +The suffixes +.BR w , +.BR l , +.BR b , +and +.B k +cause multiplication by +2 (`words'), +4 (`long words'), +512 (`blocks'), +or +1024 (`kilobytes'), +respectively. +Value and suffix may be separated by a `*' character +to suppress interpretation of `b' as a hexadecimal digit. +.TP +\fB\-n\fI\ count\fR +Causes only +.I count +characters of the file to be displayed; +.I count +is handled as the +.I offset +for +.I \-s +described above. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Selects the set of printable single-byte characters +for the +.I \-A +and +.I \-c +options +and the set of printable characters for the +.I \-t +option. +.SH "SEE ALSO" +od(1) diff --git a/hd/hd.c b/hd/hd.c @@ -0,0 +1,715 @@ +/* + * hd - display files in hexadecimal format + * + * Gunnar Ritter, Freiburg i. Br., Germany, September 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)hd.sl 1.12 (gritter) 5/29/05"; + +#include <sys/types.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> +#include <stdarg.h> +#include <locale.h> +#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <inttypes.h> +#include <limits.h> +#include "atoll.h" +#include "mbtowi.h" + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +#ifdef _IO_putc_unlocked +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif +#endif /* __GLIBC__ */ + +enum base { + BASE_0 = 00, + BASE_X = 01, + BASE_D = 02, + BASE_O = 04 +}; + +union block { + int8_t b_c[16]; + int16_t b_w[8]; + int32_t b_l[4]; +}; + +static const struct fm { + int f_fmt; + enum base f_base; + char f_width; + char f_align[3]; + const char *f_prf; +} ofmt[] = { + { 'b', BASE_X, 2, {2,5,11}, "%02x" }, + { 'b', BASE_D, 3, {3,7,15}, "%3u" }, + { 'b', BASE_O, 3, {3,7,15}, "%03o" }, + { 'c', BASE_X, 2, {2,5,11}, "%02x" }, + { 'c', BASE_D, 3, {3,7,15}, "%3u" }, + { 'c', BASE_O, 3, {3,7,15}, "%03o" }, + { 'w', BASE_X, 4, {0,4, 9}, "%04x" }, + { 'w', BASE_D, 5, {0,5,11}, "%5u" }, + { 'w', BASE_O, 6, {0,6,13}, "%06o" }, + { 'l', BASE_X, 8, {0,0, 8}, "%08lx" }, + { 'l', BASE_D, 10, {0,0,10}, "%10lu" }, + { 'l', BASE_O, 11, {0,0,11}, "%011lo" }, + { 0, BASE_0, 0, {0,0, 0}, NULL } +}; + +static int Aflag; /* print ASCII at right */ +static enum base aflag; /* address format specifier */ +static enum base bflag; /* byte format specifier */ +static enum base cflag; /* print ASCII at center */ +static enum base lflag; /* long (32 bit) format specifier */ +static long long nflag; /* number of bytes to process */ +static long long sflag; /* start offset */ +static int tflag; /* print text file */ +static int vflag; /* no '*' for identical lines */ +static enum base wflag; /* word (16 bit) format specifier */ +static char align[3]; +static const char *progname; +static int status; +static int mb_cur_max; + +static void usage(void); +static void flag(int); +static void base(enum base, enum base *); +static long long count(const char *); +static void usage(void); +static void diag(const char *, ...); +static void hd(FILE *); +static void prna(long long); +static void prnb(union block *, int); +static void line(union block *, int, int, enum base, int); +static const struct fm *getfmt(int, enum base); +static void getalign(void); +static void prnt(FILE *, long long); +static void prnc(int); +static char *wcget(FILE *fp, wint_t *wc, int *len); + +int +main(int argc, char **argv) +{ + FILE *fp; + int i, j; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + for (i = 1; i < argc && argv[i][0] == '-'; i++) { + switch (argv[i][1]) { + case 's': + if (argv[i][2]) + sflag = count(&argv[i][2]); + else if (++i < argc) + sflag = count(argv[i]); + else + usage(); + break; + case 'n': + if (argv[i][2]) + nflag = count(&argv[i][2]); + else if (++i < argc) + nflag = count(argv[i]); + else + usage(); + break; + default: + for (j = 1; argv[i][j]; j++) + flag(argv[i][j]&0377); + flag(0); + } + } + if (tflag && (Aflag|bflag|cflag|lflag|wflag)) + diag("-t flag overrides other flags"); + if ((Aflag|bflag|cflag|lflag|wflag) == 0) + Aflag = 1; + if ((bflag|cflag|lflag|wflag) == 0) + bflag = BASE_X; + getalign(); + if (i < argc) { + j = i+1 < argc; + do { + if (access(argv[i], R_OK) < 0) { + diag("cannot access %s", argv[i]); + continue; + } + if ((fp = fopen(argv[i], "r")) == NULL) { + diag("open of %s failed", argv[i]); + continue; + } + if (j) + printf("%s:\n", argv[i]); + hd(fp); + fclose(fp); + if (i+1 < argc) + printf("\n"); + } while (++i < argc); + } else + hd(stdin); + return status; +} + +static void +flag(int c) +{ + static enum base *basep; + + switch (c) { + case '\0': + if (basep && basep != &aflag && *basep == BASE_0) + *basep |= BASE_O|BASE_D|BASE_X; + basep = NULL; + break; + case 'a': + basep = &aflag; + break; + case 'b': + basep = &bflag; + break; + case 'w': + basep = &wflag; + break; + case 'l': + basep = &lflag; + break; + case 'c': + basep = &cflag; + break; + case 'A': + Aflag = 1; + break; + case 'x': + base(BASE_X, basep); + break; + case 'd': + base(BASE_D, basep); + break; + case 'o': + base(BASE_O, basep); + break; + case 't': + tflag = 1; + break; + case 'v': + vflag = 1; + break; + default: + usage(); + } +} + +static void +base(enum base b, enum base *basep) +{ + if (basep) { + if (basep == &aflag) + *basep = b; + else + *basep |= b; + } else { + if (aflag == BASE_0) + aflag |= b; + cflag |= b; + bflag |= b; + wflag |= b; + lflag |= b; + } +} + +static long long +count(const char *s) +{ + long long c; + int bs = 10; + char *x; + + if (s[0] == '0' && s[1] == 'x') { + bs = 16; + s += 2; + } else if (s[0] == '0') { + bs = 8; + s++; + } + c = strtoll(s, &x, bs); + s = x; + if (*s == '*') + s++; + switch (*s) { + case 'w': + c *= 2; + s++; + break; + case 'l': + c *= 4; + s++; + break; + case 'b': + c *= 512; + s++; + break; + case 'k': + c *= 1024; + s++; + break; + } + if (*s) { + diag("bad count/offset value"); + exit(3); + } + return c; +} + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-acbwlAxdo] [-t] [-s offset[*][wlbk]] " + "[-n count[*][wlbk]] [file] ...\n", + progname); + exit(2); +} + +static void +diag(const char *fmt, ...) +{ + va_list ap; + + fprintf(stderr, "%s: ", progname); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + status |= 1; +} + +static void +hd(FILE *fp) +{ + long long of = 0, rd = 0; + union block b, ob; + size_t n, m, on = 0; + int star = 0; + + if (sflag) + while (of < sflag) { + getc(fp); + of++; + } + if (tflag) { + prnt(fp, of); + return; + } + do { + if (nflag == 0 || rd + sizeof b.b_c < nflag) + m = sizeof b.b_c; + else + m = nflag - rd; + if ((n = fread(b.b_c, 1, m, fp)) > 0) { + if (!vflag && n==on && memcmp(b.b_c, ob.b_c, n) == 0) { + if (star == 0) + printf("*\n"); + star = 1; + } else { + star = 0; + prna(of); + if (n < sizeof b.b_c) + memset(&b.b_c[n], 0, sizeof b.b_c - n); + prnb(&b, n); + } + } + rd += n; + of += n; + on = n; + ob = b; + } while (n == m && (nflag == 0 || rd < nflag)); + prna(of); + putchar('\n'); +} + +static void +prna(long long n) +{ + switch (aflag) { + case BASE_O: + printf("%06llo", n); + break; + case BASE_D: + printf("%05llu", n); + break; + case BASE_0: + case BASE_X: + printf("%04llx", n); + break; + } +} + +static void +prnb(union block *bp, int n) +{ + int cnt = 0; + + if (cflag&BASE_X) + line(bp, n, 'c', BASE_X, cnt++); + if (cflag&BASE_D) + line(bp, n, 'c', BASE_D, cnt++); + if (cflag&BASE_O) + line(bp, n, 'c', BASE_O, cnt++); + if (bflag&BASE_X) + line(bp, n, 'b', BASE_X, cnt++); + if (bflag&BASE_D) + line(bp, n, 'b', BASE_D, cnt++); + if (bflag&BASE_O) + line(bp, n, 'b', BASE_O, cnt++); + if (wflag&BASE_X) + line(bp, n, 'w', BASE_X, cnt++); + if (wflag&BASE_D) + line(bp, n, 'w', BASE_D, cnt++); + if (wflag&BASE_O) + line(bp, n, 'w', BASE_O, cnt++); + if (lflag&BASE_X) + line(bp, n, 'l', BASE_X, cnt++); + if (lflag&BASE_D) + line(bp, n, 'l', BASE_D, cnt++); + if (lflag&BASE_O) + line(bp, n, 'l', BASE_O, cnt++); +} + +static void +line(union block *bp, int n, int fmt, enum base base, int cnt) +{ + int c, i, j, k, col = 0; + const char *cp; + const struct fm *fmp; + + putchar('\t'); + i = 0; + switch (fmt) { + case 'l': + fmp = getfmt('l', base); + for (j = i/4; j < (n>>2); j++, i += 4) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[2]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf,(long)(bp->b_l[j]&0xffffffff)); + } + if (i == n) + break; + /*FALLTHRU*/ + case 'w': + fmp = getfmt('w', base); + for (j = i/2; j < (n>>1); j++, i += 2) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[1]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf, (int)(bp->b_w[j]&0177777)); + } + if (i == n) + break; + /*FALLTHRU*/ + case 'b': + fmp = getfmt('b', base); + for (j = i; j < n; j++, i++) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[0]; k++) { + putchar(' '); + col++; + } + col += printf(fmp->f_prf, bp->b_c[j]&0377); + } + break; + case 'c': + fmp = getfmt('c', base); + for (i = 0; i < n; i++) { + if (col > 0) { + putchar(' '); + col++; + } + if (i == 8) { + putchar(' '); + col++; + } + for (k = fmp->f_width; k < align[0]; k++) { + putchar(' '); + col++; + } + c = bp->b_c[i]&0377; + cp = NULL; + if (c == '\b') + cp = "\\b"; + else if (c == '\t') + cp = "\\t"; + else if (c == '\n') + cp = "\\n"; + else if (c == '\f') + cp = "\\f"; + else if (c == '\r') + cp = "\\r"; + else if (!isprint(c)) { + col += printf(fmp->f_prf, c); + } else { + if (base != BASE_X) { + putchar(' '); + col++; + } + col += printf(" %c", c); + } + if (cp) { + if (base != BASE_X) { + putchar(' '); + col++; + } + printf(cp); + } + } + break; + } + if (cnt == 0 && Aflag) { + while (col++ < 51) + putchar(' '); + for (i = 0; i < n; i++) { + if ((bp->b_c[i]&0340) == 0 || bp->b_c[i] == 0177 || + !isprint(bp->b_c[i]&0377)) + putchar('.'); + else + putchar(bp->b_c[i]&0377); + } + } + putchar('\n'); +} + +static const struct fm * +getfmt(int fmt, enum base base) +{ + int i; + + for (i = 0; ofmt[i].f_fmt; i++) + if (ofmt[i].f_fmt == fmt && ofmt[i].f_base == base) + return &ofmt[i]; + return NULL; +} + +static void +getalign(void) +{ + int i, j; + enum base *basep; + + for (i = 0; ofmt[i].f_fmt; i++) { + switch (ofmt[i].f_fmt) { + case 'b': + basep = &bflag; + break; + case 'c': + basep = &cflag; + break; + case 'w': + basep = &wflag; + break; + case 'l': + basep = &lflag; + break; + default: + basep = NULL; + } + if (basep && *basep & ofmt[i].f_base) + for (j = 0; j < sizeof align; j++) + if (ofmt[i].f_align[j] > align[j]) + align[j] = ofmt[i].f_align[j]; + } +} + +static void +prnt(FILE *fp, long long of) +{ + wint_t wc; + char b, *mb; + int c, lastc = '\n', n; + long long rd = 0; + + while ((nflag == 0 || rd < nflag)) { + if (mb_cur_max > 1) { + if ((mb = wcget(fp, &wc, &n)) == NULL) + break; + } else { + if ((c = getc(fp)) == EOF) + break; + b = wc = c; + mb = &b; + n = 1; + } + if (lastc == '\n') { + prna(of); + putchar('\t'); + } + of += n, rd += n; + if (n == 1) { + c = *mb&0377; + lastc = c; + if (wc != WEOF && isprint(c) && c != '\\' && + c != '^' && c != '~') + putchar(c); + else + prnc(c); + if (lastc == '\n') + putchar('\n'); + } else { + lastc = c = EOF; + if (wc != WEOF && iswprint(wc)) + while (n--) { + putchar(*mb&0377); + mb++; + } + else + while (n--) { + prnc(*mb&0377); + mb++; + } + } + } + if (lastc != '\n') + putchar('\n'); + prna(of); + putchar('\n'); +} + +static void +prnc(int c) +{ + if (c == 0177 || c == 0377) { + printf("\\%o", c); + return; + } + if (c & 0200) { + putchar('~'); + c &= 0177; + } + if (c < 040) { + putchar('^'); + c |= 0100; + } + if (c == '\\' || c == '~' || c == '^') + putchar('\\'); + putchar(c); +} + +static char * +wcget(FILE *fp, wint_t *wc, int *len) +{ + static char mbuf[MB_LEN_MAX+1]; + static char *mcur, *mend; + static int incompl; + size_t rest; + int c, i, n; + + i = 0; + rest = mend - mcur; + if (rest && mcur > mbuf) { + do + mbuf[i] = mcur[i]; + while (i++, --rest); + } else if (incompl) { + incompl = 0; + *wc = WEOF; + mend = mcur = NULL; + return NULL; + } + if (i == 0) { + c = getc(fp); + if (c == EOF) { + *wc = WEOF; + mend = mcur = NULL; + return NULL; + } + mbuf[i++] = c; + } + if (mbuf[0] & 0200) { + while (mbuf[i-1] != '\n' && i < mb_cur_max && + incompl == 0) { + c = getc(fp); + if (c != EOF) + mbuf[i++] = c; + else + incompl = 1; + } + n = mbtowi(wc, mbuf, i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = mbuf[0]; + *len = n = 1; + } + mcur = &mbuf[*len]; + mend = &mcur[i - *len]; + return mbuf; +} diff --git a/hd/mkfile b/hd/mkfile @@ -0,0 +1,7 @@ +BIN = hd +OBJ = hd.o +INSTALL_BIN = hd +INSTALL_MAN1 = hd.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/lex/allprint.c b/lex/allprint.c @@ -0,0 +1,94 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "allprint.c 6.11 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)allprint.c 1.4 (gritter) 11/27/05 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <ctype.h> +#ifdef __sun +#include <sys/euc.h> +#include <widec.h> +#endif +#include <wctype.h> +#include <wchar.h> + +extern FILE *yyout; + +#ifndef JLSLEX +#define CHR char +#endif + +#ifdef WOPTION +#define CHR wchar_t +#define sprint sprint_w +#define allprint allprint_w +#endif + +#ifdef EOPTION +#define CHR wchar_t +#endif + +void +allprint(CHR c) +{ + switch (c) { + case '\n': + fprintf(yyout, "\\n"); + break; + case '\t': + fprintf(yyout, "\\t"); + break; + case '\b': + fprintf(yyout, "\\b"); + break; + case ' ': + fprintf(yyout, "\\_"); + break; + default: + if (!iswprint(c)) + fprintf(yyout, "\\x%-2x", (int)c); + else + putwc(c, yyout); + break; + } +} + +void +sprint(CHR *s) +{ + while (*s) + allprint(*s++); +} diff --git a/lex/depsinc.mk b/lex/depsinc.mk @@ -0,0 +1 @@ +LEX = $lex_DEPDIR/lex -Y$lex_DEPDIR diff --git a/lex/getopt.c b/lex/getopt.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)getopt.c 1.10 (gritter) 12/16/07 + */ +/* from OpenSolaris "getopt.c 1.23 05/06/08 SMI" */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * See getopt(3C) and SUS/XPG getopt() for function definition and + * requirements. + * + * This actual implementation is a bit looser than the specification + * as it allows any character other than ':' to be used as an option + * character - The specification only guarantees the alnum characters + * ([a-z][A-Z][0-9]). + */ + +#include <sys/types.h> +#include <string.h> +#include <stdio.h> + +extern ssize_t write(int, const void *, size_t); + +char *optarg = NULL; +int optind = 1; +int opterr = 1; +int optopt = 0; + +#define ERR(s, c) err(s, c, optstring, argv[0]) +static void +err(const char *s, int c, const char *optstring, const char *argv0) +{ + char errbuf[256], *ep = errbuf; + const char *cp; + + if (opterr && optstring[0] != ':') { + for (cp = argv0; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = ": "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = s; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = " -- "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = c; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = '\n'; + write(2, errbuf, ep - errbuf); + } +} + +/* + * getopt_sp is required to keep state between successive calls to getopt() + * while extracting aggregated options (ie: -abcd). Hence, getopt() is not + * thread safe or reentrant, but it really doesn't matter. + * + * So, why isn't this "static" you ask? Because the historical Bourne + * shell has actually latched on to this little piece of private data. + */ +int getopt_sp = 1; + +/* + * Determine if the specified character (c) is present in the string + * (optstring) as a regular, single character option. If the option is found, + * return a pointer into optstring pointing at the option character, + * otherwise return null. The character ':' is not allowed. + */ +static char * +parse(const char *optstring, const char c) +{ + char *cp = (char *)optstring; + + if (c == ':') + return (NULL); + do { + if (*cp == c) + return (cp); + } while (*cp++ != '\0'); + return (NULL); +} + +/* + * External function entry point. + */ +int +getopt(int argc, char *const *argv, const char *optstring) +{ + char c; + char *cp; + + /* + * Has the end of the options been encountered? The following + * implements the SUS requirements: + * + * If, when getopt() is called: + * argv[optind] is a null pointer + * *argv[optind] is not the character '-' + * argv[optind] points to the string "-" + * getopt() returns -1 without changing optind. If + * argv[optind] points to the string "--" + * getopt() returns -1 after incrementing optind. + */ + if (getopt_sp == 1) { + if (optind >= argc || argv[optind][0] != '-' || + argv[optind] == NULL || argv[optind][1] == '\0') + return (EOF); + else if (strcmp(argv[optind], "--") == 0) { + optind++; + return (EOF); + } + } + + /* + * Getting this far indicates that an option has been encountered. + * Note that the syntax of optstring applies special meanings to + * the characters ':' and '(', so they are not permissible as + * option letters. A special meaning is also applied to the ')' + * character, but its meaning can be determined from context. + * Note that the specification only requires that the alnum + * characters be accepted. + */ + optopt = c = (unsigned char)argv[optind][getopt_sp]; + optarg = NULL; + if ((cp = parse(optstring, c)) == NULL) { + /* LINTED: variable format specifier */ + ERR("illegal option", c); + if (argv[optind][++getopt_sp] == '\0') { + optind++; + getopt_sp = 1; + } + return ('?'); + } + optopt = c = *cp; + + /* + * A valid option has been identified. If it should have an + * option-argument, process that now. SUS defines the setting + * of optarg as follows: + * + * 1. If the option was the last character in the string pointed to + * by an element of argv, then optarg contains the next element + * of argv, and optind is incremented by 2. If the resulting + * value of optind is not less than argc, this indicates a + * missing option-argument, and getopt() returns an error + * indication. + * + * 2. Otherwise, optarg points to the string following the option + * character in that element of argv, and optind is incremented + * by 1. + * + * The second clause allows -abcd (where b requires an option-argument) + * to be interpreted as "-a -b cd". + */ + if (*(cp + 1) == ':') { + /* The option takes an argument */ + if (argv[optind][getopt_sp+1] != '\0') { + optarg = &argv[optind++][getopt_sp+1]; + } else if (++optind >= argc) { + /* LINTED: variable format specifier */ + ERR("option requires an argument", c); + getopt_sp = 1; + optarg = NULL; + return (optstring[0] == ':' ? ':' : '?'); + } else + optarg = argv[optind++]; + getopt_sp = 1; + } else { + /* The option does NOT take an argument */ + if (argv[optind][++getopt_sp] == '\0') { + getopt_sp = 1; + optind++; + } + optarg = NULL; + } + return (c); +} /* getopt() */ + +#ifdef __APPLE__ +/* + * Starting with Mac OS 10.5 Leopard, <unistd.h> turns getopt() + * into getopt$UNIX2003() by default. Consequently, this function + * is called instead of the one defined above. However, optind is + * still taken from this file, so in effect, options are not + * properly handled. Defining an own getopt$UNIX2003() function + * works around this issue. + */ +int +getopt$UNIX2003(int argc, char *const argv[], const char *optstring) +{ + return getopt(argc, argv, optstring); +} +#endif /* __APPLE__ */ diff --git a/lex/header.c b/lex/header.c @@ -0,0 +1,409 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "header.c 6.22 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)header.c 1.12 (gritter) 9/23/06 + */ + +#include "ldefs.c" + +static void rhd1(void); +static void chd1(void); +static void chd2(void); +static void ctail(void); +static void rtail(void); + +void +phead1(void) +{ + ratfor ? rhd1() : chd1(); +} + +static void +chd1(void) +{ + if (*v_stmp == 'y') { + extern const char rel[]; + fprintf(fout, "\ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4\n\ +#define YYUSED __attribute__ ((used))\n\ +#elif defined __GNUC__\n\ +#define YYUSED __attribute__ ((unused))\n\ +#else\n\ +#define YYUSED\n\ +#endif\n\ +static const char yylexid[] USED = \"lex: %s\"\n", rel); + } + if (handleeuc) { + fprintf(fout, "#ifndef EUC\n"); + fprintf(fout, "#define EUC\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#include <stdio.h>\n"); + fprintf(fout, "#include <stdlib.h>\n"); + fprintf(fout, "#ifdef __sun\n"); + fprintf(fout, "#include <widec.h>\n"); + fprintf(fout, "#else /* !__sun */\n"); + fprintf(fout, "#include <wchar.h>\n"); + fprintf(fout, "#endif /* !__sun */\n"); + if (widecio) { /* -w option */ + fprintf(fout, "#define YYTEXT yytext\n"); + fprintf(fout, "#define YYLENG yyleng\n"); + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "#define YYINPUT input\n"); + fprintf(fout, "#define YYOUTPUT output\n"); + fprintf(fout, "#else\n"); + fprintf(fout, "#define YYINPUT lex_input\n"); + fprintf(fout, "#define YYOUTPUT lex_output\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#define YYUNPUT unput\n"); + } else { /* -e option */ + fprintf(fout, "#include <limits.h>\n"); + fprintf(fout, "#ifdef __sun\n"); + fprintf(fout, "#include <sys/euc.h>\n"); + fprintf(fout, "#endif /* __sun */\n"); + fprintf(fout, "#define YYLEX_E 1\n"); + fprintf(fout, "#define YYTEXT yywtext\n"); + fprintf(fout, "#define YYLENG yywleng\n"); + fprintf(fout, "#define YYINPUT yywinput\n"); + fprintf(fout, "#define YYOUTPUT yywoutput\n"); + fprintf(fout, "#define YYUNPUT yywunput\n"); + } + } else { /* ASCII compatibility mode. */ + fprintf(fout, "#include <stdio.h>\n"); + fprintf(fout, "#include <stdlib.h>\n"); + } + if (ZCH > NCH) + fprintf(fout, "# define U(x) ((x)&0377)\n"); + else + fprintf(fout, "# define U(x) x\n"); + fprintf(fout, "# define NLSTATE yyprevious=YYNEWLINE\n"); + fprintf(fout, "# define BEGIN yybgin = yysvec + 1 +\n"); + fprintf(fout, "# define INITIAL 0\n"); + fprintf(fout, "# define YYLERR yysvec\n"); + fprintf(fout, "# define YYSTATE (yyestate-yysvec-1)\n"); + if (optim) + fprintf(fout, "# define YYOPTIM 1\n"); +#ifdef DEBUG + fprintf(fout, "# define LEXDEBUG 1\n"); +#endif + fprintf(fout, "# ifndef YYLMAX \n"); + fprintf(fout, "# define YYLMAX BUFSIZ\n"); + fprintf(fout, "# endif \n"); + fprintf(fout, "#ifndef __cplusplus\n"); + if (widecio) + fprintf(fout, "# define output(c) (void)putwc(c,yyout)\n"); + else + fprintf(fout, "# define output(c) (void)putc(c,yyout)\n"); + fprintf(fout, "#else\n"); + if (widecio) + fprintf(fout, "# define lex_output(c) (void)putwc(c,yyout)\n"); + else + fprintf(fout, "# define lex_output(c) (void)putc(c,yyout)\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\n#if defined(__cplusplus) || defined(__STDC__)\n"); + fprintf(fout, "\n#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(fout, "extern \"C\" {\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\tint yyback(int *, int);\n"); /* ? */ + fprintf(fout, "\tint yyinput(void);\n"); /* ? */ + fprintf(fout, "\tint yylook(void);\n"); /* ? */ + fprintf(fout, "\tvoid yyoutput(int);\n"); /* ? */ + fprintf(fout, "\tint yyracc(int);\n"); /* ? */ + fprintf(fout, "\tint yyreject(void);\n"); /* ? */ + fprintf(fout, "\tvoid yyunput(int);\n"); /* ? */ + fprintf(fout, "\tint yylex(void);\n"); + fprintf(fout, "#ifdef YYLEX_E\n"); + fprintf(fout, "\tvoid yywoutput(wchar_t);\n"); + fprintf(fout, "\twchar_t yywinput(void);\n"); + fprintf(fout, "\tvoid yywunput(wchar_t);\n"); + fprintf(fout, "#endif\n"); + + /* XCU4: type of yyless is int */ + fprintf(fout, "#ifndef yyless\n"); + fprintf(fout, "\tint yyless(int);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#ifndef yywrap\n"); + fprintf(fout, "\tint yywrap(void);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#ifdef LEXDEBUG\n"); + fprintf(fout, "\tvoid allprint(char);\n"); + fprintf(fout, "\tvoid sprint(char *);\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(fout, "}\n"); + fprintf(fout, "#endif\n\n"); + fprintf(fout, "#ifdef __cplusplus\n"); + fprintf(fout, "extern \"C\" {\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "\tvoid exit(int);\n"); + fprintf(fout, "#ifdef __cplusplus\n"); + fprintf(fout, "}\n"); + fprintf(fout, "#endif\n\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "# define unput(c)" + " {yytchar= (c);if(yytchar=='\\n')yylineno--;*yysptr++=yytchar;}\n"); + fprintf(fout, "# define yymore() (yymorfg=1)\n"); + if (widecio) { + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "%s%d%s\n", +"# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):yygetwchar())==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#else\n"); + fprintf(fout, "%s%d%s\n", +"# define lex_input() (((yytchar=yysptr>yysbuf?U(*--yysptr):yygetwchar())==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#endif\n"); + fprintf(fout, + "# define ECHO (void)fprintf(yyout, \"%%ls\",yytext)\n"); + fprintf(fout, + "# define REJECT { nstr = yyreject_w(); goto yyfussy;}\n"); + fprintf(fout, "#define yyless yyless_w\n"); + fprintf(fout, "int yyreject_w(void);\n"); + fprintf(fout, "int yyleng;\n"); + + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, "wchar_t yytext[YYLMAX];\n"); + } else { + fprintf(fout, "wchar_t yy_tbuf[YYLMAX];\n"); + fprintf(fout, "wchar_t * yytext = yy_tbuf;\n"); + fprintf(fout, "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } else { + fprintf(fout, "#ifndef __cplusplus\n"); + fprintf(fout, "%s%d%s\n", +"# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#else\n"); + fprintf(fout, "%s%d%s\n", +"# define lex_input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==", + ctable['\n'], +"?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + fprintf(fout, "#endif\n"); + fprintf(fout, "#define ECHO fprintf(yyout, \"%%s\",yytext)\n"); + if (handleeuc) { + fprintf(fout, +"# define REJECT { nstr = yyreject_e(); goto yyfussy;}\n"); + fprintf(fout, "int yyreject_e(void);\n"); + fprintf(fout, "int yyleng;\n"); + fprintf(fout, "size_t yywleng;\n"); + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, + "unsigned char yytext[YYLMAX*MB_LEN_MAX];\n"); + fprintf(fout, + "wchar_t yywtext[YYLMAX];\n"); + } else { + fprintf(fout, + "wchar_t yy_twbuf[YYLMAX];\n"); + fprintf(fout, + "wchar_t yy_tbuf[YYLMAX*MB_LEN_MAX];\n"); + fprintf(fout, + "unsigned char * yytext =" + "(unsigned char *)yy_tbuf;\n"); + fprintf(fout, + "wchar_t * yywtext = yy_twbuf;\n"); + fprintf(fout, + "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } else { + fprintf(fout, +"# define REJECT { nstr = yyreject(); goto yyfussy;}\n"); + fprintf(fout, "int yyleng;\n"); + + /* + * XCU4: + * If %array, yytext[] contains the token. + * If %pointer, yytext is a pointer to yy_tbuf[]. + */ + if (isArray) { + fprintf(fout, "#define YYISARRAY\n"); + fprintf(fout, "char yytext[YYLMAX];\n"); + } else { + fprintf(fout, "char yy_tbuf[YYLMAX];\n"); + fprintf(fout, + "char * yytext = yy_tbuf;\n"); + fprintf(fout, + "int yytextsz = YYLMAX;\n"); + fprintf(fout, "#ifndef YYTEXTSZINC\n"); + fprintf(fout, + "#define YYTEXTSZINC 100\n"); + fprintf(fout, "#endif\n"); + } + } + } + fprintf(fout, "int yymorfg;\n"); + if (handleeuc) + fprintf(fout, "extern wchar_t *yysptr, yysbuf[];\n"); + else + fprintf(fout, "extern char *yysptr, yysbuf[];\n"); + fprintf(fout, "int yytchar;\n"); + fprintf(fout, "FILE *yyin = (FILE *)-1, *yyout = (FILE *)-1;\n"); + fprintf(fout, "#if defined (__GNUC__)\n"); + fprintf(fout, + "static void _yyioinit(void) __attribute__ ((constructor));\n"); + fprintf(fout, "#elif defined (__SUNPRO_C)\n"); + fprintf(fout, "#pragma init (_yyioinit)\n"); + fprintf(fout, "#elif defined (__HP_aCC) || defined (__hpux)\n"); + fprintf(fout, "#pragma INIT \"_yyioinit\"\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "static void _yyioinit(void) {\n"); + fprintf(fout, "yyin = stdin; yyout = stdout; }\n"); + fprintf(fout, "extern int yylineno;\n"); + fprintf(fout, "struct yysvf { \n"); + fprintf(fout, "\tstruct yywork *yystoff;\n"); + fprintf(fout, "\tstruct yysvf *yyother;\n"); + fprintf(fout, "\tint *yystops;};\n"); + fprintf(fout, "struct yysvf *yyestate;\n"); + fprintf(fout, "extern struct yysvf yysvec[], *yybgin;\n"); +} + +static void +rhd1(void) +{ + fprintf(fout, "integer function yylex(dummy)\n"); + fprintf(fout, "define YYLMAX 200\n"); + fprintf(fout, "define ECHO call yyecho(yytext,yyleng)\n"); + fprintf(fout, + "define REJECT nstr = yyrjct(yytext,yyleng);goto 30998\n"); + fprintf(fout, "integer nstr,yylook,yywrap\n"); + fprintf(fout, "integer yyleng, yytext(YYLMAX)\n"); + fprintf(fout, "common /yyxel/ yyleng, yytext\n"); + fprintf(fout, + "common /yyldat/ yyfnd, yymorf, yyprev, yybgin, yylsp, yylsta\n"); + fprintf(fout, + "integer yyfnd, yymorf, yyprev, yybgin, yylsp, yylsta(YYLMAX)\n"); + fprintf(fout, "for(;;){\n"); + fprintf(fout, "\t30999 nstr = yylook(dummy)\n"); + fprintf(fout, "\tgoto 30998\n"); + fprintf(fout, "\t30000 k = yywrap(dummy)\n"); + fprintf(fout, "\tif(k .ne. 0){\n"); + fprintf(fout, "\tyylex=0; return; }\n"); + fprintf(fout, "\t\telse goto 30998\n"); +} + +void +phead2(void) +{ + if (!ratfor) + chd2(); +} + +static void +chd2(void) +{ + fprintf(fout, "if (yyin == (FILE *)-1) yyin = stdin;\n"); + fprintf(fout, "if (yyout == (FILE *)-1) yyout = stdout;\n"); + fprintf(fout, "#if defined (__cplusplus) || defined (__GNUC__)\n"); + fprintf(fout, + "/* to avoid CC and lint complaining yyfussy not being used ...*/\n"); + fprintf(fout, "{static int __lex_hack = 0;\n"); + fprintf(fout, "if (__lex_hack) { yyprevious = 0; goto yyfussy; } }\n"); + fprintf(fout, "#endif\n"); + fprintf(fout, "while((nstr = yylook()) >= 0)\n"); + fprintf(fout, "yyfussy: switch(nstr){\n"); + fprintf(fout, "case 0:\n"); + fprintf(fout, "if(yywrap()) return(0); break;\n"); +} + +void +ptail(void) +{ + if (!pflag) + ratfor ? rtail() : ctail(); + pflag = 1; +} + +static void +ctail(void) +{ + fprintf(fout, "case -1:\nbreak;\n"); /* for reject */ + fprintf(fout, "default:\n"); + fprintf(fout, + "(void)fprintf(yyout,\"bad switch yylook %%d\",nstr);\n"); + fprintf(fout, "} return(0); }\n"); + fprintf(fout, "/* end of yylex */\n"); +} + +static void +rtail(void) +{ + int i; + fprintf(fout, + "\n30998 if(nstr .lt. 0 .or. nstr .gt. %d)goto 30999\n", casecount); + fprintf(fout, "nstr = nstr + 1\n"); + fprintf(fout, "goto(\n"); + for (i = 0; i < casecount; i++) + fprintf(fout, "%d,\n", 30000+i); + fprintf(fout, "30999),nstr\n"); + fprintf(fout, "30997 continue\n"); + fprintf(fout, "}\nend\n"); +} + +void +statistics(void) +{ + fprintf(errorf, +"%d/%d nodes(%%e), %d/%d positions(%%p), %d/%d (%%n), %ld transitions,\n", + tptr, treesize, nxtpos-positions, maxpos, stnum + 1, nstates, rcount); + fprintf(errorf, + "%d/%d packed char classes(%%k), ", pcptr-pchar, pchlen); + if (optim) + fprintf(errorf, + " %d/%d packed transitions(%%a), ", nptr, ntrans); + fprintf(errorf, " %d/%d output slots(%%o)", yytop, outsize); + putc('\n', errorf); +} diff --git a/lex/ldefs.c b/lex/ldefs.c @@ -0,0 +1,309 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "ldefs.c 6.16 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)ldefs.c 1.7 (gritter) 4/14/07 + */ + +#include <stdio.h> +#include <stdlib.h> +#include <inttypes.h> + +#ifdef __sun +#include <widec.h> +#endif +#include <wctype.h> + +#define CHR wchar_t +#define BYTE char +#define Boolean char +#define LONG_WCHAR_T 1 + +#define PP 1 +#ifdef u370 +#define CWIDTH 8 +#define CMASK 0377 +#define ASCII 1 +#else + +#ifdef unix +#define CWIDTH 7 +#define CMASK 0177 +#define ASCII 1 +#endif + +#ifdef gcos +#define CWIDTH 9 +#define CMASK 0777 +#define ASCII 1 +#endif + +#ifdef ibm +#define CWIDTH 8 +#define CMASK 0377 +#define EBCDIC 1 +#endif +#endif + +#define NCH 256 +#define TOKENSIZE 10000 +#define DEFSIZE 1000 +#define DEFCHAR 2000 +#define BUF_SIZ 2000 +#define STARTCHAR 2560 +#define STARTSIZE 256 +#define CCLSIZE 20000 + + +#ifdef SMALL +#define TREESIZE 600 +#define NTRANS 1500 +#define NSTATES 300 +#define MAXPOS 1500 +#define MAXPOSSTATE 500 +#define NOUTPUT 1500 +#endif + +#ifndef SMALL +#define TREESIZE 1000 +#define NSTATES 500 +#define MAXPOS 2500 +#define MAXPOSSTATE 4*1000 +#define NTRANS 2000 +#define NOUTPUT 4*3000 +#endif +#define NACTIONS 4*1000 +#define ALITTLEEXTRA 300 + +#define RCCL 0x4000 +#define RNCCL 0x4001 +#define RSTR 0x4002 +#define RSCON 0x4003 +/* XCU4: add RXSCON */ +#define RXSCON 0x4011 +#define RNEWE 0x4004 +#define FINAL 0x4005 +#define RNULLS 0x4006 +#define RCAT 0x4007 +#define STAR 0x4008 +#define PLUS 0x4009 +#define QUEST 0x400a +#define DIV 0x400b +#define BAR 0x400c +#define CARAT 0x400d +#define S1FINAL 0x400e +#define S2FINAL 0x400f +#define DOT 0x4010 +#define ISOPERATOR(n) ((n & 0xc080) == 0x4000) + +/* + * New to JLE; this is not really a node tag. + * This is used in a string pointed to by + * the leaf of an RCCL or RNCCL node as a + * special prefix code that substitutes + * the infix '-' range operator. For + * example, a lex character class "[_0-9a-zA-Z]" + * would be translated to the intermidiate + * form: + * RCCL + * | + * | + * v + * "_<RANGE>09<RANGE>a-z<RANGE>A-Z" + */ +#define RANGE 0x40ff + +#define MAXNCG 1000 +extern int ncgidtbl; +extern int ncg; /* ncg == ncgidtbl * 2 */ +typedef unsigned long lchar; +extern lchar yycgidtbl[]; +extern int yycgid(wchar_t); +extern Boolean handleeuc; /* TRUE iff -w or -e option is specified. */ +extern Boolean widecio; /* TRUE iff -w option is specified. */ + +#define DEFSECTION 1 +#define RULESECTION 2 +#define ENDSECTION 5 + +#define PC 1 +#define PS 1 + +#ifdef DEBUG +#define LINESIZE 110 +extern int yydebug; +extern int debug; /* 1 = on */ +extern int charc; +#endif + +#ifndef DEBUG +#define freturn(s) s +#endif + +#undef FALSE +#undef TRUE +enum { + FALSE, + TRUE +}; + + +extern int optind; +extern int no_input; +extern int sargc; +extern char **sargv; +extern char *v_stmp; +extern char *release_string; +extern CHR buf[]; +extern int ratfor; /* 1 = ratfor, 0 = C */ +extern int fatal; +extern int n_error; +extern int copy_line; +extern int yyline; /* line number of file */ +extern int sect; +extern int eof; +extern int lgatflg; +extern int divflg; +extern int funcflag; +extern int pflag; +extern int casecount; +extern int chset; /* 1 = CHR set modified */ +extern FILE *fin, *fout, *fother, *errorf; +extern int fptr; +extern char *ratname, *cname; +extern int prev; /* previous input character */ +extern int pres; /* present input character */ +extern int peek; /* next input character */ +extern int *name; +extern intptr_t *left; +extern intptr_t *right; +extern int *parent; +extern Boolean *nullstr; +extern int tptr; +extern CHR pushc[TOKENSIZE]; +extern CHR *pushptr; +extern CHR slist[STARTSIZE]; +extern CHR *slptr; +extern CHR **def, **subs, *dchar; +extern CHR **sname, *schar; +/* XCU4: %x exclusive start */ +extern int *exclusive; +extern CHR *ccl; +extern CHR *ccptr; +extern CHR *dp, *sp; +extern int dptr, sptr; +extern CHR *bptr; /* store input position */ +extern CHR *tmpstat; +extern int count; +extern int **foll; +extern int *nxtpos; +extern int *positions; +extern int *gotof; +extern int *nexts; +extern CHR *nchar; +extern int **state; +extern int *sfall; /* fallback state num */ +extern Boolean *cpackflg; /* true if state has been character packed */ +extern int *atable, aptr; +extern int nptr; +extern Boolean symbol[MAXNCG]; +extern CHR cindex[MAXNCG]; +extern int xstate; +extern int stnum; +extern int ctable[]; +extern int ZCH; +extern int ccount; +extern CHR match[MAXNCG]; +extern BYTE extra[]; +extern CHR *pcptr, *pchar; +extern int pchlen; +extern int nstates, maxpos; +extern int yytop; +extern int report; +extern int ntrans, treesize, outsize; +extern long rcount; +extern int optim; +extern int *verify, *advance, *stoff; +extern int scon; +extern CHR *psave; +extern CHR *getl(CHR *); +extern void *myalloc(int, int); + +void phead1(void); +void phead2(void); +void ptail(void); +void statistics(void); +void error_tail(void); +void error(const char *, ...); +void warning(const char *, ...); +void lgate(void); +void scopy(CHR *s, CHR *t); +void cclinter(int sw); +void cpycom(CHR *p); +void munput(int t, CHR *p); +void cfoll(int v); +void cgoto(void); +void mkmatch(void); +void layout(void); +void remch(wchar_t c); +void sortcgidtbl(void); +void repbycgid(void); +int gch(void); +int slength(CHR *s); +int yyparse(void); +int scomp(CHR *x, CHR *y); +int space(int ch); +int siconv(CHR *t); +int digit(int c); +int ctrans(CHR **ss); +int cpyact(void); +int lookup(CHR *s, CHR **t); +int usescape(int c); +int alpha(int c); +int mn2(int a, intptr_t d, intptr_t c); +int mn1(int a, intptr_t d); +int mn0(int a); +int dupl(int n); + +#undef getwc +#define getwc(f) lex_getwc(f) +extern wint_t lex_getwc(FILE *); +#undef putwc +#define putwc(c, f) lex_putwc(c, f) +extern wint_t lex_putwc(wchar_t, FILE *); + +#undef index +#define index lex_index + +extern int isArray; /* XCU4: for %array %pointer */ diff --git a/lex/lex.1 b/lex/lex.1 @@ -0,0 +1,131 @@ +.\" +.\" Sccsid @(#)lex.1 1.5 (gritter) 11/27/05 +.\" Derived from lex(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH LEX 1 "11/27/05" "Heirloom Development Tools" "User Commands" +.SH NAME +lex \- generator of lexical analysis programs +.SH SYNOPSIS +.HP +.ad l +.nh +\fBlex\fR [\fB\-ctvnVew\fR] [\fB\-Q\fR(\fBy\fR|\fBn\fR)] +[\fB\-Y\ \fIdirectory\fR] +[\fIfiles\fR\ ...] +.br +.ad b +.SH DESCRIPTION +.I Lex +generates programs to be used in simple lexical analyis of text. +The input +.I files +(standard input default) +contain regular expressions +to be searched for, and actions written in C to be executed when +expressions are found. +.PP +A C source program, `lex.yy.c' is generated, to be compiled thus: +.IP +cc lex.yy.c \-ll +.LP +This program, when run, copies unrecognized portions of +the input to the output, +and executes the associated +C action for each regular expression that is recognized. +.PP +The following +.I lex +program converts upper case to lower, +removes blanks at the end of lines, +and replaces multiple blanks by single blanks. +.IP "" +.nf +.ta \w'[A\-Z] 'u +%% +[A\-Z] putchar(yytext[0]+\'a\'\-\'A\'); +[ ]+$ +[ ]+ putchar(\' \'); +.fi +.PP +The options have the following meanings. +.TP +.B \-c +Generate output in the `C' language. +This is the default. +.TP +.B \-e +Generates output that can handle multibyte characters, +with \fIyytext[]\fR being of type \fIunsigned char[]\fR. +This option is an extension. +.TP +.B \-n +Opposite of +.BR \-v ; +.B \-n +is default. +.TP +\fB\-Q\fR(\fBy\fR|\fBn\fR) +With +.BR \-Qy , +a version identification variable is put into lex.yy.c. +With +.B \-Qn +(the default), no such variable is generated. +.TP +.B \-t +Place the result on the standard output instead of in file +`lex.yy.c'. +.TP +.B \-v +Print a one-line summary of statistics of the generated analyzer. +.TP +.B \-V +Causes version information for +.I lex +to be printed. +.TP +.B \-w +Generates output that can handle multibyte characters, +with \fIyytext[]\fR being of type \fIwchar_t[]\fR. +This option is an extension. +.TP +\fB\-Y \fIdirectory\fR +Use `\fIdirectory\fR' to locate driver files, +instead of the default `/usr/ccs/lib/lex'. +This option is an extension. +.SH "SEE ALSO" +yacc(1) +.br +M. E. Lesk and E. Schmidt, +.I LEX \- Lexical Analyzer Generator diff --git a/lex/libmain.c b/lex/libmain.c @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "libmain.c 6.6 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)libmain.c 1.4 (gritter) 11/26/05 + */ + +#include "stdio.h" + +extern int yylex(void); + +int +main(void) +{ + yylex(); + return (0); +} diff --git a/lex/lsearch.c b/lex/lsearch.c @@ -0,0 +1,71 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "lsearch.c 1.15 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)lsearch.c 1.4 (gritter) 11/26/05 + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Linear search algorithm, generalized from Knuth (6.1) Algorithm Q. + * + * This version no longer has anything to do with Knuth's Algorithm Q, + * which first copies the new element into the table, then looks for it. + * The assumption there was that the cost of checking for the end of the + * table before each comparison outweighed the cost of the comparison, which + * isn't true when an arbitrary comparison function must be called and when the + * copy itself takes a significant number of cycles. + * Actually, it has now reverted to Algorithm S, which is "simpler." + */ + +#include <sys/types.h> +#include <stddef.h> +#include <string.h> +#include "search.h" + +void * +xlsearch(const void *ky, void *bs, unsigned *nelp, unsigned width, + int (*compar)(const void *, const void *)) +{ + char *key = (char *)ky; + char *base = (char *)bs; + char *next = base + *nelp * width; /* End of table */ + void *res; + + for (; base < next; base += width) + if ((*compar)(key, base) == 0) + return (base); /* Key found */ + ++*nelp; /* Not found, add to table */ + res = memcpy(base, key, width); /* base now == next */ + return (res); +} diff --git a/lex/main.c b/lex/main.c @@ -0,0 +1,364 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* Copyright 1976, Bell Telephone Laboratories, Inc. */ + +/* from OpenSolaris "main.c 6.16 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)main.c 1.9 (gritter) 11/26/05 + */ + +#include <string.h> +#include "once.h" +#include "sgs.h" +#include <locale.h> +#include <limits.h> +#include <unistd.h> + +static wchar_t L_INITIAL[] = {'I', 'N', 'I', 'T', 'I', 'A', 'L', 0}; + +static void get1core(void); +static void free1core(void); +static void get2core(void); +static void free2core(void); +static void get3core(void); +#ifdef DEBUG +static void free3core(void); +#endif + +int +main(int argc, char **argv) +{ + int i; + int c; + char *path = NULL; + Boolean eoption = 0, woption = 0; + + sargv = argv; + sargc = argc; + errorf = stderr; + setlocale(LC_CTYPE, ""); +#ifdef DEBUG + while ((c = getopt(argc, argv, "dyctvnewVQ:Y:")) != EOF) { +#else + while ((c = getopt(argc, argv, "ctvnewVQ:Y:")) != EOF) { +#endif + switch (c) { +#ifdef DEBUG + case 'd': + debug++; + break; + case 'y': + yydebug = TRUE; + break; +#endif + case 'V': + fprintf(stderr, "lex:%s , %s\n", pkg, rel); + break; + case 'Q': + v_stmp = optarg; + if (*v_stmp != 'y' && *v_stmp != 'n') + error( + "lex: -Q should be followed by [y/n]"); + break; + case 'Y': + path = malloc(strlen(optarg) + + sizeof ("/nceucform") + 1); + path = strcpy(path, optarg); + break; + case 'c': + ratfor = FALSE; + break; + case 't': + fout = stdout; + break; + case 'v': + report = 1; + break; + case 'n': + report = 0; + break; + case 'w': + case 'W': + woption = 1; + handleeuc = 1; + widecio = 1; + break; + case 'e': + case 'E': + eoption = 1; + handleeuc = 1; + widecio = 0; + break; + default: + fprintf(stderr, + "Usage: lex [-ctvnV] [-Q(y/n)] [files...]\n"); + exit(1); + } + } + if (woption && eoption) { + error( + "You may not specify both -w and -e simultaneously."); + } + no_input = argc - optind; + if (no_input) { + /* XCU4: recognize "-" file operand for stdin */ + if (strcmp(argv[optind], "-") == 0) + fin = stdin; + else { + fin = fopen(argv[optind], "r"); + if (fin == NULL) + error( + "Can't open input file -- %s", argv[optind]); + } + } else + fin = stdin; + + /* may be gotten: def, subs, sname, schar, ccl, dchar */ + gch(); + + /* may be gotten: name, left, right, nullstr, parent */ + get1core(); + + scopy(L_INITIAL, sp); + sname[0] = sp; + sp += slength(L_INITIAL) + 1; + sname[1] = 0; + + /* XCU4: %x exclusive start */ + exclusive[0] = 0; + + if (!handleeuc) { + /* + * Set ZCH and ncg to their default values + * as they may be needed to handle %t directive. + */ + ZCH = ncg = NCH; /* ncg behaves as constant in this mode. */ + } + + /* may be disposed of: def, subs, dchar */ + if (yyparse()) + exit(1); /* error return code */ + + if (handleeuc) { + ncg = ncgidtbl * 2; + ZCH = ncg; + if (ncg >= MAXNCG) + error( + "Too complex rules -- requires too many char groups."); + sortcgidtbl(); + } + repbycgid(); /* Call this even in ASCII compat. mode. */ + + /* + * maybe get: + * tmpstat, foll, positions, gotof, nexts, + * nchar, state, atable, sfall, cpackflg + */ + free1core(); + get2core(); + ptail(); + mkmatch(); +#ifdef DEBUG + if (debug) + pccl(); +#endif + sect = ENDSECTION; + if (tptr > 0) + cfoll(tptr-1); +#ifdef DEBUG + if (debug) + pfoll(); +#endif + cgoto(); +#ifdef DEBUG + if (debug) { + printf("Print %d states:\n", stnum + 1); + for (i = 0; i <= stnum; i++) + stprt(i); + } +#endif + /* + * may be disposed of: + * positions, tmpstat, foll, state, name, + * left, right, parent, ccl, schar, sname + * maybe get: verify, advance, stoff + */ + free2core(); + get3core(); + layout(); + /* + * may be disposed of: + * verify, advance, stoff, nexts, nchar, + * gotof, atable, ccpackflg, sfall + */ + +#ifdef DEBUG + free3core(); +#endif + if (path == NULL) { + static char formpath[sizeof FORMPATH + 20] = FORMPATH; + path = formpath; + } + + if (handleeuc) { + if (ratfor) + error("Ratfor is not supported by -w or -e option."); + strcat(path, "/nceucform"); + } + else + strcat(path, ratfor ? "/nrform" : "/ncform"); + + fother = fopen(path, "r"); + if (fother == NULL) + error("Lex driver missing, file %s", path); + while ((i = getc(fother)) != EOF) + putc(i, fout); + fclose(fother); + fclose(fout); + if (report == 1) + statistics(); + fclose(stdout); + fclose(stderr); + return (0); /* success return code */ +} + +static void +get1core(void) +{ + ccptr = ccl = myalloc(CCLSIZE, sizeof (*ccl)); + pcptr = pchar = myalloc(pchlen, sizeof (*pchar)); + def = myalloc(DEFSIZE, sizeof (*def)); + subs = myalloc(DEFSIZE, sizeof (*subs)); + dp = dchar = myalloc(DEFCHAR, sizeof (*dchar)); + sname = myalloc(STARTSIZE, sizeof (*sname)); + /* XCU4: exclusive start array */ + exclusive = myalloc(STARTSIZE, sizeof (*exclusive)); + sp = schar = myalloc(STARTCHAR, sizeof (*schar)); + if (ccl == 0 || def == 0 || + pchar == 0 || subs == 0 || dchar == 0 || + sname == 0 || exclusive == 0 || schar == 0) + error("Too little core to begin"); +} + +static void +free1core(void) +{ + free(def); + free(subs); + free(dchar); +} + +static void +get2core(void) +{ + int i; + gotof = myalloc(nstates, sizeof (*gotof)); + nexts = myalloc(ntrans, sizeof (*nexts)); + nchar = myalloc(ntrans, sizeof (*nchar)); + state = myalloc(nstates, sizeof (*state)); + atable =myalloc(nstates, sizeof (*atable)); + sfall = myalloc(nstates, sizeof (*sfall)); + cpackflg = myalloc(nstates, sizeof (*cpackflg)); + tmpstat = myalloc(tptr+1, sizeof (*tmpstat)); + foll = myalloc(tptr+1, sizeof (*foll)); + nxtpos = positions = myalloc(maxpos, sizeof (*positions)); + if (tmpstat == 0 || foll == 0 || positions == 0 || + gotof == 0 || nexts == 0 || nchar == 0 || + state == 0 || atable == 0 || sfall == 0 || cpackflg == 0) + error("Too little core for state generation"); + for (i = 0; i <= tptr; i++) + foll[i] = 0; +} + +static void +free2core(void) +{ + free(positions); + free(tmpstat); + free(foll); + free(name); + free(left); + free(right); + free(parent); + free(nullstr); + free(state); + free(sname); + /* XCU4: exclusive start array */ + free(exclusive); + free(schar); + free(ccl); +} + +static void +get3core(void) +{ + verify = myalloc(outsize, sizeof (*verify)); + advance = myalloc(outsize, sizeof (*advance)); + stoff = myalloc(stnum+2, sizeof (*stoff)); + if (verify == 0 || advance == 0 || stoff == 0) + error("Too little core for final packing"); +} + +#ifdef DEBUG +static void +free3core(void) +{ + free(advance); + free(verify); + free(stoff); + free(gotof); + free(nexts); + free(nchar); + free(atable); + free(sfall); + free(cpackflg); +} +#endif + +void * +myalloc(int a, int b) +{ + void *i; + i = calloc(a, b); + if (i == NULL) + warning("calloc returns a 0"); + return (i); +} + +void +yyerror(char *s) +{ + fprintf(stderr, + "\"%s\":line %d: Error: %s\n", sargv[optind], yyline, s); +} diff --git a/lex/mkfile b/lex/mkfile @@ -0,0 +1,26 @@ +BIN = lex +LIB = libl.a +OBJ = main.o sub1.o sub2.o sub3.o header.o wcio.o parser.o getopt.o lsearch.o +LOBJ = allprint.o libmain.o reject.o yyless.o yywrap.o \ + allprint_w.o reject_w.o yyless_w.o reject_e.o yyless_e.o +LOCAL_CFLAGS = -DFORMPATH=\"$LIBDIR/lex\" +CLEAN_FILES = parser.c +INSTALL_BIN = lex +INSTALL_MAN1 = lex.1 +INSTALL_OTHER1 = nceucform ncform nrform +INSTALL_OTHER1_DIR = $LIBDIR/lex +DEPS = yacc + +WFLAGS = -DEUC -DJLSLEX -DWOPTION +EFLAGS = -DEUC -DJLSLEX -DEOPTION + +<$mkbuild/mk.default + +%_w.o:Q: %.c + echo CC $target + $CC -c $CFLAGS $CPPFLAGS $WFLAGS $prereq -o $target + +%_e.o:Q: %.c + echo CC $target + $CC -c $CFLAGS $CPPFLAGS $EFLAGS $prereq -o $target + diff --git a/lex/nceucform b/lex/nceucform @@ -0,0 +1,480 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "nceucform 1.17 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)nceucform 1.6 (gritter) 11/18/05 + */ + +#if defined(__cplusplus) || defined(__STDC__) +int yycgid(wchar_t); +wint_t yygetwchar(void); +#endif +int yylineno =1; +int yygid; +#define LONG_WCHAR_T 1 +# define YYU(x) x +# define NLSTATE yyprevious=YYNEWLINE +wchar_t yysbuf[YYLMAX]; +wchar_t *yysptr = yysbuf; +struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp; +int *yyfnd; +extern struct yysvf *yyestate; +int yyprevious = YYNEWLINE; +#if defined(__cplusplus) || defined(__STDC__) +int yylook(void) +#else +yylook() +#endif +{ + struct yysvf *yystate, **lsp; + struct yywork *yyt; + struct yysvf *yyz; + int yych, yyfirst; + struct yywork *yyr; +# ifdef LEXDEBUG + int debug; +# endif + wchar_t *yylastch; + /* start off machines */ +# ifdef LEXDEBUG + debug = 0; +# endif + yyfirst=1; + if (!yymorfg) + yylastch = YYTEXT; + else { + yymorfg=0; + yylastch = YYTEXT+YYLENG; + } + for(;;){ + lsp = yylstate; + yyestate = yystate = yybgin; + if (yyprevious==YYNEWLINE) yystate++; + for (;;){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1); +# endif + yyt = yystate->yystoff; + if(yyt == yycrank && !yyfirst){ /* may not be any transitions */ + yyz = yystate->yyother; + if(yyz == 0)break; + if(yyz->yystoff == yycrank)break; + } + *yylastch++ = yych = YYINPUT(); +#ifdef YYISARRAY + if(yylastch > &YYTEXT[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } +#else + if (yylastch >= &YYTEXT[ yytextsz ]) { + int x = yylastch - YYTEXT; + + yytextsz += YYTEXTSZINC; +#ifdef YYLEX_E /* -e */ + if (YYTEXT == yy_twbuf) { + YYTEXT = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(YYTEXT, yy_twbuf, + sizeof (yy_twbuf)); + yytext = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(yytext, yy_tbuf, + sizeof (yy_tbuf)); +#else + if (YYTEXT == yy_tbuf) { + YYTEXT = (wchar_t *) + malloc(yytextsz * + sizeof (wchar_t)); + memcpy(YYTEXT, yy_tbuf, + sizeof (yy_tbuf)); +#endif + } + else { + YYTEXT = (wchar_t *) + realloc(YYTEXT, yytextsz); +#ifdef YYLEX_E /* -e */ + yytext = (wchar_t *) + realloc(yytext, + yytextsz * sizeof (wchar_t)); +#endif + } + if (!YYTEXT) { + fprintf(yyout, + "Cannot realloc YYTEXT\n"); + exit(1); + } + yylastch = YYTEXT + x; + } +#endif + yygid = yycgid(yych); + yyfirst=0; + tryagain: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"wchar_t "); + allprint(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + yyr = yyt; + if ( yyt > yycrank){ + yyt = yyr + yygid; + if (yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } +# ifdef YYOPTIM + else if(yyt < yycrank) { /* r < yycrank */ + yyt = yyr = yycrank+(yycrank-yyt); +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"compressed state\n"); +# endif + yyt = yyt + yygid; + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + yyt = yyr + YYU(yymatch[yygid]); +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"try fall back character "); + allprint_w(YYU(yymatch[yygid])); + fprintf(yyout," gid %d\n", yygid); + } +# endif + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transition */ + {YYUNPUT(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } + if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1); +# endif + goto tryagain; + } +# endif + else + {YYUNPUT(*--yylastch);break;} + contin: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"state %d wchar_t ",yystate-yysvec-1); + allprint_w(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + ; + } +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1); + allprint_w(yych); + fprintf(yyout," gid %d\n", yygid); + } +# endif + while (lsp-- > yylstate){ + *yylastch-- = 0; + if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){ + yyolsp = lsp; + if(yyextra[*yyfnd]){ /* must backup */ + while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){ + lsp--; + YYUNPUT(*yylastch--); + } + } + yyprevious = YYU(*yylastch); + yylsp = lsp; + YYLENG = yylastch-YYTEXT+1; + YYTEXT[YYLENG] = 0; +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"\nmatch "); + sprint_w(YYTEXT); + fprintf(yyout," action %d\n",*yyfnd); + } +# endif +#ifdef YYLEX_E /* -e */ + yyleng=wcstombs((char *)yytext, YYTEXT, sizeof(yytext)-1); +#ifdef LEXDEBUG + if(yyleng>=sizeof(yytext)-1) + fprintf(yyout, "yytext[] too short\n"); +#endif +#endif + return(*yyfnd++); + } + YYUNPUT(*yylastch); + } + if (YYTEXT[0] == 0 /* && feof(yyin) */) + { + yysptr=yysbuf; + return(0); + } + yyprevious = YYTEXT[0] = YYINPUT(); + if (yyprevious>0) + YYOUTPUT(yyprevious); + yylastch=YYTEXT; +# ifdef LEXDEBUG + if(debug)putchar('\n'); +# endif + } + } +int +#if defined(__cplusplus) || defined(__STDC__) +yyback(int *p, int m) +#else +yyback(p, m) + int *p; + int m; +#endif +{ + if (p==0) return(0); + while (*p) { + if (*p++ == m) + return(1); + } + return(0); +} + +#ifdef YYLEX_E /* -e */ +wchar_t +#if defined(__cplusplus) || defined(__STDC__) +yywinput(void){ +#else +yywinput(){ +#endif + static mbstate_t state; + wchar_t wc; + char b; + int n; + + for (;;) { + do { +#ifndef __cplusplus + b=input(); +#else + b=lex_input(); +#endif + } while ((n = mbrtowc(&wc, &b, 1, &state)) == (size_t)-2); + if (n == (size_t)-1) { + for (n = 0; n < sizeof state; n++) + ((char *)&state)[n] = 0; + continue; + } + break; + } + + return wc; +} + +#if defined(__cplusplus) || defined(__STDC__) +void +yywoutput(wchar_t wc) +#else +yywoutput(wc) + wchar_t wc; +#endif +{ + unsigned char eucbuf[MB_LEN_MAX]; + int n; + unsigned char *p=eucbuf; + + n=wctomb( (char *)eucbuf, wc ); +#ifndef __cplusplus + while(n-->0) output(*p++); +#else + while(n-->0) lex_output(*p++); +#endif +} + +#if defined(__cplusplus) || defined(__STDC__) +void +yywunput(wchar_t wc) +#else +yywunput(wc) + wchar_t wc; +#endif +{ + unsigned char eucbuf[MB_LEN_MAX]; + int n; + unsigned char *p; + + n=wctomb( (char *)eucbuf, wc ); + p=eucbuf+n; + while(n-->0) unput(*--p); +} +#endif + +#ifdef LONG_WCHAR_T +#define yylinearize(lc) lc +#else/*!LONG_WCHAR_T*/ +unsigned long +yylinearize(wc) + wchar_t wc; +{ + unsigned long prefix; + switch(wc&0x8080){ + case 0x0000: prefix=0x00000000; break; + case 0x0080: prefix=0x20000000; break; + case 0x8000: prefix=0x40000000; break; + case 0x8080: prefix=0x60000000; break; + } + return prefix|wc; +} +#endif/*!LONG_WCHAR_T*/ +int +yycgid(c) + wchar_t c; +{ + int first = 0; + int last = YYNCGIDTBL - 1; + unsigned long lc=yylinearize(c); + + if( yycgidtbl[YYNCGIDTBL-1] < lc ) return YYNCGIDTBL*2-1; + + while (last >= 0) { + int i = (first+last)/2; + if (lc == yycgidtbl[i]) + return (2*i); + else if ( yycgidtbl[i]<lc) { + if (lc<yycgidtbl[i+1]) + return (2*i+1); + else + first = i + 1; + }else + last = i - 1; + } + return 0; /*Error*/ +} + + + + + + /* the following are only used in the lex library */ +int +#if defined(__cplusplus) || defined(__STDC__) +yyinput(void) +#else +yyinput() +#endif +{ +#ifndef __cplusplus + return(input()); +#else + return(lex_input()); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void +yyoutput(int c) +#else +yyoutput(c) + int c; +#endif +{ +#ifndef __cplusplus + output(c); +#else + lex_output(c); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void +yyunput(int c) +#else +yyunput(c) + int c; +#endif +{ + unput(c); + } + +#ifndef YYLEX_E +/* + * Just strip all illegal byte sequences from input. + */ +wint_t +#if defined(__cplusplus) || defined(__STDC__) +yygetwchar(void) +#else +yygetwchar() +#endif +{ + static mbstate_t state; + char b; + int c, n; + wchar_t wc = WEOF; + + for (;;) { + while ((c = getc(yyin)) != EOF) { + b = c; + if ((n = mbrtowc(&wc, &b, 1, &state)) != (size_t)-2) + break; + } + if (n == (size_t)-1 || n == (size_t)-2) { + if (c == EOF) + return WEOF; + for (n = 0; n < sizeof state; n++) + ((char *)&state)[n] = 0; + continue; + } + break; + } + return wc; +} +#endif /* !YYLEX_E */ diff --git a/lex/ncform b/lex/ncform @@ -0,0 +1,290 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "ncform 6.14 05/06/10 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)ncform 1.4 (gritter) 11/18/05 + */ + +int yylineno =1; +# define YYU(x) x +# define NLSTATE yyprevious=YYNEWLINE +struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp; +char yysbuf[YYLMAX]; +char *yysptr = yysbuf; +int *yyfnd; +extern struct yysvf *yyestate; +int yyprevious = YYNEWLINE; +#if defined(__cplusplus) || defined(__STDC__) +int yylook(void) +#else +yylook() +#endif +{ + register struct yysvf *yystate, **lsp; + register struct yywork *yyt; + struct yysvf *yyz; + int yych, yyfirst; + struct yywork *yyr; +# ifdef LEXDEBUG + int debug; +# endif + char *yylastch; + /* start off machines */ +# ifdef LEXDEBUG + debug = 0; +# endif + yyfirst=1; + if (!yymorfg) + yylastch = yytext; + else { + yymorfg=0; + yylastch = yytext+yyleng; + } + for(;;){ + lsp = yylstate; + yyestate = yystate = yybgin; + if (yyprevious==YYNEWLINE) yystate++; + for (;;){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1); +# endif + yyt = yystate->yystoff; + if(yyt == yycrank && !yyfirst){ /* may not be any transitions */ + yyz = yystate->yyother; + if(yyz == 0)break; + if(yyz->yystoff == yycrank)break; + } +#ifndef __cplusplus + *yylastch++ = yych = input(); +#else + *yylastch++ = yych = lex_input(); +#endif +#ifdef YYISARRAY + if(yylastch > &yytext[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } +#else + if (yylastch >= &yytext[ yytextsz ]) { + int x = yylastch - yytext; + + yytextsz += YYTEXTSZINC; + if (yytext == yy_tbuf) { + yytext = (char *) malloc(yytextsz); + memcpy(yytext, yy_tbuf, sizeof (yy_tbuf)); + } + else + yytext = (char *) realloc(yytext, yytextsz); + if (!yytext) { + fprintf(yyout, + "Cannot realloc yytext\n"); + exit(1); + } + yylastch = yytext + x; + } +#endif + yyfirst=0; + tryagain: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"char "); + allprint(yych); + putchar('\n'); + } +# endif + yyr = yyt; + if ( yyt > yycrank){ + yyt = yyr + yych; + if (yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } +# ifdef YYOPTIM + else if(yyt < yycrank) { /* r < yycrank */ + yyt = yyr = yycrank+(yycrank-yyt); +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"compressed state\n"); +# endif + yyt = yyt + yych; + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + yyt = yyr + YYU(yymatch[yych]); +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"try fall back character "); + allprint(YYU(yymatch[yych])); + putchar('\n'); + } +# endif + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transition */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + if(lsp > &yylstate[YYLMAX]) { + fprintf(yyout,"Input string too long, limit %d\n",YYLMAX); + exit(1); + } + goto contin; + } + } + if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1); +# endif + goto tryagain; + } +# endif + else + {unput(*--yylastch);break;} + contin: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"state %d char ",yystate-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + ; + } +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + while (lsp-- > yylstate){ + *yylastch-- = 0; + if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){ + yyolsp = lsp; + if(yyextra[*yyfnd]){ /* must backup */ + while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){ + lsp--; + unput(*yylastch--); + } + } + yyprevious = YYU(*yylastch); + yylsp = lsp; + yyleng = yylastch-yytext+1; + yytext[yyleng] = 0; +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"\nmatch "); + sprint(yytext); + fprintf(yyout," action %d\n",*yyfnd); + } +# endif + return(*yyfnd++); + } + unput(*yylastch); + } + if (yytext[0] == 0 /* && feof(yyin) */) + { + yysptr=yysbuf; + return(0); + } +#ifndef __cplusplus + yyprevious = yytext[0] = input(); + if (yyprevious>0) + output(yyprevious); +#else + yyprevious = yytext[0] = lex_input(); + if (yyprevious>0) + lex_output(yyprevious); +#endif + yylastch=yytext; +# ifdef LEXDEBUG + if(debug)putchar('\n'); +# endif + } + } +#if defined(__cplusplus) || defined(__STDC__) +int yyback(int *p, int m) +#else +yyback(p, m) + int *p; +#endif +{ + if (p==0) return(0); + while (*p) { + if (*p++ == m) + return(1); + } + return(0); +} + /* the following are only used in the lex library */ +#if defined(__cplusplus) || defined(__STDC__) +int yyinput(void) +#else +yyinput() +#endif +{ +#ifndef __cplusplus + return(input()); +#else + return(lex_input()); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void yyoutput(int c) +#else +yyoutput(c) + int c; +#endif +{ +#ifndef __cplusplus + output(c); +#else + lex_output(c); +#endif + } +#if defined(__cplusplus) || defined(__STDC__) +void yyunput(int c) +#else +yyunput(c) + int c; +#endif +{ + unput(c); + } diff --git a/lex/nrform b/lex/nrform @@ -0,0 +1,188 @@ +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License, Version 1.0 only +# (the "License"). You may not use this file except in compliance +# with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# +# +# Copyright (c) 1993 by Sun Microsystems, Inc. +# + +# from OpenSolaris "nrform 6.7 05/06/08 SMI" + +# +# Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany +# +# Sccsid @(#)nrform 1.4 (gritter) 10/20/06 +# + +block data +integer cshift, csize, yynlin +common /yyllib/ cshift, csize, yynlin +data yynlin/YYNEWLINE/ +end +block data +common /yyldat/ yyfnd, ymorf, yyprev, yybgin, yytop +integer yyfnd, yymorf, yyprev, yybgin, yytop +data yybgin/1/ +data yyprev/YYNEWLINE/ +data yytop/YYTOPVAL/ +end +integer function yylook(dummy) +common /Lverif/ verif +common /Ladvan/ advan +common /Lstoff/ stoff +common /Lsfall/ sfall +common /Latable/ atable +common /Lextra/ extra +common /Lvstop/ vstop +integer verif(Sverif), advan(Sadvan),stoff(Sstoff),match(Smatch) +integer sfall(Ssfall),atable(Satable),extra(Sextra), vstop(Svstop) +integer state, lsp, r +integer ch, n +common /yyldat/ yyfnd, yymorf, yyprev, yybgin, yytop, yylsp, yylsta(YYLMAX) +common /yyxel/ yyleng, yytext +integer yyfnd, yymorf, yylsta, yylsp, yytext, yyprev, yyleng, yytop +integer lexshf, yytext(YYLMAX), yyback, yybgin +integer z, t +if (yymorf .eq. 0) + yyleng = 0 +else + yymorf=0 +1776 + lsp = 1 + state = yybgin + if (yyprev .eq. YYNEWLINE) + state = state + 1 + for (;;){ + r = stoff(state) + if (r .eq. 0){ + z = sfall(state) + if (z .eq. 0) + break + if(stoff(z) == 0) break + } + ch = input(dummy) + ich = lexshf(ch) + yyleng = yyleng+1 + yytext(yyleng) = ch + 1984 + if(r .gt. 0){ + t = r + ich + if (t<= yytop){ + if (verif(t) .eq. state){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp +1 + goto 2001 + } + } + } + if(r < 0){ + t = r + ich + if (t <= yytop .and. verif(t) .eq. state){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp +1 + goto 2001 + } + t = r + match(ich) + if(t <= yytop && state == verif(t)){ + if(advan(t) == YYERROR){ + call unput(yytext(yyleng)) + yyleng = yyleng - 1 + break + } + state = advan(t) + yylsta(lsp) = state + lsp = lsp + 1 + goto 2001 + } + } + else { + if (state > 0) state = sfall(state) + if (state .gt. 0) r = stoff(state) + if (state .gt. 0 .and. r .ne. 0) + goto 1984 + call unput(yytext(yyleng)) + yyleng = yyleng -1 + break + } + 2001 + continue + } + while (lsp .gt. 1){ + lsp = lsp -1 + ilsp = yylsta(lsp) + yyfnd = atable(ilsp) + if (yyfnd .gt. 0) + if (vstop(yyfnd) .gt. 0){ + r = vstop(yyfnd) + if (extra(r) .ne. 0){ + for(;;){ + ilsp = yylsta(lsp) + if (yyback(atable(ilsp), -r) .eq. 1) + break + lsp= lsp -1 + call unput(yytext(yyleng)) + yyleng = yyleng -1 + } + } + yyprev = lexshf(yytext(yyleng)) + yylsp = lsp + yyfnd = yyfnd + 1 + yylook = r + yytext(yyleng+1) = 0 + return + } + call unput(yytext(yyleng)) + } + if (yytext(1) .eq. 0){ + yylook=0 + return + } + yyprev = input(dummy) + call output(yyprev) + yyprev = lexshf(yyprev) + yyleng = 0 + goto 1776 +end +integer function yyback (isub, n) +common /Lvstop/ vstop +integer vstop(Svstop) +if (isub .ne. 0) +while (vstop(isub) .ne. 0){ + if (vstop(isub) .eq. m){ + yyback = 1 + return + } + isub = isub + 1 + } +yyback = 0 +return +end diff --git a/lex/once.h b/lex/once.h @@ -0,0 +1,166 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "once.h 6.9 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)once.h 1.4 (gritter) 11/26/05 + */ + +#include "ldefs.c" + +/* once.c */ + /* because of external definitions, this code should occur only once */ +int ctable[2*NCH] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, + 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, + 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, + 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, + 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, + 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, + 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, + 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, + 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, + 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, + 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, + 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, + 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, + 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, + 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, + 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, + 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, + 250, 251, 252, 253, 254, 255 +}; + +int ZCH = NCH; +FILE *fout = NULL, *errorf; +int sect = DEFSECTION; +int prev = '\n'; /* previous input character */ +int pres = '\n'; /* present input character */ +int peek = '\n'; /* next input character */ +CHR *pushptr = pushc; +CHR *slptr = slist; + +#ifndef CNAME +#define CNAME "./ncform" +#endif +#ifndef RATNAME +#define RATNAME "./nrform" +#endif +char *cname = CNAME; +char *ratname = RATNAME; + +int ccount = 1; +int casecount = 1; +int aptr = 1; +int nstates = NSTATES, maxpos = MAXPOS; +int treesize = TREESIZE, ntrans = NTRANS; +int yytop; +int outsize = NOUTPUT; +int sptr = 1; +int optim = TRUE; +int report = 2; +int debug; /* 1 = on */ +int charc; +char *v_stmp = "n"; +int no_input; +int copy_line; +int n_error = 0; +int fatal = 1; +int sargc; +char **sargv; +CHR buf[BUF_SIZ]; +int ratfor; /* 1 = ratfor, 0 = C */ +int yyline; /* line number of file */ +int eof; +int lgatflg; +int divflg; +int funcflag; +int pflag; +int chset; /* 1 = char set modified */ +FILE *fin, *fother; +int fptr; +int *name; +intptr_t *left; +intptr_t *right; +int *parent; +Boolean *nullstr; +int tptr; +CHR pushc[TOKENSIZE]; +CHR slist[STARTSIZE]; +CHR **def, **subs, *dchar; +/* XCU4: %x exclusive start */ +int *exclusive; +CHR **sname, *schar; +CHR *ccl; +CHR *ccptr; +CHR *dp, *sp; +int dptr; +CHR *bptr; /* store input position */ +CHR *tmpstat; +int count; +int **foll; +int *nxtpos; +int *positions; +int *gotof; +int *nexts; +CHR *nchar; +int **state; +int *sfall; /* fallback state num */ +Boolean *cpackflg; /* true if state has been character packed */ +int *atable; +int nptr; +Boolean symbol[MAXNCG]; +CHR cindex[MAXNCG]; +int xstate; +int stnum; +CHR match[MAXNCG]; +BYTE extra[NACTIONS]; +CHR *pchar, *pcptr; +int pchlen = TOKENSIZE; +long rcount; +int *verify, *advance, *stoff; +int scon; +CHR *psave; + +Boolean handleeuc = FALSE; +Boolean widecio = FALSE; + +int isArray = 1; /* XCU4: for %array %pointer */ diff --git a/lex/parser.y b/lex/parser.y @@ -0,0 +1,978 @@ +%{ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +%} +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +%{ +/* from OpenSolaris "parser.y 6.15 05/06/10 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)parser.y 1.8 (gritter) 11/26/05 + */ + +void yyerror(char *); + +#include <ctype.h> +#include <wchar.h> +#include <inttypes.h> +#ifndef __sun +#define wcsetno(c) 0 +#endif + +%} +/* parser.y */ + +/* XCU4: add XSCON: %x exclusive start token */ +/* XCU4: add ARRAY: %a yytext is char array */ +/* XCU4: add POINTER: %p yytext is a pointer to char */ +%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS XSCON ARRAY POINTER + +%nonassoc ARRAY POINTER +%left XSCON SCON NEWE +%left '/' +/* + * XCU4: lower the precedence of $ and ^ to less than the or operator + * per Spec. 1170 + */ +%left '$' '^' +%left '|' +%left CHAR CCL NCCL '(' '.' STR NULLS +%left ITER +%left CAT +%left '*' '+' '?' + +%{ +#include "ldefs.c" + +#define YYSTYPE union _yystype_ +union _yystype_ +{ + int i; + CHR *cp; +}; +int peekon = 0; /* need this to check if "^" came in a definition section */ + +%} +%% +%{ +int i; +int j,k; +int g; +CHR *p; +static wchar_t L_PctUpT[]= {'%', 'T', 0}; +static wchar_t L_PctLoT[]= {'%', 't', 0}; +static wchar_t L_PctCbr[]= {'%', '}', 0}; +%} +acc : lexinput + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } + ; +lexinput: defns delim prods end + | defns delim end + ={ + if(!funcflag)phead2(); + funcflag = TRUE; + } + | error + ={ +# ifdef DEBUG + if(debug) { + sect1dump(); + sect2dump(); + } +# endif + fatal = 0; + n_error++; + error("Illegal definition"); + fatal = 1; + } + ; +end: delim | ; +defns: defns STR STR + ={ scopy($2.cp,dp); + def[dptr] = dp; + dp += slength($2.cp) + 1; + scopy($3.cp,dp); + subs[dptr++] = dp; + if(dptr >= DEFSIZE) + error("Too many definitions"); + dp += slength($3.cp) + 1; + if(dp >= dchar+DEFCHAR) + error("Definitions too long"); + subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ + } + | + ; +delim: DELIM + ={ +# ifdef DEBUG + if(sect == DEFSECTION && debug) sect1dump(); +# endif + sect++; + } + ; +prods: prods pr + ={ $$.i = mn2(RNEWE,$1.i,$2.i); + } + | pr + ={ $$.i = $1.i;} + ; +pr: r NEWE + ={ + if(divflg == TRUE) + i = mn1(S1FINAL,casecount); + else i = mn1(FINAL,casecount); + $$.i = mn2(RCAT,$1.i,i); + divflg = FALSE; + if((++casecount)>NACTIONS) + error("Too many (>%d) pattern-action rules.", NACTIONS); + } + | error NEWE + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + fatal = 0; + yyline--; + n_error++; + error("Illegal rule"); + fatal = 1; + yyline++; + } +r: CHAR + ={ $$.i = mn0($1.i); } + | STR + ={ + p = (CHR *)$1.cp; + i = mn0((unsigned)(*p++)); + while(*p) + i = mn2(RSTR,i,(unsigned)(*p++)); + $$.i = i; + } + | '.' + ={ + $$.i = mn0(DOT); + } + | CCL + ={ $$.i = mn1(RCCL,(intptr_t)$1.cp); } + | NCCL + ={ $$.i = mn1(RNCCL,(intptr_t)$1.cp); } + | r '*' + ={ $$.i = mn1(STAR,$1.i); } + | r '+' + ={ $$.i = mn1(PLUS,$1.i); } + | r '?' + ={ $$.i = mn1(QUEST,$1.i); } + | r '|' r + ={ $$.i = mn2(BAR,$1.i,$3.i); } + | r r %prec CAT + ={ $$.i = mn2(RCAT,$1.i,$2.i); } + | r '/' r + ={ if(!divflg){ + j = mn1(S2FINAL,-casecount); + i = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,i,$3.i); + } + else { + $$.i = mn2(RCAT,$1.i,$3.i); + error("illegal extra slash"); + } + divflg = TRUE; + } + | r ITER ',' ITER '}' + ={ if($2.i > $4.i){ + i = $2.i; + $2.i = $4.i; + $4.i = i; + } + if($4.i <= 0) + error("iteration range must be positive"); + else { + j = $1.i; + for(k = 2; k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + for(i = $2.i+1; i<=$4.i; i++){ + g = dupl($1.i); + for(k=2;k<=i;k++) + g = mn2(RCAT,g,dupl($1.i)); + j = mn2(BAR,j,g); + } + $$.i = j; + } + } + | r ITER '}' + ={ + if($2.i < 0)error("can't have negative iteration"); + else if($2.i == 0) $$.i = mn0(RNULLS); + else { + j = $1.i; + for(k=2;k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + $$.i = j; + } + } + | r ITER ',' '}' + ={ + /* from n to infinity */ + if($2.i < 0)error("can't have negative iteration"); + else if($2.i == 0) $$.i = mn1(STAR,$1.i); + else if($2.i == 1)$$.i = mn1(PLUS,$1.i); + else { /* >= 2 iterations minimum */ + j = $1.i; + for(k=2;k<$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + k = mn1(PLUS,dupl($1.i)); + $$.i = mn2(RCAT,j,k); + } + } + | SCON r + ={ $$.i = mn2(RSCON,$2.i,(intptr_t)$1.cp); } + + /* XCU4: add XSCON */ + | XSCON r + ={ $$.i = mn2(RXSCON,$2.i,(intptr_t)$1.cp); } + | '^' r + ={ $$.i = mn1(CARAT,$2.i); } + | r '$' + ={ i = mn0('\n'); + if(!divflg){ + j = mn1(S2FINAL,-casecount); + k = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,k,i); + } + else $$.i = mn2(RCAT,$1.i,i); + divflg = TRUE; + } + | '(' r ')' + ={ $$.i = $2.i; } + | NULLS + ={ $$.i = mn0(RNULLS); } + + /* XCU4: add ARRAY and POINTER */ + | ARRAY + ={ isArray = 1; }; + | POINTER + ={ isArray = 0; }; + ; + +%% +int +yylex(void) +{ + CHR *p; + int i; + CHR *xp; + int lex_startcond_lookupval; + CHR *t, c; + int n, j = 0, k, x; + CHR ch; + static int sectbegin; + static CHR token[TOKENSIZE]; + static int iter; + int ccs; /* Current CodeSet. */ + CHR *ccp; + int exclusive_flag; /* XCU4: exclusive start flag */ + +# ifdef DEBUG + yylval.i = 0; +# endif + + if(sect == DEFSECTION) { /* definitions section */ + while(!eof) { + if(prev == '\n'){ /* next char is at beginning of line */ + getl(p=buf); + switch(*p){ + case '%': + switch(c= *(p+1)){ + case '%': + if(scomp(p, (CHR *)"%%")) { + p++; + while(*(++p)) + if(!space(*p)) { + warning("invalid string following %%%% be ignored"); + break; + } + } + lgate(); + if(!ratfor)fprintf(fout,"# "); + fprintf(fout,"define YYNEWLINE %d\n",ctable['\n']); + if(!ratfor) { + fprintf(fout,"int yylex(){\nint nstr = 0; extern int yyprevious;\n"); + } + sectbegin = TRUE; + i = treesize*(sizeof(*name)+sizeof(*left)+ + sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; + p = myalloc(i,1); + if(p == NULL) + error("Too little core for parse tree"); + free(p); + name = myalloc(treesize,sizeof(*name)); + left = myalloc(treesize,sizeof(*left)); + right = myalloc(treesize,sizeof(*right)); + nullstr = myalloc(treesize,sizeof(*nullstr)); + parent = myalloc(treesize,sizeof(*parent)); + if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0) + error("Too little core for parse tree"); + return(freturn(DELIM)); + case 'p': case 'P': + /* %p or %pointer */ + if ((*(p+2) == 'o') || + (*(p+2) == 'O')) { + if(lgatflg) + error("Too late for %%pointer"); + while(*p && !iswspace(*p)) + p++; + isArray = 0; + continue; + } + /* has overridden number of positions */ + p += 2; + maxpos = siconv(p); + if (maxpos<=0)error("illegal position number"); +# ifdef DEBUG + if (debug) printf("positions (%%p) now %d\n",maxpos); +# endif + if(report == 2)report = 1; + continue; + case 'n': case 'N': /* has overridden number of states */ + p += 2; + nstates = siconv(p); + if(nstates<=0)error("illegal state number"); +# ifdef DEBUG + if(debug)printf( " no. states (%%n) now %d\n",nstates); +# endif + if(report == 2)report = 1; + continue; + case 'e': case 'E': /* has overridden number of tree nodes */ + p += 2; + treesize = siconv(p); + if(treesize<=0)error("illegal number of parse tree nodes"); +# ifdef DEBUG + if (debug) printf("treesize (%%e) now %d\n",treesize); +# endif + if(report == 2)report = 1; + continue; + case 'o': case 'O': + p += 2; + outsize = siconv(p); + if(outsize<=0)error("illegal size of output array"); + if (report ==2) report=1; + continue; + case 'a': case 'A': + /* %a or %array */ + if ((*(p+2) == 'r') || + (*(p+2) == 'R')) { + if(lgatflg) + error("Too late for %%array"); + while(*p && !iswspace(*p)) + p++; + isArray = 1; + continue; + } + /* has overridden number of transitions */ + p += 2; + ntrans = siconv(p); + if(ntrans<=0)error("illegal translation number"); +# ifdef DEBUG + if (debug)printf("N. trans (%%a) now %d\n",ntrans); +# endif + if(report == 2)report = 1; + continue; + case 'k': case 'K': /* overriden packed char classes */ + p += 2; + free(pchar); + pchlen = siconv(p); + if(pchlen<=0)error("illegal number of packed character class"); +# ifdef DEBUG + if (debug) printf( "Size classes (%%k) now %d\n",pchlen); +# endif + pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); + if (report==2) report=1; + continue; + case 't': case 'T': /* character set specifier */ + if(handleeuc) + error("\ +Character table (%t) is supported only in ASCII compatibility mode.\n"); + ZCH = wcstol(p+2, NULL, 10); + if (ZCH < NCH) ZCH = NCH; + if (ZCH > 2*NCH) error("ch table needs redeclaration"); + chset = TRUE; + for(i = 0; i<ZCH; i++) + ctable[i] = 0; + while(getl(p) && scomp(p,L_PctUpT) != 0 && scomp(p,L_PctLoT) != 0){ + if((n = siconv(p)) <= 0 || n > ZCH){ + error("Character value %d out of range",n); + continue; + } + while(digit(*p)) p++; + if(!iswspace(*p)) error("bad translation format"); + while(iswspace(*p)) p++; + t = p; + while(*t){ + c = ctrans(&t); + if(ctable[(unsigned)c]){ + if (iswprint(c)) + warning("Character '%lc' used twice",c); + + else + error("Chararter %o used twice",c); + } + else ctable[(unsigned)c] = n; + t++; + } + p = buf; + } + { + char chused[2*NCH]; int kr; + for(i=0; i<ZCH; i++) + chused[i]=0; + for(i=0; i<NCH; i++) + chused[ctable[i]]=1; + for(kr=i=1; i<NCH; i++) + if (ctable[i]==0) + { + while (chused[kr] == 0) + kr++; + ctable[i]=kr; + chused[kr]=1; + } + } + lgate(); + continue; + case 'r': case 'R': + c = 'r'; + /* FALLTHRU */ + case 'c': case 'C': + if(lgatflg) + error("Too late for language specifier"); + ratfor = (c == 'r'); + continue; + case '{': + lgate(); + while(getl(p) && scomp(p, L_PctCbr) != 0) + if(p[0]=='/' && p[1]=='*') + cpycom(p); + else + fprintf(fout,"%ls\n",p); + if(p[0] == '%') continue; + if (*p) error("EOF before %%%%"); + else error("EOF before %%}"); + break; + + case 'x': case 'X': /* XCU4: exclusive start conditions */ + exclusive_flag = 1; + goto start; + + case 's': case 'S': /* start conditions */ + exclusive_flag = 0; +start: + lgate(); + + while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) p++; + n = TRUE; + while(n){ + while(*p && (iswspace(*p) || ((*p) == (wchar_t)','))) p++; + t = p; + while(*p && !iswspace(*p) && ((*p) != (wchar_t)',')) { + if(!isascii(*p)) + error("None-ASCII characters in start condition."); + p++; + } + if(!*p) n = FALSE; + *p++ = 0; + if (*t == 0) continue; + i = sptr*2; + if(!ratfor)fprintf(fout,"# "); + fprintf(fout,"define %ls %d\n",t,i); + scopy(t,sp); + sname[sptr] = sp; + /* XCU4: save exclusive flag with start name */ + exclusive[sptr++] = exclusive_flag; + sname[sptr] = 0; /* required by lookup */ + if(sptr >= STARTSIZE) + error("Too many start conditions"); + sp += slength(sp) + 1; + if(sp >= schar+STARTCHAR) + error("Start conditions too long"); + } + continue; + default: + error("Invalid request %s",p); + continue; + } /* end of switch after seeing '%' */ + break; + case ' ': case '\t': /* must be code */ + lgate(); + if( p[1]=='/' && p[2]=='*' ) cpycom(p); + else fprintf(fout, "%ls\n",p); + continue; + case '/': /* look for comments */ + lgate(); + if((*(p+1))=='*') cpycom(p); + /* FALLTHRU */ + default: /* definition */ + while(*p && !iswspace(*p)) p++; + if(*p == 0) + continue; + prev = *p; + *p = 0; + bptr = p+1; + yylval.cp = (CHR *)buf; + if(digit(buf[0])) + warning("Substitution strings may not begin with digits"); + return(freturn(STR)); + } + } else { /* still sect 1, but prev != '\n' */ + p = bptr; + while(*p && iswspace(*p)) p++; + if(*p == 0) + warning("No translation given - null string assumed"); + scopy(p,token); + yylval.cp = (CHR *)token; + prev = '\n'; + return(freturn(STR)); + } + } + error("unexpected EOF before %%%%"); + /* end of section one processing */ + } else if(sect == RULESECTION){ /* rules and actions */ + lgate(); + while(!eof){ + static int first_test=TRUE, first_value; + static int reverse=FALSE; + switch(c=gch()){ + case '\0': + if(n_error)error_tail(); + return(freturn(0)); + case '\n': + if(prev == '\n') continue; + x = NEWE; + break; + case ' ': + case '\t': + if(prev == '\n') copy_line = TRUE; + if(sectbegin == TRUE){ + cpyact(); + copy_line = FALSE; + while((c=gch()) && c != '\n'); + continue; + } + if(!funcflag)phead2(); + funcflag = TRUE; + if(ratfor)fprintf(fout,"%d\n",30000+casecount); + else fprintf(fout,"case %d:\n",casecount); + if(cpyact()){ + if(ratfor)fprintf(fout,"goto 30997\n"); + else fprintf(fout,"break;\n"); + } + while((c=gch()) && c != '\n') { + if (c=='/') { + if((c=gch())=='*') { + c=gch(); + while(c !=EOF) { + while (c=='*') + if ((c=gch()) == '/') goto w_loop; + c = gch(); + } + error("EOF inside comment"); + } else + warning("undefined string"); + } else if (c=='}') + error("illegal extra \"}\""); + w_loop: ; + } + /* while ((c=gch())== ' ' || c == '\t') ; */ + /* if (!space(c)) error("undefined action string"); */ + if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ + fatal = 0; + n_error++; + error("executable statements should occur right after %%%%"); + fatal = 1; + continue; + } + x = NEWE; + break; + case '%': + if(prev != '\n') goto character; + if(peek == '{'){ /* included code */ + getl(buf); + while(!eof&& getl(buf) && scomp(L_PctCbr,buf)!=0) + if(buf[0]=='/' && buf[1]=='*') + cpycom(buf); + else + fprintf(fout,"%ls\n",buf); + continue; + } + if(peek == '%'){ + c = gch(); + c = gch(); + x = DELIM; + break; + } + goto character; + case '|': + if(peek == ' ' || peek == '\t' || peek == '\n'){ + if(ratfor)fprintf(fout,"%d\n",30000+casecount++); + else fprintf(fout,"case %d:\n",casecount++); + continue; + } + x = '|'; + break; + case '$': + if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ + x = c; + break; + } + goto character; + case '^': + if(peekon && (prev == '}')){ + x = c; + break; + } + if(prev != '\n' && scon != TRUE) goto character; + /* valid only at line begin */ + x = c; + break; + case '?': + case '+': + case '*': + if(prev == '\n' ) { + fatal = 0; + n_error++; + error("illegal operator -- %c",c); + fatal = 1; + } + /* FALLTHRU */ + case '.': + case '(': + case ')': + case ',': + case '/': + x = c; + break; + case '}': + iter = FALSE; + x = c; + break; + case '{': /* either iteration or definition */ + if(digit(c=gch())){ /* iteration */ + iter = TRUE; + if(prev=='{') first_test = TRUE; + ieval: + i = 0; + while(digit(c)){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + yylval.i = siconv(token); + if(first_test) { + first_test = FALSE; + first_value = yylval.i; + } else + if(first_value>yylval.i)warning("the values between braces are reversed"); + ch = c; + munput('c',&ch); + x = ITER; + break; + } + else { /* definition */ + i = 0; + while(c && c!='}'){ + token[i++] = c; + if(i >= TOKENSIZE) + error("definition too long"); + c = gch(); + } + token[i] = 0; + i = lookup(token,def); + if(i < 0) + error("definition %ls not found",token); + else + munput('s',(CHR *)(subs[i])); + if (peek == '^') + peekon = 1; + continue; + } + case '<': /* start condition ? */ + if(prev != '\n') /* not at line begin, not start */ + goto character; + t = slptr; + do { + i = 0; + if(!isascii(c = gch())) + error("Non-ASCII characters in start condition."); + while(c != ',' && c && c != '>'){ + token[i++] = c; + if(i >= TOKENSIZE) + error("string name too long"); + if(!isascii(c = gch())) + error("None-ASCII characters in start condition."); + } + token[i] = 0; + if(i == 0) + goto character; + i = lookup(token,sname); + lex_startcond_lookupval = i; + if(i < 0) { + fatal = 0; + n_error++; + error("undefined start condition %ls",token); + fatal = 1; + continue; + } + *slptr++ = i+1; + } while(c && c != '>'); + *slptr++ = 0; + /* check if previous value re-usable */ + for (xp=slist; xp<t; ) + { + if (scomp(xp, t)==0) + break; + while (*xp++); + } + if (xp<t) + { + /* re-use previous pointer to string */ + slptr=t; + t=xp; + } + if(slptr > slist+STARTSIZE) /* note not packed */ + error("Too many start conditions used"); + yylval.cp = (CHR *)t; + + /* XCU4: add XSCON */ + + if (exclusive[lex_startcond_lookupval]) + x = XSCON; + else + x = SCON; + break; + case '"': + i = 0; + while((c=gch()) && c != '"' && c != '\n'){ + if(c == '\\') c = usescape(c=gch()); + remch(c); + token[i++] = c; + if(i >= TOKENSIZE){ + warning("String too long"); + i = TOKENSIZE-1; + break; + } + } + if(c == '\n') { + yyline--; + warning("Non-terminated string"); + yyline++; + } + token[i] = 0; + if(i == 0)x = NULLS; + else if(i == 1){ + yylval.i = (unsigned)token[0]; + x = CHAR; + } + else { + yylval.cp = (CHR *)token; + x = STR; + } + break; + case '[': + reverse = FALSE; + x = CCL; + if((c = gch()) == '^'){ + x = NCCL; + reverse = TRUE; + c = gch(); + } + i = 0; + while(c != ']' && c){ + static int light=TRUE, ESCAPE=FALSE; + if(c == '-' && prev == '^' && reverse){ + symbol[(unsigned)c] = 1; + c = gch(); + continue; + } + if(c == '\\') { + c = usescape(c=gch()); + ESCAPE = TRUE; + } + if(c=='-' && !ESCAPE && prev!='[' && peek!=']'){ + /* range specified */ + if (light) { + c = gch(); + if(c == '\\') + c=usescape(c=gch()); + remch(c); + k = c; + ccs=wcsetno(k); + if(wcsetno(j)!=ccs) + error("\ +Character range specified between different codesets."); + if((unsigned)j > (unsigned)k) { + n = j; + j = k; + k = n; + } + if(!handleeuc) + if(!(('A'<=j && k<='Z') || + ('a'<=j && k<='z') || + ('0'<=j && k<='9'))) + warning("Non-portable Character Class"); + token[i++] = RANGE; + token[i++] = j; + token[i++] = k; + light = FALSE; + } else { + error("unmatched hyphen"); + if(symbol[(unsigned)c])warning("\"%c\" redefined inside brackets",c); + else symbol[(unsigned)c] = 1; + } + ESCAPE = FALSE; + } else { + j = c; + remch(c); + token[i++] = c; /* Remember whatever.*/ + light = TRUE; + ESCAPE = FALSE; + } + c = gch(); + } + /* try to pack ccl's */ + + token[i] = 0; + ccp = ccl; + while (ccp < ccptr && scomp(token, ccp) != 0) ccp++; + if (ccp < ccptr) { /* found in ccl */ + yylval.cp = ccp; + } else { /* not in ccl, add it */ + scopy(token,ccptr); + yylval.cp = ccptr; + ccptr += slength(token) + 1; + if(ccptr >= ccl+CCLSIZE) + error("Too many large character classes"); + } + break; + case '\\': + c = usescape(c=gch()); + default: + character: + if(iter){ /* second part of an iteration */ + iter = FALSE; + if('0' <= c && c <= '9') + goto ieval; + } + remch(c); + if(alpha(peek)){ + i = 0; + yylval.cp = (CHR *)token; + token[i++] = c; + while(alpha(peek)) { + remch(token[i++] = gch()); + if(i >= TOKENSIZE) { + warning("string too long"); + i = TOKENSIZE - 1; + break; + } + } + if(peek == '?' || peek == '*' || peek == '+') + munput('c',&token[--i]); + token[i] = 0; + if(i == 1){ + yylval.i = (unsigned)(token[0]); + x = CHAR; + } + else x = STR; + } + else { + yylval.i = (unsigned)c; + x = CHAR; + } + } + scon = FALSE; + peekon = 0; + if((x == SCON) || (x == XSCON)) + scon = TRUE; + sectbegin = FALSE; + return(freturn(x)); + /* NOTREACHED */ + } + } + /* section three */ + lgate(); + ptail(); +# ifdef DEBUG + if(debug) + fprintf(fout,"\n/*this comes from section three - debug */\n"); +# endif + + if(getl(buf) && !eof) { + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline-1); + else + fprintf(fout, + "\n# line %d \"%s\"\n", yyline-1, sargv[optind]); + fprintf(fout,"%ls\n",buf); + while(getl(buf) && !eof) + fprintf(fout,"%ls\n",buf); + } + + return(freturn(0)); + } +/* end of yylex */ +# ifdef DEBUG +freturn(i) + int i; { + if(yydebug) { + printf("now return "); + if((unsigned)i < NCH) allprint(i); + else printf("%d",i); + printf(" yylval = "); + switch(i){ + case STR: case CCL: case NCCL: + strpt(yylval.cp); + break; + case CHAR: + allprint(yylval.i); + break; + default: + printf("%d",yylval.i); + break; + } + putchar('\n'); + } + return(i); + } +# endif diff --git a/lex/reject.c b/lex/reject.c @@ -0,0 +1,158 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "reject.c 6.10 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)reject.c 1.4 (gritter) 11/27/05 + */ + +#include <stdio.h> + +#ifdef EUC +#ifdef __sun +#include <euc.h> +#include <widec.h> +#else /* !sun */ +#include <wchar.h> +#endif /* !sun */ +#include <limits.h> +#endif + + +#ifndef JLSLEX + +#define CHR char +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define YYREJECT yyreject +#endif + +#ifdef WOPTION + +#define CHR wchar_t +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define YYREJECT yyreject_w +#endif + +#ifdef EOPTION + +#define CHR wchar_t +#define YYTEXT yywtext +#define YYLENG yywleng +#define YYINPUT yywinput +#define YYUNPUT yywunput +#define YYOUTPUT yywoutput +#define YYREJECT yyreject_e +extern unsigned char yytext[]; +extern int yyleng; +#endif + +#if defined(__cplusplus) || defined(__STDC__) +extern int yyback(int *, int); +extern int YYINPUT(void); +extern void YYUNPUT(int); +#ifdef EUC + static int yyracc(int); +#else + extern int yyracc(int); +#endif +#ifdef EOPTION + extern size_t wcstombs(char *, const wchar_t *, size_t); +#endif +#endif + +extern FILE *yyout, *yyin; + +extern int yyprevious, *yyfnd; + +extern char yyextra[]; + +extern int YYLENG; +extern CHR YYTEXT[]; + +extern struct {int *yyaa, *yybb; int *yystops; } *yylstate[], **yylsp, **yyolsp; +#if defined(__cplusplus) || defined(__STDC__) +int +YYREJECT(void) +#else +YYREJECT() +#endif +{ + for (; yylsp < yyolsp; yylsp++) + YYTEXT[YYLENG++] = YYINPUT(); + if (*yyfnd > 0) + return (yyracc(*yyfnd++)); + while (yylsp-- > yylstate) { + YYUNPUT(YYTEXT[YYLENG-1]); + YYTEXT[--YYLENG] = 0; + if (*yylsp != 0 && (yyfnd = (*yylsp)->yystops) && *yyfnd > 0) + return (yyracc(*yyfnd++)); + } +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + if (YYTEXT[0] == 0) + return (0); + YYLENG = 0; +#ifdef EOPTION + yyleng = 0; +#endif + return (-1); +} + +#ifdef EUC +static +#endif +#if defined(__cplusplus) || defined(__STDC__) +int +yyracc(int m) +#else +yyracc(m) +#endif +{ + yyolsp = yylsp; + if (yyextra[m]) { + while (yyback((*yylsp)->yystops, -m) != 1 && yylsp > yylstate) { + yylsp--; + YYUNPUT(YYTEXT[--YYLENG]); + } + } + yyprevious = YYTEXT[YYLENG-1]; + YYTEXT[YYLENG] = 0; +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + return (m); +} diff --git a/lex/search.h b/lex/search.h @@ -0,0 +1,48 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2003 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +#ifndef SEARCH_H +#define SEARCH_H + +/* from OpenSolaris "search.h 1.19 05/06/08 SMI" SVr4.0 1.3.1.11 */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)search.h 1.4 (gritter) 11/26/05 + */ + +#include <sys/types.h> + + +void *xlsearch(const void *, void *, unsigned *, unsigned, + int (*)(const void *, const void *)); + +#endif /* SEARCH_H */ diff --git a/lex/sgs.h b/lex/sgs.h @@ -0,0 +1,51 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + */ +const char sccsid[] = "@(#)lex.sl 2.13 (gritter) 4/14/07"; +const char pkg[] = "Heirloom Development Tools"; +const char rel[] = "2.13 (gritter) 4/14/07"; +/* SLIST */ +/* +allprint.c: * Sccsid @(#)allprint.c 1.4 (gritter) 11/27/05 +getopt.c: * Sccsid @(#)getopt.c 1.9 (gritter) 4/2/07 +header.c: * Sccsid @(#)header.c 1.12 (gritter) 9/23/06 +ldefs.c: * Sccsid @(#)ldefs.c 1.7 (gritter) 4/14/07 +libmain.c: * Sccsid @(#)libmain.c 1.4 (gritter) 11/26/05 +lsearch.c: * Sccsid @(#)lsearch.c 1.4 (gritter) 11/26/05 +main.c: * Sccsid @(#)main.c 1.9 (gritter) 11/26/05 +once.h: * Sccsid @(#)once.h 1.4 (gritter) 11/26/05 +parser.y: * Sccsid @(#)parser.y 1.8 (gritter) 11/26/05 +reject.c: * Sccsid @(#)reject.c 1.4 (gritter) 11/27/05 +search.h: * Sccsid @(#)search.h 1.4 (gritter) 11/26/05 +sub1.c: * Sccsid @(#)sub1.c 1.5 (gritter) 11/26/05 +sub2.c: * Sccsid @(#)sub2.c 1.7 (gritter) 01/12/07 +sub3.c: * Sccsid @(#)sub3.c 1.4 (gritter) 11/26/05 +wcio.c: * Sccsid @(#)wcio.c 1.1 (gritter) 6/25/05 +yyless.c: * Sccsid @(#)yyless.c 1.6 (gritter) 11/27/05 +yywrap.c: * Sccsid @(#)yywrap.c 1.3 (gritter) 6/18/05 +nceucform: * Sccsid @(#)nceucform 1.6 (gritter) 11/18/05 +ncform: * Sccsid @(#)ncform 1.4 (gritter) 11/18/05 +nrform:# Sccsid @(#)nrform 1.4 (gritter) 10/20/06 + +*/ diff --git a/lex/sub1.c b/lex/sub1.c @@ -0,0 +1,1017 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "sub1.c 6.18 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub1.c 1.5 (gritter) 11/26/05 + */ + +#include "ldefs.c" +#include <limits.h> +#include <wchar.h> +#include <ctype.h> +#include <stdarg.h> + +/* + * return next line of input, throw away trailing '\n' + * and also throw away trailing blanks (spaces and tabs) + * returns 0 if eof is had immediately + */ + +CHR * +getl(CHR *p) +{ + int c; + CHR *s, *t, *u = NULL; + int blank = 0; + + t = s = p; + while (((c = gch()) != 0) && c != '\n') { + if (t >= &p[BUF_SIZ]) + error("definitions too long"); + if (c == ' ' || c == '\t') { + if (!blank) { + blank = 1; + u = t; + } + } else + blank = 0; + + *t++ = c; + } + if (blank) + *u = 0; + else + *t = 0; + + if (c == 0 && s == t) + return ((CHR *) 0); + prev = '\n'; + pres = '\n'; + return (s); +} + +int +space(int ch) +{ + switch (ch) { + case ' ': + case '\t': + case '\n': + return (1); + } + return (0); +} + +int +digit(int c) +{ + return (c >= '0' && c <= '9'); +} + +/* VARARGS1 */ +void +error(const char *s, ...) +{ + va_list ap; + + /* if(!eof) */ + if (!yyline) + fprintf(errorf, "Command line: "); + else { + fprintf(errorf, !no_input ? "" : "\"%s\":", sargv[optind]); + fprintf(errorf, "line %d: ", yyline); + } + fprintf(errorf, "Error: "); + va_start(ap, s); + vfprintf(errorf, s, ap); + va_end(ap); + putc('\n', errorf); + if (fatal) + error_tail(); +} + +void +error_tail(void) +{ +#ifdef DEBUG + if (debug && sect != ENDSECTION) { + sect1dump(); + sect2dump(); + } +#endif + + if (report == 1) + statistics(); + exit(1); + /* NOTREACHED */ +} + +/* VARARGS1 */ +void +warning(const char *s, ...) +{ + va_list ap; + + if (!eof) + if (!yyline) + fprintf(errorf, "Command line: "); + else { + fprintf(errorf, !no_input?"":"\"%s\":", sargv[optind]); + fprintf(errorf, "line %d: ", yyline); + } + fprintf(errorf, "Warning: "); + va_start(ap, s); + vfprintf(errorf, s, ap); + va_end(ap); + putc('\n', errorf); + fflush(errorf); + if (fout) + fflush(fout); + fflush(stdout); +} + +int +index(int a, CHR *s) +{ + int k; + for (k = 0; s[k]; k++) + if (s[k] == a) + return (k); + return (-1); +} + +int +alpha(int c) +{ + return ('a' <= c && c <= 'z' || + 'A' <= c && c <= 'Z'); +} + +int +printable(int c) +{ + return (c > 040 && c < 0177); +} + +void +lgate(void) +{ + char fname[20]; + + if (lgatflg) + return; + lgatflg = 1; + if (fout == NULL) { + sprintf(fname, "lex.yy.%c", ratfor ? 'r' : 'c'); + fout = fopen(fname, "w"); + } + if (fout == NULL) + error("Can't open %s", fname); + if (ratfor) + fprintf(fout, "#\n"); + phead1(); +} + +/* + * scopy(ptr to str, ptr to str) - copy first arg str to second + * returns ptr to second arg + */ +void +scopy(CHR *s, CHR *t) +{ + CHR *i; + i = t; + while (*i++ = *s++); +} + +/* + * convert string t, return integer value + */ +int +siconv(CHR *t) +{ + int i, sw; + CHR *s; + s = t; + while (space(*s)) + s++; + if (!digit(*s) && *s != '-') + error("missing translation value"); + sw = 0; + if (*s == '-') { + sw = 1; + s++; + } + if (!digit(*s)) + error("incomplete translation format"); + i = 0; + while ('0' <= *s && *s <= '9') + i = i * 10 + (*(s++)-'0'); + return (sw ? -i : i); +} + +/* + * slength(ptr to str) - return integer length of string arg + * excludes '\0' terminator + */ +int +slength(CHR *s) +{ + int n; + CHR *t; + t = s; + for (n = 0; *t++; n++); + return (n); +} + +/* + * scomp(x,y) - return -1 if x < y, + * 0 if x == y, + * return 1 if x > y, all lexicographically + */ +int +scomp(CHR *x, CHR *y) +{ + CHR *a, *d; + a = (CHR *) x; + d = (CHR *) y; + while (*a || *d) { + if (*a > *d) + return (1); + if (*a < *d) + return (-1); + a++; + d++; + } + return (0); +} + +int +ctrans(CHR **ss) +{ + int c, k; + if ((c = **ss) != '\\') + return (c); + switch (c = *++*ss) { + case 'a': + c = '\a'; + warning("\\a is ANSI C \"alert\" character"); + break; + case 'v': c = '\v'; break; + case 'n': c = '\n'; break; + case 't': c = '\t'; break; + case 'r': c = '\r'; break; + case 'b': c = '\b'; break; + case 'f': c = 014; break; /* form feed for ascii */ + case '\\': c = '\\'; break; + case 'x': { + int dd; + warning("\\x is ANSI C hex escape"); + if (digit((dd = *++*ss)) || + ('a' <= dd && dd <= 'f') || + ('A' <= dd && dd <= 'F')) { + c = 0; + while (digit(dd) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + if (digit(dd)) + c = c*16 + dd - '0'; + else if (dd >= 'a') + c = c*16 + 10 + dd - 'a'; + else + c = c*16 + 10 + dd - 'A'; + dd = *++*ss; + } + } else + c = 'x'; + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while ((k = *(*ss+1)) >= '0' && k <= '7') { + c = c*8 + k - '0'; + (*ss)++; + } + break; + } + return (c); +} + +void +cclinter(int sw) +{ + /* sw = 1 ==> ccl */ + int i, j, k; + int m; + if (!sw) { /* is NCCL */ + for (i = 1; i < ncg; i++) + symbol[i] ^= 1; /* reverse value */ + } + for (i = 1; i < ncg; i++) + if (symbol[i]) + break; + if (i >= ncg) + return; + i = cindex[i]; + /* see if ccl is already in our table */ + j = 0; + if (i) { + for (j = 1; j < ncg; j++) { + if ((symbol[j] && cindex[j] != i) || + (!symbol[j] && cindex[j] == i)) + break; + } + } + if (j >= ncg) + return; /* already in */ + m = 0; + k = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + if (!cindex[i]) { + cindex[i] = ccount; + symbol[i] = 0; + m = 1; + } else + k = 1; + } + } + /* m == 1 implies last value of ccount has been used */ + if (m) + ccount++; + if (k == 0) + return; /* is now in as ccount wholly */ + /* intersection must be computed */ + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + m = 0; + j = cindex[i]; /* will be non-zero */ + for (k = 1; k < ncg; k++) { + if (cindex[k] == j) { + if (symbol[k]) + symbol[k] = 0; + else { + cindex[k] = ccount; + m = 1; + } + } + } + if (m) + ccount++; + } + } +} + +int +usescape(int c) +{ + char d; + switch (c) { + case 'a': + c = '\a'; + warning("\\a is ANSI C \"alert\" character"); break; + case 'v': c = '\v'; break; + case 'n': c = '\n'; break; + case 'r': c = '\r'; break; + case 't': c = '\t'; break; + case 'b': c = '\b'; break; + case 'f': c = 014; break; /* form feed for ascii */ + case 'x': { + int dd; + if (digit((dd = gch())) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + c = 0; + while (digit(dd) || + ('A' <= dd && dd <= 'F') || + ('a' <= dd && dd <= 'f')) { + if (digit(dd)) + c = c*16 + dd - '0'; + else if (dd >= 'a') + c = c*16 + 10 + dd - 'a'; + else + c = c*16 + 10 + dd - 'A'; + if (!digit(peek) && + !('A' <= peek && peek <= 'F') && + !('a' <= peek && peek <= 'f')) + break; + dd = gch(); + } + + } else + c = 'x'; + break; + } + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + c -= '0'; + while ('0' <= (d = gch()) && d <= '7') { + c = c * 8 + (d-'0'); + if (!('0' <= peek && peek <= '7')) break; + } + + break; + } + + if (handleeuc && !isascii(c)) { + char tmpchar = c & 0x00ff; + wchar_t wc; + mbtowc(&wc, &tmpchar, sizeof (tmpchar)); + c = wc; + } + return (c); +} + +int +lookup(CHR *s, CHR **t) +{ + int i; + i = 0; + while (*t) { + if (scomp(s, *t) == 0) + return (i); + i++; + t++; + } + return (-1); +} + +void +cpycom(CHR *p) +{ + static CHR *t; + static int c; + t = p; + + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline); + else + fprintf(fout, "\n# line %d \"%s\"\n", yyline, sargv[optind]); + + putc(*t, fout), t++; + putc(*t, fout), t++; + while (*t) { + while (*t == '*') { + putc(*t, fout), t++; + if (*t == '/') + goto backcall; + } + /* + * FIX BUG #1058428, not parsing comments correctly + * that span more than one line + */ + if (*t != '\0') + putc(*t, fout), t++; + } + putc('\n', fout); + while (c = gch()) { + while (c == '*') { + putc(c, fout); + if ((c = gch()) == '/') { + while ((c = gch()) == ' ' || c == '\t'); + if (!space(c)) + error("unacceptable statement"); + prev = '\n'; + goto backcall; + } + } + putc(c, fout); + } + error("unexpected EOF inside comment"); +backcall: + putc('/', fout); + putc('\n', fout); +} + +/* + * copy C action to the next ; or closing + */ +int +cpyact(void) +{ + int brac, c, mth; + static int sw, savline; + + brac = 0; + sw = TRUE; + savline = yyline; + + if (sargv[optind] == NULL) + fprintf(fout, "\n# line %d\n", yyline); + else + fprintf(fout, "\n# line %d \"%s\"\n", yyline, sargv[optind]); + + while (!eof) { + c = gch(); + swt: + switch (c) { + case '|': + if (brac == 0 && sw == TRUE) { + if (peek == '|') + gch(); /* eat up an extra '|' */ + return (0); + } + break; + case ';': + if (brac == 0) { + putwc(c, fout); + putc('\n', fout); + return (1); + } + break; + case '{': + brac++; + savline = yyline; + break; + case '}': + brac--; + if (brac == 0) { + putwc(c, fout); + putc('\n', fout); + return (1); + } + break; + case '/': + putwc(c, fout); + c = gch(); + if (c != '*') + goto swt; + putwc(c, fout); + savline = yyline; + while (c = gch()) { + while (c == '*') { + putwc(c, fout); + if ((c = gch()) == '/') { + putc('/', fout); + while ((c = gch()) == ' ' || + c == '\t' || c == '\n') + putwc(c, fout); + goto swt; + } + } + putc(c, fout); + } + yyline = savline; + error("EOF inside comment"); + /* NOTREACHED */ + break; + case '\'': /* character constant */ + case '"': /* character string */ + mth = c; + putwc(c, fout); + while (c = gch()) { + if (c == '\\') { + putwc(c, fout); + c = gch(); + } + else + if (c == mth) + goto loop; + putwc(c, fout); + if (c == '\n') { + yyline--; + error( +"Non-terminated string or character constant"); + } + } + error("EOF in string or character constant"); + /* NOTREACHED */ + break; + case '\0': + yyline = savline; + error("Action does not terminate"); + /* NOTREACHED */ + break; + default: + break; /* usual character */ + } + loop: + if (c != ' ' && c != '\t' && c != '\n') + sw = FALSE; + putwc(c, fout); + if (peek == '\n' && !brac && copy_line) { + putc('\n', fout); + return (1); + } + } + error("Premature EOF"); + return (0); +} + +int +gch(void) +{ + int c; + prev = pres; + c = pres = peek; + peek = pushptr > pushc ? *--pushptr : getwc(fin); + while (peek == EOF) { + if (no_input) { + if (!yyline) + error("Cannot read from -- %s", + sargv[optind]); + if (optind < sargc-1) { + yyline = 0; + if (fin != stdin) + fclose(fin); + fin = fopen(sargv[++optind], "r"); + if (fin == NULL) + error("Cannot open file -- %s", + sargv[optind]); + peek = getwc(fin); + } else + break; + } else { + if (fin != stdin) + fclose(fin); + if (!yyline) + error("Cannot read from -- standard input"); + else + break; + } + } + if (c == EOF) { + eof = TRUE; + return (0); + } + if (c == '\n') + yyline++; + return (c); +} + +int +mn2(int a, intptr_t d, intptr_t c) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + if (d >= treesize) { + error("Parse error"); + } + name[tptr] = a; + left[tptr] = d; + right[tptr] = c; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch (a) { + case RSTR: + parent[d] = tptr; + break; + case BAR: + case RNEWE: + if (nullstr[d] || nullstr[c]) + nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + case RCAT: + case DIV: + if (nullstr[d] && nullstr[c]) + nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: + parent[d] = tptr; + nullstr[tptr] = nullstr[d]; + break; +#ifdef DEBUG + default: + warning("bad switch mn2 %d %d", a, d); + break; +#endif + } + return (tptr++); +} + +int +mn1(int a, intptr_t d) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + name[tptr] = a; + left[tptr] = d; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch (a) { + case RCCL: + case RNCCL: + if (slength((CHR *)d) == 0) + nullstr[tptr] = TRUE; + break; + case STAR: + case QUEST: + nullstr[tptr] = TRUE; + parent[d] = tptr; + break; + case PLUS: + case CARAT: + nullstr[tptr] = nullstr[d]; + parent[d] = tptr; + break; + case S2FINAL: + nullstr[tptr] = TRUE; + break; +#ifdef DEBUG + case FINAL: + case S1FINAL: + break; + default: + warning("bad switch mn1 %d %d", a, d); + break; +#endif + } + return (tptr++); +} + +int +mn0(int a) +{ + if (tptr >= treesize) { + tptr++; + error("Parse tree too big %s", + (treesize == TREESIZE ? "\nTry using %e num" : "")); + } + + name[tptr] = a; + parent[tptr] = 0; + nullstr[tptr] = 0; + if (ISOPERATOR(a)) { + switch (a) { + case DOT: break; + case RNULLS: nullstr[tptr] = TRUE; break; +#ifdef DEBUG + default: + warning("bad switch mn0 %d", a); + break; +#endif + } + } + return (tptr++); +} + +void +munput(int t, CHR *p) +{ + int i, j; + if (t == 'c') { + *pushptr++ = peek; + peek = *p; + } else if (t == 's') { + *pushptr++ = peek; + peek = p[0]; + i = slength(p); + for (j = i - 1; j >= 1; j--) + *pushptr++ = p[j]; + } + if (pushptr >= pushc + TOKENSIZE) + error("Too many characters pushed"); +} + +int +dupl(int n) +{ + /* duplicate the subtree whose root is n, return ptr to it */ + int i; + i = name[n]; + if (!ISOPERATOR(i)) + return (mn0(i)); + switch (i) { + case DOT: + case RNULLS: + return (mn0(i)); + case RCCL: case RNCCL: case FINAL: case S1FINAL: case S2FINAL: + return (mn1(i, left[n])); + case STAR: case QUEST: case PLUS: case CARAT: + return (mn1(i, dupl(left[n]))); + + /* XCU4: add RXSCON */ + case RSTR: case RSCON: case RXSCON: + return (mn2(i, dupl(left[n]), right[n])); + case BAR: case RNEWE: case RCAT: case DIV: + return (mn2(i, dupl(left[n]), dupl(right[n]))); + } + return (0); +} + +#ifdef DEBUG +void +allprint(CHR c) +{ + switch (c) { + case 014: + printf("\\f"); + charc++; + break; + case '\n': + printf("\\n"); + charc++; + break; + case '\t': + printf("\\t"); + charc++; + break; + case '\b': + printf("\\b"); + charc++; + break; + case ' ': + printf("\\_"); + break; + default: + if (!iswprint(c)) { + printf("\\x%-2x", c); /* up to fashion. */ + charc += 3; + } else + putwc(c, stdout); + break; + } + charc++; +} + +void +strpt(CHR *s) +{ + charc = 0; + while (*s) { + allprint(*s++); + if (charc > LINESIZE) { + charc = 0; + printf("\n\t"); + } + } +} + +void +sect1dump(void) +{ + int i; + printf("Sect 1:\n"); + if (def[0]) { + printf("str trans\n"); + i = -1; + while (def[++i]) + printf("%ls\t%ls\n", def[i], subs[i]); + } + if (sname[0]) { + printf("start names\n"); + i = -1; + while (sname[++i]) + printf("%ls\n", sname[i]); + } + if (chset == TRUE) { + printf("char set changed\n"); + for (i = 1; i < NCH; i++) { + if (i != ctable[i]) { + allprint(i); + putchar(' '); + iswprint(ctable[i]) ? + putwc(ctable[i], stdout) : + printf("%d", ctable[i]); + putchar('\n'); + } + } + } +} + +void +sect2dump(void) +{ + printf("Sect 2:\n"); + treedump(); +} + +void +treedump(void) +{ + int t; + CHR *p; + printf("treedump %d nodes:\n", tptr); + for (t = 0; t < tptr; t++) { + printf("%4d ", t); + parent[t] ? printf("p=%4d", parent[t]) : printf(" "); + printf(" "); + if (!ISOPERATOR(name[t])) { + allprint(name[t]); + } else + switch (name[t]) { + case RSTR: + printf("%ld ", (long)left[t]); + allprint(right[t]); + break; + case RCCL: + printf("ccl "); + strpt((CHR *)left[t]); + break; + case RNCCL: + printf("nccl "); + strpt((CHR *)left[t]); + break; + case DIV: + printf("/ %ld %ld", + (long)left[t], (long)right[t]); + break; + case BAR: + printf("| %ld %ld", + (long)left[t], (long)right[t]); + break; + case RCAT: + printf("cat %ld %ld", + (long)left[t], (long)right[t]); + break; + case PLUS: + printf("+ %ld", (long)left[t]); + break; + case STAR: + printf("* %ld", (long)left[t]); + break; + case CARAT: + printf("^ %ld", (long)left[t]); + break; + case QUEST: + printf("? %ld", (long)left[t]); + break; + case RNULLS: + printf("nullstring"); + break; + case FINAL: + printf("final %ld", (long)left[t]); + break; + case S1FINAL: + printf("s1final %ld", (long)left[t]); + break; + case S2FINAL: + printf("s2final %ld", (long)left[t]); + break; + case RNEWE: + printf("new %ld %ld", + (long)left[t], (long)right[t]); + break; + + /* XCU4: add RXSCON */ + case RXSCON: + p = (CHR *)right[t]; + printf("exstart %s", sname[*p++-1]); + while (*p) + printf(", %ls", sname[*p++-1]); + printf(" %ld", (long)left[t]); + break; + case RSCON: + p = (CHR *)right[t]; + printf("start %s", sname[*p++-1]); + while (*p) + printf(", %ls", sname[*p++-1]); + printf(" %ld", (long)left[t]); + break; + case DOT: + printf("dot"); + break; + default: + printf( + "unknown %d %ld %ld", name[t], + (long)left[t], (long)right[t]); + break; + } + if (nullstr[t]) + printf("\t(null poss.)"); + putchar('\n'); + } +} +#endif diff --git a/lex/sub2.c b/lex/sub2.c @@ -0,0 +1,1217 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "sub2.c 6.15 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub2.c 1.7 (gritter) 01/12/07 + */ + +#include "ldefs.c" + +static void add(int **array, int n); +static void follow(int v); +static void first(int v); +static void nextstate(int s, int c); +static void packtrans(int st, CHR *tch, int *tst, int cnt, int tryit); +static void acompute(int s); +static void rprint(int *a, char *s, int n); +static void shiftr(int *a, int n); +static void upone(int *a, int n); +static void bprint(char *a, char *s, int n); +static int notin(int n); +static int member(int d, CHR *t); + +#ifdef PP +static void padd(int **array, int n); +#endif + +void +cfoll(int v) +{ + int i, j, k; + CHR *p; + i = name[v]; + if (!ISOPERATOR(i)) + i = 1; + switch (i) { + case 1: case RSTR: case RCCL: case RNCCL: case RNULLS: + for (j = 0; j < tptr; j++) + tmpstat[j] = FALSE; + count = 0; + follow(v); +#ifdef PP + padd(foll, v); /* packing version */ +#else + add(foll, v); /* no packing version */ +#endif + if (i == RSTR) + cfoll(left[v]); + else if (i == RCCL || i == RNCCL) { + for (j = 1; j < ncg; j++) + symbol[j] = (i == RNCCL); + p = (CHR *) left[v]; + while (*p) + symbol[*p++] = (i == RCCL); + p = pcptr; + for (j = 1; j < ncg; j++) + if (symbol[j]) { + for (k = 0; p + k < pcptr; k++) + if (cindex[j] == *(p + k)) + break; + if (p + k >= pcptr) + *pcptr++ = cindex[j]; + } + *pcptr++ = 0; + if (pcptr > pchar + pchlen) + error( + "Too many packed character classes"); + left[v] = (intptr_t)p; + name[v] = RCCL; /* RNCCL eliminated */ +#ifdef DEBUG + if (debug && *p) { + printf("ccl %d: %d", v, *p++); + while (*p) + printf(", %d", *p++); + putchar('\n'); + } +#endif + } + break; + case CARAT: + cfoll(left[v]); + break; + + /* XCU4: add RXSCON */ + case RXSCON: + + case STAR: case PLUS: case QUEST: case RSCON: + cfoll(left[v]); + break; + case BAR: case RCAT: case DIV: case RNEWE: + cfoll(left[v]); + cfoll(right[v]); + break; +#ifdef DEBUG + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning("bad switch cfoll %d", v); +#endif + } +} + +#ifdef DEBUG +void +pfoll(void) +{ + int i, k, *p; + int j; + /* print sets of chars which may follow positions */ + printf("pos\tchars\n"); + for (i = 0; i < tptr; i++) + if (p = foll[i]) { + j = *p++; + if (j >= 1) { + printf("%d:\t%d", i, *p++); + for (k = 2; k <= j; k++) + printf(", %d", *p++); + putchar('\n'); + } + } +} +#endif + +static void +add(int **array, int n) +{ + int i, *temp; + CHR *ctemp; + temp = nxtpos; + ctemp = tmpstat; + array[n] = nxtpos; /* note no packing is done in positions */ + *temp++ = count; + for (i = 0; i < tptr; i++) + if (ctemp[i] == TRUE) + *temp++ = i; + nxtpos = temp; + if (nxtpos >= positions+maxpos) + error( + "Too many positions %s", + (maxpos == MAXPOS ? "\nTry using %p num" : "")); +} + +static void +follow(int v) +{ + int p; + if (v >= tptr-1) + return; + p = parent[v]; + if (p == 0) + return; + switch (name[p]) { + /* will not be CHAR RNULLS FINAL S1FINAL S2FINAL RCCL RNCCL */ + case RSTR: + if (tmpstat[p] == FALSE) { + count++; + tmpstat[p] = TRUE; + } + break; + case STAR: case PLUS: + first(v); + follow(p); + break; + case BAR: case QUEST: case RNEWE: + follow(p); + break; + case RCAT: case DIV: + if (v == left[p]) { + if (nullstr[right[p]]) + follow(p); + first(right[p]); + } + else + follow(p); + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: case CARAT: + follow(p); + break; +#ifdef DEBUG + default: + warning("bad switch follow %d", p); +#endif + } +} + +/* + * Check if I have a RXSCON in my upper node + */ +static int +check_me(int v) +{ + int tmp = parent[v]; + + while (name[tmp] != RNEWE) { + if (name[tmp] == RXSCON) + return (1); + tmp = parent[tmp]; + } + return (0); +} + +/* calculate set of positions with v as root which can be active initially */ +static void +first(int v) +{ + int i; + CHR *p; + i = name[v]; + if (!ISOPERATOR(i)) + i = 1; + switch (i) { + case 1: case RCCL: case RNCCL: + case RNULLS: case FINAL: + case S1FINAL: case S2FINAL: + /* + * XCU4: if we are working on an exclusive start state and + * the parent of this position is not RXSCON or RSTR this + * is not an active position. + * + * (There is a possibility that RSXCON appreas as the + * (parent)* node. Check it by check_me().) + */ + if ((exclusive[stnum/2]) && + ISOPERATOR(name[parent[v]]) && + (name[parent[v]] != RXSCON) && + (name[parent[v]] != RSTR) && + (check_me(v) == 0)) { + break; + } + if (tmpstat[v] == FALSE) { + count++; + tmpstat[v] = TRUE; + } + break; + case BAR: case RNEWE: + first(left[v]); + first(right[v]); + break; + case CARAT: + if (stnum % 2 == 1) + first(left[v]); + break; + /* XCU4: add RXSCON */ + case RXSCON: + case RSCON: + i = stnum/2 +1; + p = (CHR *) right[v]; + while (*p) + if (*p++ == i) { + first(left[v]); + break; + } + break; + case STAR: case QUEST: + case PLUS: case RSTR: + /* + * XCU4: if we are working on an exclusive start state and + * the parent of this position is not RXSCON or RSTR this + * is not an active position. + * + * (There is a possibility that RSXCON appreas as the + * (parent)* node. Check it by check_me().) + */ + if ((exclusive[stnum/2]) && + ISOPERATOR(name[parent[v]]) && + (name[parent[v]] != RXSCON) && + (name[parent[v]] != RSTR) && + (check_me(v) == 0)) { + break; + } + first(left[v]); + break; + case RCAT: case DIV: + first(left[v]); + if (nullstr[left[v]]) + first(right[v]); + break; +#ifdef DEBUG + default: + warning("bad switch first %d", v); +#endif + } +} + +void +cgoto(void) +{ + int i, j; + static int s; + int npos, curpos, n; + int tryit; + CHR tch[MAXNCG]; + int tst[MAXNCG]; + CHR *q; + /* generate initial state, for each start condition */ + if (ratfor) { + fprintf(fout, "blockdata\n"); + fprintf(fout, "common /Lvstop/ vstop\n"); + fprintf(fout, "define Svstop %d\n", nstates+1); + fprintf(fout, "integer vstop(Svstop)\n"); + } else + fprintf(fout, "int yyvstop[] = {\n0,\n"); + while (stnum < 2 || stnum/2 < sptr) { + for (i = 0; i < tptr; i++) + tmpstat[i] = 0; + count = 0; + if (tptr > 0) + first(tptr-1); + add(state, stnum); +#ifdef DEBUG + if (debug) { + if (stnum > 1) + printf("%ls:\n", sname[stnum/2]); + pstate(stnum); + } +#endif + stnum++; + } + stnum--; + /* even stnum = might not be at line begin */ + /* odd stnum = must be at line begin */ + /* even states can occur anywhere, odd states only at line begin */ + for (s = 0; s <= stnum; s++) { + tryit = FALSE; + cpackflg[s] = FALSE; + sfall[s] = -1; + acompute(s); + for (i = 0; i < ncg; i++) + symbol[i] = 0; + npos = *state[s]; + for (i = 1; i <= npos; i++) { + curpos = *(state[s]+i); + if (!ISOPERATOR(name[curpos])) + symbol[name[curpos]] = TRUE; + else { + switch (name[curpos]) { + case RCCL: + tryit = TRUE; + q = (CHR *)left[curpos]; + while (*q) { + for (j = 1; j < ncg; j++) + if (cindex[j] == *q) + symbol[j] = TRUE; + q++; + } + break; + case RSTR: + symbol[right[curpos]] = TRUE; + break; +#ifdef DEBUG + case RNULLS: + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning( + "bad switch cgoto %d state %d", + curpos, s); + break; +#endif + } + } + } +#ifdef DEBUG + if (debug) { + printf("State %d transitions on char-group {", s); + charc = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + printf("%d,", i); + } + if (i == ncg-1) + printf("}\n"); + if (charc > LINESIZE/4) { + charc = 0; + printf("\n\t"); + } + } + } +#endif + /* for each char, calculate next state */ + n = 0; + for (i = 1; i < ncg; i++) { + if (symbol[i]) { + /* executed for each state, transition pair */ + nextstate(s, i); + xstate = notin(stnum); + if (xstate == -2) + warning("bad state %d %o", s, i); + else if (xstate == -1) { + if (stnum+1 >= nstates) { + stnum++; + error("Too many states %s", + (nstates == NSTATES ? + "\nTry using %n num":"")); + } + add(state, ++stnum); +#ifdef DEBUG + if (debug) + pstate(stnum); +#endif + tch[n] = i; + tst[n++] = stnum; + } else { /* xstate >= 0 ==> state exists */ + tch[n] = i; + tst[n++] = xstate; + } + } + } + tch[n] = 0; + tst[n] = -1; + /* pack transitions into permanent array */ + if (n > 0) + packtrans(s, tch, tst, n, tryit); + else + gotof[s] = -1; + } + ratfor ? fprintf(fout, "end\n") : fprintf(fout, "0};\n"); +} + +/* + * Beware -- 70% of total CPU time is spent in this subroutine - + * if you don't believe me - try it yourself ! + */ +static void +nextstate(int s, int c) +{ + int j, *newpos; + CHR *temp, *tz; + int *pos, i, *f, num, curpos, number; + /* state to goto from state s on char c */ + num = *state[s]; + temp = tmpstat; + pos = state[s] + 1; + for (i = 0; i < num; i++) { + curpos = *pos++; + j = name[curpos]; + if ((!ISOPERATOR(j)) && j == c || + j == RSTR && c == right[curpos] || + j == RCCL && member(c, (CHR *) left[curpos])) { + f = foll[curpos]; + number = *f; + newpos = f+1; + for (j = 0; j < number; j++) + temp[*newpos++] = 2; + } + } + j = 0; + tz = temp + tptr; + while (temp < tz) { + if (*temp == 2) { + j++; + *temp++ = 1; + } + else + *temp++ = 0; + } + count = j; +} + +static int +notin(int n) +{ /* see if tmpstat occurs previously */ + int *j, k; + CHR *temp; + int i; + if (count == 0) + return (-2); + temp = tmpstat; + for (i = n; i >= 0; i--) { /* for each state */ + j = state[i]; + if (count == *j++) { + for (k = 0; k < count; k++) + if (!temp[*j++]) + break; + if (k >= count) + return (i); + } + } + return (-1); +} + +static void +packtrans(int st, CHR *tch, int *tst, int cnt, int tryit) +{ + /* + * pack transitions into nchar, nexts + * nchar is terminated by '\0', nexts uses cnt, followed by elements + * gotof[st] = index into nchr, nexts for state st + * sfall[st] = t implies t is fall back state for st + * == -1 implies no fall back + */ + + int cmin, cval, tcnt, diff, p, *ast; + int i, j, k; + CHR *ach; + int go[MAXNCG], temp[MAXNCG], index, c; + int swork[MAXNCG]; + CHR cwork[MAXNCG]; + int upper; + + rcount += (long)cnt; + cmin = -1; + cval = ncg; + ast = tst; + ach = tch; + /* try to pack transitions using ccl's */ + if (!optim) + goto nopack; /* skip all compaction */ + if (tryit) { /* ccl's used */ + for (i = 1; i < ncg; i++) { + go[i] = temp[i] = -1; + symbol[i] = 1; + } + for (i = 0; i < cnt; i++) { + index = (unsigned char) tch[i]; + if ((index >= 0) && (index < NCH)) { + go[index] = tst[i]; + symbol[index] = 0; + } else { + fprintf(stderr, +"lex`sub2`packtran: tch[%d] out of bounds (%ld)\n", + i, (long)tch[i]); + } + } + for (i = 0; i < cnt; i++) { + c = match[tch[i]]; + if (go[c] != tst[i] || c == tch[i]) + temp[tch[i]] = tst[i]; + } + /* fill in error entries */ + for (i = 1; i < ncg; i++) + if (symbol[i]) + temp[i] = -2; /* error trans */ + /* count them */ + k = 0; + for (i = 1; i < ncg; i++) + if (temp[i] != -1) + k++; + if (k < cnt) { /* compress by char */ +#ifdef DEBUG + if (debug) + printf( + "use compression %d, %d vs %d\n", st, k, cnt); +#endif + k = 0; + for (i = 1; i < ncg; i++) + if (temp[i] != -1) { + cwork[k] = i; + swork[k++] = + (temp[i] == -2 ? -1 : temp[i]); + } + cwork[k] = 0; +#ifdef PC + ach = cwork; + ast = swork; + cnt = k; + cpackflg[st] = TRUE; +#endif + } + } + /* + * get most similar state + * reject state with more transitions, + * state already represented by a third state, + * and state which is compressed by char if ours is not to be + */ + for (i = 0; i < st; i++) { + if (sfall[i] != -1) + continue; + if (cpackflg[st] == 1) + if (!(cpackflg[i] == 1)) + continue; + p = gotof[i]; + if (p == -1) /* no transitions */ + continue; + tcnt = nexts[p]; + if (tcnt > cnt) + continue; + diff = 0; + k = 0; + j = 0; + upper = p + tcnt; + while (ach[j] && p < upper) { + while (ach[j] < nchar[p] && ach[j]) { + diff++; + j++; + } + if (ach[j] == 0) + break; + if (ach[j] > nchar[p]) { + diff = ncg; + break; + } + /* ach[j] == nchar[p] */ + if (ast[j] != nexts[++p] || + ast[j] == -1 || + (cpackflg[st] && ach[j] != match[ach[j]])) + diff++; + j++; + } + while (ach[j]) { + diff++; + j++; + } + if (p < upper) + diff = ncg; + if (diff < cval && diff < tcnt) { + cval = diff; + cmin = i; + if (cval == 0) + break; + } + } + /* cmin = state "most like" state st */ +#ifdef DEBUG + if (debug) + printf("select st %d for st %d diff %d\n", + cmin, st, cval); +#endif +#ifdef PS + if (cmin != -1) { /* if we can use st cmin */ + gotof[st] = nptr; + k = 0; + sfall[st] = cmin; + p = gotof[cmin] + 1; + j = 0; + while (ach[j]) { + /* if cmin has a transition on c, then so will st */ + /* st may be "larger" than cmin, however */ + while (ach[j] < nchar[p-1] && ach[j]) { + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + j++; + } + if (nchar[p-1] == 0) + break; + if (ach[j] > nchar[p-1]) { + warning("bad transition %d %d", st, cmin); + goto nopack; + } + /* ach[j] == nchar[p-1] */ + if (ast[j] != nexts[p] || + ast[j] == -1 || + (cpackflg[st] && ach[j] != match[ach[j]])) { + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + } + p++; + j++; + } + while (ach[j]) { + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j++]; + k++; + } + nexts[gotof[st]] = cnt = k; + nchar[nptr++] = 0; + } else { +#endif +nopack: + /* stick it in */ + gotof[st] = nptr; + nexts[nptr] = cnt; + for (i = 0; i < cnt; i++) { + nchar[nptr] = ach[i]; + nexts[++nptr] = ast[i]; + } + nchar[nptr++] = 0; +#ifdef PS + } +#endif + if (cnt < 1) { + gotof[st] = -1; + nptr--; + } else + if (nptr > ntrans) + error( + "Too many transitions %s", + (ntrans == NTRANS ? "\nTry using %a num" : "")); +} + +#ifdef DEBUG +void +pstate(int s) +{ + int *p, i, j; + printf("State %d:\n", s); + p = state[s]; + i = *p++; + if (i == 0) + return; + printf("%4d", *p++); + for (j = 1; j < i; j++) { + printf(", %4d", *p++); + if (j%30 == 0) + putchar('\n'); + } + putchar('\n'); +} +#endif + +static int +member(int d, CHR *t) +{ + int c; + CHR *s; + c = d; + s = t; + c = cindex[c]; + while (*s) + if (*s++ == c) + return (1); + return (0); +} + +#ifdef DEBUG +void +stprt(int i) +{ + int p, t; + printf("State %d:", i); + /* print actions, if any */ + t = atable[i]; + if (t != -1) + printf(" final"); + putchar('\n'); + if (cpackflg[i] == TRUE) + printf("backup char in use\n"); + if (sfall[i] != -1) + printf("fall back state %d\n", sfall[i]); + p = gotof[i]; + if (p == -1) + return; + printf("(%d transitions)\n", nexts[p]); + while (nchar[p]) { + charc = 0; + if (nexts[p+1] >= 0) + printf("%d\t", nexts[p+1]); + else + printf("err\t"); + allprint(nchar[p++]); + while (nexts[p] == nexts[p+1] && nchar[p]) { + if (charc > LINESIZE) { + charc = 0; + printf("\n\t"); + } + allprint(nchar[p++]); + } + putchar('\n'); + } + putchar('\n'); +} +#endif + +/* compute action list = set of poss. actions */ +static void +acompute(int s) +{ + int *p, i, j; + int q, r; + int cnt, m; + int temp[MAXPOSSTATE], k, neg[MAXPOSSTATE], n; + k = 0; + n = 0; + p = state[s]; + cnt = *p++; + if (cnt > MAXPOSSTATE) + error("Too many positions for one state - acompute"); + for (i = 0; i < cnt; i++) { + q = *p; + if (name[q] == FINAL) + temp[k++] = left[q]; + else if (name[q] == S1FINAL) { + temp[k++] = left[q]; + if ((r = left[q]) >= NACTIONS) + error( + "INTERNAL ERROR:left[%d]==%d>=NACTIONS", q, r); + extra[r] = 1; + } else if (name[q] == S2FINAL) + neg[n++] = left[q]; + p++; + } + atable[s] = -1; + if (k < 1 && n < 1) + return; +#ifdef DEBUG + if (debug) + printf("final %d actions:", s); +#endif + /* sort action list */ + for (i = 0; i < k; i++) + for (j = i+1; j < k; j++) + if (temp[j] < temp[i]) { + m = temp[j]; + temp[j] = temp[i]; + temp[i] = m; + } + /* remove dups */ + for (i = 0; i < k-1; i++) + if (temp[i] == temp[i+1]) + temp[i] = 0; + /* copy to permanent quarters */ + atable[s] = aptr; +#ifdef DEBUG + if (!ratfor) + fprintf(fout, "/* actions for state %d */", s); +#endif + putc('\n', fout); + for (i = 0; i < k; i++) + if (temp[i] != 0) { + ratfor ? + fprintf(fout, "data vstop(%d)/%d/\n", aptr, temp[i]) : + fprintf(fout, "%d,\n", temp[i]); +#ifdef DEBUG + if (debug) + printf("%d ", temp[i]); +#endif + aptr++; + } + for (i = 0; i < n; i++) { /* copy fall back actions - all neg */ + ratfor ? + fprintf(fout, "data vstop(%d)/%d/\n", aptr, neg[i]) : + fprintf(fout, "%d,\n", neg[i]); + aptr++; +#ifdef DEBUG + if (debug) + printf("%d ", neg[i]); +#endif + } +#ifdef DEBUG + if (debug) + putchar('\n'); +#endif + ratfor ? fprintf(fout, "data vstop (%d)/0/\n", aptr) : + fprintf(fout, "0, \n"); + aptr++; +} + +#ifdef DEBUG +void +pccl(void) +{ + /* print character class sets */ + int i, j; + printf("char class intersection\n"); + for (i = 0; i < ccount; i++) { + charc = 0; + printf("class %d:\n\t", i); + for (j = 1; j < ncg; j++) + if (cindex[j] == i) { + allprint(j); + if (charc > LINESIZE) { + printf("\n\t"); + charc = 0; + } + } + putchar('\n'); + } + charc = 0; + printf("match:\n"); + for (i = 0; i < ncg; i++) { + allprint(match[i]); + if (charc > LINESIZE) { + putchar('\n'); + charc = 0; + } + } + putchar('\n'); +} +#endif + +void +mkmatch(void) +{ + int i; + CHR tab[MAXNCG]; + for (i = 0; i < ccount; i++) + tab[i] = 0; + for (i = 1; i < ncg; i++) + if (tab[cindex[i]] == 0) + tab[cindex[i]] = i; + /* tab[i] = principal char for new ccl i */ + for (i = 1; i < ncg; i++) + match[i] = tab[cindex[i]]; +} + +void +layout(void) +{ + /* format and output final program's tables */ + int i, j, k; + int top, bot, startup, omin; + startup = 0; + for (i = 0; i < outsize; i++) + verify[i] = advance[i] = 0; + omin = 0; + yytop = 0; + for (i = 0; i <= stnum; i++) { /* for each state */ + j = gotof[i]; + if (j == -1) { + stoff[i] = 0; + continue; + } + bot = j; + while (nchar[j]) + j++; + top = j - 1; +#if DEBUG + if (debug) { + printf("State %d: (layout)\n", i); + for (j = bot; j <= top; j++) { + printf(" %o", nchar[j]); + if (j % 10 == 0) + putchar('\n'); + } + putchar('\n'); + } +#endif + while (verify[omin+ZCH]) + omin++; + startup = omin; +#if DEBUG + if (debug) + printf( + "bot,top %d, %d startup begins %d\n", + bot, top, startup); +#endif + if (chset) { + do { + startup += 1; + if (startup > outsize - ZCH) + error("output table overflow"); + for (j = bot; j <= top; j++) { + k = startup+ctable[nchar[j]]; + if (verify[k]) + break; + } + } while (j <= top); +#if DEBUG + if (debug) + printf(" startup will be %d\n", startup); +#endif + /* have found place */ + for (j = bot; j <= top; j++) { + k = startup + ctable[nchar[j]]; + if (ctable[nchar[j]] <= 0) + printf( + "j %d nchar %ld ctable.nch %d\n", + j, (long)nchar[j], ctable[nchar[k]]); + verify[k] = i + 1; /* state number + 1 */ + advance[k] = nexts[j+1]+1; + if (yytop < k) + yytop = k; + } + } else { + do { + startup += 1; + if (startup > outsize - ZCH) + error("output table overflow"); + for (j = bot; j <= top; j++) { + k = startup + nchar[j]; + if (verify[k]) + break; + } + } while (j <= top); + /* have found place */ +#if DEBUG + if (debug) + printf(" startup going to be %d\n", startup); +#endif + for (j = bot; j <= top; j++) { + k = startup + nchar[j]; + verify[k] = i+1; /* state number + 1 */ + advance[k] = nexts[j+1] + 1; + if (yytop < k) + yytop = k; + } + } + stoff[i] = startup; + } + + /* stoff[i] = offset into verify, advance for trans for state i */ + /* put out yywork */ + if (ratfor) { + fprintf(fout, "define YYTOPVAL %d\n", yytop); + rprint(verify, "verif", yytop+1); + rprint(advance, "advan", yytop+1); + shiftr(stoff, stnum); + rprint(stoff, "stoff", stnum+1); + shiftr(sfall, stnum); + upone(sfall, stnum+1); + rprint(sfall, "fall", stnum+1); + bprint(extra, "extra", casecount+1); + bprint((char *)match, "match", ncg); + shiftr(atable, stnum); + rprint(atable, "atable", stnum+1); + } + fprintf(fout, + "# define YYTYPE %s\n", stnum+1 >= NCH ? "int" : "unsigned char"); + fprintf(fout, + "struct yywork { YYTYPE verify, advance; } yycrank[] = {\n"); + for (i = 0; i <= yytop; i += 4) { + for (j = 0; j < 4; j++) { + k = i+j; + if (verify[k]) + fprintf(fout, + "{ %d,%d },\t", verify[k], advance[k]); + else + fprintf(fout, "{ 0,0 },\t"); + } + putc('\n', fout); + } + fprintf(fout, "{0,0}};\n"); + + /* put out yysvec */ + + fprintf(fout, "struct yysvf yysvec[] = {\n"); + fprintf(fout, "{ 0,\t0,\t0 },\n"); + for (i = 0; i <= stnum; i++) { /* for each state */ + if (cpackflg[i]) + stoff[i] = -stoff[i]; + fprintf(fout, "{ yycrank+%d,\t", stoff[i]); + if (sfall[i] != -1) + fprintf(fout, + "yysvec+%d,\t", sfall[i]+1); /* state + 1 */ + else + fprintf(fout, "0,\t\t"); + if (atable[i] != -1) + fprintf(fout, "yyvstop+%d", atable[i]); + else + fprintf(fout, "0"); +#ifdef DEBUG + fprintf(fout, " },\t\t/* state %d */", i); +#endif + fprintf(fout, " },\t\t/* state %d */", i); + } + fprintf(fout, "{ 0,\t0,\t0}};\n"); + + /* put out yymatch */ + + fprintf(fout, "struct yywork *yytop = yycrank+%d;\n", yytop); + fprintf(fout, "struct yysvf *yybgin = yysvec+1;\n"); + if (optim) { + if (handleeuc) { + fprintf(fout, "int yymatch[] = {\n"); + } else { + fprintf(fout, "char yymatch[] = {\n"); + } + if (chset == 0) { /* no chset, put out in normal order */ + for (i = 0; i < ncg; i += 8) { + for (j = 0; j < 8; j++) { + int fbch; + fbch = match[i+j]; + fprintf(fout, "%3d, ", fbch); + } + putc('\n', fout); + } + } else { + int *fbarr; + fbarr = myalloc(2*MAXNCG, sizeof (*fbarr)); + if (fbarr == 0) + error("No space for char table reverse", 0); + for (i = 0; i < MAXNCG; i++) + fbarr[i] = 0; + for (i = 0; i < ncg; i++) + fbarr[ctable[i]] = ctable[match[i]]; + for (i = 0; i < ncg; i += 8) { + for (j = 0; j < 8; j++) + fprintf(fout, "0%-3o,", fbarr[i+j]); + putc('\n', fout); + } + free(fbarr); + } + fprintf(fout, "0};\n"); + } + /* put out yyextra */ + fprintf(fout, "char yyextra[] = {\n"); + for (i = 0; i < casecount; i += 8) { + for (j = 0; j < 8; j++) + fprintf(fout, "%d,", i+j < NACTIONS ? + extra[i+j] : 0); + putc('\n', fout); + } + fprintf(fout, "0};\n"); + if (handleeuc) { + /* Put out yycgidtbl */ + fprintf(fout, "#define YYNCGIDTBL %d\n", ncgidtbl); + fprintf(fout, "\tunsigned long yycgidtbl[]={"); + /* + * Use "unsigned long" instead of "lchar" to minimize + * the name-space polution for the application program. + */ + for (i = 0; i < ncgidtbl; ++i) { + if (i%8 == 0) + fprintf(fout, "\n\t\t"); + fprintf(fout, "0x%08lx, ", yycgidtbl[i]); + } + fprintf(fout, "\n\t0};\n"); + } +} + +static void +rprint(int *a, char *s, int n) +{ + int i; + fprintf(fout, "block data\n"); + fprintf(fout, "common /L%s/ %s\n", s, s); + fprintf(fout, "define S%s %d\n", s, n); + fprintf(fout, "integer %s (S%s)\n", s, s); + for (i = 1; i <= n; i++) { + if (i%8 == 1) + fprintf(fout, "data "); + fprintf(fout, "%s (%d)/%d/", s, i, a[i]); + fprintf(fout, (i%8 && i < n) ? ", " : "\n"); + } + fprintf(fout, "end\n"); +} + +static void +shiftr(int *a, int n) +{ + int i; + for (i = n; i >= 0; i--) + a[i+1] = a[i]; +} + +static void +upone(int *a, int n) +{ + int i; + for (i = 0; i <= n; i++) + a[i]++; +} + +static void +bprint(char *a, char *s, int n) +{ + int i, j, k; + fprintf(fout, "block data\n"); + fprintf(fout, "common /L%s/ %s\n", s, s); + fprintf(fout, "define S%s %d\n", s, n); + fprintf(fout, "integer %s (S%s)\n", s, s); + for (i = 1; i < n; i += 8) { + fprintf(fout, "data %s (%d)/%d/", s, i, a[i]); + for (j = 1; j < 8; j++) { + k = i+j; + if (k < n) + fprintf(fout, ", %s (%d)/%d/", s, k, a[k]); + } + putc('\n', fout); + } + fprintf(fout, "end\n"); +} + +#ifdef PP +static void +padd(int **array, int n) +{ + int i, *j, k; + array[n] = nxtpos; + if (count == 0) { + *nxtpos++ = 0; + return; + } + for (i = tptr-1; i >= 0; i--) { + j = array[i]; + if (j && *j++ == count) { + for (k = 0; k < count; k++) + if (!tmpstat[*j++]) + break; + if (k >= count) { + array[n] = array[i]; + return; + } + } + } + add(array, n); +} +#endif diff --git a/lex/sub3.c b/lex/sub3.c @@ -0,0 +1,395 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2005 Sun Microsystems, Inc. + * All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "sub3.c 1.8 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)sub3.c 1.4 (gritter) 11/26/05 + */ + +/* + * sub3.c ... ALE enhancement. + * Since a typical Asian language has a huge character set, it is not + * ideal to index an array by a character code itself, which requires + * as large as 2**16 entries per array. + * To get arround this problem, we identify a set of characters that + * causes the same transition on all states and call it character group. + * Every character in a same character group has a unique number called + * character group id. A function yycgid(c) maps the character c (in process + * code) to the id. This mapping is determined by analyzing all regular + * expressions in the lex program. + * + */ +#include <stdlib.h> +#ifdef __sun +#include <widec.h> +#endif +#include "search.h" +#include "ldefs.c" +#include <ctype.h> + +/* + * "lchar" stands for linearized character. It is a variant of + * process code. AT&T's 16-bit process code has a drawback in which + * for three three process code C, D and E where C <= D <= E, + * codeset(C)==codeset(E) does not mean codeset(D)==codeset(C). + * In other words, four codesets alternates as the magnitude + * of character increases. + * The lchar representation holds this property: + * If three lchar C', D' and E' have the relationship C' < D' < E' and + * codeset(C') == codeset(E') then D' is guaranteed to belong to + * the same codeset as C' and E'. + * lchar is implemented as 32 bit entities and the function linearize() + * that maps a wchar_t to lchar is defined below. There is no + * reverse function for it though. + * The 32-bit process code by AT&T, used only for Taiwanese version at the + * time of wrting, has no such problem and we use it as it is. + */ + +lchar yycgidtbl[MAXNCG] = { + 0, /* For ease of computation of the id. */ + '\n', /* Newline is always special because '.' exclude it. */ + 0x000000ff, /* The upper limit of codeset 0. */ + 0x20ffffff, /* The upper limit of codeset 2. */ + 0x40ffffff /* The upper limit of codeset 3. */ +/* 0x60ffffff The upper limit of codeset 1. */ + /* Above assumes the number of significant bits of wchar_t is <= 24. */ +}; +int ncgidtbl = 5; /* # elements in yycgidtbl. */ +int ncg; /* Should set to ncgidtbl*2; this is the largest value yycgid() */ + /* returns plus 1. */ + +static void setsymbol(int i); + +/* + * For given 16-bit wchar_t (See NOTE), lchar is computed as illustrated below: + * + * wc: axxxxxxbyyyyyyy + * + * returns: 0ab0000000000000axxxxxxxbyyyyyyy + * + * linearize() doesn't do any if compiled with 32-bit wchar_t, use of + * which is flagged with LONG_WCHAR_T macro. + * NOTE: + * The implementation is highly depends on the process code representation. + * This function should be modified when 32-bit process code is used. + * There is no need to keep 'a' and 'b' bits in the lower half of lchar. + * You can actually omit these and squeeze the xxxxxx part one bit right. + * We don't do that here just in sake of speed. + */ +lchar +linearize(wchar_t wc) +{ +#ifdef LONG_WCHAR_T + return ((lchar)wc); /* Don't do anything. */ +#else + + lchar prefix; + switch (wc&0x8080) { + case 0x0000: prefix = 0x00000000; break; + case 0x0080: prefix = 0x20000000; break; + case 0x8000: prefix = 0x40000000; break; + case 0x8080: prefix = 0x60000000; break; + } + return (prefix|wc); +#endif +} + +/* compare liniear characters pointed to by pc1 and pc2 */ +int +cmplc(const void *arg1, const void *arg2) +{ + lchar *pc1 = (lchar *)arg1; + lchar *pc2 = (lchar *)arg2; + + if (*pc1 > *pc2) + return (1); + else if (*pc1 == *pc2) + return (0); + else + return (-1); +} + +void +remch(wchar_t c) +{ + lchar lc = linearize(c); + + /* + * User-friendliness consideration: + * Make sure no EUC chars are used in reg. exp. + */ + if (!handleeuc) { + if (!isascii(c)) + if (iswprint(c)) + warning( +"Non-ASCII character '%lc' in pattern; use -w or -e lex option.", c); + else warning( +"Non-ASCII character of value %#x in pattern; use -w or -e lex option.", c); + /* In any case, we don't need to construct ncgidtbl[]. */ + return; + } + + xlsearch(&lc, yycgidtbl, + (unsigned *)&ncgidtbl, sizeof (lchar), cmplc); +} + +void +sortcgidtbl(void) +{ + if (!handleeuc) + return; + qsort(yycgidtbl, ncgidtbl, sizeof (lchar), cmplc); +} + +/* + * int yycgid(wchar_t c) + * Takes c and returns its character group id, determind by the + * following algorithm. The program also uses the binary search + * algorithm, generalized from Knuth (6.2.1) Algorithm B. + * + * This function computes the "character group id" based on + * a table yycgidtbl of which each lchar entry is pre-sorted + * in ascending sequence The number of valid entries is given + * by YYNCGIDTBL. There is no duplicate entries in yycgidtbl. + * const int YYNCGIDTBL; + * lchar yycgidtbl[YYNCGIDTBL]; + * + * yycgidtbl[0] is guaranteed to have zero. + * + * For given c, yycgid(c) returns: + * 2*i iff yycgidtbl[i] == lc + * 2*i+1 iff yycgidtbl[i] < lc < yycgidtbl[i+1] + * YYNCGIDTBL*2-1 + * iff yycgidtbl[YYNCGIDTBL-1] < lc + * where lc=linearize(c). + * + * Some interesting properties.: + * 1. For any c, 0 <= yycgid(c) <= 2*YYNCGIDTBL-1 + * 2. yycgid(c) == 0 iff c == 0. + * 3. For any wchar_t c and d, if linearize(c) < linearize(d) then + * yycgid(c) <= yycgid(d). + * 4. For any wchar_t c and d, if yycgid(c) < yycgid(d) then + * linearize(c) < linearize(d). + */ +#define YYNCGIDTBL ncgidtbl + +int +yycgid(wchar_t c) +{ + int first = 0; + int last = YYNCGIDTBL - 1; + lchar lc; + + /* + * In ASCII compat. mode, each character forms a "group" and the + * group-id is itself... + */ + if (!handleeuc) + return (c); + + lc = linearize(c); + + /* An exceptional case: yycgidtbl[YYNCGIDTBL-1] < lc */ + if (yycgidtbl[YYNCGIDTBL - 1] < lc) + return (YYNCGIDTBL*2 - 1); + + while (last >= 0) { + int i = (first+last)/2; + if (lc == yycgidtbl[i]) + return (2*i); /* lc exactly matches an element. */ + else if (yycgidtbl[i] < lc) { + if (lc < yycgidtbl[i+1]) + return (2*i+1); /* lc is in between two elements. */ + else + first = i + 1; + } else + last = i - 1; + } + error( + "system error in yycgid():binary search failed for c=0x%04x\n", c); + return (0); +} + +/* + * repbycgid --- replaces each character in the parsing tree by its + * character group id. This, however, should be called even in + * the ASCII compat. mode to process DOT nodes and to call cclinter() + * for the DOT and CCL nodes. + */ +void +repbycgid(void) +{ + int i, c; + + for (i = 0; i < tptr; ++i) { + c = name[i]; + if (!ISOPERATOR(c)) { + /* If not an operator, it must be a char. */ + name[i] = yycgid((wchar_t)c); /* So replace it. */ +#ifdef DEBUG + if (debug) { + printf("name[%d]:'%c'->%d;\n", i, c, name[i]); + } +#endif + } else if (c == RSTR) { + c = right[i]; + right[i] = yycgid((wchar_t)c); +#ifdef DEBUG + if (debug) { + printf( + "name[%d].right:'%c'->%d;\n", i, c, right[i]); + } +#endif + } else if ((c == RCCL) || (c == RNCCL)) { + CHR cc, *s; + int j; + CHR ccltoken[CCLSIZE]; + CHR *ccp; + int m; + /* + * This node represetns a character class RE [ccccc] + * s points to the string of characters that forms + * the class and/or a special prefix notation + * <RANGE>XY which corresponds to the RE X-Y, + * characters in the range of X and Y. Here, + * X <= Y is guranteed. + * We transform these characters into a string + * of sorted character group ids. + * + * There is another mechanism of packing tables + * that is inherited from the ASCII lex. Call of + * cclinter() is required for this packing. + * This used to be done as yylex() reads the lex + * rules but we have to do this here because the + * transition table is made to work on the char-group + * ids and the mapping cannot be determined until + * the entire file is read. + */ +#ifdef DEBUG + if (debug) { + printf("name[%d]:R[N]CCL of \"", i); + strpt((CHR *)left[i]); + printf(" -> {"); + } +#endif + /* Prepare symbol[] for cclinter(). */ + for (j = 0; j < ncg; ++j) + symbol[j] = FALSE; + + s = (CHR *) left[i]; + while (cc = *s++) { + if (cc == RANGE) { + int low, high, i; + /* + * Special form: <RANGE>XY + * This means the range X-Y. + * We mark all symbols[] + * elements for yycgid(X) thru + * yycgid(Y), inclusively. + */ + low = yycgid(*s++); + high = yycgid(*s++); + for (i = low; i <= high; ++i) + setsymbol(i); + } else { + setsymbol(yycgid(cc)); + } + } + + /* Now make a transformed string of cgids. */ + s = ccptr; + m = 0; + for (j = 0; j < ncg; ++j) + if (symbol[j]) { + ccltoken[m++] = (CHR)j; +#ifdef DEBUG + if (debug) printf("%d, ", j); +#endif + } + +#ifdef DEBUG + if (debug) printf("}\n"); +#endif + ccltoken[m] = 0; + ccp = ccl; + while (ccp < ccptr && scomp(ccltoken, ccp) != 0) + ccp++; + if (ccp < ccptr) { /* character class found in ccl */ + left[i] = (intptr_t)ccp; + } else { /* not in ccl, add it */ + left[i] = (intptr_t)ccptr; + scopy(ccltoken, ccptr); + ccptr += slength(ccltoken) + 1; + if (ccptr > ccl + CCLSIZE) + error("Too many large character classes"); + } + cclinter(c == RCCL); + } else if (c == DOT) { + if (psave == 0) { /* First DOT node. */ + int j, nlid; + /* + * Make symbol[k]=TRUE for all k + * except k == yycgid('\n'). + */ + nlid = yycgid('\n'); + psave = ccptr; + for (j = 1; j < ncg; ++j) { + if (j == nlid) { + symbol[j] = FALSE; + } else { + symbol[j] = TRUE; + *ccptr++ = (CHR) j; + } + } + *ccptr++ = 0; + if (ccptr > ccl + CCLSIZE) + error("Too many large character classes"); + } + /* Mimic mn1(RCCL,psave)... */ + name[i] = RCCL; + left[i] = (intptr_t)psave; + cclinter(1); + } + } +#ifdef DEBUG + if (debug) { + printf("treedump after repbycgid().\n"); + treedump(); + } +#endif +} + +static void +setsymbol(int i) +{ + if (i > sizeof (symbol)) + error("setsymbol: (SYSERR) %d out of range", i); + symbol[i] = TRUE; +} diff --git a/lex/wcio.c b/lex/wcio.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)wcio.c 1.1 (gritter) 6/25/05 + */ +#include <stdlib.h> +#include <wchar.h> +#include <stdio.h> +#include <limits.h> +#include <errno.h> + +extern int error(char *, ...); + +/* + * This is like getwc() but issues an error message when an illegal + * byte sequence is encountered. + */ +wint_t +lex_getwc(FILE *fp) +{ + wint_t wc; + + if ((wc = getwc(fp)) != WEOF) + return wc; + if (ferror(fp) && errno == EILSEQ) + error("illegal byte sequence"); + return wc; +} + +/* + * A substitute for putwc(), to ensure that stdio output FILE objects + * are always byte-oriented. + */ +wint_t +lex_putwc(wchar_t wc, FILE *fp) +{ + char mb[MB_LEN_MAX]; + int i, n; + + if ((n = wctomb(mb, wc)) < 0) { + wctomb(mb, 0); + errno = EILSEQ; + return WEOF; + } + for (i = 0; i < n; i++) + if (putc(mb[i]&0377, fp) == EOF) + return WEOF; + return wc; +} diff --git a/lex/yyless.c b/lex/yyless.c @@ -0,0 +1,137 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "yyless.c 6.14 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)yyless.c 1.6 (gritter) 11/27/05 + */ + +#include <stdlib.h> +#ifdef __sun +#include <sys/euc.h> +#include <widec.h> +#endif +#include <limits.h> +#include <inttypes.h> +#include <unistd.h> + +extern int yyprevious; + +#ifndef JLSLEX +#define CHR char + +extern CHR yytext[]; + +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#endif + +#ifdef WOPTION +#define CHR wchar_t + +extern CHR yytext[]; + +#define YYTEXT yytext +#define YYLENG yyleng +#define YYINPUT yyinput +#define YYUNPUT yyunput +#define YYOUTPUT yyoutput +#define yyless yyless_w +#endif + +#ifdef EOPTION +#define CHR wchar_t + +extern int yyleng; +extern CHR yytext[]; +extern CHR yywtext[]; + +#define YYTEXT yywtext +#define YYLENG yywleng +#define YYINPUT yywinput +#define YYUNPUT yywunput +#define YYOUTPUT yywoutput +#define yyless yyless_e +#endif + +extern int YYLENG; +#if defined(__STDC__) + extern void YYUNPUT(int); +#endif + +#if defined(__cplusplus) || defined(__STDC__) +/* XCU4: type of yyless() changes to int */ +int +yyless(int x) +#else +yyless(x) +int x; +#endif +{ + register CHR *lastch, *ptr; + + lastch = YYTEXT+YYLENG; + if (x >= 0 && x <= YYLENG) + ptr = x + YYTEXT; + else { + if (sizeof (int) != sizeof (intptr_t)) { + static int seen = 0; + + if (!seen) { + write(2, + "warning: yyless pointer arg truncated\n", 39); + seen = 1; + } + } + /* + * The cast on the next line papers over an unconscionable nonportable + * glitch to allow the caller to hand the function a pointer instead of + * an integer and hope that it gets figured out properly. But it's + * that way on all systems. + */ + ptr = (CHR *)(intptr_t)x; + } + while (lastch > ptr) + YYUNPUT(*--lastch); + *lastch = 0; + if (ptr > YYTEXT) + yyprevious = *--lastch; + YYLENG = ptr-YYTEXT; +#ifdef EOPTION + yyleng = wcstombs((char *)yytext, YYTEXT, YYLENG*MB_LEN_MAX); +#endif + return (0); +} diff --git a/lex/yywrap.c b/lex/yywrap.c @@ -0,0 +1,41 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* from OpenSolaris "yywrap.c 6.4 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)yywrap.c 1.3 (gritter) 6/18/05 + */ + +#if defined(__cplusplus) || defined(__STDC__) +int yywrap(void) +#else +yywrap() +#endif +{ + return(1); +} diff --git a/libcommon/CHECK.c b/libcommon/CHECK.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)CHECK.c 1.8 (gritter) 12/16/07 */ + +#include <stdlib.h> + +#ifdef __FreeBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __APPLE__ +#include <available.h> +#if __MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_OS_X_VERSION_10_5 +#define NEED_ALLOCA_H 1 +#endif +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __DragonFly__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __OpenBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __NetBSD__ +#define NEED_ALLOCA_H 1 +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __dietlibc__ +#define NEED_MALLOC_H 1 +#define NEED_UTMPX_H 1 +#endif + +#ifdef __UCLIBC__ +#define NEED_UTMPX_H 1 +#endif + +#ifndef NEED_ALLOCA_H +#define NEED_ALLOCA_H 0 +#endif + +#ifndef NEED_MALLOC_H +#define NEED_MALLOC_H 0 +#endif + +#ifndef NEED_UTMPX_H +#define NEED_UTMPX_H 0 +#endif + +int alloca_h = NEED_ALLOCA_H; +int malloc_h = NEED_MALLOC_H; +int utmpx_h = NEED_UTMPX_H; diff --git a/libcommon/_alloca.h b/libcommon/_alloca.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_alloca.h 1.5 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <stdlib.h> +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ diff --git a/libcommon/_malloc.h b/libcommon/_malloc.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_malloc.h 1.2 (gritter) 5/1/04 */ + +#include <stdlib.h> + +extern void *memalign(size_t, size_t); diff --git a/libcommon/_utmpx.h b/libcommon/_utmpx.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)_utmpx.h 1.9 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__UCLIBC__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <sys/types.h> +#include <sys/time.h> +#include <utmp.h> + +#ifndef __dietlibc__ +struct utmpx { + char ut_user[UT_NAMESIZE]; + char ut_id[UT_LINESIZE]; + char ut_line[UT_LINESIZE]; + char ut_host[UT_HOSTSIZE]; + pid_t ut_pid; + short ut_type; + struct timeval ut_tv; + struct { + int e_termination; + int e_exit; + } ut_exit; +}; + +#ifndef EMPTY +#define EMPTY 0 +#endif +#ifndef BOOT_TIME +#define BOOT_TIME 1 +#endif +#ifndef OLD_TIME +#define OLD_TIME 2 +#endif +#ifndef NEW_TIME +#define NEW_TIME 3 +#endif +#ifndef USER_PROCESS +#define USER_PROCESS 4 +#endif +#ifndef INIT_PROCESS +#define INIT_PROCESS 5 +#endif +#ifndef LOGIN_PROCESS +#define LOGIN_PROCESS 6 +#endif +#ifndef DEAD_PROCESS +#define DEAD_PROCESS 7 +#endif +#ifndef RUN_LVL +#define RUN_LVL 8 +#endif +#ifndef ACCOUNTING +#define ACCOUNTING 9 +#endif +#else /* __dietlibc__ */ +#define utmpx utmp +#endif /* __dietlibc__ */ + +extern void endutxent(void); +extern struct utmpx *getutxent(void); +extern struct utmpx *getutxid(const struct utmpx *); +extern struct utmpx *getutxline(const struct utmpx *); +extern struct utmpx *pututxline(const struct utmpx *); +extern void setutxent(void); +extern int utmpxname(const char *); +extern void updwtmpx(const char *, const struct utmpx *); +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __UCLIBC__ || + __OpenBSD__ || __DragonFly__ || __APPLE__ */ diff --git a/libcommon/asciitype.c b/libcommon/asciitype.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)asciitype.c 1.4 (gritter) 4/17/03 */ + +#include "asciitype.h" + +const unsigned char class_char[] = { +/* 000 nul 001 soh 002 stx 003 etx 004 eot 005 enq 006 ack 007 bel */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 010 bs 011 ht 012 nl 013 vt 014 np 015 cr 016 so 017 si */ + C_CNTRL,C_BLANK,C_WHITE,C_SPACE,C_SPACE,C_SPACE,C_CNTRL,C_CNTRL, +/* 020 dle 021 dc1 022 dc2 023 dc3 024 dc4 025 nak 026 syn 027 etb */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 030 can 031 em 032 sub 033 esc 034 fs 035 gs 036 rs 037 us */ + C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL,C_CNTRL, +/* 040 sp 041 ! 042 " 043 # 044 $ 045 % 046 & 047 ' */ + C_BLANK,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 050 ( 051 ) 052 * 053 + 054 , 055 - 056 . 057 / */ + C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 060 0 061 1 062 2 063 3 064 4 065 5 066 6 067 7 */ + C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL,C_OCTAL, +/* 070 8 071 9 072 : 073 ; 074 < 075 = 076 > 077 ? */ + C_DIGIT,C_DIGIT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 100 @ 101 A 102 B 103 C 104 D 105 E 106 F 107 G */ + C_PUNCT,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 110 H 111 I 112 J 113 K 114 L 115 M 116 N 117 O */ + C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 120 P 121 Q 122 R 123 S 124 T 125 U 126 V 127 W */ + C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER,C_UPPER, +/* 130 X 131 Y 132 Z 133 [ 134 \ 135 ] 136 ^ 137 _ */ + C_UPPER,C_UPPER,C_UPPER,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT, +/* 140 ` 141 a 142 b 143 c 144 d 145 e 146 f 147 g */ + C_PUNCT,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 150 h 151 i 152 j 153 k 154 l 155 m 156 n 157 o */ + C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 160 p 161 q 162 r 163 s 164 t 165 u 166 v 167 w */ + C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER,C_LOWER, +/* 170 x 171 y 172 z 173 { 174 | 175 } 176 ~ 177 del */ + C_LOWER,C_LOWER,C_LOWER,C_PUNCT,C_PUNCT,C_PUNCT,C_PUNCT,C_CNTRL +}; diff --git a/libcommon/asciitype.h b/libcommon/asciitype.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)asciitype.h 1.6 (gritter) 9/9/05 */ + +/* + * Locale-independent character classes. + */ +enum { + C_CNTRL = 0000, + C_BLANK = 0001, + C_WHITE = 0002, + C_SPACE = 0004, + C_PUNCT = 0010, + C_OCTAL = 0020, + C_DIGIT = 0040, + C_UPPER = 0100, + C_LOWER = 0200 +}; + +extern const unsigned char class_char[]; + +#define asciichar(c) ((unsigned)(c) <= 0177) +#define alnumchar(c) (asciichar(c)&&(class_char[c]&\ + (C_DIGIT|C_OCTAL|C_UPPER|C_LOWER))) +#define alphachar(c) (asciichar(c)&&(class_char[c]&(C_UPPER|C_LOWER))) +#define blankchar(c) (asciichar(c)&&(class_char[c]&(C_BLANK))) +#define cntrlchar(c) (asciichar(c)&&(class_char[c]==C_CNTRL) +#define digitchar(c) (asciichar(c)&&(class_char[c]&(C_DIGIT|C_OCTAL))) +#define lowerchar(c) (asciichar(c)&&(class_char[c]&(C_LOWER))) +#define punctchar(c) (asciichar(c)&&(class_char[c]&(C_PUNCT))) +#define spacechar(c) (asciichar(c)&&(class_char[c]&(C_BLANK|C_SPACE|C_WHITE))) +#define upperchar(c) (asciichar(c)&&(class_char[c]&(C_UPPER))) +#define whitechar(c) (asciichar(c)&&(class_char[c]&(C_BLANK|C_WHITE))) +#define octalchar(c) (asciichar(c)&&(class_char[c]&(C_OCTAL))) +#define graphchar(c) (asciichar(c)&&(class_char[c]&\ + (C_UPPER|C_LOWER|C_DIGIT|C_OCTAL|C_PUNCT))) +#define printchar(c) ((c)==' ' || asciichar(c)&&(class_char[c]&\ + (C_UPPER|C_LOWER|C_DIGIT|C_OCTAL|C_PUNCT))) + +#define upperconv(c) (lowerchar(c) ? (c)-'a'+'A' : (c)) +#define lowerconv(c) (upperchar(c) ? (c)-'A'+'a' : (c)) diff --git a/libcommon/atoll.h b/libcommon/atoll.h @@ -0,0 +1,8 @@ +/* Sccsid @(#)atoll.h 1.4 (gritter) 7/18/04 */ + +#if defined (__hpux) || defined (_AIX) || \ + defined (__FreeBSD__) && (__FreeBSD__) < 5 +extern long long strtoll(const char *nptr, char **endptr, int base); +extern unsigned long long strtoull(const char *nptr, char **endptr, int base); +extern long long atoll(const char *nptr); +#endif /* __hpux || _AIX || __FreeBSD__ < 5 */ diff --git a/libcommon/blank.h b/libcommon/blank.h @@ -0,0 +1,38 @@ +/* + * isblank() and iswblank() are not available with many pre-XSH6 + * systems. Check whether isblank was defined, and assume it is + * not available if not. + */ +/* Sccsid @(#)blank.h 1.3 (gritter) 5/1/04 */ + +#ifndef __dietlibc__ +#ifndef LIBCOMMON_BLANK_H +#define LIBCOMMON_BLANK_H 1 + +#include <ctype.h> +#include <wctype.h> + +#ifndef isblank + +static +#ifdef __GNUC__ +__inline__ +#endif /* __GNUC__ */ +int +my_isblank(int c) +{ + return c == ' ' || c == '\t'; +} +#define isblank(c) my_isblank(c) + +static int +my_iswblank(wint_t c) +{ + return c == L' ' || c == L'\t'; +} +#undef iswblank +#define iswblank(c) my_iswblank(c) + +#endif /* !isblank */ +#endif /* !LIBCOMMON_BLANK_H */ +#endif /* !__dietlibc__ */ diff --git a/libcommon/depsinc.mk b/libcommon/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libcommon_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libcommon_DEPDIR -lcommon diff --git a/libcommon/getdir.c b/libcommon/getdir.c @@ -0,0 +1,197 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)getdir.c 1.20 (gritter) 5/14/06 */ + +#ifndef __linux__ +/* + * 32-bit Solaris and Open UNIX do not have 64-bit getdents(); but + * having _FILE_OFFSET_BITS=64 will make it use a dirent64 struct + * on Open UNIX -> SEGV. + */ +#undef _FILE_OFFSET_BITS +#endif /* !__linux__ */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#if defined (__UCLIBC__) +#include <linux/types.h> +#include <linux/dirent.h> +#define getdents(a, b, c) __getdents64(a, b, c) +#define dirent dirent64 +extern int getdents(int, struct dirent *, size_t); +#elif defined (__GLIBC__) || defined (__FreeBSD__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <dirent.h> +#define getdents(a, b, c) getdirentries((a), (char *)(b), (c), &(db->g_offs)) +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#undef d_ino +#endif /* __FreeBSD__ || __NetBSD__ || __OpenBSD__ || __DragonFly__ + || __APPLE__ */ +#elif defined (__dietlibc__) +#include <dirent.h> +#include <unistd.h> +#else /* !__GLIBC__, !__dietlibc__ */ +#ifdef __hpux +#define _KERNEL +#endif /* __hpux */ +#include <dirent.h> +#ifdef __hpux +#ifndef _INO64_T +typedef unsigned long long uint64_t; +typedef uint64_t ino64_t; +#endif /* !_INO64_T */ +#ifdef __LP64__ +#define dirent __dirent64 +#else /* !__LP64__ */ +#define dirent __dirent32 +#endif /* !__LP64__ */ +#define d_reclen __d_reclen +#define d_name __d_name +#define d_ino __d_ino +#endif /* __hpux */ +#endif /* !__GLIBC__, !__dietlibc__ */ + +#include "getdir.h" + +#define DIBSIZE 5120 + +struct getdb { +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + off_t g_offs; +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + long g_offs; +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + struct dirent *g_dirp; + const char *g_path; + struct direc g_dic; + union { + char g_dirbuf[DIBSIZE+1]; + struct dirent g_dummy[1]; + } g_u; + int g_num; + int g_fd; +}; + +struct getdb * +getdb_alloc(const char *path, int fd) +{ + struct getdb *db; + + if ((db = malloc(sizeof *db)) == NULL) + return NULL; + db->g_dirp = NULL; + db->g_offs = 0; + db->g_fd = fd; + db->g_path = path; + return db; +} + +void +getdb_free(struct getdb *db) +{ + free(db); +} + +struct direc * +getdir(struct getdb *db, int *err) +{ + int reclen; + + *err = 0; + while (db->g_dirp == NULL) + { + /*LINTED*/ + db->g_num = getdents(db->g_fd, + (struct dirent *)db->g_u.g_dirbuf, + DIBSIZE); + if (db->g_num <= 0) { + if (db->g_num < 0) + *err = errno; + db->g_offs = 0; + return NULL; + } + /*LINTED*/ + db->g_dirp = (struct dirent *)db->g_u.g_dirbuf; + while (db->g_dirp && +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + db->g_dirp->d_ino == 0 +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + (db->g_dirp->d_fileno == 0 +#ifdef DT_WHT + || db->g_dirp->d_type == DT_WHT +#endif + ) +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + ) + { + next: +#ifndef __DragonFly__ + reclen = db->g_dirp->d_reclen; +#else + reclen = _DIRENT_DIRSIZ(db->g_dirp); +#endif + if ((db->g_num -= reclen) == 0 || reclen == 0) + db->g_dirp = NULL; + else + db->g_dirp = + /*LINTED*/ + (struct dirent *)((char *)db->g_dirp + + reclen); + } + } +#if !defined (__FreeBSD__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__DragonFly__) && !defined (__APPLE__) + if (db->g_dirp->d_ino == 0) + goto next; + db->g_dic.d_ino = db->g_dirp->d_ino; +#else /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + if (db->g_dirp->d_fileno == 0 +#ifdef DT_WHT + || db->g_dirp->d_type == DT_WHT +#endif + ) + { + goto next; + } + db->g_dic.d_ino = db->g_dirp->d_fileno; +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + db->g_dic.d_name = db->g_dirp->d_name; +#ifndef __DragonFly__ + reclen = db->g_dirp->d_reclen; +#else + reclen = _DIRENT_DIRSIZ(db->g_dirp); +#endif + if ((db->g_num -= reclen) == 0 || reclen == 0) + db->g_dirp = NULL; + else + /*LINTED*/ + db->g_dirp = (struct dirent *)((char *)db->g_dirp + reclen); + return &(db->g_dic); +} diff --git a/libcommon/getdir.h b/libcommon/getdir.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)getdir.h 1.4 (gritter) 10/19/03 */ + +#include <sys/types.h> + +struct direc { + unsigned long long d_ino; + char *d_name; +}; + +extern struct getdb *getdb_alloc(const char *, int); +extern void getdb_free(struct getdb *); +extern struct direc *getdir(struct getdb *, int *); diff --git a/libcommon/getopt.c b/libcommon/getopt.c @@ -0,0 +1,141 @@ +/* + * getopt() - command option parsing + * + * Gunnar Ritter, Freiburg i. Br., Germany, March 2002. + */ + +/* Sccsid @(#)getopt.c 1.6 (gritter) 12/16/07 */ + +#include <sys/types.h> +#include <alloca.h> +#include <string.h> +#include "msgselect.h" + +/* + * One should not think that re-implementing this is necessary, but + * + * - Some libcs print weird messages. + * + * - GNU libc getopt() is totally brain-damaged, as it requires special + * care _not_ to reorder parameters and can't be told to work correctly + * with ':' as first optstring character at all. + */ + +char *optarg = 0; +int optind = 1; +int opterr = 1; +int optopt = 0; +extern char *pfmt_label__; + +static void +error(const char *s, int c) +{ + /* + * Avoid including <unistd.h>, in case its getopt() declaration + * conflicts. + */ + extern ssize_t write(int, const void *, size_t); + const char *msg = 0; + char *buf, *bp; + + if (pfmt_label__) + s = pfmt_label__; + switch (c) { + case '?': + msg = ": " msgselect("I","i") "llegal option -- "; + break; + case ':': + msg = ": " msgselect("O","o") "ption requires an argument -- "; + break; + } + bp = buf = alloca(strlen(s) + strlen(msg) + 2); + while (*s) + *bp++ = *s++; + while (*msg) + *bp++ = *msg++; + *bp++ = optopt; + *bp++ = '\n'; + write(2, buf, bp - buf); +} + +int +getopt(int argc, char *const argv[], const char *optstring) +{ + int colon; + static const char *lastp; + const char *curp; + + if (optstring[0] == ':') { + colon = 1; + optstring++; + } else + colon = 0; + if (lastp) { + curp = lastp; + lastp = 0; + } else { + if (optind >= argc || argv[optind] == 0 || + argv[optind][0] != '-' || + argv[optind][1] == '\0') + return -1; + if (argv[optind][1] == '-' && argv[optind][2] == '\0') { + optind++; + return -1; + } + curp = &argv[optind][1]; + } + optopt = curp[0] & 0377; + while (optstring[0]) { + if (optstring[0] == ':') { + optstring++; + continue; + } + if ((optstring[0] & 0377) == optopt) { + if (optstring[1] == ':') { + if (curp[1] != '\0') { + optarg = (char *)&curp[1]; + optind++; + } else { + if ((optind += 2) > argc) { + if (!colon && opterr) + error(argv[0], ':'); + return colon ? ':' : '?'; + } + optarg = argv[optind - 1]; + } + } else { + if (curp[1] != '\0') + lastp = &curp[1]; + else + optind++; + optarg = 0; + } + return optopt; + } + optstring++; + } + if (!colon && opterr) + error(argv[0], '?'); + if (curp[1] != '\0') + lastp = &curp[1]; + else + optind++; + optarg = 0; + return '?'; +} + +#ifdef __APPLE__ +/* + * Starting with Mac OS 10.5 Leopard, <unistd.h> turns getopt() + * into getopt$UNIX2003() by default. Consequently, this function + * is called instead of the one defined above. However, optind is + * still taken from this file, so in effect, options are not + * properly handled. Defining an own getopt$UNIX2003() function + * works around this issue. + */ +int +getopt$UNIX2003(int argc, char *const argv[], const char *optstring) +{ + return getopt(argc, argv, optstring); +} +#endif /* __APPLE__ */ diff --git a/libcommon/gmatch.c b/libcommon/gmatch.c @@ -0,0 +1,136 @@ +/* + * Derived from /usr/src/cmd/sh/expand.c, Unix 7th Edition: + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)gmatch.sl 1.5 (gritter) 5/29/05"; + +#include <stdlib.h> +#include <wchar.h> +#include <limits.h> + +#include "mbtowi.h" + +#define fetch(wc, s, n) ((mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowi(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc = WEOF, 1) : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)), (s) += (n), (wc)) + +int +gmatch(const char *s, const char *p) +{ + const char *bs = s; + int mb_cur_max = MB_CUR_MAX; + wint_t c, scc; + int n; + + if (fetch(scc, s, n) == WEOF) + return (0); + switch (fetch(c, p, n)) { + + case '[': { + int ok = 0, excl; + unsigned long lc = ULONG_MAX; + const char *bp; + + if (*p == '!') { + p++; + excl = 1; + } else + excl = 0; + fetch(c, p, n); + bp = p; + while (c != '\0') { + if (c == ']' && p > bp) + return (ok ^ excl ? gmatch(s, p) : 0); + else if (c == '-' && p > bp && *p != ']') { + if (*p == '\\') + p++; + if (fetch(c, p, n) == '\0') + break; + if (lc <= scc && scc <= c) + ok = 1; + } else { + if (c == '\\') { + if (fetch(c, p, n) == '\0') + break; + } + if (scc == (lc = c)) + ok = 1; + } + fetch(c, p, n); + } + return (0); + } + + case '\\': + fetch(c, p, n); + if (c == '\0') + return (0); + /*FALLTHRU*/ + + default: + if (c != scc) + return (0); + /*FALLTHRU*/ + + case '?': + return (scc ? gmatch(s, p) : 0); + + case '*': + if (*p == '\0') + return (1); + s = bs; + while (*s) { + if (gmatch(s, p)) + return (1); + fetch(scc, s, n); + } + return (0); + + case '\0': + return (scc == '\0'); + + case WEOF: + return (0); + + } +} diff --git a/libcommon/ib_alloc.c b/libcommon/ib_alloc.c @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_alloc.c 1.5 (gritter) 3/12/05 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <malloc.h> + +#include "memalign.h" +#include "iblok.h" + +struct iblok * +ib_alloc(int fd, unsigned blksize) +{ + static long pagesize; + struct iblok *ip; + struct stat st; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if (blksize == 0) { + if (fstat(fd, &st) < 0) + return NULL; + blksize = st.st_blksize > 0 ? st.st_blksize : 512; + } + if ((ip = calloc(1, sizeof *ip)) == NULL) + return NULL; + if ((ip->ib_blk = memalign(pagesize, blksize)) == NULL) { + free(ip); + return NULL; + } + ip->ib_blksize = blksize; + ip->ib_fd = fd; + ip->ib_mb_cur_max = MB_CUR_MAX; + return ip; +} diff --git a/libcommon/ib_close.c b/libcommon/ib_close.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_close.c 1.2 (gritter) 4/17/03 */ + +#include <unistd.h> + +#include "iblok.h" + +int +ib_close(struct iblok *ip) +{ + int fd; + + fd = ip->ib_fd; + ib_free(ip); + return close(fd); +} diff --git a/libcommon/ib_free.c b/libcommon/ib_free.c @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_free.c 1.2 (gritter) 4/17/03 */ + +#include <stdlib.h> + +#include "iblok.h" + +void +ib_free(struct iblok *ip) +{ + free(ip->ib_blk); + free(ip); +} diff --git a/libcommon/ib_getlin.c b/libcommon/ib_getlin.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_getlin.c 1.2 (gritter) 4/17/03 */ + +#include <string.h> +#include <stdlib.h> +#include "iblok.h" + +size_t +ib_getlin(struct iblok *ip, char **line, size_t *alcd, + void *(*reallc)(void *, size_t)) +{ + char *nl; + size_t sz, llen = 0, nllen; + + for (;;) { + if (ip->ib_cur >= ip->ib_end) { + if (ip->ib_incompl) { + ip->ib_incompl = 0; + return 0; + } + if (ib_read(ip) == EOF) { + if (llen) { + ip->ib_incompl++; + (*line)[llen] = '\0'; + return llen; + } else + return 0; + } + /* + * ib_read() advances ib_cur since *ib_cur++ gives + * better performance than *++ib_cur for ib_get(). + * Go back again. + */ + ip->ib_cur--; + } + sz = ip->ib_end - ip->ib_cur; + if ((nl = memchr(ip->ib_cur, '\n', sz)) != NULL) { + sz = nl - ip->ib_cur + 1; + if ((nllen = llen + sz + 1) > *alcd) { + *line = reallc(*line, nllen); + *alcd = nllen; + } + memcpy(&(*line)[llen], ip->ib_cur, sz); + (*line)[llen + sz] = '\0'; + ip->ib_cur = nl + 1; + return llen + sz; + } + if ((nllen = llen + sz + 1) > *alcd) { + *line = reallc(*line, nllen); + *alcd = nllen; + } + memcpy(&(*line)[llen], ip->ib_cur, sz); + llen += sz; + ip->ib_cur = ip->ib_end; + } + /*NOTREACHED*/ + return 0; +} diff --git a/libcommon/ib_getw.c b/libcommon/ib_getw.c @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_getw.c 1.5 (gritter) 7/16/04 */ + +#include <stdlib.h> +#include <string.h> +#include "iblok.h" +#include "mbtowi.h" + +char * +ib_getw(struct iblok *ip, wint_t *wc, int *len) +{ + size_t rest; + int c, i, n; + + i = 0; + rest = ip->ib_mend - ip->ib_mcur; + if (rest && ip->ib_mcur > ip->ib_mbuf) { + do + ip->ib_mbuf[i] = ip->ib_mcur[i]; + while (i++, --rest); + } else if (ip->ib_incompl) { + ip->ib_incompl = 0; + *wc = WEOF; + ip->ib_mend = ip->ib_mcur = NULL; + return NULL; + } + if (i == 0) { + c = ib_get(ip); + if (c == EOF) { + *wc = WEOF; + ip->ib_mend = ip->ib_mcur = NULL; + return NULL; + } + ip->ib_mbuf[i++] = (char)c; + } + if (ip->ib_mbuf[0] & 0200) { + while (ip->ib_mbuf[i-1] != '\n' && i < ip->ib_mb_cur_max && + ip->ib_incompl == 0) { + c = ib_get(ip); + if (c != EOF) + ip->ib_mbuf[i++] = (char)c; + else + ip->ib_incompl = 1; + } + n = mbtowi(wc, ip->ib_mbuf, i); + if (n < 0) { + *len = 1; + *wc = WEOF; + } else if (n == 0) { + *len = 1; + *wc = '\0'; + } else + *len = n; + } else { + *wc = ip->ib_mbuf[0]; + *len = n = 1; + } + ip->ib_mcur = &ip->ib_mbuf[*len]; + ip->ib_mend = &ip->ib_mcur[i - *len]; + return ip->ib_mbuf; +} diff --git a/libcommon/ib_open.c b/libcommon/ib_open.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_open.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +struct iblok * +ib_open(const char *name, unsigned blksize) +{ + struct iblok *ip; + int fd, err; + + if ((fd = open(name, O_RDONLY)) < 0) + return NULL; + if ((ip = ib_alloc(fd, blksize)) == NULL) { + err = errno; + close(fd); + errno = err; + } + return ip; +} diff --git a/libcommon/ib_popen.c b/libcommon/ib_popen.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_popen.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> +#include <signal.h> + +#include "iblok.h" + +struct iblok * +ib_popen(const char *cmd, unsigned blksize) +{ + struct iblok *ip; + int fd[2], err; + pid_t pid; + char *shell; + + if (pipe(fd) < 0) + return NULL; + switch (pid = fork()) { + case -1: + return NULL; + case 0: + close(fd[0]); + dup2(fd[1], 1); + close(fd[1]); + if ((shell = getenv("SHELL")) == NULL) + shell = "/bin/sh"; + execl(shell, shell, "-c", cmd, NULL); + _exit(0177); + /*NOTREACHED*/ + } + close(fd[1]); + if ((ip = ib_alloc(fd[0], blksize)) == NULL) { + err = errno; + close(fd[0]); + errno = err; + } + ip->ib_pid = pid; + return ip; +} + +int +ib_pclose(struct iblok *ip) +{ + struct sigaction oldhup, oldint, oldquit, act; + int status; + + close(ip->ib_fd); + act.sa_handler = SIG_IGN; + sigemptyset(&act.sa_mask); + act.sa_flags = 0; + sigaction(SIGHUP, &act, &oldhup); + sigaction(SIGINT, &act, &oldint); + sigaction(SIGQUIT, &act, &oldquit); + while (waitpid(ip->ib_pid, &status, 0) < 0 && errno == EINTR); + sigaction(SIGHUP, &oldhup, NULL); + sigaction(SIGINT, &oldint, NULL); + sigaction(SIGQUIT, &oldquit, NULL); + return status; +} diff --git a/libcommon/ib_read.c b/libcommon/ib_read.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_read.c 1.2 (gritter) 4/17/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +int +ib_read(struct iblok *ip) +{ + ssize_t sz; + + do { + if ((sz = read(ip->ib_fd, ip->ib_blk, ip->ib_blksize)) > 0) { + ip->ib_endoff += sz; + ip->ib_cur = ip->ib_blk; + ip->ib_end = &ip->ib_blk[sz]; + return *ip->ib_cur++ & 0377; + } + } while (sz < 0 && errno == EINTR); + if (sz < 0) + ip->ib_errno = errno; + ip->ib_cur = ip->ib_end = NULL; + return EOF; +} diff --git a/libcommon/ib_seek.c b/libcommon/ib_seek.c @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)ib_seek.c 1.4 (gritter) 5/8/03 */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +#include "iblok.h" + +off_t +ib_seek(struct iblok *ip, off_t off, int whence) +{ + if (whence == SEEK_CUR) { + off = ip->ib_endoff - (ip->ib_end - ip->ib_cur); + whence = SEEK_SET; + } + if (ip->ib_seekable && whence == SEEK_SET && ip->ib_cur && ip->ib_end && + off < ip->ib_endoff && + off >= ip->ib_endoff - (ip->ib_end - ip->ib_blk)) { + ip->ib_cur = ip->ib_end - (ip->ib_endoff - off); + return off; + } + if ((off = lseek(ip->ib_fd, off, whence)) == (off_t)-1) + return -1; + ip->ib_cur = ip->ib_end = NULL; + ip->ib_endoff = off; + ip->ib_seekable = 1; + return off; +} diff --git a/libcommon/iblok.h b/libcommon/iblok.h @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)iblok.h 1.5 (gritter) 7/16/04 */ + +/* + * Functions to read a file sequentially. + */ + +#include <sys/types.h> /* for off_t, pid_t */ +#include <stdio.h> /* for EOF */ +#include <wchar.h> /* for wchar_t */ +#include <limits.h> /* for MB_LEN_MAX */ + +struct iblok { + long long ib_endoff; /* offset of endc from start of file */ + char ib_mbuf[MB_LEN_MAX+1]; /* multibyte overflow buffer */ + char *ib_mcur; /* next byte to read in ib_mbuf */ + char *ib_mend; /* one beyond last byte in ib_mbuf */ + char *ib_blk; /* buffered data */ + char *ib_cur; /* next character in ib_blk */ + char *ib_end; /* one beyond last byte in ib_blk */ + int ib_fd; /* input file descriptor */ + int ib_errno; /* errno on error, or 0 */ + int ib_incompl; /* had an incomplete last line */ + int ib_mb_cur_max; /* MB_CUR_MAX at time of ib_alloc() */ + int ib_seekable; /* had a successful lseek() */ + pid_t ib_pid; /* child from ib_popen() */ + unsigned ib_blksize; /* buffer size */ +}; + +/* + * Allocate an input buffer with file descriptor fd. blksize may be + * either the size of a buffer to allocate in ib_blk, or 0 if the + * size is determined automatically. On error, NULL is returned and + * errno indicates the offending error. + */ +extern struct iblok *ib_alloc(int fd, unsigned blksize); + +/* + * Deallocate the passed input buffer. The file descriptor is not + * closed. + */ +extern void ib_free(struct iblok *ip); + +/* + * Open file name and do ib_alloc() on the descriptor. + */ +extern struct iblok *ib_open(const char *name, unsigned blksize); + +/* + * Close the file descriptor in ip and do ib_free(). Return value is + * the result of close(). + */ +extern int ib_close(struct iblok *ip); + +/* + * A workalike of popen(cmd, "r") using iblok facilities. + */ +extern struct iblok *ib_popen(const char *cmd, unsigned blksize); + +/* + * Close an iblok opened with ib_popen(). + */ +extern int ib_pclose(struct iblok *ip); + +/* + * Read new input buffer. Returns the next character (or EOF) and advances + * ib_cur by one above the bottom of the buffer. + */ +extern int ib_read(struct iblok *ip); + +/* + * Get next character. Return EOF at end-of-file or read error. + */ +#define ib_get(ip) ((ip)->ib_cur < (ip)->ib_end ? *(ip)->ib_cur++ & 0377 :\ + ib_read(ip)) + +/* + * Unget a character. Note that this implementation alters the read buffer. + * Caution: Calling this macro more than once might underflow ib_blk. + */ +#define ib_unget(c, ip) (*(--(ip)->ib_cur) = (char)(c)) + +/* + * Get file offset of last read character. + */ +#define ib_offs(ip) ((ip)->ib_endoff - ((ip)->ib_end - (ip)->ib_cur - 1)) + +/* + * Read a wide character using ib_get() facilities. *wc is used to store + * the wide character, or WEOF if an invalid byte sequence was found. + * The number of bytes consumed is stored in *len. Return value is the + * corresponding byte sequence, or NULL at end-of-file in input. + * + * Note that it is not possible to mix calls to ib_getw() with calls to + * ib_get(), ib_unget() or ib_seek() unless the last character read by + * ib_getw() was L'\n'. + */ +extern char *ib_getw(struct iblok *ip, wint_t *wc, int *len); + +/* + * Get a line from ip, returning the line length. Further arguments are either + * the pointer to a malloc()ed buffer and a pointer to its size, or (NULL, 0) + * if ib_getlin() shall allocate the buffer itselves. ib_getlin() will use + * the realloc-style function reallc() to increase the buffer if necessary; + * this function is expected never to fail (i. e., it must longjmp() or abort + * if it cannot allocate a buffer of the demanded size). + * On end-of-file or error, 0 is returned. + */ +extern size_t ib_getlin(struct iblok *ip, char **line, size_t *alcd, + void *(*reallc)(void *, size_t)); + +/* + * Like lseek(). + */ +extern off_t ib_seek(struct iblok *ip, off_t off, int whence); diff --git a/libcommon/mbtowi.h b/libcommon/mbtowi.h @@ -0,0 +1,22 @@ +/* Sccsid @(#)mbtowi.h 1.2 (gritter) 7/16/04 */ + +#ifndef LIBCOMMON_MBTOWI_H +#define LIBCOMMON_MBTOWI_H + +static +#if defined (__GNUC__) || defined (__USLC__) || defined (__INTEL_COMPILER) || \ + defined (__IBMC__) || defined (__SUNPRO_C) + inline +#endif + int +mbtowi(wint_t *pwi, const char *s, size_t n) +{ + wchar_t wc; + int i; + + i = mbtowc(&wc, s, n); + *pwi = wc; + return i; +} + +#endif /* !LIBCOMMON_MBTOWI_H */ diff --git a/libcommon/memalign.c b/libcommon/memalign.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)memalign.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +/* + * FreeBSD malloc(3) promises to page-align the return of malloc() calls + * if size is at least a page. This serves for a poor man's memalign() + * implementation that matches our needs. + */ +#include <unistd.h> +#include <stdlib.h> + +#include "memalign.h" + +void * +memalign(size_t alignment, size_t size) +{ + static long pagesize; + + if (pagesize == 0) + pagesize = sysconf(_SC_PAGESIZE); + if (alignment != pagesize) + return NULL; + if (size < pagesize) + size = pagesize; + return malloc(size); +} +#endif /* __FreeBSD__ || __dietlibc__ || _AIX || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/memalign.h b/libcommon/memalign.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)memalign.h 1.7 (gritter) 1/22/06 */ + +#ifndef LIBCOMMON_MEMALIGN_H +#define LIBCOMMON_MEMALIGN_H + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (_AIX) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +#include <stdlib.h> + +extern void *memalign(size_t, size_t); +#endif /* __FreeBSD__ || __dietlibc__ || _AIX || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ +#endif /* !LIBCOMMON_MEMALIGN_H */ diff --git a/libcommon/mkfile b/libcommon/mkfile @@ -0,0 +1,61 @@ +LIB = libcommon.a +LOBJ = asciitype.o ib_alloc.o ib_close.o ib_free.o ib_getlin.o ib_getw.o \ + ib_open.o ib_popen.o ib_read.o ib_seek.o oblok.o sfile.o strtol.o \ + getdir.o regexpr.o gmatch.o utmpx.o memalign.o pathconf.o \ + sigset.o signal.o sigrelse.o sighold.o sigignore.o sigpause.o \ + getopt.o pfmt.o vpfmt.o setlabel.o setuxlabel.o pfmt_label.o sysv3.o +TARG = CHECK +CLEAN_FILES = alloca.h malloc.h utmpx.h + +<$mkbuild/mk.common + +libcommon.a:Q: headers $LOBJ + echo AR $target + $AR -rv $target $LOBJ + echo RANLIB $target + $RANLIB $target + +CHECK:Q: CHECK.c + echo CC CHECK + $CC $CFLAGS $CPPFLAGS -E CHECK.c >CHECK + +headers:Q: CHECK + one() { + echo "" + rm -f "$1.h" + if grep "$1_h[ ]*=[ ]*[^0][ ]*;" CHECK >/dev/null; + then + ln -s "_$1.h" "$1.h" + fi + } + one alloca + one malloc + one utmpx + +asciitype.o: asciitype.h +ib_alloc.o: iblok.h +ib_close.o: iblok.h +ib_free.o: iblok.h +ib_getlin.o: iblok.h +ib_getw.o: iblok.h +ib_open.o: iblok.h +ib_read.o: iblok.h +ib_seek.o: iblok.h +iblok.o: iblok.h +oblok.o: oblok.h +sfile.o: sfile.h +getdir.o: getdir.h +regexpr.o: regexpr.h regexp.h +pfmt.o: pfmt.h +vpfmt.o: pfmt.h +setlabel.o: pfmt.h +setuxlabel.o: pfmt.h msgselect.h +getopt.o: msgselect.h +sighold.o: sigset.h +sigignore.o: sigset.h +sigpause.o: sigset.h +sigrelse.o: sigset.h +sigset.o: sigset.h +signal.o: sigset.h +pathconf.o: pathconf.h + diff --git a/libcommon/msgselect.h b/libcommon/msgselect.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)msgselect.h 1.2 (gritter) 9/21/03 */ + +#define MSG_LEVEL 0 + +#if MSG_LEVEL == 1 +#define msgselect(a, b) a +#else +#define msgselect(a, b) b +#endif diff --git a/libcommon/oblok.c b/libcommon/oblok.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)oblok.c 1.7 (gritter) 7/16/04 */ + +#include <sys/types.h> +#include <unistd.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <malloc.h> + +#include "memalign.h" +#include "oblok.h" + +struct list { + struct list *l_nxt; + struct oblok *l_op; +}; + +static struct list *bloks; +static int exitset; + +int +ob_clear(void) +{ + struct list *lp; + int val = 0; + + for (lp = bloks; lp; lp = lp->l_nxt) { + if (ob_flush(lp->l_op) < 0) + val = -1; + else if (val >= 0) + val++; + } + return val; +} + +static void +add(struct oblok *op) +{ + struct list *lp, *lq; + + if ((lp = calloc(1, sizeof *lp)) != NULL) { + lp->l_nxt = NULL; + lp->l_op = op; + if (bloks) { + for (lq = bloks; lq->l_nxt; lq = lq->l_nxt); + lq->l_nxt = lp; + } else + bloks = lp; + if (exitset == 0) { + exitset = 1; + atexit((void (*)(void))ob_clear); + } + } +} + +static void +del(struct oblok *op) +{ + struct list *lp, *lq = NULL; + + if (bloks) { + for (lp = bloks; lp && lp->l_op != op; lp = lp->l_nxt) + lq = lp; + if (lp) { + if (lq) + lq->l_nxt = lp->l_nxt; + if (lp == bloks) + bloks = bloks->l_nxt; + free(lp); + } + } +} + +struct oblok * +ob_alloc(int fd, enum ob_mode bf) +{ + static long pagesize; + struct oblok *op; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if ((op = memalign(pagesize, sizeof *op)) == NULL) + return NULL; + memset(op, 0, sizeof *op); + op->ob_fd = fd; + switch (bf) { + case OB_EBF: + op->ob_bf = isatty(fd) ? OB_LBF : OB_FBF; + break; + default: + op->ob_bf = bf; + } + add(op); + return op; +} + +ssize_t +ob_free(struct oblok *op) +{ + ssize_t wrt; + + wrt = ob_flush(op); + del(op); + free(op); + return wrt; +} + +static ssize_t +swrite(int fd, const char *data, size_t sz) +{ + ssize_t wo, wt = 0; + + do { + if ((wo = write(fd, data + wt, sz - wt)) < 0) { + if (errno == EINTR) + continue; + else + return wt; + } + wt += wo; + } while (wt < sz); + return sz; +} + +ssize_t +ob_write(struct oblok *op, const char *data, size_t sz) +{ + ssize_t wrt; + size_t di, isz; + + switch (op->ob_bf) { + case OB_NBF: + wrt = swrite(op->ob_fd, data, sz); + op->ob_wrt += wrt; + if (wrt != sz) { + op->ob_bf = OB_EBF; + writerr(op, sz, wrt>0?wrt:0); + return -1; + } + return wrt; + case OB_LBF: + case OB_FBF: + isz = sz; + while (op->ob_pos + sz > (OBLOK)) { + di = (OBLOK) - op->ob_pos; + sz -= di; + if (op->ob_pos > 0) { + memcpy(&op->ob_blk[op->ob_pos], data, di); + wrt = swrite(op->ob_fd, op->ob_blk, (OBLOK)); + } else + wrt = swrite(op->ob_fd, data, (OBLOK)); + op->ob_wrt += wrt; + if (wrt != (OBLOK)) { + op->ob_bf = OB_EBF; + writerr(op, (OBLOK), wrt>0?wrt:0); + return -1; + } + data += di; + op->ob_pos = 0; + } + if (op->ob_bf == OB_LBF) { + const char *cp; + + cp = data; + while (cp < &data[sz]) { + if (*cp == '\n') { + di = cp - data + 1; + sz -= di; + if (op->ob_pos > 0) { + memcpy(&op->ob_blk[op->ob_pos], + data, di); + wrt = swrite(op->ob_fd, + op->ob_blk, + op->ob_pos + di); + } else + wrt = swrite(op->ob_fd, + data, di); + op->ob_wrt += wrt; + if (wrt != op->ob_pos + di) { + op->ob_bf = OB_EBF; + writerr(op, di, wrt>0?wrt:0); + return -1; + } + op->ob_pos = 0; + data += di; + cp = data; + } + cp++; + } + } + if (sz == (OBLOK)) { + wrt = swrite(op->ob_fd, data, sz); + op->ob_wrt += wrt; + if (wrt != sz) { + op->ob_bf = OB_EBF; + writerr(op, sz, wrt>0?wrt:0); + return -1; + } + } else if (sz) { + memcpy(&op->ob_blk[op->ob_pos], data, sz); + op->ob_pos += sz; + } + return isz; + case OB_EBF: + ; + } + return -1; +} + +ssize_t +ob_flush(struct oblok *op) +{ + ssize_t wrt = 0; + + if (op->ob_pos) { + wrt = swrite(op->ob_fd, op->ob_blk, op->ob_pos); + op->ob_wrt += wrt; + if (wrt != op->ob_pos) { + op->ob_bf = OB_EBF; + writerr(op, op->ob_pos, wrt>0?wrt:0); + wrt = -1; + } + op->ob_pos = 0; + } + return wrt; +} + +int +ob_chr(int c, struct oblok *op) +{ + char b; + ssize_t wrt; + + b = (char)c; + wrt = ob_write(op, &b, 1); + return wrt < 0 ? EOF : c; +} diff --git a/libcommon/oblok.h b/libcommon/oblok.h @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)oblok.h 1.3 (gritter) 4/17/03 */ + +#include <sys/types.h> + +#ifndef OBLOK +enum { + OBLOK = 4096 +}; +#endif /* !OBLOK */ + +enum ob_mode { + OB_EBF = 0, /* error or mode unset */ + OB_NBF = 1, /* not buffered */ + OB_LBF = 2, /* line buffered */ + OB_FBF = 3 /* fully buffered */ +}; + +struct oblok { + char ob_blk[OBLOK]; /* buffered data */ + long long ob_wrt; /* amount of data written */ + int ob_pos; /* position of first empty date byte */ + int ob_fd; /* file descriptor to write to */ + enum ob_mode ob_bf; /* buffering mode */ +}; + +/* + * Allocate an output buffer with file descriptor fd and buffer mode bf. + * If bf is OB_EBF, the choice is made dependant upon the file type. + * NULL is returned if no memory is available. + */ +extern struct oblok *ob_alloc(int fd, enum ob_mode bf); + +/* + * Deallocate the passed output buffer, flushing all data. The file + * descriptor is not closed. Returns -1 if flushing fails. + */ +extern ssize_t ob_free(struct oblok *op); + +/* + * Write data of length sz to the passed output buffer. Returns -1 on + * error or the amount of data written. + */ +extern ssize_t ob_write(struct oblok *op, const char *data, size_t sz); + +/* + * Flush all data in the passed output buffer. Returns -1 on error or + * the amount of data written; 0 is success and means 'nothing to flush'. + * The underlying device is not flushed (i. e. no fsync() is performed). + */ +extern ssize_t ob_flush(struct oblok *op); + +/* + * Flush all output buffers. Called automatically using atexit(). Returns + * -1 on error or the number of buffers flushed; 0 is success. + */ +extern int ob_clear(void); + +/* + * putc() workalike. + */ +#define ob_put(c, op) ((op)->ob_bf != OB_FBF || (op)->ob_pos >= (OBLOK) - 1 ?\ + ob_chr((c), (op)) : \ + (int)((op)->ob_blk[(op)->ob_pos++] = (char)(c))) + + +/* + * fputc() workalike. + */ +extern int ob_chr(int c, struct oblok *op); + +/* + * This function must be supplied by the calling code; it is called on + * write error. + */ +extern void writerr(struct oblok *op, int count, int written); diff --git a/libcommon/pathconf.c b/libcommon/pathconf.c @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pathconf.c 1.2 (gritter) 5/1/04 */ + +#ifdef __dietlibc__ +#include <unistd.h> +#include "pathconf.h" + +static long +pc(int name) +{ + switch (name) { + case _PC_PATH_MAX: + return 1024; + case _PC_VDISABLE: + return 0; + default: + return -1; + } +} + +long +fpathconf(int fildes, int name) +{ + return pc(name); +} + +long +pathconf(const char *path, int name) { + return pc(name); +} +#endif /* __dietlibc__ */ diff --git a/libcommon/pathconf.h b/libcommon/pathconf.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pathconf.h 1.2 (gritter) 5/1/04 */ + +#ifdef __dietlibc__ +#include <unistd.h> + +extern long fpathconf(int, int); +extern long pathconf(const char *, int); +#endif /* __dietlibc__ */ diff --git a/libcommon/pfmt.c b/libcommon/pfmt.c @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pfmt.c 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> +#include <stdarg.h> + +#include "pfmt.h" + +int +pfmt(FILE *stream, long flags, const char *fmt, ...) +{ + va_list ap; + int i; + + va_start(ap, fmt); + i = vpfmt(stream, flags, fmt, ap); + va_end(ap); + return i; +} diff --git a/libcommon/pfmt.h b/libcommon/pfmt.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)pfmt.h 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> + +extern int pfmt(FILE *stream, long flags, const char *format, ...); + +#include <stdarg.h> + +extern int vpfmt(FILE *stream, long flags, const char *format, va_list ap); + +#define MM_HALT 0x00000001 +#define MM_ERROR 0x00000000 +#define MM_WARNING 0x00000002 +#define MM_INFO 0x00000004 +#define MM_ACTION 0x00000100 +#define MM_NOSTD 0x00000200 +#define MM_STD 0x00000000 +#define MM_NOGET 0x00000400 +#define MM_GET 0x00000000 + +extern int setlabel(const char *label); +extern int setuxlabel(const char *label); + +#define setcat(s) (s) +#define gettxt(n, s) (s) diff --git a/libcommon/pfmt_label.c b/libcommon/pfmt_label.c @@ -0,0 +1 @@ +char *pfmt_label__; diff --git a/libcommon/regexp.h b/libcommon/regexp.h @@ -0,0 +1,1211 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, February 2002. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define REGEXP_H_USED __attribute__ ((used)) +#elif defined __GNUC__ +#define REGEXP_H_USED __attribute__ ((unused)) +#else +#define REGEXP_H_USED +#endif +static const char regexp_h_sccsid[] REGEXP_H_USED = + "@(#)regexp.sl 1.56 (gritter) 5/29/05"; + +#if !defined (REGEXP_H_USED_FROM_VI) && !defined (__dietlibc__) +#define REGEXP_H_WCHARS +#endif + +#define CBRA 2 +#define CCHR 4 +#define CDOT 8 +#define CCL 12 +/* CLNUM 14 used in sed */ +/* CEND 16 used in sed */ +#define CDOL 20 +#define CCEOF 22 +#define CKET 24 +#define CBACK 36 +#define CNCL 40 +#define CBRC 44 +#define CLET 48 +#define CCH1 52 +#define CCH2 56 +#define CCH3 60 + +#define STAR 01 +#define RNGE 03 +#define REGEXP_H_LEAST 0100 + +#ifdef REGEXP_H_WCHARS +#define CMB 0200 +#else /* !REGEXP_H_WCHARS */ +#define CMB 0 +#endif /* !REGEXP_H_WCHARS */ + +#define NBRA 9 + +#define PLACE(c) ep[c >> 3] |= bittab[c & 07] +#define ISTHERE(c) (ep[c >> 3] & bittab[c & 07]) + +#ifdef REGEXP_H_WCHARS +#define REGEXP_H_IS_THERE(ep, c) ((ep)[c >> 3] & bittab[c & 07]) +#endif + +#include <ctype.h> +#include <string.h> +#include <limits.h> +#ifdef REGEXP_H_WCHARS +#include <stdlib.h> +#include <wchar.h> +#include <wctype.h> +#endif /* REGEXP_H_WCHARS */ + +#define regexp_h_uletter(c) (isalpha(c) || (c) == '_') +#ifdef REGEXP_H_WCHARS +#define regexp_h_wuletter(c) (iswalpha(c) || (c) == L'_') + +/* + * Used to allocate memory for the multibyte star algorithm. + */ +#ifndef regexp_h_malloc +#define regexp_h_malloc(n) malloc(n) +#endif +#ifndef regexp_h_free +#define regexp_h_free(p) free(p) +#endif + +/* + * Can be predefined to 'inline' to inline some multibyte functions; + * may improve performance for files that contain many multibyte + * sequences. + */ +#ifndef regexp_h_inline +#define regexp_h_inline +#endif + +/* + * Mask to determine whether the first byte of a sequence possibly + * starts a multibyte character. Set to 0377 to force mbtowc() for + * any byte sequence (except 0). + */ +#ifndef REGEXP_H_MASK +#define REGEXP_H_MASK 0200 +#endif +#endif /* REGEXP_H_WCHARS */ + +/* + * For regexpr.h. + */ +#ifndef regexp_h_static +#define regexp_h_static +#endif +#ifndef REGEXP_H_STEP_INIT +#define REGEXP_H_STEP_INIT +#endif +#ifndef REGEXP_H_ADVANCE_INIT +#define REGEXP_H_ADVANCE_INIT +#endif + +char *braslist[NBRA]; +char *braelist[NBRA]; +int nbra; +char *loc1, *loc2, *locs; +int sed; +int nodelim; + +regexp_h_static int circf; +regexp_h_static int low; +regexp_h_static int size; + +regexp_h_static unsigned char bittab[] = { + 1, + 2, + 4, + 8, + 16, + 32, + 64, + 128 +}; +static int regexp_h_advance(register const char *lp, + register const char *ep); +static void regexp_h_getrnge(register const char *str, int least); + +static const char *regexp_h_bol; /* beginning of input line (for \<) */ + +#ifdef REGEXP_H_WCHARS +static int regexp_h_wchars; +static int regexp_h_mbcurmax; + +static const char *regexp_h_firstwc; /* location of first + multibyte character + on input line */ + +#define regexp_h_getwc(c) { \ + if (regexp_h_wchars) { \ + char mbbuf[MB_LEN_MAX + 1], *mbptr; \ + wchar_t wcbuf; \ + int mb, len; \ + mbptr = mbbuf; \ + do { \ + mb = GETC(); \ + *mbptr++ = mb; \ + *mbptr = '\0'; \ + } while ((len = mbtowc(&wcbuf, mbbuf, regexp_h_mbcurmax)) < 0 \ + && mb != eof && mbptr < mbbuf + MB_LEN_MAX); \ + if (len == -1) \ + ERROR(67); \ + c = wcbuf; \ + } else { \ + c = GETC(); \ + } \ +} + +#define regexp_h_store(wc, mb, me) { \ + int len; \ + if (wc == WEOF) \ + ERROR(67); \ + if ((len = me - mb) <= regexp_h_mbcurmax) { \ + char mt[MB_LEN_MAX]; \ + if (wctomb(mt, wc) >= len) \ + ERROR(50); \ + } \ + switch (len = wctomb(mb, wc)) { \ + case -1: \ + ERROR(67); \ + case 0: \ + mb++; \ + break; \ + default: \ + mb += len; \ + } \ +} + +static regexp_h_inline wint_t +regexp_h_fetchwc(const char **mb, int islp) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, *mb, regexp_h_mbcurmax)) < 0) { + (*mb)++; + return WEOF; + } + if (islp && regexp_h_firstwc == NULL) + regexp_h_firstwc = *mb; + /*if (len == 0) { + (*mb)++; + return L'\0'; + } handled in singlebyte code */ + *mb += len; + return wc; +} + +#define regexp_h_fetch(mb, islp) ((*(mb) & REGEXP_H_MASK) == 0 ? \ + (*(mb)++&0377): \ + regexp_h_fetchwc(&(mb), islp)) + +static regexp_h_inline wint_t +regexp_h_showwc(const char *mb) +{ + wchar_t wc; + + if (mbtowc(&wc, mb, regexp_h_mbcurmax) < 0) + return WEOF; + return wc; +} + +#define regexp_h_show(mb) ((*(mb) & REGEXP_H_MASK) == 0 ? (*(mb)&0377): \ + regexp_h_showwc(mb)) + +/* + * Return the character immediately preceding mb. Since no byte is + * required to be the first byte of a character, the longest multibyte + * character ending at &[mb-1] is searched. + */ +static regexp_h_inline wint_t +regexp_h_previous(const char *mb) +{ + const char *p = mb; + wchar_t wc, lastwc = WEOF; + int len, max = 0; + + if (regexp_h_firstwc == NULL || mb <= regexp_h_firstwc) + return (mb > regexp_h_bol ? (mb[-1] & 0377) : WEOF); + while (p-- > regexp_h_bol) { + mbtowc(NULL, NULL, 0); + if ((len = mbtowc(&wc, p, mb - p)) >= 0) { + if (len < max || len < mb - p) + break; + max = len; + lastwc = wc; + } else if (len < 0 && max > 0) + break; + } + return lastwc; +} + +#define regexp_h_cclass(set, c, af) \ + ((c) == 0 || (c) == WEOF ? 0 : ( \ + ((c) > 0177) ? \ + regexp_h_cclass_wc(set, c, af) : ( \ + REGEXP_H_IS_THERE((set)+1, (c)) ? (af) : !(af) \ + ) \ + ) \ + ) + +static regexp_h_inline int +regexp_h_cclass_wc(const char *set, register wint_t c, int af) +{ + register wint_t wc, wl = WEOF; + const char *end; + + end = &set[18] + set[0] - 1; + set += 17; + while (set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (wc == '-' && wl != WEOF && set < end) { + wc = regexp_h_fetch(set, 0); +#ifdef REGEXP_H_VI_BACKSLASH + if (wc == '\\' && set < end && + (*set == ']' || *set == '-' || + *set == '^' || *set == '\\')) { + wc = regexp_h_fetch(set, 0); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c > wl && c < wc) + return af; + } + if (c == wc) + return af; + wl = wc; + } + return !af; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_wchars 0 +#define regexp_h_getwc(c) { c = GETC(); } +#endif /* !REGEXP_H_WCHARS */ + +regexp_h_static char * +compile(char *instring, char *ep, const char *endbuf, int seof) +{ + INIT /* Dependent declarations and initializations */ + register int c; + register int eof = seof; + char *lastep = instring; + int cclcnt; + char bracket[NBRA], *bracketp; + int closed; + char neg; + int lc; + int i, cflg; + +#ifdef REGEXP_H_WCHARS + char *eq; + regexp_h_mbcurmax = MB_CUR_MAX; + regexp_h_wchars = regexp_h_mbcurmax > 1 ? CMB : 0; +#endif + lastep = 0; + bracketp = bracket; + if((c = GETC()) == eof || c == '\n') { + if (c == '\n') { + UNGETC(c); + nodelim = 1; + } + if(*ep == 0 && !sed) + ERROR(41); + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + circf = closed = nbra = 0; + if (c == '^') + circf++; + else + UNGETC(c); + for (;;) { + if (ep >= endbuf) + ERROR(50); + regexp_h_getwc(c); + if(c != '*' && ((c != '\\') || (PEEKC() != '{'))) + lastep = ep; + if (c == eof) { + *ep++ = CCEOF; + if (bracketp > bracket) + ERROR(42); + RETURN(ep); + } + switch (c) { + + case '.': + *ep++ = CDOT|regexp_h_wchars; + continue; + + case '\n': + if (sed == 0) { + UNGETC(c); + *ep++ = CCEOF; + nodelim = 1; + RETURN(ep); + } + ERROR(36); + case '*': + if (lastep==0 || *lastep==CBRA || *lastep==CKET || + *lastep==(CBRC|regexp_h_wchars) || + *lastep==(CLET|regexp_h_wchars)) + goto defchar; + *lastep |= STAR; + continue; + + case '$': + if(PEEKC() != eof) + goto defchar; + *ep++ = CDOL; + continue; + + case '[': +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + if(&ep[33] >= endbuf) + ERROR(50); + + *ep++ = CCL; + lc = 0; + for(i = 0; i < 32; i++) + ep[i] = 0; + + neg = 0; + if((c = GETC()) == '^') { + neg = 1; + c = GETC(); + } + + do { + c &= 0377; + if(c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + c = GETC(); + c &= 0377; + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if(c == '-' && lc != 0) { + if ((c = GETC()) == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) + c = GETC(); +#endif /* REGEXP_H_VI_BACKSLASH */ + c &= 0377; + while(lc < c) { + PLACE(lc); + lc++; + } + } + lc = c; + PLACE(c); + } while((c = GETC()) != ']'); + if(neg) { + for(cclcnt = 0; cclcnt < 32; cclcnt++) + ep[cclcnt] ^= 0377; + ep[0] &= 0376; + } + + ep += 32; +#ifdef REGEXP_H_WCHARS + } else { + if (&ep[18] >= endbuf) + ERROR(50); + *ep++ = CCL|CMB; + *ep++ = 0; + lc = 0; + for (i = 0; i < 16; i++) + ep[i] = 0; + eq = &ep[16]; + regexp_h_getwc(c); + if (c == L'^') { + regexp_h_getwc(c); + ep[-2] = CNCL|CMB; + } + do { + if (c == '\0' || c == '\n') + ERROR(49); +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && ((c = PEEKC()) == ']' || + c == '-' || c == '^' || + c == '\\')) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } else +#endif /* REGEXP_H_VI_BACKSLASH */ + if (c == '-' && lc != 0 && lc <= 0177) { + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + if (c == ']') { + PLACE('-'); + break; + } +#ifdef REGEXP_H_VI_BACKSLASH + if(c == '\\' && + ((c = PEEKC()) == ']' || + c == '-' || + c == '^' || + c == '\\')) { + regexp_h_store(c, eq, + endbuf); + regexp_h_getwc(c); + } +#endif /* REGEXP_H_VI_BACKSLASH */ + while (lc < (c & 0177)) { + PLACE(lc); + lc++; + } + } + lc = c; + if (c <= 0177) + PLACE(c); + regexp_h_store(c, eq, endbuf); + regexp_h_getwc(c); + } while (c != L']'); + if ((i = eq - &ep[16]) > 255) + ERROR(50); + lastep[1] = i; + ep = eq; + } +#endif /* REGEXP_H_WCHARS */ + + continue; + + case '\\': + regexp_h_getwc(c); + switch(c) { + + case '(': + if(nbra >= NBRA) + ERROR(43); + *bracketp++ = nbra; + *ep++ = CBRA; + *ep++ = nbra++; + continue; + + case ')': + if(bracketp <= bracket) + ERROR(42); + *ep++ = CKET; + *ep++ = *--bracketp; + closed++; + continue; + + case '<': + *ep++ = CBRC|regexp_h_wchars; + continue; + + case '>': + *ep++ = CLET|regexp_h_wchars; + continue; + + case '{': + if(lastep == (char *) (0)) + goto defchar; + *lastep |= RNGE; + cflg = 0; + nlim: + c = GETC(); + i = 0; + do { + if ('0' <= c && c <= '9') + i = 10 * i + c - '0'; + else + ERROR(16); + } while(((c = GETC()) != '\\') && (c != ',')); + if (i > 255) + ERROR(11); + *ep++ = i; + if (c == ',') { + if(cflg++) + ERROR(44); + if((c = GETC()) == '\\') { + *ep++ = (char)255; + *lastep |= REGEXP_H_LEAST; + } else { + UNGETC(c); + goto nlim; /* get 2'nd number */ + } + } + if(GETC() != '}') + ERROR(45); + if(!cflg) /* one number */ + *ep++ = i; + else if((ep[-1] & 0377) < (ep[-2] & 0377)) + ERROR(46); + continue; + + case '\n': + ERROR(36); + + case 'n': + c = '\n'; + goto defchar; + + default: + if(c >= '1' && c <= '9') { + if((c -= '1') >= closed) + ERROR(25); + *ep++ = CBACK; + *ep++ = c; + continue; + } + } + /* Drop through to default to use \ to turn off special chars */ + + defchar: + default: + lastep = ep; +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + *ep++ = CCHR; + *ep++ = c; +#ifdef REGEXP_H_WCHARS + } else { + char mbbuf[MB_LEN_MAX]; + + switch (wctomb(mbbuf, c)) { + case 1: *ep++ = CCH1; + break; + case 2: *ep++ = CCH2; + break; + case 3: *ep++ = CCH3; + break; + default: + *ep++ = CCHR|CMB; + } + regexp_h_store(c, ep, endbuf); + } +#endif /* REGEXP_H_WCHARS */ + } + } +} + +int +step(const char *p1, const char *p2) +{ + register int c; +#ifdef REGEXP_H_WCHARS + register int d; +#endif /* REGEXP_H_WCHARS */ + + REGEXP_H_STEP_INIT /* get circf */ + regexp_h_bol = p1; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + if (circf) { + loc1 = (char *)p1; + return(regexp_h_advance(p1, p2)); + } + /* fast check for first character */ + if (*p2==CCHR) { + c = p2[1] & 0377; + do { + if ((*p1 & 0377) != c) + continue; + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); + } +#ifdef REGEXP_H_WCHARS + else if (*p2==CCH1) { + do { + if (p1[0] == p2[1] && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH2) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if (*p2==CCH3) { + do { + if (p1[0] == p2[1] && p1[1] == p2[2] && p1[2] == p2[3]&& + regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + return(0); + } else if ((*p2&0377)==(CCHR|CMB)) { + d = regexp_h_fetch(p2, 0); + do { + c = regexp_h_fetch(p1, 1); + if (c == d && regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while(c); + return(0); + } + /* regular algorithm */ + if (regexp_h_wchars) + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + c = regexp_h_fetch(p1, 1); + } while (c); + else +#endif /* REGEXP_H_WCHARS */ + do { + if (regexp_h_advance(p1, p2)) { + loc1 = (char *)p1; + return(1); + } + } while (*p1++); + return(0); +} + +#ifdef REGEXP_H_WCHARS +/* + * It is painfully slow to read character-wise backwards in a + * multibyte string (see regexp_h_previous() above). For the star + * algorithm, we therefore keep track of every character as it is + * read in forward direction. + * + * Don't use alloca() for stack blocks since there is no measurable + * speedup and huge amounts of memory are used up for long input + * lines. + */ +#ifndef REGEXP_H_STAKBLOK +#define REGEXP_H_STAKBLOK 1000 +#endif + +struct regexp_h_stack { + struct regexp_h_stack *s_nxt; + struct regexp_h_stack *s_prv; + const char *s_ptr[REGEXP_H_STAKBLOK]; +}; + +#define regexp_h_push(sb, sp, sc, lp) (regexp_h_wchars ? \ + regexp_h_pushwc(sb, sp, sc, lp) : (void)0) + +static regexp_h_inline void +regexp_h_pushwc(struct regexp_h_stack **sb, + struct regexp_h_stack **sp, + const char ***sc, const char *lp) +{ + if (regexp_h_firstwc == NULL || lp < regexp_h_firstwc) + return; + if (*sb == NULL) { + if ((*sb = regexp_h_malloc(sizeof **sb)) == NULL) + return; + (*sb)->s_nxt = (*sb)->s_prv = NULL; + *sp = *sb; + *sc = &(*sb)->s_ptr[0]; + } else if (*sc >= &(*sp)->s_ptr[REGEXP_H_STAKBLOK]) { + if ((*sp)->s_nxt == NULL) { + struct regexp_h_stack *bq; + + if ((bq = regexp_h_malloc(sizeof *bq)) == NULL) + return; + bq->s_nxt = NULL; + bq->s_prv = *sp; + (*sp)->s_nxt = bq; + *sp = bq; + } else + *sp = (*sp)->s_nxt; + *sc = &(*sp)->s_ptr[0]; + } + *(*sc)++ = lp; +} + +static regexp_h_inline const char * +regexp_h_pop(struct regexp_h_stack **sb, struct regexp_h_stack **sp, + const char ***sc, const char *lp) +{ + if (regexp_h_firstwc == NULL || lp <= regexp_h_firstwc) + return &lp[-1]; + if (*sp == NULL) + return regexp_h_firstwc; + if (*sc == &(*sp)->s_ptr[0]) { + if ((*sp)->s_prv == NULL) { + regexp_h_free(*sp); + *sp = NULL; + *sb = NULL; + return regexp_h_firstwc; + } + *sp = (*sp)->s_prv; + regexp_h_free((*sp)->s_nxt); + (*sp)->s_nxt = NULL ; + *sc = &(*sp)->s_ptr[REGEXP_H_STAKBLOK]; + } + return *(--(*sc)); +} + +static void +regexp_h_zerostak(struct regexp_h_stack **sb, struct regexp_h_stack **sp) +{ + for (*sp = *sb; *sp && (*sp)->s_nxt; *sp = (*sp)->s_nxt) + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + if (*sp) { + if ((*sp)->s_prv) + regexp_h_free((*sp)->s_prv); + regexp_h_free(*sp); + } + *sp = *sb = NULL; +} +#else /* !REGEXP_H_WCHARS */ +#define regexp_h_push(sb, sp, sc, lp) +#endif /* !REGEXP_H_WCHARS */ + +static int +regexp_h_advance(const char *lp, const char *ep) +{ + register const char *curlp; + int c, least; +#ifdef REGEXP_H_WCHARS + int d; + struct regexp_h_stack *sb = NULL, *sp = NULL; + const char **sc; +#endif /* REGEXP_H_WCHARS */ + char *bbeg; + int ct; + + for (;;) switch (least = *ep++ & 0377, least & ~REGEXP_H_LEAST) { + + case CCHR: +#ifdef REGEXP_H_WCHARS + case CCH1: +#endif + if (*ep++ == *lp++) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CCHR|CMB: + if (regexp_h_fetch(ep, 0) == regexp_h_fetch(lp, 1)) + continue; + return(0); + + case CCH2: + if (ep[0] == lp[0] && ep[1] == lp[1]) { + ep += 2, lp += 2; + continue; + } + return(0); + + case CCH3: + if (ep[0] == lp[0] && ep[1] == lp[1] && ep[2] == lp[2]) { + ep += 3, lp += 3; + continue; + } + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOT: + if (*lp++) + continue; + return(0); +#ifdef REGEXP_H_WCHARS + case CDOT|CMB: + if ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CDOL: + if (*lp==0) + continue; + return(0); + + case CCEOF: + loc2 = (char *)lp; + return(1); + + case CCL: + c = *lp++ & 0377; + if(ISTHERE(c)) { + ep += 32; + continue; + } + return(0); + +#ifdef REGEXP_H_WCHARS + case CCL|CMB: + case CNCL|CMB: + c = regexp_h_fetch(lp, 1); + if (regexp_h_cclass(ep, c, (ep[-1] & 0377) == (CCL|CMB))) { + ep += (*ep & 0377) + 17; + continue; + } + return 0; +#endif /* REGEXP_H_WCHARS */ + + case CBRA: + braslist[*ep++ & 0377] = (char *)lp; + continue; + + case CKET: + braelist[*ep++ & 0377] = (char *)lp; + continue; + + case CBRC: + if (lp == regexp_h_bol && locs == NULL) + continue; + if ((isdigit(lp[0] & 0377) || regexp_h_uletter(lp[0] & 0377)) + && !regexp_h_uletter(lp[-1] & 0377) + && !isdigit(lp[-1] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CBRC|CMB: + c = regexp_h_show(lp); + d = regexp_h_previous(lp); + if ((iswdigit(c) || regexp_h_wuletter(c)) + && !regexp_h_wuletter(d) + && !iswdigit(d)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CLET: + if (!regexp_h_uletter(lp[0] & 0377) && !isdigit(lp[0] & 0377)) + continue; + return(0); + +#ifdef REGEXP_H_WCHARS + case CLET|CMB: + c = regexp_h_show(lp); + if (!regexp_h_wuletter(c) && !iswdigit(c)) + continue; + return(0); +#endif /* REGEXP_H_WCHARS */ + + case CCHR|RNGE: + c = *ep++; + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ != c) + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|RNGE|CMB: + case CCH1|RNGE: + case CCH2|RNGE: + case CCH3|RNGE: + c = regexp_h_fetch(ep, 0); + regexp_h_getrnge(ep, least); + while (low--) + if (regexp_h_fetch(lp, 1) != c) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if (regexp_h_fetch(lp, 1) != c) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CDOT|RNGE: + regexp_h_getrnge(ep, least); + while(low--) + if(*lp++ == '\0') + return(0); + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if(*lp++ == '\0') + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 2; + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|RNGE|CMB: + regexp_h_getrnge(ep, least); + while (low--) + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + return 0; + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + if ((c = regexp_h_fetch(lp, 1)) == L'\0' || c == WEOF) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += 2; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|RNGE: + regexp_h_getrnge(ep + 32, least); + while(low--) { + c = *lp++ & 0377; + if(!ISTHERE(c)) + return(0); + } + curlp = lp; + while(size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + if(!ISTHERE(c)) + break; + } + if(size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + lp++; + } + ep += 34; /* 32 + 2 */ + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|RNGE|CMB: + case CNCL|RNGE|CMB: + regexp_h_getrnge(ep + (*ep & 0377) + 17, least); + while (low--) { + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + return 0; + } + curlp = lp; + while (size--) { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + if (!regexp_h_cclass(ep, c, + (ep[-1] & 0377 & ~REGEXP_H_LEAST) + == (CCL|RNGE|CMB))) + break; + } + if (size < 0) { + regexp_h_push(&sb, &sp, &sc, lp); + regexp_h_fetch(lp, 1); + } + ep += (*ep & 0377) + 19; + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CBACK: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + + if(strncmp(bbeg, lp, ct) == 0) { + lp += ct; + continue; + } + return(0); + + case CBACK|STAR: + bbeg = braslist[*ep & 0377]; + ct = braelist[*ep++ & 0377] - bbeg; + curlp = lp; + while(strncmp(bbeg, lp, ct) == 0) + lp += ct; + + while(lp >= curlp) { + if(regexp_h_advance(lp, ep)) return(1); + lp -= ct; + } + return(0); + + + case CDOT|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++); + goto star; + +#ifdef REGEXP_H_WCHARS + case CDOT|STAR|CMB: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while ((c = regexp_h_fetch(lp, 1)) != L'\0' && c != WEOF); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCHR|STAR: + curlp = lp; + do + regexp_h_push(&sb, &sp, &sc, lp); + while (*lp++ == *ep); + ep++; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCHR|STAR|CMB: + case CCH1|STAR: + case CCH2|STAR: + case CCH3|STAR: + curlp = lp; + d = regexp_h_fetch(ep, 0); + do + regexp_h_push(&sb, &sp, &sc, lp); + while (regexp_h_fetch(lp, 1) == d); + goto star; +#endif /* REGEXP_H_WCHARS */ + + case CCL|STAR: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = *lp++ & 0377; + } while(ISTHERE(c)); + ep += 32; + goto star; + +#ifdef REGEXP_H_WCHARS + case CCL|STAR|CMB: + case CNCL|STAR|CMB: + curlp = lp; + do { + regexp_h_push(&sb, &sp, &sc, lp); + c = regexp_h_fetch(lp, 1); + } while (regexp_h_cclass(ep, c, (ep[-1] & 0377) + == (CCL|STAR|CMB))); + ep += (*ep & 0377) + 17; + goto star; +#endif /* REGEXP_H_WCHARS */ + + star: +#ifdef REGEXP_H_WCHARS + if (regexp_h_wchars == 0) { +#endif + do { + if(--lp == locs) + break; + if (regexp_h_advance(lp, ep)) + return(1); + } while (lp > curlp); +#ifdef REGEXP_H_WCHARS + } else { + do { + lp = regexp_h_pop(&sb, &sp, &sc, lp); + if (lp <= locs) + break; + if (regexp_h_advance(lp, ep)) { + regexp_h_zerostak(&sb, &sp); + return(1); + } + } while (lp > curlp); + regexp_h_zerostak(&sb, &sp); + } +#endif /* REGEXP_H_WCHARS */ + return(0); + + } +} + +static void +regexp_h_getrnge(register const char *str, int least) +{ + low = *str++ & 0377; + size = least & REGEXP_H_LEAST ? /*20000*/INT_MAX : (*str & 0377) - low; +} + +int +advance(const char *lp, const char *ep) +{ + REGEXP_H_ADVANCE_INIT /* skip past circf */ + regexp_h_bol = lp; +#ifdef REGEXP_H_WCHARS + regexp_h_firstwc = NULL; +#endif /* REGEXP_H_WCHARS */ + return regexp_h_advance(lp, ep); +} diff --git a/libcommon/regexpr.c b/libcommon/regexpr.c @@ -0,0 +1,90 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)regexpr.c 1.8 (gritter) 10/13/04 */ + +#include <stdlib.h> +#include "regexpr.h" + +int regerrno, reglength; +static int circf; + +static char *regexpr_compile(char *, char *, const char *, int); + +char * +compile(const char *instring, char *ep, char *endbuf) +{ + char *cp; + int sz = 0; + + if (ep == 0) { + for (cp = (char *)instring; *cp != '\0'; cp++) + if (*cp == '[') + sz += 32; + sz += 2 * (cp - instring) + 5; + if ((ep = malloc(sz)) == 0) { + regerrno = 11; + return 0; + } + endbuf = &ep[sz]; + ep[1] = '\0'; + } + if ((cp=regexpr_compile((char *)instring, &ep[1], endbuf, '\0')) == 0) { + if (sz) + free(ep); + return 0; + } + ep[0] = circf; + reglength = cp - ep; + return sz ? ep : cp; +} + +#define INIT register char *sp = instring; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) return (c); +#define ERROR(c) { regerrno = c; return 0; } + +#define compile(a, b, c, d) regexpr_compile(a, b, c, d) +#define regexp_h_static static +#define REGEXP_H_STEP_INIT circf = *p2++; +#define REGEXP_H_ADVANCE_INIT circf = *ep++; + +#include "regexp.h" diff --git a/libcommon/regexpr.h b/libcommon/regexpr.h @@ -0,0 +1,53 @@ +/* + * Simple Regular Expression functions. Derived from Unix 7th Edition, + * /usr/src/cmd/expr.y + * + * Modified by Gunnar Ritter, Freiburg i. Br., Germany, January 2003. + * + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Sccsid @(#)regexpr.h 1.2 (gritter) 1/11/03 */ + +#define NBRA 9 + +extern char *braslist[NBRA]; +extern char *braelist[NBRA]; +extern int nbra; +extern int regerrno, reglength; +extern char *loc1, *loc2, *locs; +extern int sed; + +extern char *compile(const char *, char *, char *); +extern int step(const char *, const char *); +extern int advance(const char *, const char *); diff --git a/libcommon/setlabel.c b/libcommon/setlabel.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)setlabel.c 1.1 (gritter) 9/21/03 */ + +extern char *pfmt_label__; + +int +setlabel(const char *s) +{ + static char lbuf[26]; + char *lp; + + if (s && s[0]) { + for (lp = lbuf; *s && lp < &lbuf[sizeof lbuf-1]; s++, lp++) + *lp = *s; + *lp = '\0'; + pfmt_label__ = lbuf; + } else + pfmt_label__ = 0; + return 0; +} diff --git a/libcommon/setuxlabel.c b/libcommon/setuxlabel.c @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)setuxlabel.c 1.1 (gritter) 9/21/03 */ + +#include "msgselect.h" + +extern char *pfmt_label__; + +int +setuxlabel(const char *s) +{ + static char lbuf[msgselect(29,26)]; + char *lp, *mp; + + if (s && s[0]) { + lp = lbuf; + mp = msgselect("UX:",""); + while (*mp) + *lp++ = *mp++; + lbuf[0] = 'U', lbuf[1] = 'X', lbuf[2] = ':'; + while (*s && lp < &lbuf[sizeof lbuf-1]) + *lp++ = *s++; + *lp = '\0'; + pfmt_label__ = lbuf; + } else + pfmt_label__ = 0; + return 0; +} diff --git a/libcommon/sfile.c b/libcommon/sfile.c @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sfile.c 1.9 (gritter) 6/7/04 */ + +#ifdef __linux__ +#undef _FILE_OFFSET_BITS + +#include <sys/types.h> +#include <sys/sendfile.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> +#include <errno.h> +#include "sfile.h" + +long long +sfile(int dfd, int sfd, mode_t mode, long long count) +{ + static int enosys, einval, success; + off_t offset; + ssize_t sent, total; + extern void writerr(void *, int, int); + /* + * A process is not interruptible while executing a sendfile() + * system call. So it is not advisable to to send an entire + * file with one call; it is sent in parts so signals can + * be delivered in between. + */ + const ssize_t chunk = 196608; + + /* + * If a previous call returned ENOSYS, the operating system does + * not support sendfile() at all and it makes no sense to try it + * again. + * + * If a previous call returned EINVAL and there was no successful + * call yet, it is very likely that this is a permanent error + * condition (on Linux 2.6.0-test4, sendfile() may be used for + * socket targets only; older versions don't support tmpfs as + * source file system etc.). + */ + if (enosys || !success && einval || + (mode&S_IFMT) != S_IFREG || count > SSIZE_MAX) + return 0; + offset = lseek(sfd, 0, SEEK_CUR); + sent = 0, total = 0; + while (count > 0 && (sent = sendfile(dfd, sfd, &offset, + count > chunk ? chunk : count)) > 0) { + count -= sent, total += sent; + } + if (total && lseek(sfd, offset, SEEK_SET) == (off_t)-1) + return -1; + if (count == 0 || sent == 0) { + success = 1; + return total; + } + switch (errno) { + case ENOSYS: + enosys = 1; + return 0; + case EINVAL: + einval = 1; + return 0; + case ENOMEM: + return 0; + default: + writerr(NULL, count > chunk ? chunk : count, 0); + return -1; + } +} +#else /* !__linux__ */ +#include <sys/types.h> + +/*ARGSUSED*/ +long long +sfile(int dfd, int sfd, mode_t mode, long long count) +{ + return 0; +} +#endif /* __linux__ */ diff --git a/libcommon/sfile.h b/libcommon/sfile.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sfile.h 1.4 (gritter) 4/17/03 */ + +/* + * Return values: + * + * src_size The entire range has been copied. The file offset of both + * dst_fd and src_fd have been set to this position. The + * operation has been completed successfully. + * + * >0 Number of bytes written. The file offset of both dst_fd + * and src_fd have been set to this position. The operation + * may continue using read()/write(). + * + * 0 No data was written; operation may continue. + * + * -1 An error occured; operation may not continue. + */ +extern long long sfile(int dst_fd, int src_fd, mode_t src_mode, + long long src_size); diff --git a/libcommon/sighold.c b/libcommon/sighold.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sighold.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sighold(int sig) +{ + sigset_t set, oset; + + if (sig <= 0) + return -1; + sigemptyset(&set); + sigaddset(&set, sig); + return sigprocmask(SIG_BLOCK, &set, &oset); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigignore.c b/libcommon/sigignore.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigignore.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigignore(int sig) +{ + struct sigaction act; + + if (sig <= 0) + return -1; + act.sa_handler = SIG_IGN; + act.sa_flags = 0; + if (sig == SIGCHLD) + act.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&act.sa_mask); + sigaddset(&act.sa_mask, sig); + return sigaction(sig, &act, (struct sigaction *)0); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/signal.c b/libcommon/signal.c @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)signal.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +void (*signal(int sig, void (*func)(int)))(int) +{ + struct sigaction nact, oact; + + if (sig <= 0) + return SIG_ERR; + nact.sa_handler = func; + nact.sa_flags = SA_RESETHAND|SA_NODEFER; + if (sig == SIGCHLD && func == SIG_IGN) + nact.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&nact.sa_mask); + if (sigaction(sig, &nact, &oact) == -1) + return SIG_ERR; + return oact.sa_handler; +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigpause.c b/libcommon/sigpause.c @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigpause.c 1.6 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigpause(int sig) +{ + sigset_t nset, oset; + int ret; + + if (sig <= 0) + return -1; + sigemptyset(&nset); + sigaddset(&nset, sig); + if (sigprocmask(SIG_UNBLOCK, &nset, &oset) < 0) + return -1; + sigemptyset(&nset); + ret = sigsuspend(&nset); + if (sigprocmask(SIG_SETMASK, &oset, (sigset_t *)0) < 0) + ret = -1; + return ret; +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigrelse.c b/libcommon/sigrelse.c @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigrelse.c 1.8 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +int +sigrelse(int sig) +{ + sigset_t set, oset; + + if (sig <= 0) + return -1; + sigemptyset(&set); + sigaddset(&set, sig); + return sigprocmask(SIG_UNBLOCK, &set, &oset); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigset.c b/libcommon/sigset.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigset.c 1.7 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#include <signal.h> +#include "sigset.h" + +void (*sigset(int sig, void (*func)(int)))(int) +{ + struct sigaction nact, oact; + sigset_t nset, oset; + + if (sig <= 0) + return SIG_ERR; + sigemptyset(&nset); + sigaddset(&nset, sig); + if (sigprocmask(func==SIG_HOLD?SIG_BLOCK:SIG_UNBLOCK, &nset, &oset) < 0) + return SIG_ERR; + nact.sa_handler = func; + nact.sa_flags = 0; + if (sig == SIGCHLD && func == SIG_IGN) + nact.sa_flags |= SA_NOCLDSTOP|SA_NOCLDWAIT; + sigemptyset(&nact.sa_mask); + sigaddset(&nact.sa_mask, sig); + if (sigaction(sig, func==SIG_HOLD?(struct sigaction *)0:&nact, &oact) + == -1) + return SIG_ERR; + if (sigismember(&oset, sig)) + return SIG_HOLD; + else + return (oact.sa_handler); +} +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/sigset.h b/libcommon/sigset.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)sigset.h 1.9 (gritter) 1/22/06 */ + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) + +#ifndef SIG_HOLD +#define SIG_HOLD ((void (*)(int))2) +#endif /* !SIG_HOLD */ + +extern int sighold(int); +extern int sigignore(int); +extern int sigpause(int); +extern int sigrelse(int); +extern void (*sigset(int, void (*)(int)))(int); +extern void (*signal(int, void (*)(int)))(int); +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ diff --git a/libcommon/strtol.c b/libcommon/strtol.c @@ -0,0 +1,117 @@ +/* Sccsid @(#)strtol.c 1.6 (gritter) 7/18/04 */ + +#if defined (__hpux) || defined (_AIX) || \ + defined (__FreeBSD__) && (__FreeBSD__) < 5 + +#include <stdlib.h> +#include <ctype.h> +#include <errno.h> + +#include "atoll.h" + +#ifdef __hpux +#ifndef _INCLUDE__STDC_A1_SOURCE +#error You must use cc -D_INCLUDE__STDC_A1_SOURCE on HP-UX +#endif +#endif /* __hpux */ + +static long long +internal(const char *nptr, char **endptr, int base, int flags) +{ + const char *pp = nptr, *bptr; + long long v = 0, ov; + int sign = 1; + int c; + int valid = 1; + + /* XXX + * iswspace() should be used. + */ + for (bptr = nptr; isspace(*bptr&0377); bptr++); + if (*bptr == '-') { + sign = -1; + bptr++; + } else if (*bptr == '+') + bptr++; + if (base == 0) { + if (*bptr >= '1' && *bptr <= '9') + base = 10; + else if (*bptr == '0') { + if (bptr[1] == 'x' || bptr[1] == 'X') + base = 16; + else + base = 8; + } else { + if (flags&1) + errno = EINVAL; + goto out; + } + } + if (base < 2 || base > 36) { + if (flags&1) + errno = EINVAL; + goto out; + } + if (base == 16 && bptr[0] == '0' && + (bptr[1] == 'x' || bptr[1] == 'X')) + bptr += 2; + pp = bptr; + for (;;) { + if (*pp >= '0' && *pp <= '9') + c = *pp - '0'; + else if (*pp >= 'a' && *pp <= 'z') + c = *pp - 'a' + 10; + else if (*pp >= 'A' && *pp <= 'A') + c = *pp - 'A' + 10; + else + break; + if (c >= base) + break; + pp++; + if (valid) { + ov = v; + v = v * base + c; + if (flags&1) { + if (flags&2 && (unsigned long long)v < + (unsigned long long)ov || + v < ov) { + sign = 1; + errno = ERANGE; + v = -1; + if ((flags&2)==0) + v = (unsigned long long)v >> 1; + valid = 0; + } + } + } + } +out: if (pp <= bptr) { + if (flags&1) + errno = EINVAL; + if (endptr) + *endptr = (char *)nptr; + } else { + if (endptr) + *endptr = (char *)pp; + } + return v * sign; +} + +long long +strtoll(const char *nptr, char **endptr, int base) +{ + return internal(nptr, endptr, base, 1); +} + +unsigned long long +strtoull(const char *nptr, char **endptr, int base) +{ + return (unsigned long long)internal(nptr, endptr, base, 3); +} + +long long +atoll(const char *nptr) +{ + return internal(nptr, NULL, 10, 0); +} +#endif /* __hpux || _AIX || __FreeBSD__ < 5 */ diff --git a/libcommon/sysv3.c b/libcommon/sysv3.c @@ -0,0 +1,2 @@ +/* Sccsid @(#)sysv3.c 1.1 (gritter) 5/29/04 */ +int sysv3; diff --git a/libcommon/utmpx.c b/libcommon/utmpx.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2004 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)utmpx.c 1.13 (gritter) 12/16/07 */ + +#include <stdio.h> + +#if defined (__FreeBSD__) || defined (__dietlibc__) || defined (__NetBSD__) || \ + defined (__UCLIBC__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || \ + defined (__APPLE__) && \ + (__MAC_OS_X_VERSION_MIN_REQUIRED < __MAC_OS_X_VERSION_10_5) +#include <sys/types.h> +#include <sys/time.h> +#include <utmp.h> +#include <string.h> + +#include "utmpx.h" + +static FILE *utfp; +static struct utmpx utx; +static const char *utmpfile = _PATH_UTMP; + +static FILE * +init(void) +{ + if (utfp == NULL && (utfp = fopen(utmpfile, "r+")) == NULL) + if ((utfp = fopen(utmpfile, "r")) == NULL) + return NULL; + return utfp; +} + +static struct utmpx * +utmp2utmpx(struct utmpx *ux, const struct utmp *up) +{ +#ifndef __dietlibc__ + memset(ux, 0, sizeof *ux); + ux->ut_tv.tv_sec = up->ut_time; + memcpy(ux->ut_line, up->ut_line, UT_LINESIZE); + memcpy(ux->ut_user, up->ut_name, UT_NAMESIZE); + memcpy(ux->ut_host, up->ut_host, UT_HOSTSIZE); + if (strcmp(up->ut_line, "~") == 0) + ux->ut_type = BOOT_TIME; + else if (strcmp(up->ut_line, "|") == 0) + ux->ut_type = OLD_TIME; + else if (strcmp(up->ut_line, "}") == 0) + ux->ut_type = NEW_TIME; + else if (*up->ut_name == 0) + ux->ut_type = DEAD_PROCESS; + else + ux->ut_type = USER_PROCESS; +#else /* __dietlibc__ */ + *ux = *up; +#endif /* __dietlibc__ */ + return ux; +} + +static struct utmp * +utmpx2utmp(struct utmp *up, const struct utmpx *ux) +{ +#ifndef __dietlibc__ + memset(up, 0, sizeof *up); + up->ut_time = ux->ut_tv.tv_sec; + switch (ux->ut_type) { + case DEAD_PROCESS: + memcpy(up->ut_line, ux->ut_line, UT_LINESIZE); + break; + default: + case EMPTY: + case INIT_PROCESS: + case LOGIN_PROCESS: + case RUN_LVL: + case ACCOUNTING: + return NULL; + case BOOT_TIME: + strcpy(up->ut_name, "reboot"); + strcpy(up->ut_line, "~"); + break; + case OLD_TIME: + strcpy(up->ut_name, "date"); + strcpy(up->ut_line, "|"); + break; + case NEW_TIME: + strcpy(up->ut_name, "date"); + strcpy(up->ut_line, "{"); + break; + case USER_PROCESS: + memcpy(up->ut_line, ux->ut_line, UT_LINESIZE); + memcpy(up->ut_name, ux->ut_user, UT_NAMESIZE); + memcpy(up->ut_host, ux->ut_host, UT_HOSTSIZE); + } +#else /* __dietlibc__ */ + *up = *ux; +#endif /* __dietlibc__ */ + return up; +} + +struct utmpx * +getutxent(void) +{ + static struct utmp zero; + struct utmp ut; + + if (init() == NULL) + return NULL; + do { + if (fread(&ut, sizeof ut, 1, utfp) != 1) + return NULL; + } while (memcmp(&ut, &zero, sizeof ut) == 0); + return utmp2utmpx(&utx, &ut); +} + +struct utmpx * +getutxline(const struct utmpx *ux) +{ + struct utmp ut; + + if (init() == NULL) + return NULL; + fseek(utfp, 0, SEEK_SET); + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + utmp2utmpx(&utx, &ut); + if ((utx.ut_type == LOGIN_PROCESS || + utx.ut_type == USER_PROCESS) && + strcmp(ut.ut_line, utx.ut_line) == 0) + return &utx; + } + return NULL; +} + +struct utmpx * +getutxid(const struct utmpx *ux) +{ +#ifdef __dietlibc__ + struct utmp ut; +#endif + + if (init() == NULL) + return NULL; +#ifdef __dietlibc__ + fseek(utfp, 0, SEEK_SET); + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + utmp2utmpx(&utx, &ut); + switch (ux->ut_type) { + case BOOT_TIME: + case OLD_TIME: + case NEW_TIME: + if (ux->ut_type == utx.ut_type) + return &utx; + break; + case INIT_PROCESS: + case LOGIN_PROCESS: + case USER_PROCESS: + case DEAD_PROCESS: + if (ux->ut_type == utx.ut_type && + ux->ut_id == utx.ut_id) + return &utx; + break; + } + } +#endif /* __dietlibc__ */ + return NULL; +} + +void +setutxent(void) +{ + if (init() == NULL) + return; + fseek(utfp, 0, SEEK_SET); +} + +void +endutxent(void) +{ + FILE *fp; + + if (init() == NULL) + return; + fp = utfp; + utfp = NULL; + fclose(fp); +} + +int +utmpxname(const char *name) +{ + utmpfile = strdup(name); + return 0; +} + +extern struct utmpx * +pututxline(const struct utmpx *up) +{ + struct utmp ut; + struct utmpx *rp; + + if (init() == NULL) + return NULL; + /* + * Cannot use getutxid() because there is no id field. Use + * the equivalent of getutxline() instead. + */ + while (fread(&ut, sizeof ut, 1, utfp) == 1) { + if (strncmp(ut.ut_line, up->ut_line, UT_LINESIZE) == 0) { + fseek(utfp, -sizeof ut, SEEK_CUR); + break; + } + } + fflush(utfp); + if (utmpx2utmp(&ut, up) == NULL) + rp = NULL; + else if (fwrite(&ut, sizeof ut, 1, utfp) == 1) { + utx = *up; + rp = &utx; + } else + rp = NULL; + fflush(utfp); + return rp; +} + +extern void +updwtmpx(const char *name, const struct utmpx *up) +{ + FILE *fp; + + if ((fp = fopen(name, "a")) == NULL) + return; + fwrite(up, sizeof *up, 1, fp); + fclose(fp); +} + +#endif /* __FreeBSD__ || __dietlibc__ || __NetBSD__ || __UCLIBC__ || + __OpenBSD__ || __DragonFly__ || __APPLE__ */ diff --git a/libcommon/vpfmt.c b/libcommon/vpfmt.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ +/* Sccsid @(#)vpfmt.c 1.2 (gritter) 9/21/03 */ + +#include <stdio.h> +#include <stdarg.h> + +#include "pfmt.h" + +extern char *pfmt_label__; + +/* + * Strip catalog and msgnum from s, but only if they actually appear. + */ +static const char * +begin(const char *s, long flags) +{ + const char *sp; + + if (flags & MM_NOGET) + return s; + sp = s; + if (*sp && *sp != ':') { + sp++; + while (*sp && *sp != '/' && *sp != ':' && sp - s < 14) + sp++; + } + if (*sp++ != ':') + return s; + while (*sp >= '0' && *sp <= '9') + sp++; + if (*sp++ != ':' || *sp == '\0') + return s; + return sp; +} + +int +vpfmt(FILE *stream, long flags, const char *fmt, va_list ap) +{ + int n = 0; + const char *severity = NULL; + char sevbuf[25]; + + if ((flags&MM_NOSTD) == 0) { + if (flags & MM_ACTION) + severity = "TO FIX"; + else switch (flags & 0377) { + case MM_HALT: + severity = "HALT"; + break; + case MM_WARNING: + severity = "WARNING"; + break; + case MM_INFO: + severity = "INFO"; + break; + case MM_ERROR: + severity = "ERROR"; + break; + default: + snprintf(sevbuf, sizeof sevbuf, "SEV=%ld", flags&0377); + severity = sevbuf; + } + if (pfmt_label__) + n = fprintf(stream, "%s: ", pfmt_label__); + if (severity) + n += fprintf(stream, "%s: ", severity); + } + n += vfprintf(stream, begin(fmt, flags), ap); + return n; +} diff --git a/libuxre/COPYING.LGPL b/libuxre/COPYING.LGPL @@ -0,0 +1,504 @@ + GNU LESSER GENERAL PUBLIC LICENSE + Version 2.1, February 1999 + + Copyright (C) 1991, 1999 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts + as the successor of the GNU Library Public License, version 2, hence + the version number 2.1.] + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +Licenses are intended to guarantee your freedom to share and change +free software--to make sure the software is free for all its users. + + This license, the Lesser General Public License, applies to some +specially designated software packages--typically libraries--of the +Free Software Foundation and other authors who decide to use it. You +can use it too, but we suggest you first think carefully about whether +this license or the ordinary General Public License is the better +strategy to use in any particular case, based on the explanations below. + + When we speak of free software, we are referring to freedom of use, +not price. Our General Public Licenses are designed to make sure that +you have the freedom to distribute copies of free software (and charge +for this service if you wish); that you receive source code or can get +it if you want it; that you can change the software and use pieces of +it in new free programs; and that you are informed that you can do +these things. + + To protect your rights, we need to make restrictions that forbid +distributors to deny you these rights or to ask you to surrender these +rights. These restrictions translate to certain responsibilities for +you if you distribute copies of the library or if you modify it. + + For example, if you distribute copies of the library, whether gratis +or for a fee, you must give the recipients all the rights that we gave +you. You must make sure that they, too, receive or can get the source +code. If you link other code with the library, you must provide +complete object files to the recipients, so that they can relink them +with the library after making changes to the library and recompiling +it. And you must show them these terms so they know their rights. + + We protect your rights with a two-step method: (1) we copyright the +library, and (2) we offer you this license, which gives you legal +permission to copy, distribute and/or modify the library. + + To protect each distributor, we want to make it very clear that +there is no warranty for the free library. Also, if the library is +modified by someone else and passed on, the recipients should know +that what they have is not the original version, so that the original +author's reputation will not be affected by problems that might be +introduced by others. + + Finally, software patents pose a constant threat to the existence of +any free program. We wish to make sure that a company cannot +effectively restrict the users of a free program by obtaining a +restrictive license from a patent holder. Therefore, we insist that +any patent license obtained for a version of the library must be +consistent with the full freedom of use specified in this license. + + Most GNU software, including some libraries, is covered by the +ordinary GNU General Public License. This license, the GNU Lesser +General Public License, applies to certain designated libraries, and +is quite different from the ordinary General Public License. We use +this license for certain libraries in order to permit linking those +libraries into non-free programs. + + When a program is linked with a library, whether statically or using +a shared library, the combination of the two is legally speaking a +combined work, a derivative of the original library. The ordinary +General Public License therefore permits such linking only if the +entire combination fits its criteria of freedom. The Lesser General +Public License permits more lax criteria for linking other code with +the library. + + We call this license the "Lesser" General Public License because it +does Less to protect the user's freedom than the ordinary General +Public License. It also provides other free software developers Less +of an advantage over competing non-free programs. These disadvantages +are the reason we use the ordinary General Public License for many +libraries. However, the Lesser license provides advantages in certain +special circumstances. + + For example, on rare occasions, there may be a special need to +encourage the widest possible use of a certain library, so that it becomes +a de-facto standard. To achieve this, non-free programs must be +allowed to use the library. A more frequent case is that a free +library does the same job as widely used non-free libraries. In this +case, there is little to gain by limiting the free library to free +software only, so we use the Lesser General Public License. + + In other cases, permission to use a particular library in non-free +programs enables a greater number of people to use a large body of +free software. For example, permission to use the GNU C Library in +non-free programs enables many more people to use the whole GNU +operating system, as well as its variant, the GNU/Linux operating +system. + + Although the Lesser General Public License is Less protective of the +users' freedom, it does ensure that the user of a program that is +linked with the Library has the freedom and the wherewithal to run +that program using a modified version of the Library. + + The precise terms and conditions for copying, distribution and +modification follow. Pay close attention to the difference between a +"work based on the library" and a "work that uses the library". The +former contains code derived from the library, whereas the latter must +be combined with the library in order to run. + + GNU LESSER GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License Agreement applies to any software library or other +program which contains a notice placed by the copyright holder or +other authorized party saying it may be distributed under the terms of +this Lesser General Public License (also called "this License"). +Each licensee is addressed as "you". + + A "library" means a collection of software functions and/or data +prepared so as to be conveniently linked with application programs +(which use some of those functions and data) to form executables. + + The "Library", below, refers to any such software library or work +which has been distributed under these terms. A "work based on the +Library" means either the Library or any derivative work under +copyright law: that is to say, a work containing the Library or a +portion of it, either verbatim or with modifications and/or translated +straightforwardly into another language. (Hereinafter, translation is +included without limitation in the term "modification".) + + "Source code" for a work means the preferred form of the work for +making modifications to it. For a library, complete source code means +all the source code for all modules it contains, plus any associated +interface definition files, plus the scripts used to control compilation +and installation of the library. + + Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running a program using the Library is not restricted, and output from +such a program is covered only if its contents constitute a work based +on the Library (independent of the use of the Library in a tool for +writing it). Whether that is true depends on what the Library does +and what the program that uses the Library does. + + 1. You may copy and distribute verbatim copies of the Library's +complete source code as you receive it, in any medium, provided that +you conspicuously and appropriately publish on each copy an +appropriate copyright notice and disclaimer of warranty; keep intact +all the notices that refer to this License and to the absence of any +warranty; and distribute a copy of this License along with the +Library. + + You may charge a fee for the physical act of transferring a copy, +and you may at your option offer warranty protection in exchange for a +fee. + + 2. You may modify your copy or copies of the Library or any portion +of it, thus forming a work based on the Library, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) The modified work must itself be a software library. + + b) You must cause the files modified to carry prominent notices + stating that you changed the files and the date of any change. + + c) You must cause the whole of the work to be licensed at no + charge to all third parties under the terms of this License. + + d) If a facility in the modified Library refers to a function or a + table of data to be supplied by an application program that uses + the facility, other than as an argument passed when the facility + is invoked, then you must make a good faith effort to ensure that, + in the event an application does not supply such function or + table, the facility still operates, and performs whatever part of + its purpose remains meaningful. + + (For example, a function in a library to compute square roots has + a purpose that is entirely well-defined independent of the + application. Therefore, Subsection 2d requires that any + application-supplied function or table used by this function must + be optional: if the application does not supply it, the square + root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Library, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Library, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote +it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library +with the Library (or with a work based on the Library) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may opt to apply the terms of the ordinary GNU General Public +License instead of this License to a given copy of the Library. To do +this, you must alter all the notices that refer to this License, so +that they refer to the ordinary GNU General Public License, version 2, +instead of to this License. (If a newer version than version 2 of the +ordinary GNU General Public License has appeared, then you can specify +that version instead if you wish.) Do not make any other change in +these notices. + + Once this change is made in a given copy, it is irreversible for +that copy, so the ordinary GNU General Public License applies to all +subsequent copies and derivative works made from that copy. + + This option is useful when you wish to copy part of the code of +the Library into a program that is not a library. + + 4. You may copy and distribute the Library (or a portion or +derivative of it, under Section 2) in object code or executable form +under the terms of Sections 1 and 2 above provided that you accompany +it with the complete corresponding machine-readable source code, which +must be distributed under the terms of Sections 1 and 2 above on a +medium customarily used for software interchange. + + If distribution of object code is made by offering access to copy +from a designated place, then offering equivalent access to copy the +source code from the same place satisfies the requirement to +distribute the source code, even though third parties are not +compelled to copy the source along with the object code. + + 5. A program that contains no derivative of any portion of the +Library, but is designed to work with the Library by being compiled or +linked with it, is called a "work that uses the Library". Such a +work, in isolation, is not a derivative work of the Library, and +therefore falls outside the scope of this License. + + However, linking a "work that uses the Library" with the Library +creates an executable that is a derivative of the Library (because it +contains portions of the Library), rather than a "work that uses the +library". The executable is therefore covered by this License. +Section 6 states terms for distribution of such executables. + + When a "work that uses the Library" uses material from a header file +that is part of the Library, the object code for the work may be a +derivative work of the Library even though the source code is not. +Whether this is true is especially significant if the work can be +linked without the Library, or if the work is itself a library. The +threshold for this to be true is not precisely defined by law. + + If such an object file uses only numerical parameters, data +structure layouts and accessors, and small macros and small inline +functions (ten lines or less in length), then the use of the object +file is unrestricted, regardless of whether it is legally a derivative +work. (Executables containing this object code plus portions of the +Library will still fall under Section 6.) + + Otherwise, if the work is a derivative of the Library, you may +distribute the object code for the work under the terms of Section 6. +Any executables containing that work also fall under Section 6, +whether or not they are linked directly with the Library itself. + + 6. As an exception to the Sections above, you may also combine or +link a "work that uses the Library" with the Library to produce a +work containing portions of the Library, and distribute that work +under terms of your choice, provided that the terms permit +modification of the work for the customer's own use and reverse +engineering for debugging such modifications. + + You must give prominent notice with each copy of the work that the +Library is used in it and that the Library and its use are covered by +this License. You must supply a copy of this License. If the work +during execution displays copyright notices, you must include the +copyright notice for the Library among them, as well as a reference +directing the user to the copy of this License. Also, you must do one +of these things: + + a) Accompany the work with the complete corresponding + machine-readable source code for the Library including whatever + changes were used in the work (which must be distributed under + Sections 1 and 2 above); and, if the work is an executable linked + with the Library, with the complete machine-readable "work that + uses the Library", as object code and/or source code, so that the + user can modify the Library and then relink to produce a modified + executable containing the modified Library. (It is understood + that the user who changes the contents of definitions files in the + Library will not necessarily be able to recompile the application + to use the modified definitions.) + + b) Use a suitable shared library mechanism for linking with the + Library. A suitable mechanism is one that (1) uses at run time a + copy of the library already present on the user's computer system, + rather than copying library functions into the executable, and (2) + will operate properly with a modified version of the library, if + the user installs one, as long as the modified version is + interface-compatible with the version that the work was made with. + + c) Accompany the work with a written offer, valid for at + least three years, to give the same user the materials + specified in Subsection 6a, above, for a charge no more + than the cost of performing this distribution. + + d) If distribution of the work is made by offering access to copy + from a designated place, offer equivalent access to copy the above + specified materials from the same place. + + e) Verify that the user has already received a copy of these + materials or that you have already sent this user a copy. + + For an executable, the required form of the "work that uses the +Library" must include any data and utility programs needed for +reproducing the executable from it. However, as a special exception, +the materials to be distributed need not include anything that is +normally distributed (in either source or binary form) with the major +components (compiler, kernel, and so on) of the operating system on +which the executable runs, unless that component itself accompanies +the executable. + + It may happen that this requirement contradicts the license +restrictions of other proprietary libraries that do not normally +accompany the operating system. Such a contradiction means you cannot +use both them and the Library together in an executable that you +distribute. + + 7. You may place library facilities that are a work based on the +Library side-by-side in a single library together with other library +facilities not covered by this License, and distribute such a combined +library, provided that the separate distribution of the work based on +the Library and of the other library facilities is otherwise +permitted, and provided that you do these two things: + + a) Accompany the combined library with a copy of the same work + based on the Library, uncombined with any other library + facilities. This must be distributed under the terms of the + Sections above. + + b) Give prominent notice with the combined library of the fact + that part of it is a work based on the Library, and explaining + where to find the accompanying uncombined form of the same work. + + 8. You may not copy, modify, sublicense, link with, or distribute +the Library except as expressly provided under this License. Any +attempt otherwise to copy, modify, sublicense, link with, or +distribute the Library is void, and will automatically terminate your +rights under this License. However, parties who have received copies, +or rights, from you under this License will not have their licenses +terminated so long as such parties remain in full compliance. + + 9. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Library or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Library (or any work based on the +Library), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Library or works based on it. + + 10. Each time you redistribute the Library (or any work based on the +Library), the recipient automatically receives a license from the +original licensor to copy, distribute, link with or modify the Library +subject to these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties with +this License. + + 11. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Library at all. For example, if a patent +license would not permit royalty-free redistribution of the Library by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any +particular circumstance, the balance of the section is intended to apply, +and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 12. If the distribution and/or use of the Library is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Library under this License may add +an explicit geographical distribution limitation excluding those countries, +so that distribution is permitted only in or among countries not thus +excluded. In such case, this License incorporates the limitation as if +written in the body of this License. + + 13. The Free Software Foundation may publish revised and/or new +versions of the Lesser General Public License from time to time. +Such new versions will be similar in spirit to the present version, +but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library +specifies a version number of this License which applies to it and +"any later version", you have the option of following the terms and +conditions either of that version or of any later version published by +the Free Software Foundation. If the Library does not specify a +license version number, you may choose any version ever published by +the Free Software Foundation. + + 14. If you wish to incorporate parts of the Library into other free +programs whose distribution conditions are incompatible with these, +write to the author to ask for permission. For software which is +copyrighted by the Free Software Foundation, write to the Free +Software Foundation; we sometimes make exceptions for this. Our +decision will be guided by the two goals of preserving the free status +of all derivatives of our free software and of promoting the sharing +and reuse of software generally. + + NO WARRANTY + + 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO +WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. +EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR +OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY +KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE +LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME +THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN +WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY +AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU +FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR +CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE +LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING +RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A +FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF +SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Libraries + + If you develop a new library, and you want it to be of the greatest +possible use to the public, we recommend making it free software that +everyone can redistribute and change. You can do so by permitting +redistribution under these terms (or, alternatively, under the terms of the +ordinary General Public License). + + To apply these terms, attach the following notices to the library. It is +safest to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least the +"copyright" line and a pointer to where the full notice is found. + + <one line to give the library's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + <signature of Ty Coon>, 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + diff --git a/libuxre/NOTES b/libuxre/NOTES @@ -0,0 +1,14 @@ +Notes for the modified 'UNIX(R) Regular Expression Library' +============================================================ + +The code this is based on was released by Caldera as 'osutils-0.1a' +and is available at <http://unixtools.sourceforge.net/>. Notable +changes include: + +- Support for multibyte characters was enabled again. +- Support for traditional extended regular expression syntax was added. +- Fix: With REG_ICASE, [B-z] matches 'A', 'a', and '[' according to + POSIX.2. +- Some speed improvements. + + Gunnar Ritter 9/22/03 diff --git a/libuxre/_collelem.c b/libuxre/_collelem.c @@ -0,0 +1,119 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include <stddef.h> + +#define CCE(p) ((const CollElem *)(p)) +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) +{ + const char *tbl; + size_t hi, lo, cur; + const CollMult *cmp; + const CollElem *cep; + long diff; + int sz; + + /* + * ELEM_ENCODED is returned when the collation is entirely + * based on the encoded value of the character. + */ + if (col == 0 || col->flags & CHF_ENCODED + || (tbl = (const char *)col->maintbl) == 0) + { + return ELEM_ENCODED; + } + if ((wuchar_type)wc <= UCHAR_MAX) + { + indexed:; + cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); + if (cep->weight[0] == WGHT_SPECIAL) + return ELEM_BADCHAR; + return cep; + } + if (col->flags & CHF_INDEXED) + { + if ((wuchar_type)wc >= col->nmain) + return ELEM_BADCHAR; + goto indexed; + } + /* + * Binary search for a match. Could speed up the search if + * some interpolation was used, but keep it simple for now. + * Note that this is actually a table of CollMult's. + * + * To save space in the file, sequences of similar elements + * are sometimes compressed into a single CollMult that + * describes many entries. This is denoted by a subnbeg + * with the SUBN_SPECIAL bit set. The rest of the bits give + * the range covered by this entry. + */ + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += (1 + UCHAR_MAX) * col->elemsize; + lo = 0; + hi = col->nmain - UCHAR_MAX; + while (lo < hi) + { + if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ + cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ + cmp = CCM(&tbl[cur * sz]); + if ((diff = wc - cmp->ch) < 0) + hi = cur; + else if (cmp->elem.subnbeg & SUBN_SPECIAL) + { + if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) + lo = cur + 1; + else /* create an entry from the sequence in spare */ + { + spare->multbeg = cmp->elem.multbeg; + spare->subnbeg = 0; + spare->weight[0] = cmp->elem.weight[0] + diff; + for (lo = 1; lo < col->nweight; lo++) + { + wuchar_type w; + + if ((w = cmp->elem.weight[lo]) + == WGHT_SPECIAL) + { + w = spare->weight[0]; + } + spare->weight[lo] = w; + } + return spare; + } + } + else if (diff == 0) + return &cmp->elem; + else + lo = cur + 1; + } + return ELEM_BADCHAR; +} diff --git a/libuxre/_collmult.c b/libuxre/_collmult.c @@ -0,0 +1,55 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collmult.c 1.4 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include <stddef.h> + +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collmult(struct lc_collate *col, const CollElem *cep, wchar_t wc) +{ + const char *tbl; + size_t sz; + w_type ch; + + if (col == 0 || cep->multbeg == 0 + || (tbl = (const char *)col->multtbl) == 0) + { + return ELEM_BADCHAR; + } + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += sz * cep->multbeg; + while ((ch = CCM(tbl)->ch) != wc) + { + if (ch == 0) + return ELEM_BADCHAR; /* end of list */ + tbl += sz; + } + return &CCM(tbl)->elem; +} diff --git a/libuxre/bracket.c b/libuxre/bracket.c @@ -0,0 +1,829 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include "re.h" + +/* +* Build and match the [...] part of REs. +* +* In general, each compiled bracket construct holds a set of mapped +* wide character values and a set of character classifications. +* The mapping applied (when the current LC_COLLATE is not CHF_ENCODED) +* is the "basic" weight (cep->weight[0]); otherwise the actual wide +* character is used. +* +* To support simplified range handling, this code assumes that a w_type, +* a signed integer type, can hold all valid basic weight values (as well +* as all wide character values for CHF_ENCODED locales) and that these +* are all positive. Negative values indicate error conditions (BKT_*); +* zero (which must be the same as WGHT_IGNORE) indicates success, but +* that the item installed is not a range endpoint. +*/ + +static int +addwide(Bracket *bp, wchar_t ord) +{ + unsigned int nw; + + if ((nw = bp->nwide) < NWIDE) + bp->wide[nw] = ord; + else + { + if (nw % NWIDE == 0 && (bp->exwide = + realloc(bp->exwide, nw * sizeof(wchar_t))) == 0) + { + return BKT_ESPACE; + } + nw -= NWIDE; + bp->exwide[nw] = ord; + } + bp->nwide++; + return 0; +} + +#if USHRT_MAX == 65535 /* have 16 bits */ +#define PLIND(n) ((n) >> 4) +#define PLBIT(n) (1 << ((n) & 0xf)) +#else +#define PLIND(n) ((n) / CHAR_BIT) +#define PLBIT(n) (1 << ((n) % CHAR_BIT)) +#endif + +#define RANGE ((wchar_t)'-') /* separates wide chars in ranges */ + +static int +addrange(Bracket *bp, wchar_t ord, w_type prev) +{ + int ret; + + if (prev > 0 && prev != ord) /* try for range */ + { + if (prev > ord) + { + if (bp->flags & BKT_ODDRANGE) /* prev only - done */ + return 0; + else if ((bp->flags & BKT_BADRANGE) == 0) + return BKT_ERANGE; + } + else + { + if (++prev <= UCHAR_MAX) /* "prev" already there */ + { + do + { + bp->byte[PLIND(prev)] |= PLBIT(prev); + if (prev == ord) + return 0; + } while (++prev <= UCHAR_MAX); + } + if ((ret = addwide(bp, prev)) != 0) + return ret; + if (++prev > ord) + return 0; + if (prev < ord && (ret = addwide(bp, RANGE)) != 0) + return ret; + return addwide(bp, ord); + } + } + if (ord <= UCHAR_MAX) + { + bp->byte[PLIND(ord)] |= PLBIT(ord); + return 0; + } + if (prev == ord) /* don't bother */ + return 0; + return addwide(bp, ord); +} + +static w_type +place(Bracket *bp, wchar_t wc, w_type prev, int mb_cur_max) +{ + const CollElem *cep; + CollElem spare; + int ret; + + if ((cep = libuxre_collelem(bp->col, &spare, wc)) != ELEM_ENCODED) + { + if (cep == ELEM_BADCHAR) + return BKT_BADCHAR; + wc = cep->weight[0]; + } + if ((ret = addrange(bp, wc, prev)) != 0) + return ret; + return wc; +} + +#ifndef CHARCLASS_NAME_MAX +# define CHARCLASS_NAME_MAX 127 +#endif + +static w_type +chcls(Bracket *bp, const unsigned char *s, int n) +{ + char clsstr[CHARCLASS_NAME_MAX + 1]; + unsigned int nt; + wctype_t wct; + + if (n > CHARCLASS_NAME_MAX) + return BKT_ECTYPE; + (void)memcpy(clsstr, s, n); + clsstr[n] = '\0'; + if ((wct = wctype(clsstr)) == 0) + return BKT_ECTYPE; + if ((nt = bp->ntype) < NTYPE) + bp->type[nt] = wct; + else + { + if (nt % NTYPE == 0 && (bp->extype = + realloc(bp->extype, nt * sizeof(wctype_t))) == 0) + { + return BKT_ESPACE; + } + nt -= NTYPE; + bp->extype[nt] = wct; + } + bp->ntype++; + return 0; /* cannot be end point of a range */ +} + + /* + * The purpose of mcce() and its Mcce structure is to locate + * the next full collation element from "wc" and "s". It is + * called both at compile and execute time. These two differ + * primarily in that at compile time there is an exact number + * of bytes to be consumed, while at execute time the longest + * valid collation element is to be found. + * + * When BKT_ONECASE is set, MCCEs become particularly messy. + * There is no guarantee that all possible combinations of + * upper/lower case are defined as MCCEs. Thus, this code + * tries both lower- and uppercase (in that order) for each + * character than might be part of an MCCE. + */ + +typedef struct +{ + const unsigned char *max; /* restriction by caller */ + const unsigned char *aft; /* longest successful */ + Bracket *bp; /* readonly */ + struct lc_collate *col; /* readonly */ + const CollElem *cep; /* entry matching longest */ + wchar_t ch; /* initial character (if any) */ + w_type wc; /* character matching "aft" */ +} Mcce; + +static int +mcce(Mcce *mcp, const CollElem *cep, const unsigned char *s, int mb_cur_max, + int compile_time) +{ + const CollElem *nxt; + CollElem spare; + w_type ch, wc; + int i; + + /* + * Get next character. + */ + if ((wc = mcp->ch) != '\0') + { + mcp->ch = '\0'; + } + else if (ISONEBYTE(wc = *s++)) + { + if (wc == '\0') + return 0; + } + else if ((i = libuxre_mb2wc(&wc, s)) > 0) + { + s += i; + if (mcp->max != 0 && s > mcp->max) + return 0; + } + else if (i < 0) + return BKT_ILLSEQ; + /* + * Try out the this character as part of an MCCE. + * If BKT_ONECASE is set, this code tries both the lower- and + * uppercase version, continuing if it matches so far. + */ + ch = wc; + if (mcp->bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == ch) + ch = to_upper(wc); + } + for (;;) /* at most twice */ + { + if (cep == ELEM_BADCHAR) /* first character */ + { + if ((nxt = libuxre_collelem(mcp->col, &spare, wc)) + == ELEM_ENCODED + || (mcp->col->flags & CHF_MULTICH) == 0 + || s == mcp->max) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + break; + } + } + else + { + nxt = libuxre_collmult(mcp->col, cep, wc); + } + if (nxt != ELEM_BADCHAR) + { + /* + * Okay so far. Record this collating element + * if it's really one (not WGHT_IGNORE) and + * we've reached a new high point or it's the + * first match. + * + * If there's a possibility for more, call mcce() + * recursively for the subsequent characters. + */ + if (nxt->weight[0] != WGHT_IGNORE + && (mcp->aft < s || mcp->cep == ELEM_BADCHAR)) + { + mcp->aft = s; + mcp->cep = nxt; + mcp->wc = wc; + } + if (nxt->multbeg != 0 + && (mcp->max == 0 || s < mcp->max)) + { + if ((i = mcce(mcp, nxt, s, mb_cur_max, + compile_time)) != 0) + return i; + } + } + if (wc == ch) + break; + wc = ch; + } + return 0; +} + +static w_type +eqcls(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + w_type last; + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_EEQUIV; + last = mcbuf.wc; + if (mcbuf.cep != ELEM_ENCODED && mcbuf.col->nweight > 1) + { + const CollElem *cep; + + /* + * The first and last weight[0] values for equivalence + * classes are stuffed into the terminator for the + * multiple character lists. If these values are + * scattered (elements that are not part of this + * equivalence class have weight[0] values between the + * two end points), then SUBN_SPECIAL is placed in + * this terminator. Note that weight[1] of the + * terminator must be other than WGHT_IGNORE, too. + */ + last = mcbuf.cep->weight[0]; + if ((cep = libuxre_collmult(bp->col, mcbuf.cep, 0)) + != ELEM_BADCHAR + && cep->weight[1] != WGHT_IGNORE) + { + last = cep->weight[1]; + if (cep->subnbeg == SUBN_SPECIAL) + { + unsigned int nq; + + /* + * Permit ranges up to the first and + * after the last. + */ + if (prev > 0 && prev != cep->weight[0] + && (prev = addrange(bp, + cep->weight[0], prev)) != 0) + { + return prev; + } + /* + * Record the equivalence class by storing + * the primary weight. + */ + if ((nq = bp->nquiv) < NQUIV) + bp->quiv[nq] = mcbuf.cep->weight[1]; + else + { + if (nq % NQUIV == 0 && (bp->exquiv = + realloc(bp->exquiv, + nq * sizeof(wuchar_type))) + == 0) + { + return REG_ESPACE; + } + nq -= NQUIV; + bp->exquiv[nq] = mcbuf.cep->weight[1]; + } + bp->nquiv++; + return last; + } + mcbuf.cep = cep; + } + mcbuf.wc = mcbuf.cep->weight[0]; + } + /* + * Determine range, if any, to install. + * + * If there's a pending low (prev > 0), then try to use it. + * + * Otherwise, try to use mcbuf.wc as the low end of the range. + * Since addrange() assumes that the low point has already been + * placed, we try to fool it by using a prev of one less than + * mcbuf.wc. But, if that value would not look like a valid + * low point of a range, we have to explicitly place mcbuf.wc. + */ + if (prev <= 0 && (prev = mcbuf.wc - 1) <= 0) + { + if ((prev = addrange(bp, mcbuf.wc, 0)) != 0) + return prev; + } + if ((mcbuf.wc = addrange(bp, last, prev)) != 0) + return mcbuf.wc; + return last; +} + +static w_type +clsym(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max) +{ + Mcce mcbuf; + int err; + + mcbuf.max = &s[n]; + mcbuf.aft = &s[0]; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcbuf.ch = '\0'; + if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0) + return err; + if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max) + return BKT_ECOLLATE; + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + if ((err = addrange(bp, mcbuf.wc, prev)) != 0) + return err; + return mcbuf.wc; +} + + /* + * Scans the rest of a bracket construction within a regular + * expression and fills in a description for it. + * The leading [ and the optional set complement indicator + * were handled already by the caller. + * Returns: + * <0 error (a BKT_* value) + * >0 success; equals how many bytes were scanned. + */ +LIBUXRE_STATIC int +libuxre_bktmbcomp(Bracket *bp, const unsigned char *pat0, + int flags, int mb_cur_max) +{ + static const Bracket zero = {0}; + const unsigned char *pat = pat0; + struct lc_collate *savecol; + w_type n, wc, prev = 0; + + /* + * Set represented set to empty. Easiest to copy an empty + * version over the caller's, (re)setting col and flags. + */ + savecol = bp->col; + *bp = zero; + bp->col = savecol; + bp->flags = flags + & (BKT_NEGATED | BKT_ONECASE | BKT_NOTNL | BKT_BADRANGE | + BKT_ODDRANGE); + /* + * Handle optional "empty" brackets; typically only used + * in combination with BKT_QUOTE or BKT_ESCAPE. + */ + if ((wc = *pat) == ']' && (flags & BKT_EMPTY) != 0) + return 1; + /* + * Populate *bp. + */ + for (;; prev = n) + { + switch (wc) + { + case '\0': + ebrack:; + n = BKT_EBRACK; + goto err; + case '\n': + if (flags & BKT_NLBAD) + goto ebrack; + goto regular; + case '/': + if (flags & BKT_SLASHBAD) + goto ebrack; + goto regular; + case '\\': + if ((flags & (BKT_ESCAPE | BKT_QUOTE + | BKT_ESCNL | BKT_ESCSEQ)) == 0) + { + goto regular; + } + switch (wc = *++pat) + { + default: + noesc:; + if ((flags & BKT_ESCAPE) == 0) + { + wc = '\\'; + pat--; + } + break; + case '\\': + case ']': + case '-': + case '^': + if ((flags & BKT_QUOTE) == 0) + goto noesc; + break; + case 'a': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\a'; + break; + case 'b': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\b'; + break; + case 'f': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\f'; + break; + case 'n': + if ((flags & (BKT_ESCSEQ | BKT_ESCNL)) == 0) + goto noesc; + wc = '\n'; + break; + case 'r': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\r'; + break; + case 't': + if ((flags & BKT_ESCSEQ) == 0) + goto noesc; + wc = '\t'; + break; + case 'v': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + wc = '\v'; + break; + case 'x': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + if (!isxdigit(wc = *++pat)) + { + pat--; + goto noesc; + } + /* + * Take as many hex digits as possible, + * ignoring overflows. + * Any positive result is okay. + */ + n = 0; + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + n <<= 4; + n |= wc; + } while (isxdigit(wc = *++pat)); + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if ((flags & BKT_ESCSEQ) == 0 || + (flags & BKT_OLDESC)) + goto noesc; + /* + * For compatibility (w/awk), + * permit "octal" 8 and 9. + */ + n = wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + if ((wc = *++pat) >= '0' && wc <= '9') + { + n <<= 3; + n += wc - '0'; + } + } + pat--; + if ((wc = n) <= 0) + { + n = BKT_BADESC; + goto err; + } + break; + } + goto regular; + case '[': + if (((wc = *++pat) == ':' || wc == '=' || wc == '.') && + (flags & BKT_NOI18N) == 0) + { + n = 0; + while (*++pat != wc || pat[1] != ']') + { + if (*pat == '\0') + { + badpat:; + n = BKT_BADPAT; + goto err; + } + else if (*pat == '/') + { + if (flags & BKT_SLASHBAD) + goto badpat; + } + else if (*pat == '\n') + { + if (flags & BKT_NLBAD) + goto badpat; + } + n++; + } + if (n == 0) + { + n = BKT_EMPTYSUBBKT; + goto err; + } + if (wc == ':') + n = chcls(bp, &pat[-n], n); + else if (wc == '=') + n = eqcls(bp, &pat[-n], n, prev, + mb_cur_max); + else /* wc == '.' */ + n = clsym(bp, &pat[-n], n, prev, + mb_cur_max); + pat++; + break; + } + wc = '['; + pat--; + goto regular; + default: + if (!ISONEBYTE(wc) && + (n = libuxre_mb2wc(&wc, pat + 1)) > 0) + pat += n; + regular:; + n = place(bp, wc, prev, mb_cur_max); + break; + } + if (n < 0) { + n = BKT_ILLSEQ; + goto err; + } + if ((wc = *++pat) == ']') + break; + if (wc == '-' && n != 0) + { + if (prev == 0 || (flags & BKT_SEPRANGE) == 0) + { + if ((wc = *++pat) != ']') + continue; /* valid range */ + wc = '-'; + pat--; + } + } + n = 0; /* no range this time */ + } + return pat - pat0 + 1; +err:; + libuxre_bktfree(bp); + return n; +} + +LIBUXRE_STATIC void +libuxre_bktfree(Bracket *bp) +{ + if (bp->extype != 0) + free(bp->extype); + if (bp->exquiv != 0) + free(bp->exquiv); + if (bp->exwide != 0) + free(bp->exwide); +} + +LIBUXRE_STATIC int +libuxre_bktmbexec(Bracket *bp, wchar_t wc, + const unsigned char *str, int mb_cur_max) +{ + unsigned int i; + wchar_t lc, uc; + Mcce mcbuf; + + mcbuf.aft = str; /* in case of match in character classes */ + mcbuf.ch = wc; + /* + * First: check the single wc against any character classes. + * Since multiple character collating elements are not part + * of this world, they don't apply here. + */ + if ((i = bp->ntype) != 0) + { + wctype_t *wctp = &bp->type[0]; + + if (bp->flags & BKT_ONECASE) + { + if ((wc = to_lower(wc)) == mcbuf.ch) + mcbuf.ch = to_upper(wc); + } + for (;;) + { + if (iswctype(mb_cur_max==1?btowc(wc):wc, *wctp)) + goto match; + if (wc != mcbuf.ch && + iswctype(mb_cur_max==1?btowc(mcbuf.ch):mcbuf.ch, + *wctp)) + goto match; + if (--i == 0) + break; + if (++wctp == &bp->type[NTYPE]) + wctp = &bp->extype[0]; + } + } + /* + * The main match is determined by the weight[0] value + * of the character (or characters, if the input can be + * taken as a multiple character collating element). + */ + mcbuf.max = 0; + mcbuf.bp = bp; + mcbuf.col = bp->col; + mcbuf.cep = ELEM_BADCHAR; + mcce(&mcbuf, ELEM_BADCHAR, str, mb_cur_max, 0); + if (mcbuf.cep == ELEM_BADCHAR) + return -1; /* never matches */ + if (mcbuf.cep != ELEM_ENCODED) + mcbuf.wc = mcbuf.cep->weight[0]; + /* + * POSIX.2 demands that both a character and its case counterpart + * can match if REG_ICASE is set. This means that [B-z] matches + * 'A', 'a', and '['. + */ + if (bp->flags & BKT_ONECASE) + { + lc = to_lower(mcbuf.wc); + uc = to_upper(mcbuf.wc); + } + else + lc = uc = mcbuf.wc; + /* + * See if it's in the set. Note that the list of true wide + * character values has explicit ranges. + */ + if (mcbuf.wc <= UCHAR_MAX) + { + if (bp->byte[PLIND(lc)] & PLBIT(lc)) + goto match; + if (lc != uc && (bp->byte[PLIND(uc)] & PLBIT(uc))) + goto match; + } + else if ((i = bp->nwide) != 0) + { + wchar_t *wcp = &bp->wide[0]; + long lcmp, ucmp; + + for (;;) + { + if ((lcmp = lc - *wcp) == 0) + goto match; + ucmp = uc - *wcp; + if (lc != uc && ucmp == 0) + goto match; + if (--i == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (*wcp == RANGE) + { + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + if (lcmp > 0 && lc <= *wcp) + goto match; + if (lc != uc && ucmp > 0 && uc < *wcp) + goto match; + if ((i -= 2) == 0) + break; + if (++wcp == &bp->wide[NWIDE]) + wcp = &bp->exwide[0]; + } + } + } + /* + * The last chance for a match is if an equivalence class + * was specified for which the primary weights are scattered + * through the weight[0]s. + */ + if ((i = bp->nquiv) != 0 && mcbuf.cep != ELEM_ENCODED) + { + wuchar_type *wucp = &bp->quiv[0]; + + mcbuf.wc = mcbuf.cep->weight[1]; + for (;;) + { + if (mcbuf.wc == *wucp) + goto match; + if (--i == 0) + break; + if (++wucp == &bp->quiv[NQUIV]) + wucp = &bp->exquiv[0]; + } + } + /* + * Only here when no match against the set was found. + * One final special case w/r/t newline. + */ + if (bp->flags & BKT_NEGATED) + { + if (wc != '\n' || (bp->flags & BKT_NOTNL) == 0) + return mcbuf.aft - str; + } + return -1; +match:; + /* + * Only here when a match against the described set is found. + */ + if (bp->flags & BKT_NEGATED) + return -1; + return mcbuf.aft - str; +} diff --git a/libuxre/colldata.h b/libuxre/colldata.h @@ -0,0 +1,226 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_COLLDATA_H +#define LIBUXRE_COLLDATA_H + +typedef struct +{ + long coll_offst; /* offset to xnd table */ + long sub_cnt; /* length of subnd table */ + long sub_offst; /* offset to subnd table */ + long str_offst; /* offset to strings for subnd table */ + long flags; /* nonzero if reg.exp. used */ +} hd; + +typedef struct +{ + unsigned char ch; /* character or number of followers */ + unsigned char pwt; /* primary weight */ + unsigned char swt; /* secondary weight */ + unsigned char ns; /* index of follower state list */ +} xnd; + +typedef struct +{ + char *exp; /* expression to be replaced */ + long explen; /* length of expression */ + char *repl; /* replacement string */ +} subnd; + +/*----------------------------------*/ + +#include <wcharm.h> +#include <limits.h> +/* #include <stdlock.h> */ + +/* +* Structure of a collation file: +* 1. CollHead (maintbl is 0 if CHF_ENCODED) +* if !CHF_ENCODED then +* 2. CollElem[bytes] (256 for 8 bit bytes) +* 3. if CHF_INDEXED then +* CollElem[wides] (nmain-256 for 8 bit bytes) +* else +* CollMult[wides] +* 4. CollMult[*] (none if multtbl is 0) +* 5. wuchar_type[*] (none if repltbl is 0) +* 6. CollSubn[*] (none if subntbl is 0) +* 7. strings (first is pathname for .so if CHF_DYNAMIC) +* +* The actual location of parts 2 through 7 is not important. +* +* The main table is in encoded value order. +* +* All indeces/offsets must be nonzero to be effective; zero is reserved +* to indicate no-such-entry. This implies either that an unused initial +* entry is placed in each of (4) through (7), or that the "start offset" +* given by the header is artificially pushed back by an entry size. +* +* Note that if CHF_ENCODED is not set, then nweight must be positive. +* +* If an element can begin a multiple character element, it contains a +* nonzero multbeg which is the initial index into (4) for its list; +* the list is terminated by a CollMult with a ch of zero. +* +* If there are elements with the same primary weight (weight[1]), then +* for each such element, it must have a CollMult list. The CollMult +* that terminates the list (ch==0) notes the lowest and highest basic +* weights for those elements with that same primary weight value +* respectively in weight[0] and weight[1]. If there are some basic +* weights between these values that do not have the same primary +* weight--are not in the equivalence class--then the terminator also +* has a SUBN_SPECIAL mark. Note that this list terminator should be +* shared when the elements are not multiple character collating +* elements because they wouldn't otherwise have a CollMult list. +* +* WGHT_IGNORE is used to denote ignored collating elements for a +* particular collation ordering pass. All main table entries other +* than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is +* possible for a CollMult entries from (4) to have a WGHT_IGNORE +* weight[0]: If, for example, "xyz" is a multiple character collating +* element, but "xy" is not, then the CollMult for "y" will have a +* WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each +* list of replacement weights. +* +* Within (3), it is possible to describe a sequence of unremarkable +* collating elements with a single CollMult entry. If the SUBN_SPECIAL +* bit is set, the rest of subnbeg represents the number of collating +* elements covered by this entry. The weight[0] values are determined +* by adding the difference between the encoded value and the entry's ch +* value to the entry's weight[0]. This value is then substituted for +* any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem() +* hides any match to such an entry by filling in a "spare" CollElem. +* +* If there are substitution strings, then for each character that begins +* a string, it has a nonzero subnbeg which is similarly the initial +* index into (6). The indeces in (6) refer to offsets within (7). +*/ + +#define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1)) + +#define CHF_ENCODED 0x1 /* collation by encoded values only */ +#define CHF_INDEXED 0x2 /* main table indexed by encoded values */ +#define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */ +#define CHF_DYNAMIC 0x8 /* shared object has collation functions */ + +#define CWF_BACKWARD 0x1 /* reversed ordering for this weight */ +#define CWF_POSITION 0x2 /* weight takes position into account */ + +#define CLVERS 1 /* most recent version */ + +#define WGHT_IGNORE 0 /* ignore this collating element */ +#define WGHT_SPECIAL TOPBIT(wuchar_type) +#define SUBN_SPECIAL TOPBIT(unsigned short) + +#ifndef COLL_WEIGHTS_MAX +#define COLL_WEIGHTS_MAX 1 +#endif + +typedef struct +{ + unsigned long maintbl; /* start of main table */ + unsigned long multtbl; /* start of multi-char table */ + unsigned long repltbl; /* start of replacement weights */ + unsigned long subntbl; /* start of substitutions */ + unsigned long strstbl; /* start of sub. strings */ + unsigned long nmain; /* # entries in main table */ + unsigned short flags; /* CHF_* bits */ + unsigned short version; /* handle future changes */ + unsigned char elemsize; /* # bytes/element (w/padding) */ + unsigned char nweight; /* # weights/element */ + unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */ +} CollHead; + +typedef struct +{ + unsigned short multbeg; /* start of multi-chars */ + unsigned short subnbeg; /* start of substitutions */ + wuchar_type weight[COLL_WEIGHTS_MAX]; +} CollElem; + +typedef struct +{ + wchar_t ch; /* "this" character (of sequence) */ + CollElem elem; /* its full information */ +} CollMult; + +typedef struct +{ + unsigned short strbeg; /* start of match string */ + unsigned short length; /* length of match string */ + unsigned short repbeg; /* start of replacement */ +} CollSubn; + +struct lc_collate +{ + const unsigned char *strstbl; + const wuchar_type *repltbl; + const CollElem *maintbl; + const CollMult *multtbl; + const CollSubn *subntbl; +#ifdef DSHLIB + void *handle; + void (*done)(struct lc_collate *); + int (*strc)(struct lc_collate *, const char *, const char *); + int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *); + size_t (*strx)(struct lc_collate *, char *, const char *, size_t); + size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t); +#endif + const char *mapobj; + size_t mapsize; + unsigned long nmain; + short nuse; + unsigned short flags; + unsigned char elemsize; + unsigned char nweight; + unsigned char order[COLL_WEIGHTS_MAX]; +}; + +#define ELEM_BADCHAR ((CollElem *)0) +#define ELEM_ENCODED ((CollElem *)-1) + +/* +LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *); +LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *, + const char *); +LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *, + const wchar_t *); +*/ +extern struct lc_collate *libuxre_lc_collate(struct lc_collate *); +LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *, + CollElem *, wchar_t); +LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *, + const CollElem *, wchar_t); +/* +LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *, + CollElem *, const unsigned char **); +LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *, + CollElem *, const wchar_t **); +*/ + +#endif /* !LIBUXRE_COLLDATA_H */ diff --git a/libuxre/depsinc.mk b/libuxre/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libuxre_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libuxre_DEPDIR -luxre diff --git a/libuxre/mkfile b/libuxre/mkfile @@ -0,0 +1,19 @@ +LIB = libuxre.a +LOBJ = onefile.o regfree.o regerror.o +LOCAL_CFLAGS = -I. + +<$mkbuild/mk.default + +_collelem.o: colldata.h re.h regex.h wcharm.h +_collmult.o: colldata.h re.h regex.h wcharm.h +bracket.o: colldata.h re.h regex.h wcharm.h +regcomp.o: colldata.h re.h regex.h wcharm.h +regdfa.o: colldata.h regdfa.h re.h regex.h wcharm.h +regerror.o: colldata.h re.h regex.h wcharm.h +regexec.o: colldata.h re.h regex.h wcharm.h +regfree.o: colldata.h re.h regex.h wcharm.h +regnfa.o: colldata.h re.h regex.h wcharm.h +regparse.o: colldata.h re.h regex.h wcharm.h +stubs.o: colldata.h wcharm.h +onefile.o: _collelem.c _collmult.c bracket.c regcomp.c regdfa.c regexec.c +onefile.o: regfree.c regnfa.c regparse.c stubs.c diff --git a/libuxre/onefile.c b/libuxre/onefile.c @@ -0,0 +1,38 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)onefile.c 1.1 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#define LIBUXRE_STATIC static + +#include "_collelem.c" +#include "_collmult.c" +#include "stubs.c" +#include "bracket.c" +#include "regdfa.c" +#include "regnfa.c" +#include "regparse.c" +#include "regcomp.c" +#include "regexec.c" diff --git a/libuxre/re.h b/libuxre/re.h @@ -0,0 +1,228 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_RE_H +#define LIBUXRE_RE_H + + /* + * Maps safe external tag to internal one + */ +#define re_coll_ lc_collate /* <regex.h> */ +/* #define __fnm_collate lc_collate */ /* <fnmatch.h> */ + +#include <limits.h> +#include <regex.h> +/* #include <fnmatch.h> */ +#include <colldata.h> + +#define NBSHT (sizeof(unsigned short) * CHAR_BIT) +#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT) +#define NTYPE 4 +#define NWIDE 32 +#define NQUIV 4 + +typedef struct +{ + struct lc_collate *col; /* only member set by caller */ + wctype_t *extype; + wuchar_type *exquiv; + wchar_t *exwide; + wctype_t type[NTYPE]; + wuchar_type quiv[NQUIV]; + wchar_t wide[NWIDE]; + unsigned short byte[NBYTE]; + unsigned short ntype; + unsigned short nquiv; + unsigned short nwide; + unsigned int flags; +} Bracket; + +#define BKT_NEGATED 0x001 /* complemented set */ +#define BKT_ONECASE 0x002 /* uppercase same as lowercase */ +#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */ +#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */ +#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */ +#define BKT_NLBAD 0x020 /* newline disallowed */ +#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */ +#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */ +#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */ +#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */ +#define BKT_ESCNL 0x400 /* take \n as the newline character */ +#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */ +#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */ +#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */ +#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */ + + /* + * These error returns for libuxre_bktmbcomp() are directly tied to + * the error returns for regcomp() for convenience. + */ +#define BKT_BADPAT (-REG_BADPAT) +#define BKT_ECOLLATE (-REG_ECOLLATE) +#define BKT_ECTYPE (-REG_ECTYPE) +#define BKT_EEQUIV (-REG_EEQUIV) +#define BKT_BADCHAR (-REG_EBKTCHAR) +#define BKT_EBRACK (-REG_EBRACK) +#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT) +#define BKT_ERANGE (-REG_ERANGE) +#define BKT_ESPACE (-REG_ESPACE) +#define BKT_BADESC (-REG_BADESC) +#define BKT_ILLSEQ (-REG_ILLSEQ) + + /* + * These must be distinct from the flags in <fnmatch.h>. + */ +#define FNM_COLLATE 0x2000 /* have collation information */ +#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */ + + /* + * These must be distinct from the flags in <regex.h>. + */ +#define REG_NFA 0x20000000 +#define REG_DFA 0x40000000 +#define REG_GOTBKT 0x80000000 + +#define BRACE_INF USHRT_MAX +#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */ +#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */ + +typedef union /* extra info always kept for some tokens/nodes */ +{ + Bracket *bkt; /* ROP_BKT */ + size_t sub; /* ROP_LP (ROP_RP), ROP_REF */ + unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */ +} Info; + +typedef struct /* lexical context while parsing */ +{ + Info info; + const unsigned char *pat; + unsigned char *clist; + struct lc_collate *col; + unsigned long flags; + w_type tok; + size_t maxref; + size_t nleft; + size_t nright; + size_t nclist; + int bktflags; + int err; + int mb_cur_max; +} Lex; + +typedef struct t_tree Tree; /* RE parse tree node */ +struct t_tree +{ + union + { + Tree *ptr; /* unary & binary nodes */ + size_t pos; /* position for DFA leaves */ + } left; + union + { + Tree *ptr; /* binary nodes */ + Info info; + } right; + Tree *parent; + w_type op; /* positive => char. to match */ +}; + +typedef struct re_dfa_ Dfa; /* DFA engine description */ +typedef struct re_nfa_ Nfa; /* NFA engine description */ + +typedef struct +{ + const unsigned char *str; + regmatch_t *match; + size_t nmatch; + unsigned long flags; + int mb_cur_max; +} Exec; + + /* + * Regular expression operators. Some only used internally. + * All are negative, to distinguish them from the regular + * "match this particular wide character" operation. + */ +#define BINARY_ROP 0x02 +#define UNARY_ROP 0x01 +#define LEAF_ROP 0x00 + +#define MAKE_ROP(k, v) (-((v) | ((k) << 4))) +#define KIND_ROP(v) ((-(v)) >> 4) + +#define ROP_OR MAKE_ROP(BINARY_ROP, 1) +#define ROP_CAT MAKE_ROP(BINARY_ROP, 2) + +#define ROP_STAR MAKE_ROP(UNARY_ROP, 1) +#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2) +#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3) +#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4) +#define ROP_LP MAKE_ROP(UNARY_ROP, 5) +#define ROP_RP MAKE_ROP(UNARY_ROP, 6) + +#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */ +#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */ +#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */ +#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */ +#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */ +#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */ +#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */ +#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */ +#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */ +#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */ +#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */ +#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */ +#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */ +#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */ + + /* + * Return values: + * libuxre_bktmbcomp() + * <0 error (see BKT_* above); >0 #bytes scanned + * libuxre_bktmbexec() + * <0 doesn't match; >=0 matches, #extra bytes scanned + */ +LIBUXRE_STATIC void libuxre_bktfree(Bracket *); +LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *, + int, int); +LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t, + const unsigned char *, int); + +LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int); +LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *); +LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *); +LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int); + +extern void libuxre_regdeldfa(Dfa *); +LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *); + +extern void libuxre_regdelnfa(Nfa *); +LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *); +LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *); +#endif /* !LIBUXRE_RE_H */ diff --git a/libuxre/regcomp.c b/libuxre/regcomp.c @@ -0,0 +1,77 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regcomp.c 1.6 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regcomp = _regcomp */ + +int +regcomp(regex_t *ep, const char *pat, int flags) +{ + Tree *tp; + Lex lex; + + if ((tp=libuxre_regparse(&lex, (const unsigned char *)pat, flags)) == 0) + goto out; + ep->re_nsub = lex.nleft; + ep->re_flags = lex.flags & ~(REG_NOTBOL | REG_NOTEOL | REG_NONEMPTY); + ep->re_col = lex.col; + ep->re_mb_cur_max = lex.mb_cur_max; + /* + * Build the engine(s). The factors determining which are built: + * 1. If the pattern built insists on an NFA, then only build NFA. + * 2. If flags include REG_NOSUB or REG_ONESUB and not (1), + * then only build DFA. + * 3. Otherwise, build both. + * Since libuxre_regdfacomp() modifies the tree and libuxre_regnfacomp() + * doesn't, libuxre_regnfacomp() must be called first, if both are to + * be called. + */ + if (ep->re_nsub != 0 && (flags & (REG_NOSUB | REG_ONESUB)) == 0 + || lex.flags & REG_NFA) + { + ep->re_flags |= REG_NFA; + if ((lex.err = libuxre_regnfacomp(ep, tp, &lex)) != 0) + goto out; + } + if ((lex.flags & REG_NFA) == 0) + { + ep->re_flags |= REG_DFA; + if ((lex.err = libuxre_regdfacomp(ep, tp, &lex)) != 0) + { + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + } + } +out:; + if (lex.err != 0 && lex.col != 0) + (void)libuxre_lc_collate(lex.col); + if (tp != 0) + libuxre_regdeltree(tp, lex.err); + return lex.err; +} diff --git a/libuxre/regdfa.c b/libuxre/regdfa.c @@ -0,0 +1,877 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include "regdfa.h" + +/* +* Deterministic Finite Automata. +*/ + + /* + * Postorder traversal that returns a copy of the subtree, + * except that ROP_BKT becomes ROP_BKTCOPY (since they + * share the same pointed to Bracket object). + */ +static Tree * +copy(regex_t *ep, Tree *tp) +{ + Tree *np; + + if ((np = malloc(sizeof(Tree))) == 0) + return 0; + switch (np->op = tp->op) /* almost always correct */ + { + case ROP_BKT: + np->op = ROP_BKTCOPY; + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + np->right.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + np->left.pos = ep->re_dfa->nposn++; + /*FALLTHROUGH*/ + case ROP_EMPTY: + return np; + case ROP_CAT: + case ROP_OR: + if ((np->right.ptr = copy(ep, tp->right.ptr)) == 0) + { + free(np); + return 0; + } + np->right.ptr->parent = np; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((np->left.ptr = copy(ep, tp->left.ptr)) == 0) + break; + np->left.ptr->parent = np; + return np; + } + libuxre_regdeltree(np, 1); + return 0; +} + + /* + * Postorder traversal. + * Assign unique ascending integer values to the leaves. + * Since the right child is traversed before the left, + * the position for ROP_END is guaranteed to be zero. + * The parse tree is rewritten in two cases: + * - Each ROP_BRACE is replaced by an equivalent--sometimes + * large--subtree using only ROP_CAT, ROP_QUEST, and + * ROP_PLUS. + * - If REG_ICASE, replace each simple character that has + * an uppercase equivalent with a ROP_OR subtree over the + * two versions. + * Since these rewrites occur bottom up, they have already + * been applied before any subtrees passed to copy(). + */ +static Tree * +findposn(regex_t *ep, Tree *tp, int mb_cur_max) +{ + unsigned int lo, hi; + Tree *ptr, *par; + w_type wc; + + switch (tp->op) + { + default: + if (ep->re_flags & REG_ICASE + && (wc = to_upper(tp->op)) != tp->op) + { + if ((ptr = libuxre_reg1tree(tp->op, 0)) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + tp->op = ROP_OR; + tp->left.ptr = ptr; + ptr = libuxre_reg1tree(wc, 0); + if ((tp->right.ptr = ptr) == 0) + return 0; + ptr->parent = tp; + ptr->left.pos = ep->re_dfa->nposn++; + return tp; + } + /*FALLTHROUGH*/ + case ROP_BOL: + case ROP_EOL: + case ROP_ALL: + case ROP_ANYCH: + case ROP_NOTNL: + case ROP_NONE: + case ROP_BKT: + case ROP_BKTCOPY: + case ROP_END: + tp->left.pos = ep->re_dfa->nposn++; + return tp; + case ROP_EMPTY: + return tp; + case ROP_OR: + case ROP_CAT: + if ((tp->right.ptr = findposn(ep, tp->right.ptr, + mb_cur_max)) == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + return tp; + case ROP_BRACE: + if ((tp->left.ptr = findposn(ep, tp->left.ptr, + mb_cur_max)) == 0) + return 0; + break; + } + /* + * ROP_BRACE as is cannot be handled in a DFA. This code + * duplicates the ROP_BRACE subtree as a left-towering + * series of ROP_CAT nodes, the first "lo" of which are + * direct copies of the original subtree. The tail of + * the series are either some number of ROP_QUESTs over + * copies of the original subtree, or a single ROP_PLUS + * over a copy (when "hi" is infinity). + * + * All interesting cases {lo,hi}: + * {0,0} -> ROP_EMPTY, parsing, temporary + * {0,1} -> ROP_QUEST, parsing + * {0,2} -> CAT(QUEST(left), QUEST(copy)) + * {0,n} -> CAT({0,n-1}, QUEST(copy)) + * {0,} -> ROP_STAR, parsing + * + * {1,1} -> ROP_NOP, parsing, temporary + * {1,2} -> CAT(left, QUEST(copy)) + * {1,n} -> CAT({1,n-1}, QUEST(copy)) + * {1,} -> ROP_PLUS, parsing + * + * {2,2} -> CAT(left, copy) + * {2,n} -> CAT({2,n-1}, QUEST(copy)) + * {2,} -> CAT(left, PLUS(copy)) + * + * {3,3} -> CAT({2,2}, copy) + * {3,n} -> CAT({3,n-1}, QUEST(copy)) + * {3,} -> CAT({2,2}, PLUS(copy)) + * + * {n,} -> CAT({n-1,n-1}, PLUS(copy)) + * + * In all cases, the ROP_BRACE node is turned into the + * left-most ROP_CAT, and a copy of its original subtree + * is connected as the right child. Note that the bottom- + * up nature of this duplication guarantees that copy() + * never sees a ROP_BRACE node. + */ + par = tp->parent; + lo = tp->right.info.num[0]; + hi = tp->right.info.num[1]; + if ((ptr = copy(ep, tp->left.ptr)) == 0) + return 0; + ptr->parent = tp; + tp->op = ROP_CAT; + tp->right.ptr = ptr; + if (lo == 0) + { + if ((tp->left.ptr = libuxre_reg1tree(ROP_QUEST, tp->left.ptr)) + == 0) + return 0; + tp->left.ptr->parent = tp; + } + else + { + if (hi == BRACE_INF || (hi -= lo) == 0) + lo--; /* lo > 1; no extra needed */ + while (--lo != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + if (hi == BRACE_INF) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_PLUS, tp->right.ptr)) + == 0) + return 0; + tp->right.ptr->parent = tp; + } + else if (hi != 0) + { + if ((tp->right.ptr = libuxre_reg1tree(ROP_QUEST, tp->right.ptr)) + == 0) + return 0; + ptr = tp->right.ptr; + ptr->parent = tp; + while (--hi != 0) + { + if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr))) + == 0) + return 0; + } + } + tp->parent = par; + return tp; +} + + /* + * Postorder traversal, but not always entire subtree. + * For each leaf reachable by the empty string, add it + * to the set. Return 0 if the subtree can match empty. + */ +static int +first(Dfa *dp, Tree *tp) +{ + switch (tp->op) + { + case ROP_BOL: + if (dp->flags & REG_NOTBOL) + return 0; + break; + case ROP_EOL: + if (dp->flags & REG_NOTEOL) + return 0; + break; + case ROP_EMPTY: + return 0; + case ROP_OR: + return first(dp, tp->left.ptr) & first(dp, tp->right.ptr); + case ROP_CAT: + if (first(dp, tp->left.ptr) != 0) + return 1; + return first(dp, tp->right.ptr); + case ROP_BRACE: + if (tp->right.info.num[0] != 0 && first(dp, tp->left.ptr) != 0) + return 1; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_QUEST: + first(dp, tp->left.ptr); + return 0; + case ROP_LP: + case ROP_PLUS: + return first(dp, tp->left.ptr); + } + if (dp->posset[tp->left.pos] == 0) + { + dp->posset[tp->left.pos] = 1; + dp->nset++; + } + return 1; +} + + /* + * Walk from leaf up (most likely not to root). + * Determine follow set for the leaf by filling + * set[] with the positions reachable. + */ +static void +follow(Dfa *dp, Tree *tp) +{ + Tree *pp; + + switch ((pp = tp->parent)->op) + { + case ROP_CAT: + if (pp->left.ptr == tp && first(dp, pp->right.ptr) != 0) + break; + /*FALLTHROUGH*/ + case ROP_OR: + case ROP_QUEST: + case ROP_LP: + follow(dp, pp); + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_BRACE: + first(dp, tp); + follow(dp, pp); + break; + } +} + + /* + * Postorder traversal. + * At each leaf, copy it into posn[] and assign its follow set. + * Because the left-most subtree is ROP_ALL under ROP_STAR, the + * follow set for its leaf (position dp->nposn-1) is the same + * as the initial state's signature (prior to any ROP_BOL). + */ +static int +posnfoll(Dfa *dp, Tree *tp) +{ + unsigned char *s; + size_t i, n; + size_t *fp; + Posn *p; + int ret; + + switch (tp->op) + { + case ROP_OR: + case ROP_CAT: + if ((ret = posnfoll(dp, tp->right.ptr)) != 0) + return ret; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_LP: + if ((ret = posnfoll(dp, tp->left.ptr)) != 0) + return ret; + return 0; + case ROP_END: /* keeps follow() from walking above the root */ + p = &dp->posn[tp->left.pos]; + p->op = tp->op; + p->seti = 0; + p->nset = 0; + return 0; + case ROP_BKT: + case ROP_BKTCOPY: + p = &dp->posn[tp->left.pos]; + p->bkt = tp->right.info.bkt; + goto skip; + case ROP_BOL: + dp->flags |= REG_NOTBOL; /* adjacent ROP_BOLs match empty */ + break; + case ROP_EOL: + dp->flags |= REG_NOTEOL; /* adjacent ROP_EOLs match empty */ + break; + } + p = &dp->posn[tp->left.pos]; +skip:; + p->op = tp->op; + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + follow(dp, tp); + dp->flags &= ~(REG_NOTBOL | REG_NOTEOL); + fp = dp->posfoll; + if ((p->nset = dp->nset) > dp->avail) /* need more */ + { + if ((n = p->nset << 1) < dp->nposn) + n = dp->nposn; + dp->avail += n; + if ((fp = realloc(dp->posfoll, + sizeof(size_t) * (dp->avail + dp->used))) == 0) + { + return REG_ESPACE; + } + dp->posfoll = fp; + } + p->seti = dp->used; + if ((i = dp->nset) != 0) + { + dp->used += i; + dp->avail -= i; + fp += p->seti; + s = dp->posset; + n = 0; + do + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } while (++n != dp->nposn); + } + return 0; +} + +static int +addstate(Dfa *dp) /* install state if unique; return its index */ +{ + size_t *sp, *fp; + size_t t, n, i; + int flushed; + + /* + * Compare dp->nset/dp->cursig[] against remembered states. + */ + t = dp->top; + do + { + if (dp->nsig[--t] != dp->nset) + continue; + if ((n = dp->nset) != 0) + { + fp = &dp->sigfoll[dp->sigi[t]]; + sp = &dp->cursig[0]; + loop:; + if (*fp++ != *sp++) + continue; /* to the do-while */ + if (--n != 0) + goto loop; + } + return t + 1; + } while (t != 0); + /* + * Not in currently cached states; add it. + */ + flushed = 0; + if ((t = dp->top) >= CACHESZ) /* need to flush the cache */ + { + flushed = 1; + n = dp->anybol; + n = dp->sigi[n] + dp->nsig[n]; /* past invariant states */ + dp->avail += dp->used - n; + dp->used = n; + dp->top = n = dp->nfix; + memset((void *)&dp->trans, 0, sizeof(dp->trans)); + memset((void *)&dp->acc[n], 0, CACHESZ - n); + t = n; + } + dp->top++; + fp = dp->sigfoll; + if ((n = dp->nset) > dp->avail) /* grow strip */ + { + i = dp->avail + n << 1; + if ((fp = realloc(fp, sizeof(size_t) * (i + dp->used))) == 0) + return 0; + dp->avail = i; + dp->sigfoll = fp; + } + dp->acc[t] = 0; + if ((dp->nsig[t] = n) != 0) + { + sp = dp->cursig; + if (sp[0] == 0) + dp->acc[t] = 1; + dp->sigi[t] = i = dp->used; + dp->used += n; + dp->avail -= n; + fp += i; + do + *fp++ = *sp++; + while (--n != 0); + } + t++; + if (flushed) + return -t; + return t; +} + +void +libuxre_regdeldfa(Dfa *dp) +{ + Posn *pp; + size_t np; + + if (dp->posfoll != 0) + free(dp->posfoll); + if (dp->sigfoll != 0) + free(dp->sigfoll); + if (dp->cursig != 0) + free(dp->cursig); + if ((pp = dp->posn) != 0) + { + /* + * Need to walk the positions list to free any + * space used for ROP_BKTs. + */ + np = dp->nposn; + do + { + if (pp->op == ROP_BKT) + { + libuxre_bktfree(pp->bkt); + free(pp->bkt); + } + } while (++pp, --np != 0); + free(dp->posn); + } + free(dp); +} + +int +regtrans(Dfa *dp, int st, w_type wc, int mb_cur_max) +{ + const unsigned char *s; + size_t *fp, *sp; + size_t i, n; + Posn *pp; + int nst; + + if ((n = dp->nsig[st]) == 0) /* dead state */ + return st + 1; /* stay here */ + memset(dp->posset, 0, dp->nposn); + dp->nset = 0; + fp = &dp->sigfoll[dp->sigi[st]]; + do + { + pp = &dp->posn[*fp]; + switch (pp->op) + { + case ROP_EOL: + if (wc == '\0' && (dp->flags & REG_NOTEOL) == 0) + break; + /*FALLTHROUGH*/ + case ROP_BOL: + default: + if (pp->op == wc) + break; + /*FALLTHROUGH*/ + case ROP_END: + case ROP_NONE: + continue; + case ROP_NOTNL: + if (wc == '\n') + continue; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc <= '\0') + continue; + break; + case ROP_ALL: + if (wc == '\0') + continue; + break; + case ROP_BKT: + case ROP_BKTCOPY: + /* + * Note that multiple character bracket matches + * are precluded from DFAs. (See regparse.c and + * regcomp.c.) Thus, the continuation string + * argument is not used in libuxre_bktmbexec(). + */ + if (wc > '\0' && + libuxre_bktmbexec(pp->bkt, wc, 0, mb_cur_max) == 0) + break; + continue; + } + /* + * Current character matches this position. + * For each position in its follow list, + * add that position to the new state's signature. + */ + i = pp->nset; + sp = &dp->posfoll[pp->seti]; + do + { + if (dp->posset[*sp] == 0) + { + dp->posset[*sp] = 1; + dp->nset++; + } + } while (++sp, --i != 0); + } while (++fp, --n != 0); + /* + * Move the signature (if any) into cursig[] and install it. + */ + if ((i = dp->nset) != 0) + { + fp = dp->cursig; + s = dp->posset; + for (n = 0;; n++) + { + if (*s++ != 0) + { + *fp++ = n; + if (--i == 0) + break; + } + } + } + if ((nst = addstate(dp)) < 0) /* flushed cache */ + nst = -nst; + else if (nst > 0 && (wc & ~(long)(NCHAR - 1)) == 0) + dp->trans[st][wc] = nst; + return nst; +} + +LIBUXRE_STATIC int +libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Tree *lp; + Dfa *dp; + Posn *p; + int st; + + /* + * It's convenient to insert an STAR(ALL) subtree to the + * immediate left of the current tree. This makes the + * "any match" libuxre_regdfaexec() not a special case, + * and the initial state signature will fall out when + * building the follow sets for all the leaves. + */ + if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0 + || (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0 + || (tp->left.ptr = lp + = libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0) + { + return REG_ESPACE; + } + lp->parent = tp; + if ((dp = calloc(1, sizeof(Dfa))) == 0) + return REG_ESPACE; + ep->re_dfa = dp; + /* + * Just in case null pointers aren't just all bits zero... + */ + dp->posfoll = 0; + dp->sigfoll = 0; + dp->cursig = 0; + dp->posn = 0; + /* + * Assign position values to each of the tree's leaves + * (the important parts), meanwhile potentially rewriting + * the parse tree so that it fits within the restrictions + * of our DFA. + */ + if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0) + goto err; + /* + * Get space for the array of positions and current set, + * now that the number of positions is known. + */ + if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0) + goto err; + dp->posset = (unsigned char *)&dp->posn[dp->nposn]; + /* + * Get follow sets for each position. + */ + if (posnfoll(dp, tp) != 0) + goto err; + /* + * Set up the special invariant states: + * - dead state (no valid transitions); index 0. + * - initial state for any match [STAR(ALL) follow set]; index 1. + * - initial state for any match after ROP_BOL. + * - initial state for left-most longest if REG_NOTBOL. + * - initial state for left-most longest after ROP_BOL. + * The final two are not allocated if leftmost() cannot be called. + * The pairs of initial states are the same if there is no + * explicit ROP_BOL transition. + */ + dp->avail += dp->used; + dp->used = 0; + if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0) + goto err; + p = &dp->posn[dp->nposn - 1]; /* same as first(root) */ + dp->cursig = &dp->posfoll[p->seti]; + dp->nset = p->nset; + dp->top = 1; /* index 0 is dead state */ + addstate(dp); /* must be state index 1 (returns 2) */ + if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0) + goto err; + dp->nfix = 2; + if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0) + goto err; + if ((dp->anybol = st - 1) == 2) /* new state */ + dp->nfix = 3; + if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */ + { + /* + * leftmost() initial states are the same as the + * "any match" ones without the STAR(ALL) position. + */ + dp->sigi[dp->nfix] = 0; + dp->nsig[dp->nfix] = dp->nsig[1] - 1; + dp->acc[dp->nfix] = dp->acc[1]; + dp->leftbol = dp->leftmost = dp->nfix; + dp->nfix++; + if (dp->anybol != 1) /* distinct state w/BOL */ + { + dp->sigi[dp->nfix] = dp->sigi[2]; + dp->nsig[dp->nfix] = dp->nsig[2] - 1; + dp->acc[dp->nfix] = dp->acc[2]; + dp->leftbol = dp->nfix; + dp->nfix++; + } + dp->top = dp->nfix; + } + return 0; +err:; + libuxre_regdeldfa(dp); + return REG_ESPACE; +} + +static int +leftmost(Dfa *dp, Exec *xp) +{ + const unsigned char *s, *beg, *end; + int i, nst, st, mb_cur_max; + w_type wc; + + mb_cur_max = xp->mb_cur_max; + beg = s = xp->str; + end = 0; + st = dp->leftbol; + if (xp->flags & REG_NOTBOL) + st = dp->leftmost; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + end = s; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + { + if (end == 0 || end < s) + end = s; + break; + } + beg = s; + st = dp->leftbol; + goto newst; + } + } + if ((st = nst - 1) == 0) /* dead state */ + { + if (end != 0) + break; + if ((wc = *beg++) == '\0') + return REG_NOMATCH; + else if (!ISONEBYTE(wc) && + (i = libuxre_mb2wc(&wc, beg)) > 0) + beg += i; + s = beg; + st = dp->leftmost; + goto newst; + } + if (wc == '\0') + { + if (dp->acc[st]) + { + s--; /* don't include \0 */ + if (end == 0 || end < s) + end = s; + break; + } + if (end != 0) + break; + return REG_NOMATCH; + } + newst:; + if (dp->acc[st]) + { + if (end == 0 || end < s) + end = s; + } + } + xp->match[0].rm_so = beg - xp->str; + xp->match[0].rm_eo = end - xp->str; + return 0; +} + +/* +* Optimization by simplification: singlebyte locale and REG_NEWLINE not set. +* Performance gain for grep is 25% so it's worth the hack. +*/ +static int +regdfaexec_opt(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int nst, st; + + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + do + { + if ((nst = dp->trans[st][*s]) == 0) + { + if ((nst = regtrans(dp, st, *s, 1)) == 0) + return REG_ESPACE; + } + if (dp->acc[st = nst - 1]) + return 0; + } while (*s++ != '\0'); /* st != 0 */ + return REG_NOMATCH; +} + +LIBUXRE_STATIC int +libuxre_regdfaexec(Dfa *dp, Exec *xp) +{ + const unsigned char *s; + int i, nst, st, mb_cur_max; + w_type wc; + + dp->flags = xp->flags & REG_NOTEOL; /* for regtrans() */ + mb_cur_max = xp->mb_cur_max; + if (xp->nmatch != 0) + return leftmost(dp, xp); + if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0) + return regdfaexec_opt(dp, xp); + s = xp->str; + st = dp->anybol; + if (xp->flags & REG_NOTBOL) + st = 1; + if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0) + return 0; /* initial empty match allowed */ + for (;;) + { + if ((wc = *s++) == '\n') + { + if (xp->flags & REG_NEWLINE) + wc = ROP_EOL; + } + else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if ((wc & ~(long)(NCHAR - 1)) != 0 + || (nst = dp->trans[st][wc]) == 0) + { + if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0) + return REG_ESPACE; + if (wc == ROP_EOL) /* REG_NEWLINE only */ + { + if (dp->acc[nst - 1]) + return 0; + if (dp->acc[st = dp->anybol]) + return 0; + continue; + } + } + if (dp->acc[st = nst - 1]) + return 0; + if (wc == '\0') /* st == 0 */ + return REG_NOMATCH; + } +} diff --git a/libuxre/regdfa.h b/libuxre/regdfa.h @@ -0,0 +1,75 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regdfa.h 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ + +/* +* Deterministic Finite Automata. +*/ + +#ifndef LIBUXRE_REGDFA_H +#define LIBUXRE_REGDFA_H + +#include <re.h> + +typedef struct +{ + Bracket *bkt; /* extra info for ROP_BKT */ + size_t nset; /* number of items in the follow set */ + size_t seti; /* index into the follow set strip */ + w_type op; /* the leaf match operation */ +} Posn; + +#define CACHESZ 32 /* max. states to remember (must fit in uchar) */ +#define NCHAR (1 << CHAR_BIT) + +struct re_dfa_ /*Dfa*/ +{ + unsigned char *posset; /* signatures built here */ + size_t *posfoll; /* follow strip for posn[] */ + size_t *sigfoll; /* follow strip for sigi[] */ + size_t *cursig; /* current state's signature */ + Posn *posn; /* important positions */ + size_t nposn; /* length of posn,cursig,posset */ + size_t used; /* used portion of follow strip */ + size_t avail; /* unused part of follow strip */ + size_t nset; /* # items nonzero in posset[] */ + size_t nsig[CACHESZ]; /* number of items in signature */ + size_t sigi[CACHESZ]; /* index into sigfoll[] */ + unsigned char acc[CACHESZ]; /* nonzero for accepting states */ + unsigned char leftmost; /* leftmost() start, not BOL */ + unsigned char leftbol; /* leftmost() start, w/BOL */ + unsigned char anybol; /* any match start, w/BOL */ + unsigned char nfix; /* number of invariant states */ + unsigned char top; /* next state index available */ + unsigned char flags; /* interesting flags */ + unsigned char trans[CACHESZ][NCHAR]; /* goto table */ +}; + +extern int regtrans(Dfa *, int, w_type, int); + +#endif /* !LIBUXRE_REGDFA_H */ diff --git a/libuxre/regerror.c b/libuxre/regerror.c @@ -0,0 +1,95 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regerror.c 1.4 (gritter) 3/29/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <string.h> +#include "re.h" +/* include "_locale.h" */ + +/* #pragma weak regerror = _regerror */ + +size_t +regerror(int err, const regex_t *ep, char *str, size_t max) +{ + const struct + { + int index; + const char *str; + } unk = + { + 88, "unknown regular expression error" + }, msgs[] = + { + /*ENOSYS*/ { 89, "feature not implemented" }, + /*0*/ { 0, "" }, + /*NOMATCH*/ { 90, "regular expression failed to match" }, + /*BADPAT*/ { 91, "invalid regular expression" }, + /*ECOLLATE*/ { 92, "invalid collating element construct" }, + /*ECTYPE*/ { 93, "invalid character class construct" }, + /*EEQUIV*/ { 94, "invalid equivalence class construct" }, + /*EBKTCHAR*/ { 95, "invalid character in '[ ]' construct" }, + /*EESCAPE*/ { 96, "trailing \\ in pattern" }, + /*ESUBREG*/ { 97, "'\\digit' out of range" }, + /*EBRACK*/ { 98, "'[ ]' imbalance" }, + /*EMPTYSUBBKT*/ { 99, "empty nested '[ ]' construct" }, + /*EMPTYPAREN*/ { 100, "empty '\\( \\)' or '( )'" }, + /*NOPAT*/ { 101, "empty pattern" }, + /*EPAREN*/ { 102, "'\\( \\)' or '( )' imbalance" }, + /*EBRACE*/ { 103, "'\\{ \\} or '{ }' imbalance" }, + /*BADBR*/ { 104, "invalid '\\{ \\}' or '{ }'" }, + /*ERANGE*/ { 105, "invalid endpoint in range" }, + /*ESPACE*/ { 106, "out of regular expression memory" }, + /*BADRPT*/ { 107, "invalid *, +, ?, \\{\\} or {} operator" }, + /*BADESC*/ { 108, "invalid escape sequence (e.g. \\0)" }, + /*ILLSEQ*/ { 109, "illegal byte sequence"} + }; + const char *p; + size_t len; + int i; + + if (err < REG_ENOSYS || REG_ILLSEQ < err) + { + i = unk.index; + p = unk.str; + } + else + { + i = msgs[err - REG_ENOSYS].index; + p = msgs[err - REG_ENOSYS].str; + } +/* p = __gtxt(_str_uxlibc, i, p); */ + len = strlen(p) + 1; + if (max != 0) + { + if (max > len) + max = len; + else if (max < len) + str[--max] = '\0'; + memcpy(str, p, max); + } + return len; +} diff --git a/libuxre/regex.h b/libuxre/regex.h @@ -0,0 +1,153 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regex.h 1.13 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef LIBUXRE_REGEX_H +#define LIBUXRE_REGEX_H +/* from unixsrc:usr/src/common/head/regex.h /main/uw7_nj/1 */ + +#include <sys/types.h> /* really only want [s]size_t */ + + /* + * Official regexec() flags. + */ +#define REG_NOTBOL 0x000001 /* start of string does not match ^ */ +#define REG_NOTEOL 0x000002 /* end of string does not match $ */ + + /* + * Additional regexec() flags. + */ +#define REG_NONEMPTY 0x000004 /* do not match empty at start of string */ + + /* + * Extensions to provide individual control over each + * of the differences between basic and extended REs. + */ +#define REG_OR 0x0000001 /* enable | operator */ +#define REG_PLUS 0x0000002 /* enable + operator */ +#define REG_QUEST 0x0000004 /* enable ? operator */ +#define REG_BRACES 0x0000008 /* use {m,n} (instead of \{m,n\}) */ +#define REG_PARENS 0x0000010 /* use (...) [instead of \(...\)] */ +#define REG_ANCHORS 0x0000020 /* ^ and $ are anchors anywhere */ +#define REG_NOBACKREF 0x0000040 /* disable \digit */ +#define REG_NOAUTOQUOTE 0x0000080 /* no automatic quoting of REG_BADRPTs */ + + /* + * Official regcomp() flags. + */ +#define REG_EXTENDED (REG_OR | REG_PLUS | REG_QUEST | REG_BRACES | \ + REG_PARENS | REG_ANCHORS | \ + REG_NOBACKREF | REG_NOAUTOQUOTE) +#define REG_ICASE 0x0000100 /* ignore case */ +#define REG_NOSUB 0x0000200 /* only success/fail for regexec() */ +#define REG_NEWLINE 0x0000400 /* take \n as line separator for ^ and $ */ + + /* + * Additional regcomp() flags. + * Some of these assume that int is >16 bits! + * Beware: 0x20000000 and above are used in re.h. + */ +#define REG_ONESUB 0x0000800 /* regexec() only needs pmatch[0] */ +#define REG_MTPARENFAIL 0x0001000 /* take empty \(\) or () as match failure */ +#define REG_MTPARENBAD 0x0002000 /* disallow empty \(\) or () */ +#define REG_BADRANGE 0x0004000 /* accept [m-a] ranges as [ma] */ +#define REG_ODDRANGE 0x0008000 /* oawk oddity: [m-a] means [m] */ +#define REG_SEPRANGE 0x0010000 /* disallow [a-m-z] style ranges */ +#define REG_BKTQUOTE 0x0020000 /* allow \ in []s to quote \, -, ^ or ] */ +#define REG_BKTEMPTY 0x0040000 /* allow empty []s (w/BKTQUOTE, BKTESCAPE) */ +#define REG_ANGLES 0x0080000 /* enable \<, \> operators */ +#define REG_ESCNL 0x0100000 /* take \n as newline character */ +#define REG_NLALT 0x0200000 /* take newline as alternation */ +#define REG_ESCSEQ 0x0400000 /* otherwise, take \ as start of C escapes */ +#define REG_BKTESCAPE 0x0800000 /* allow \ in []s to quote next anything */ +#define REG_NOBRACES 0x1000000 /* disable {n,m} */ +#define REG_ADDITIVE 0x2000000 /* a+*b means + and * additive, ^+ is valid */ +#define REG_NOI18N 0x4000000 /* disable I18N features ([::] etc.) */ +#define REG_OLDESC 0x8000000 /* recognize \b \f \n \r \t \123 only */ +#define REG_AVOIDNULL 0x10000000/* avoid null subexpression matches */ +#define REG_OLDBRE (REG_BADRANGE | REG_ANGLES | REG_ESCNL) +#define REG_OLDERE (REG_OR | REG_PLUS | REG_QUEST | REG_NOBRACES | \ + REG_PARENS | REG_ANCHORS | REG_ODDRANGE | \ + REG_NOBACKREF | REG_ADDITIVE | REG_NOAUTOQUOTE) + + /* + * Error return values. + */ +#define REG_ENOSYS (-1) /* unsupported */ +#define REG_NOMATCH 1 /* regexec() failed to match */ +#define REG_BADPAT 2 /* invalid regular expression */ +#define REG_ECOLLATE 3 /* invalid collating element construct */ +#define REG_ECTYPE 4 /* invalid character class construct */ +#define REG_EEQUIV 5 /* invalid equivalence class construct */ +#define REG_EBKTCHAR 6 /* invalid character in [] construct */ +#define REG_EESCAPE 7 /* trailing \ in pattern */ +#define REG_ESUBREG 8 /* number in \digit invalid or in error */ +#define REG_EBRACK 9 /* [] imbalance */ +#define REG_EMPTYSUBBKT 10 /* empty sub-bracket construct */ +#define REG_EMPTYPAREN 11 /* empty \(\) or () [REG_MTPARENBAD] */ +#define REG_NOPAT 12 /* no (empty) pattern */ +#define REG_EPAREN 13 /* \(\) or () imbalance */ +#define REG_EBRACE 14 /* \{\} or {} imbalance */ +#define REG_BADBR 15 /* contents of \{\} or {} invalid */ +#define REG_ERANGE 16 /* invalid endpoint in expression */ +#define REG_ESPACE 17 /* out of memory */ +#define REG_BADRPT 18 /* *,+,?,\{\} or {} not after r.e. */ +#define REG_BADESC 19 /* invalid escape sequence (e.g. \0) */ +#define REG_ILLSEQ 20 /* illegal byte sequence */ + +typedef struct +{ + size_t re_nsub; /* only advertised member */ + unsigned long re_flags; /* augmented regcomp() flags */ + struct re_dfa_ *re_dfa; /* DFA engine */ + struct re_nfa_ *re_nfa; /* NFA engine */ + struct re_coll_ *re_col; /* current collation info */ + int re_mb_cur_max; /* MB_CUR_MAX acceleration */ + void *re_more; /* just in case... */ +} regex_t; + +typedef ssize_t regoff_t; + +typedef struct +{ + regoff_t rm_so; + regoff_t rm_eo; +} regmatch_t; + +#ifdef __cplusplus +extern "C" { +#endif + +int regcomp(regex_t *, const char *, int); +int regexec(const regex_t *, const char *, size_t, regmatch_t *, int); +size_t regerror(int, const regex_t *, char *, size_t); +void regfree(regex_t *); + +#ifdef __cplusplus +} +#endif + +#endif /* !LIBUXRE_REGEX_H */ diff --git a/libuxre/regexec.c b/libuxre/regexec.c @@ -0,0 +1,68 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regexec.c 1.7 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regexec = _regexec */ + +int +regexec(const regex_t *ep, const char *s, size_t n, regmatch_t *mp, int flg) +{ + Exec ex; + int ret; + + ex.flags = flg | (ep->re_flags & (REG_NEWLINE|REG_ICASE|REG_AVOIDNULL)); + ex.str = (const unsigned char *)s; + ex.match = mp; + ex.mb_cur_max = ep->re_mb_cur_max; + if ((ex.nmatch = n) != 0) /* impose limits from compile flags */ + { + if (ep->re_flags & REG_NOSUB) + n = ex.nmatch = 0; + else if (ep->re_flags & REG_ONESUB) + ex.nmatch = 1; + else if (n > ep->re_nsub + 1) + ex.nmatch = ep->re_nsub + 1; + } + if (ep->re_flags & REG_DFA && ex.nmatch <= 1) + ret = libuxre_regdfaexec(ep->re_dfa, &ex); + else + ret = libuxre_regnfaexec(ep->re_nfa, &ex); + /* + * Fill unused part of mp[]. + */ + if (ret != 0) + ex.nmatch = 0; + while (n > ex.nmatch) + { + n--; + mp[n].rm_so = -1; + mp[n].rm_eo = -1; + } + return ret; +} diff --git a/libuxre/regfree.c b/libuxre/regfree.c @@ -0,0 +1,42 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regfree.c 1.3 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "re.h" + +/* #pragma weak regfree = _regfree */ + +void +regfree(regex_t *ep) +{ + if (ep->re_flags & REG_DFA) + libuxre_regdeldfa(ep->re_dfa); + if (ep->re_flags & REG_NFA) + libuxre_regdelnfa(ep->re_nfa); + if (ep->re_col != 0) + (void)libuxre_lc_collate(ep->re_col); +} diff --git a/libuxre/regnfa.c b/libuxre/regnfa.c @@ -0,0 +1,1070 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <string.h> +#include <stdlib.h> +#include "re.h" +#include <stddef.h> +#include <ctype.h> + +typedef unsigned char Uchar; +typedef unsigned short Ushort; + +/* +* Nondeterministic Finite Automata. +*/ +typedef struct t_graph Graph; +struct t_graph +{ + union + { + Graph *ptr; + Info info; + } alt; + Graph *next; + w_type op; +}; + +typedef struct t_stack Stack; +struct t_stack +{ + Stack *link; /* simplifies cleanup */ + Stack *prev; /* covered states */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ +}; + + /* + * A Context holds all the information needed for each + * potential path through the NFA graph. + */ +typedef struct t_ctxt Context; +struct t_ctxt +{ + Context *link; /* simplifies cleanup */ + Context *next; /* singly linked */ + Stack *sp; /* nested counts */ + Graph *gp; /* starting node */ + Graph *wasgp; /* node associated with this state */ + const Uchar *str; /* saved position in the string */ + Ushort cnt; /* ROP_BRACE: traversal count */ + size_t nset; /* length of rm[] that is currently set */ + regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */ +}; + +struct re_nfa_ /*Nfa*/ +{ + Graph *gp; /* entire NFA */ + Stack *sp; /* unused Stacks */ + Stack *allsp; /* linked Stacks (for cleanup) */ + Context *allcp; /* linked Contexts (for cleanup) */ + Context *cur; /* Contexts to be continued now */ + Context *step; /* Contexts waiting for a step of the NFA */ + Context *avail; /* unused Contexts */ + Context **ecur; /* ends cur list of Contexts */ + Context **estp; /* ends step list of Contexts */ + size_t rmlen; /* length of rm[] in each Context */ + size_t rmmin; /* minimum length needed */ + size_t used; /* length used for this libuxre_regnfaexec() */ + w_type beg; /* nonzero for fixed char initial node NFAs */ +}; + +#define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */ + + /* + * Depth first traversal. + * Make a singly linked list (in alt.ptr) of the graph's nodes. + * Must toss any ROP_BKTs, too, since "alt" is overwritten. + */ +static void +deltolist(Graph *gp, Graph **list) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* first time */ + { + gp->next = 0; + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + deltolist(gp->alt.ptr, list); + deltolist(ptr, list); + if (gp->op == ROP_BKT) + { + libuxre_bktfree(gp->alt.info.bkt); + free(gp->alt.info.bkt); + } + } + else if (gp->op == ROP_END) + gp->op = ROP_NOP; + else + return; + gp->alt.ptr = *list; + *list = gp; +} + + /* + * After the list is turned into a linked list, + * walk that list freeing the nodes. + */ +static void +delgraph(Graph *gp) +{ + Graph *gp2, end; + + gp2 = &end; + deltolist(gp, &gp2); + while ((gp = gp2) != &end) + { + gp2 = gp->alt.ptr; + free(gp); + } +} + + /* + * Depth first traversal. + * Look for ROP_NOPs and prune them from the graph. + * Chain them all together on *nop's list. + */ +static Graph * +nopskip(Graph *gp, Graph **nop) +{ + Graph *ptr; + + if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */ + { + if (gp->op == ROP_NOP) + { + if (gp->alt.ptr != 0) /* touched */ + return gp->next; /* already did it */ + gp->alt.ptr = *nop; + *nop = gp; + } + gp->next = 0; /* this subgraph's pending */ + if (gp->op == ROP_OR || gp->op == ROP_MTOR) + gp->alt.ptr = nopskip(gp->alt.ptr, nop); + gp->next = nopskip(ptr, nop); + if (gp->op == ROP_NOP) + return gp->next; + } + return gp; +} + + /* + * Postorder traversal of the parse tree. + * Build a graph using "Thompson's" algorithm. + * The only significant modification is the + * ROP_BRACE->ROP_MTOR construction. + * Returns 1 => graph might match empty + * 0 => graph cannot match empty + * -1 => error (in allocation) + */ +static int +mkgraph(Tree *tp, Graph **first, Graph **last) +{ + Graph *new = 0, *nop, *lf, *ll, *rf, *rl; + int lmt, rmt = 0; + + if (tp->op != ROP_CAT) + { + if ((new = malloc(sizeof(Graph))) == 0) + return 0; + new->op = tp->op; /* usually */ + } + switch (tp->op) + { + case ROP_REF: + new->alt.info.sub = tp->right.info.sub; + *first = new; + *last = new; + return 1; /* safe--can't really tell */ + case ROP_BKT: + tp->op = ROP_BKTCOPY; /* now graph owns clean up */ + /*FALLTHROUGH*/ + case ROP_BKTCOPY: + new->alt.info.bkt = tp->right.info.bkt; + /*FALLTHROUGH*/ + default: + *first = new; + *last = new; + return 0; + case ROP_EMPTY: + new->op = ROP_NOP; + new->alt.ptr = 0; /* untouched */ + *first = new; + *last = new; + return 1; + case ROP_OR: + case ROP_CAT: + lf = 0; /* in case of error */ + if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0) + goto err; + /*FALLTHROUGH*/ + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + case ROP_BRACE: + case ROP_LP: + if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0) + goto err; + break; + } + /* + * Note that ROP_NOP only serves as the node that reconnects + * the two choices of an incoming ROP_OR or ROP_QUEST. To + * prevent rewalking portions of the graph in nopskip(), + * this code marks all ROP_NOP nodes as currently untouched. + */ + switch (tp->op) + { + case ROP_OR: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + ll->next = nop; + rl->next = nop; + new->next = lf; + new->alt.ptr = rf; + *first = new; + *last = nop; + return lmt | rmt; + case ROP_CAT: /* no "new" */ + ll->next = rf; + *first = lf; + *last = rl; + return lmt & rmt; + case ROP_QUEST: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_NOP; + nop->alt.ptr = 0; /* untouched */ + new->op = ROP_OR; + new->next = lf; + new->alt.ptr = nop; + ll->next = nop; + *first = new; + *last = nop; + return 1; + case ROP_STAR: + *first = new; + rmt = 1; + star:; + new->op = lmt ? ROP_MTOR : ROP_OR; + new->alt.ptr = lf; + ll->next = new; + *last = new; + return rmt; + case ROP_PLUS: + *first = lf; + rmt = lmt; + goto star; + case ROP_BRACE: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_MTOR; /* going to save state anyway... */ + nop->alt.ptr = lf; + ll->next = new; + new->next = nop; + new->alt.info.num[1] = tp->right.info.num[1]; + if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0) + { + lmt = 1; + *first = new; + } + else + { + new->alt.info.num[0]--; /* already done 1 */ + if (new->alt.info.num[1] != BRACE_INF) + new->alt.info.num[1]--; /* likewise */ + *first = lf; + } + *last = nop; + return lmt; + case ROP_LP: + if ((nop = malloc(sizeof(Graph))) == 0) + goto err; + nop->op = ROP_RP; + nop->alt.info.sub = tp->right.info.sub; + new->alt.info.sub = tp->right.info.sub; + new->next = lf; + ll->next = nop; + *first = new; + *last = nop; + return lmt; + } +err:; + if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0) + delgraph(rf); + if (lf != 0) + delgraph(lf); + if (tp->op != ROP_CAT) + free(new); + return -1; +} + + /* + * Semi-preorder traversal. + * Return zero if there's no simple first character + * (including the operation ROP_BOL) that must always + * be at the start of a matching string. + * This code doesn't attempt to get an answer if the + * first of the tree many be empty. + */ +static w_type +firstop(Tree *tp) +{ + w_type op; + + switch (tp->op) + { + case ROP_OR: + if ((op = firstop(tp->left.ptr)) == 0 + || op != firstop(tp->right.ptr)) + { + return 0; + } + return op; + case ROP_BRACE: + if (tp->right.info.num[0] == 0) + return 0; + /*FALLTHROUGH*/ + case ROP_CAT: + case ROP_PLUS: + case ROP_LP: + return firstop(tp->left.ptr); + default: + if (tp->op < 0) + return 0; + /*FALLTHROUGH*/ + case ROP_BOL: + return tp->op; + } +} + +void +libuxre_regdelnfa(Nfa *np) +{ + Context *cp, *cpn; + Stack *sp, *spn; + + if (np->gp != 0) + delgraph(np->gp); + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + free(cp); + } + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + free(sp); + } + free(np); +} + +LIBUXRE_STATIC int +libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp) +{ + Graph *gp, end; + Nfa *np; + + if ((np = malloc(sizeof(Nfa))) == 0) + goto err; + np->gp = 0; /* in case of error */ + if (mkgraph(tp, &np->gp, &gp) < 0) + goto err; + gp->next = 0; /* nothing follows ROP_END */ + np->rmlen = 0; + if ((ep->re_flags & REG_NOSUB) == 0) + np->rmlen = ep->re_nsub + 1; + np->rmmin = 0; + if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen) + np->rmlen = np->rmmin; + /* + * Delete all ROP_NOPs from the graph. + * nopskip() disconnects them from the graph and + * links them together through their alt.ptr's. + */ + gp = &end; + np->gp = nopskip(np->gp, &gp); + while (gp != &end) + { + Graph *gp2 = gp; + + gp = gp->alt.ptr; + free(gp2); + } + np->sp = 0; + np->allsp = 0; + np->avail = 0; + np->allcp = 0; + ep->re_nfa = np; + np->beg = firstop(tp); + return 0; +err:; + if (np != 0) + { + if (np->gp != 0) + delgraph(np->gp); + free(np); + } + return REG_ESPACE; +} + +static Stack * +newstck(Nfa *np) +{ + Stack *sp, **spp; + int i; + + if ((sp = np->sp) == 0) /* get more */ + { + spp = &np->sp; + i = 4; + while ((sp = malloc(sizeof(Stack))) != 0) + { + sp->link = np->allsp; + np->allsp = sp; + *spp = sp; + spp = &sp->prev; + if (--i == 0) + break; + } + *spp = 0; + if ((sp = np->sp) == 0) /* first malloc failed */ + return 0; + } + np->sp = sp->prev; + return sp; +} + +static int +mkstck(Nfa *np, Context *cp, Graph *gp) +{ + Stack *new, *sp; + + if (gp == 0) /* copy existing stack tail */ + { + /* + * Hoist up top of stack. + */ + new = cp->sp; + cp->wasgp = new->wasgp; + cp->str = new->str; + cp->cnt = new->cnt; + cp->sp = new->prev; + if ((sp = new->prev) == 0) /* only one below */ + { + new->prev = np->sp; + np->sp = new; + cp->sp = 0; + return 0; + } + for (;;) /* copy the rest; reusing the old top */ + { + new->wasgp = sp->wasgp; + new->str = sp->str; + new->cnt = sp->cnt; + if ((new->prev = sp->prev) == 0) + break; + if ((new->prev = newstck(np)) == 0) + return REG_ESPACE; + new = new->prev; + sp = sp->prev; + } + return 0; + } + if (cp->wasgp != 0) /* push current down */ + { + if ((new = newstck(np)) == 0) + return REG_ESPACE; + new->prev = cp->sp; + cp->sp = new; + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + } + cp->wasgp = gp; + cp->str = 0; + cp->cnt = 0; + return 0; +} + + /* + * Allocate a new Context (from np->avail) + * and add it to the end of the current list. + */ +static int +newctxt(Nfa *np, Context *cp, Graph *gp) +{ + Context *new; + size_t n; + + if ((new = np->avail) == 0) /* need more */ + { + Context *ncp, **cpp; + int i; + + /* + * Can't easily allocate Contexts in one call because + * the alignments (given the varying length of rm[]) + * are potentially nontrivial. + */ + n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t); + i = 4; + cpp = &np->avail; + while ((ncp = malloc(n)) != 0) + { + ncp->link = np->allcp; + np->allcp = ncp; + *cpp = ncp; + cpp = &ncp->next; + if (--i == 0) + break; + } + *cpp = 0; + if ((new = np->avail) == 0) /* first malloc failed */ + return REG_ESPACE; + } + np->avail = new->next; + new->next = 0; + new->gp = gp; + new->sp = 0; + new->wasgp = 0; + new->nset = 0; + if (cp != 0) /* copy existing context information */ + { + if (cp->sp != 0) /* copy tail of stack */ + { + new->sp = cp->sp; + if (mkstck(np, new, 0) != 0) + return REG_ESPACE; + } + new->wasgp = cp->wasgp; + new->str = cp->str; + new->cnt = cp->cnt; + /* + * Copy any valid subexpression match information + * from the existing context. + */ + if (np->used != 0 && (n = cp->nset) != 0) + { + regmatch_t *rmn = new->rm, *rmo = cp->rm; + + new->nset = n; + for (;; ++rmn, ++rmo) + { + rmn->rm_so = rmo->rm_so; + rmn->rm_eo = rmo->rm_eo; + if (--n == 0) + break; + } + } + } + /* + * Append it to the end of the current Context list. + */ + *np->ecur = new; + np->ecur = &new->next; + return 0; +} + + /* + * Compare two byte string sequences for equality. + * If REG_ICASE, walk through the strings doing + * caseless comparisons of the wide characters. + */ +static int +casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max) +{ + const Uchar *p = &xp->str[i]; + const Uchar *end; + w_type wc1, wc2; + int k; + + if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */ + return 1; + if ((xp->flags & REG_ICASE) == 0) + return 0; + /* + * Walk through each testing for a match, ignoring case, + * of the resulting wide characters. + * Note that only "s" can run out of characters. + */ + end = &p[n]; + do + { + if ((wc1 = *s++) == '\0') + return 0; + if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0) + s += k; + if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0) + p += k; + if (wc1 != wc2) + { + wc1 = to_lower(wc1); + wc2 = to_lower(wc2); + if (wc1 != wc2) + return 0; + } + } while (p < end); + return 1; +} + +LIBUXRE_STATIC int +libuxre_regnfaexec(Nfa *np, Exec *xp) +{ + const Uchar *s, *s1, *s2; + Context *cp, *cpn; + Graph *gp, *brace; + Stack *sp, *spn; + ssize_t rmso, len; + int i, ret, mb_cur_max; + w_type wc; + size_t n; + + ret = 0; /* assume it matches */ + rmso = -1; /* but no match yet */ + np->cur = 0; + np->step = 0; + np->ecur = &np->cur; + np->estp = &np->step; + if ((np->used = xp->nmatch) < np->rmmin) + np->used = np->rmmin; + s1 = 0; /* one char back */ + s = xp->str; /* current high water in string */ + mb_cur_max = xp->mb_cur_max; + for (;;) + { + /* + * Get next character from string. + * If the engine proper hasn't started and the engine + * requires a particular character to start and this + * character isn't it, try the next one. + */ + for (;;) + { + s2 = s1; + s1 = s; + if (!ISONEBYTE(wc = *s++) && + (i = libuxre_mb2wc(&wc, s)) > 0) + s += i; + if (np->cur != 0 || np->beg == wc || np->beg == 0) + break; + if (np->beg == ROP_BOL) + { + if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0) + break; + if ((xp->flags & REG_NEWLINE) == 0) + goto nomatch; + if (s2 != 0 && *s2 == '\n') + break; + } + if (wc == '\0') + goto nomatch; + } + /* + * Start the engine by inserting a fresh initial context + * if there's no known match as yet. (Once some match + * has been found, the end is near.) + */ + if (rmso < 0 && newctxt(np, 0, np->gp) != 0) + goto err; + /* + * Walk the current Contexts list, trying each. + * "loop" is when a new Context is to be tried, + * "again" is when the same Context continues, + * but wc was not yet matched. + */ + cp = np->cur; + loop:; + gp = cp->gp; + again:; + switch (gp->op) + { + case ROP_BRACE: /* gp->next->op == ROP_MTOR */ + brace = gp; + gp = gp->next; + goto mtor; + case ROP_MTOR: + brace = 0; + mtor:; + if (cp->wasgp != gp) /* first time */ + { + if (mkstck(np, cp, gp) != 0) + goto err; + } + else if (cp->str == s) /* spinning */ + goto poptonext; + cp->str = s; + if (brace != 0) + { + if (cp->cnt >= brace->alt.info.num[1]) + goto poptonext; + if (++cp->cnt <= brace->alt.info.num[0]) + { + gp = gp->alt.ptr; + goto again; + } + if (cp->cnt > BRACE_MAX) + cp->cnt = BRACE_MAX; + } + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + poptonext:; + cp->wasgp = 0; + if ((sp = cp->sp) != 0) /* pop stack */ + { + cp->sp = sp->prev; + cp->wasgp = sp->wasgp; + cp->str = sp->str; + cp->cnt = sp->cnt; + sp->prev = np->sp; + np->sp = sp; + } + /*FALLTHROUGH*/ + case ROP_EMPTY: + tonext:; + gp = gp->next; + goto again; + case ROP_OR: + if (newctxt(np, cp, gp->alt.ptr) != 0) + goto err; + goto tonext; + case ROP_LP: + if ((n = gp->alt.info.sub) < np->used) + { + size_t k; + + cp->rm[n].rm_so = s1 - xp->str; + cp->rm[n].rm_eo = -1; + /* + * Mark any skipped subexpressions as + * failing to participate in the match. + */ + if ((k = cp->nset) < n) + { + regmatch_t *rmp = &cp->rm[k]; + + for (;; rmp++) + { + rmp->rm_so = -1; + rmp->rm_eo = -1; + if (++k >= n) + break; + } + } + cp->nset = n + 1; + } + goto tonext; + case ROP_RP: + if ((n = gp->alt.info.sub) < np->used) + cp->rm[n].rm_eo = s1 - xp->str; + goto tonext; + case ROP_BOL: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n') + goto failed; + goto tonext; + case ROP_EOL: + if (wc == '\0') + { + if (xp->flags & REG_NOTEOL) + goto failed; + } + else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n') + goto failed; + goto tonext; + default: /* character match */ + if (gp->op != wc) + { + if ((xp->flags & REG_ICASE) == 0 + || gp->op != to_lower(wc)) + { + goto failed; + } + } + nextwc:; + cp->gp = gp->next; + tostep:; + cpn = cp->next; + cp->next = 0; + *np->estp = cp; + np->estp = &cp->next; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_NOTNL: + if (wc == '\n') + goto failed; + /*FALLTHROUGH*/ + case ROP_ANYCH: + if (wc > '\0') + goto nextwc; + /*FALLTHROUGH*/ + case ROP_NONE: + failed:; + cpn = cp->next; + cp->next = np->avail; + np->avail = cp; + if ((cp = cpn) == 0) + break; + goto loop; + case ROP_LT: + if (s2 == 0) + { + if (xp->flags & REG_NOTBOL) + goto failed; + } + else + { + w_type pwc; + + if (wc != '_' && + !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + if (!ISONEBYTE(pwc = *s2)) + libuxre_mb2wc(&pwc, &s2[1]); + if (pwc == '_' || + iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc)) + goto failed; + } + goto tonext; + case ROP_GT: + if (wc == '_' || + iswalnum(mb_cur_max == 1 ? btowc(wc) : wc)) + goto failed; + goto tonext; + case ROP_BKT: + case ROP_BKTCOPY: + if (cp->wasgp == gp) /* rest of MCCE */ + { + checkspin:; + if (s1 >= cp->str) /* got it all */ + goto poptonext; + goto tostep; + } + if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s, + mb_cur_max)) < 0) + goto failed; + if ((n = i) == 0) /* only matched wc */ + goto nextwc; + spin:; + if (mkstck(np, cp, gp) != 0) + goto err; + cp->gp = gp; /* stay here until reach past s+n */ + cp->str = s + n; + goto tostep; + case ROP_REF: + if (cp->wasgp == gp) /* rest of matched string */ + goto checkspin; + if ((n = gp->alt.info.sub) >= cp->nset) + goto failed; + if ((len = cp->rm[n].rm_eo) < 0) + goto failed; + if ((len -= n = cp->rm[n].rm_so) == 0) + goto tonext; + if (casecmp(s1, xp, n, len, mb_cur_max) == 0) + goto failed; + if ((n = s - s1) >= len) + goto nextwc; + n = len - n; + goto spin; + case ROP_END: /* success! */ + if (xp->flags & REG_NONEMPTY) + { + if (s2 == 0) + goto failed; + } + if (xp->nmatch == 0) + goto match; + /* + * Mark any skipped subexpressions as failing to match. + */ + if ((n = cp->nset) < xp->nmatch) + { + do + { + cp->rm[n].rm_so = -1; + cp->rm[n].rm_eo = -1; + } while (++n < xp->nmatch); + } + /* + * Note the left-most match that's longest. + */ + n = cp->rm[0].rm_so; + if (rmso < 0 || n < rmso) + { + rmso = n; + record:; + memcpy(xp->match, cp->rm, + xp->nmatch * sizeof(regmatch_t)); + goto failed; + } + if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo) + goto failed; + if (xp->match[0].rm_eo < cp->rm[0].rm_eo) + goto record; +#if 0 /* maximize the lengths of earlier LP...RPs */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * a "longest submatch from left to right" + * where an empty string wins over a nonmatch. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + + /* + * First, go with the choice that has any + * match for subexpr n. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (nlen < 0) + { + if (len >= 0) + break; + } + else if (len < 0) + goto record; + /* + * Both have a match; go with the longer. + */ + len -= xp->match[n].rm_so; + nlen -= cp->rm[n].rm_so; + if (nlen < len) + break; + if (nlen > len) + goto record; + } +#else /* take LP and RP as "fence posts" and maximize earlier gaps */ + /* + * If both are of the same length and start + * at the same point, choose the one with + * the larger earlier subpatterns, in which + * each rm_so and rm_eo serves as a separator. + */ + for (n = 1; n < xp->nmatch; n++) + { + ssize_t nlen; + int use; + + if (xp->flags & REG_AVOIDNULL) { + /* + * This is to to satisfy POSIX.1-2001 + * XBD pp. 172-173 ll. 6127-6129, whose + * translation is "do not match null + * expressions if there is a choice". + * See also POSIX.2 interpretation #43 + * in which the question was raised. + * + * The first subexpression of "\(x*\)*" + * must thus match the string "xxx". + */ + use = cp->rm[n].rm_eo - + cp->rm[n].rm_so >= + xp->match[n].rm_eo - + xp->match[n].rm_so || + xp->match[n].rm_so < 0; + } else + use = 1; + /* + * Choose the rightmost ROP_LP as that + * maximizes the gap from before. + */ + len = xp->match[n].rm_so; + nlen = cp->rm[n].rm_so; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + /* + * The ROP_LPs are at the same point: + * Choose the rightmost ROP_RP. + */ + len = xp->match[n].rm_eo; + nlen = cp->rm[n].rm_eo; + if (len < nlen && use) + goto record; + if (len > nlen) + break; + } +#endif + goto failed; + } + /* + * Finished the current Context list. If the input string + * has been entirely scanned, we're done. Otherwise, make + * the next step list current for the next character. + * If the next step list was empty and there's an existing + * match, that's the left-most longest. + */ + if (wc == '\0') + { + if (rmso >= 0) + goto match; + goto nomatch; + } + np->ecur = np->estp; + if ((np->cur = np->step) == 0) + { + if (rmso >= 0) + goto match; + np->ecur = &np->cur; /* was pointing at step */ + } + np->step = 0; + np->estp = &np->step; + } +nomatch:; + ret = REG_NOMATCH; +match:; + np->avail = 0; + for (cp = np->allcp; cp != 0; cp = cpn) + { + cpn = cp->link; + cp->next = np->avail; + np->avail = cp; + } + np->sp = 0; + for (sp = np->allsp; sp != 0; sp = spn) + { + spn = sp->link; + sp->prev = np->sp; + np->sp = sp; + } + return ret; +err:; + ret = REG_ESPACE; + goto match; +} diff --git a/libuxre/regparse.c b/libuxre/regparse.c @@ -0,0 +1,1091 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include <stdlib.h> +#include <ctype.h> +#include "re.h" + +LIBUXRE_STATIC void +libuxre_regdeltree(Tree *tp, int all) +{ + if (tp == 0) + return; + if (tp->op < 0) + { + switch (KIND_ROP(tp->op)) + { + case BINARY_ROP: + libuxre_regdeltree(tp->right.ptr, all); + /*FALLTHROUGH*/ + case UNARY_ROP: + libuxre_regdeltree(tp->left.ptr, all); + break; + default: + if (tp->op == ROP_BKT && all) + { + libuxre_bktfree(tp->right.info.bkt); + free(tp->right.info.bkt); + } + break; + } + } + free(tp); +} + +LIBUXRE_STATIC Tree * +libuxre_reg1tree(w_type op, Tree *lp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + if (lp != 0) + libuxre_regdeltree(lp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + if (lp != 0) + lp->parent = tp; + return tp; +} + +LIBUXRE_STATIC Tree * +libuxre_reg2tree(w_type op, Tree *lp, Tree *rp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + libuxre_regdeltree(lp, 1); + libuxre_regdeltree(rp, 1); + return 0; + } + tp->op = op; + tp->left.ptr = lp; + lp->parent = tp; + tp->right.ptr = rp; + rp->parent = tp; + return tp; +} + +static int +lex(Lex *lxp) +{ + size_t num; + w_type wc; + int n, mb_cur_max; + + mb_cur_max = lxp->mb_cur_max; +nextc: switch (wc = *lxp->pat++) /* interesting ones are single bytes */ + { + case '\0': + lxp->pat--; /* continue to report ROP_END */ + wc = ROP_END; + break; + case '(': + if (lxp->flags & REG_PARENS) + { + leftparen:; + /* + * Must keep track of the closed and + * yet-to-be closed groups as a list. + * Consider (()a(()b(()c(()d... in which + * at each letter another even-numbered + * group is made available, but no + * odd-numbered ones are. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + if (lxp->nleft >= lxp->nclist) /* grow it */ + { + unsigned char *p; + + lxp->nclist += 8; /* arbitrary */ + if ((p = realloc(lxp->clist, + lxp->nclist)) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + lxp->clist = p; + } + lxp->clist[lxp->nleft] = 0; /* unavailable */ + } + lxp->nleft++; + wc = ROP_LP; + } + break; + case ')': + /* + * For REG_PARENS, only take a right paren as a close + * if there is a matching left paren. + */ + if (lxp->flags & REG_PARENS && lxp->nright < lxp->nleft) + { + lxp->nright++; + rightparen:; + /* + * The group that is being closed is the highest + * numbered as-yet-unclosed group. + */ + if ((lxp->flags & REG_NOBACKREF) == 0) + { + num = lxp->nleft; + while (lxp->clist[--num] != 0) + ; + lxp->clist[num] = 1; + } + wc = ROP_RP; + } + break; + case '.': + wc = ROP_ANYCH; + if (lxp->flags & REG_NEWLINE) + wc = ROP_NOTNL; + break; + case '*': + if (lxp->flags & REG_ADDITIVE) + { + nxtstar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtstar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + lxp->pat++; + goto nxtstar; + } + } + wc = ROP_STAR; + break; + case '^': + /* + * Look "behind" to see if this is an anchor. + * Take it as an anchor if it follows an alternation + * operator. (lxp->tok is initially set to ROP_OR.) + */ + if (lxp->flags & REG_ANCHORS || lxp->tok == ROP_OR) { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtcar: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtcar; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtcar; + } + if (optional) + goto nextc; + } + wc = ROP_BOL; + } + break; + case '$': + /* + * Look ahead to see if this is an anchor, + * unless any '$' is an anchor. + * Take it as an anchor if it occurs just before + * the pattern end or an alternation operator. + */ + if (lxp->flags & REG_ANCHORS || *lxp->pat == '\0' + || (lxp->flags & REG_OR && *lxp->pat == '|') + || (lxp->flags & REG_NLALT && *lxp->pat == '\n')) + { + if (lxp->flags & REG_ADDITIVE) + { + int optional = 0; + + nxtdol: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + lxp->pat++; + goto nxtdol; + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + /*FALLTHRU*/ + case '*': + optional = 1; + lxp->pat++; + goto nxtdol; + } + if (optional) + goto nextc; + } + wc = ROP_EOL; + } + break; + case '+': + if (lxp->flags & REG_PLUS) + { + wc = ROP_PLUS; + if (lxp->flags & REG_ADDITIVE) + { + nxtplus: switch (*lxp->pat) + { + case '?': + if ((lxp->flags & REG_QUEST) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '+': + lxp->pat++; + goto nxtplus; + } + } + } + break; + case '?': + if (lxp->flags & REG_QUEST) + { + wc = ROP_QUEST; + if (lxp->flags & REG_ADDITIVE) + { + nxtquest: switch (*lxp->pat) + { + case '+': + if ((lxp->flags & REG_PLUS) == 0) + break; + case '*': + wc = ROP_STAR; + /*FALLTHRU*/ + case '?': + lxp->pat++; + goto nxtquest; + } + } + } + break; + case '\n': + if (lxp->flags & REG_NLALT) + { + /* + * Even when newline is an alternative separator, + * it doesn't permit parenthesized subexpressions + * to include it. + */ + if (lxp->nleft != lxp->nright) + { + lxp->err = REG_EPAREN; + return -1; + } + wc = ROP_OR; + } + else if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + break; + case '|': + if (lxp->flags & REG_OR) + wc = ROP_OR; + break; + case '[': + if ((lxp->info.bkt = malloc(sizeof(Bracket))) == 0) + { + lxp->err = REG_ESPACE; + return -1; + } + if ((lxp->flags & REG_GOTBKT) == 0) /* first time */ + { + struct lc_collate *col; + + lxp->flags |= REG_GOTBKT; + lxp->bktflags = 0; + if (lxp->flags & REG_ICASE) + lxp->bktflags |= BKT_ONECASE; + if (lxp->flags & REG_NEWLINE) + lxp->bktflags |= BKT_NOTNL; + if (lxp->flags & REG_BADRANGE) + lxp->bktflags |= BKT_BADRANGE; + if (lxp->flags & REG_ODDRANGE) + lxp->bktflags |= BKT_ODDRANGE; + if (lxp->flags & REG_SEPRANGE) + lxp->bktflags |= BKT_SEPRANGE; + if (lxp->flags & REG_BKTQUOTE) + lxp->bktflags |= BKT_QUOTE; + if (lxp->flags & REG_BKTEMPTY) + lxp->bktflags |= BKT_EMPTY; + if (lxp->flags & REG_ESCNL) + lxp->bktflags |= BKT_ESCNL; + if (lxp->flags & REG_NLALT) + lxp->bktflags |= BKT_NLBAD; + if (lxp->flags & REG_ESCSEQ) + lxp->bktflags |= BKT_ESCSEQ; + if (lxp->flags & REG_BKTESCAPE) + lxp->bktflags |= BKT_ESCAPE; + if (lxp->flags & REG_NOI18N) + lxp->bktflags |= BKT_NOI18N; + if (lxp->flags & REG_OLDESC) + lxp->bktflags |= BKT_OLDESC; + if ((col = libuxre_lc_collate(0)) != 0) + { + if (col->maintbl == 0 + || col->flags & CHF_ENCODED) + { + (void)libuxre_lc_collate(col); + col = 0; + } + else if (col->flags & CHF_MULTICH) + lxp->flags |= REG_NFA; + } + lxp->col = col; + } + n = lxp->bktflags; + if (*lxp->pat == '^') + { + n |= BKT_NEGATED; + lxp->pat++; + } + lxp->info.bkt->col = lxp->col; + if ((n = libuxre_bktmbcomp(lxp->info.bkt, lxp->pat, + n, mb_cur_max)) < 0) + { + free(lxp->info.bkt); + lxp->err = -n; /* convert to REG_* errors */ + return -1; + } + /* + * NFA forced if newline can be a match and REG_NEWLINE is set. + */ + if ((lxp->flags & (REG_NFA | REG_NEWLINE)) == REG_NEWLINE + && lxp->pat[-1] == '[' /* i.e., not BKT_NEGATED */ + && libuxre_bktmbexec(lxp->info.bkt, '\n', 0, 1) == 0) + { + lxp->flags |= REG_NFA; + } + lxp->pat += n; + wc = ROP_BKT; + break; + case '{': + if (lxp->flags & REG_NOBRACES || (lxp->flags & REG_BRACES) == 0) + break; + interval:; + if (!isdigit(num = *lxp->pat)) + { + badbr:; + lxp->err = REG_BADBR; + if (*lxp->pat == '\0') + lxp->err = REG_EBRACE; /* more accurate */ + return -1; + } + num -= '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + lxp->info.num[0] = num; + lxp->info.num[1] = num; + if (wc == ',') + { + lxp->info.num[1] = BRACE_INF; + if (isdigit(wc = *++lxp->pat)) + { + num = wc - '0'; + while (isdigit(wc = *++lxp->pat)) + { + num *= 10; + if ((num += wc - '0') > BRACE_MAX) + goto badbr; + } + if (num < lxp->info.num[0]) + goto badbr; + lxp->info.num[1] = num; + } + } + if ((lxp->flags & REG_BRACES) == 0) + { + if (wc != '\\') + goto badbr; + wc = *++lxp->pat; + } + if (wc != '}') + goto badbr; + lxp->pat++; + wc = ROP_BRACE; + /* + * Replace interval with simpler equivalents where possible, + * even when the operators are not otherwise available. + */ + if (lxp->info.num[1] <= 1) + { + if (lxp->info.num[0] == 1) + wc = ROP_NOP; /* {1,1} is noise */ + else if (lxp->info.num[1] == 0) + wc = ROP_EMPTY; /* {0,0} is empty string */ + else + wc = ROP_QUEST; /* {0,1} is ? */ + } + else if (lxp->info.num[1] == BRACE_INF) + { + if (lxp->info.num[0] == 0) + wc = ROP_STAR; + else if (lxp->info.num[0] == 1) + wc = ROP_PLUS; + else if (lxp->info.num[0] > BRACE_DFAMAX) + lxp->flags |= REG_NFA; + } + else if (lxp->info.num[1] > BRACE_DFAMAX) + { + lxp->flags |= REG_NFA; + } + break; + case '\\': + switch (wc = *lxp->pat++) + { + case '\0': + lxp->err = REG_EESCAPE; + return -1; + case '<': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_LT; + } + goto out; + case '>': + if (lxp->flags & REG_ANGLES) + { + lxp->flags |= REG_NFA; + wc = ROP_GT; + } + goto out; + case '(': + if ((lxp->flags & REG_PARENS) == 0) + goto leftparen; + goto out; + case ')': + if ((lxp->flags & REG_PARENS) == 0) + { + if (++lxp->nright > lxp->nleft) + { + lxp->err = REG_EPAREN; + return -1; + } + goto rightparen; + } + goto out; + case '{': + if (lxp->flags & (REG_BRACES|REG_NOBRACES)) + goto out; + goto interval; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + num = wc - '0'; + if ((lxp->flags & REG_NOBACKREF) == 0) + { + backref:; + if (num > lxp->nleft + || lxp->clist[num - 1] == 0) + { + lxp->err = REG_ESUBREG; + return -1; + } + lxp->info.sub = num; + if (lxp->maxref < num) + lxp->maxref = num; + lxp->flags |= REG_NFA; + wc = ROP_REF; + goto out; + } + /* + * For compatibility (w/awk), permit "octal" 8 and 9. + * Already have the value of the first digit in num. + * + * If REG_OLDESC, exactly three digits must be present. + */ + tryoctal:; + if ((lxp->flags & REG_ESCSEQ) == 0) + goto out; + if ((wc = *lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + if ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num <<= 3; + num += wc - '0'; + lxp->pat++; + } + else if (lxp->flags & REG_OLDESC) + { + lxp->pat--; + wc = lxp->pat[-1]; + goto out; + } + } + else if (lxp->flags & REG_OLDESC) + { + wc = lxp->pat[-1]; + goto out; + } + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + goto out; + case '0': + if ((lxp->flags & REG_NOBACKREF) == 0 + && (num = *lxp->pat) >= '0' && num <= '9') + { + num -= '0'; + /* + * This loop ignores wraparounds. + * Keep track of number of digits in n. + */ + n = 1; + while ((wc = *++lxp->pat) >= '0' && wc <= '9') + { + num *= 10; + num += wc - '0'; + n++; + } + if (num != 0) + goto backref; + lxp->pat -= n; + } + num = 0; + goto tryoctal; + case 'a': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\a'; + goto out; + case 'b': + if (lxp->flags & REG_ESCSEQ) + wc = '\b'; + goto out; + case 'f': + if (lxp->flags & REG_ESCSEQ) + wc = '\f'; + goto out; + case 'n': + if (lxp->flags & (REG_ESCSEQ | REG_ESCNL)) + { + wc = '\n'; + if (lxp->flags & REG_NEWLINE) + lxp->flags |= REG_NFA; + } + goto out; + case 'r': + if (lxp->flags & REG_ESCSEQ) + wc = '\r'; + goto out; + case 't': + if (lxp->flags & REG_ESCSEQ) + wc = '\t'; + goto out; + case 'v': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ) + wc = '\v'; + goto out; + case 'x': + if ((lxp->flags&(REG_ESCSEQ|REG_OLDESC)) == REG_ESCSEQ + && isxdigit(num = *lxp->pat)) + { + wc = num; + num = 0; + /* + * Take as many hex digits as possible, + * ignoring overflows. + * If the result (squeezed into a w_type) + * is positive, it's okay. + */ + do + { + if (isdigit(wc)) + wc -= '0'; + else if (isupper(wc)) + wc -= 'A' + 10; + else + wc -= 'a' + 10; + num <<= 4; + num |= wc; + } while (isxdigit(wc = *++lxp->pat)); + if ((wc = num) <= 0) + { + lxp->err = REG_BADESC; + return -1; + } + } + goto out; + } + /*FALLTHROUGH*/ + default: + if (!ISONEBYTE(wc)) + { + if ((n = libuxre_mb2wc(&wc, lxp->pat)) > 0) + lxp->pat += n; + else if (n < 0) + { + lxp->err = REG_ILLSEQ; + return -1; + } + } + if (lxp->flags & REG_ICASE) + wc = to_lower(wc); + break; + } +out:; + lxp->tok = wc; + return 0; +} + +static Tree *alt(Lex *); + +static Tree * +leaf(Lex *lxp) +{ + Tree *tp; + + if ((tp = malloc(sizeof(Tree))) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + switch (tp->op = lxp->tok) /* covers most cases */ + { + default: + if (tp->op < 0) + { + lxp->err = REG_BADPAT; + tp->right.ptr = 0; + goto badunary; + } + break; + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + tp->op = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + badunary:; + tp->left.ptr = 0; + goto err; + case ROP_ANYCH: + case ROP_NOTNL: + break; + case ROP_BOL: + case ROP_EOL: + case ROP_LT: + case ROP_GT: + /* + * Look ahead for what would have been taken to be + * postfix operators. + */ + if (lex(lxp) != 0) + goto err; + switch (lxp->tok) + { + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lxp->flags & REG_NOAUTOQUOTE) == 0 + && lxp->pat[-1] != '}') + { + lxp->tok = lxp->pat[-1]; + break; + } + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_EMPTY: /* was {0,0} ROP_BRACE */ + case ROP_NOP: /* was {1,1} ROP_BRACE */ + lxp->err = REG_BADRPT; + goto err; + } + return tp; + case ROP_BKT: + tp->right.info.bkt = lxp->info.bkt; + break; + case ROP_REF: + tp->right.info.sub = lxp->info.sub; + break; + case ROP_LP: + tp->right.info.sub = lxp->nleft; + if (lex(lxp) != 0) + goto badunary; + if (lxp->tok == ROP_RP) /* empty parens; choice of meaning */ + { + if (lxp->flags & REG_MTPARENBAD) + { + lxp->err = REG_EMPTYPAREN; + goto badunary; + } + lxp->tok = ROP_EMPTY; + if (lxp->flags & REG_MTPARENFAIL) + lxp->tok = ROP_NONE; + if ((tp->left.ptr = libuxre_reg1tree(lxp->tok, 0)) == 0) + goto badunary; + } + else if ((tp->left.ptr = alt(lxp)) == 0) + { + if (lxp->err == REG_BADPAT) + goto parenerr; + goto badunary; + } + else if (lxp->tok != ROP_RP) + { + lxp->err = REG_BADPAT; + parenerr:; + if (lxp->nleft != lxp->nright) + lxp->err = REG_EPAREN; /* better choice */ + goto badunary; + } + tp->left.ptr->parent = tp; + break; + } + if (lex(lxp) != 0) + { + err:; + libuxre_regdeltree(tp, 1); + tp = 0; + } + return tp; +} + +static Tree * +post(Lex *lxp) +{ + Tree *lp; + + if ((lp = leaf(lxp)) == 0) + return 0; + switch (lxp->tok) + { + case ROP_EMPTY: /* this was {0,0} ROP_BRACE */ + libuxre_regdeltree(lp, 1); + lp = 0; + /*FALLTHROUGH*/ + case ROP_BRACE: + case ROP_STAR: + case ROP_PLUS: + case ROP_QUEST: + if ((lp = libuxre_reg1tree(lxp->tok, lp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + if (lxp->tok == ROP_BRACE) + lp->right.info = lxp->info; + /*FALLTHROUGH*/ + case ROP_NOP: /* this was {1,1} ROP_BRACE */ + if (lex(lxp) != 0) + { + libuxre_regdeltree(lp, 1); + return 0; + } + break; + } + return lp; +} + +static Tree * +cat(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = post(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok == ROP_OR || lxp->tok == ROP_RP + || lxp->tok == ROP_END) + { + return lp; + } + if ((rp = post(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +static Tree * +alt(Lex *lxp) +{ + Tree *lp, *rp; + + if ((lp = cat(lxp)) == 0) + return 0; + for (;;) + { + if (lxp->tok != ROP_OR) + return lp; + if (lex(lxp) != 0) + break; + if (lxp->tok == ROP_END) + return lp; /* ignore trailing '|' */ + if ((rp = cat(lxp)) == 0) + break; + if ((lp = libuxre_reg2tree(ROP_OR, lp, rp)) == 0) + { + lxp->err = REG_ESPACE; + return 0; + } + } + libuxre_regdeltree(lp, 1); + return 0; +} + +LIBUXRE_STATIC Tree * +libuxre_regparse(Lex *lxp, const unsigned char *pat, int flags) +{ + Tree *lp, *rp; + + lp = 0; /* in case of error */ + lxp->clist = 0; + lxp->col = 0; + lxp->err = 0; + lxp->maxref = 0; + lxp->nleft = 0; + lxp->nright = 0; + lxp->nclist = 0; + lxp->mb_cur_max = MB_CUR_MAX; + if (flags & REG_OR && *pat == '|') + pat++; /* skip initial OR like egrep did */ + lxp->pat = pat; + lxp->flags = flags; + lxp->tok = ROP_OR; /* enables ^ as anchor */ + /* + * Get initial token. + */ + if (lex(lxp) != 0) + { + err:; + if (lp != 0) + { + libuxre_regdeltree(lp, 1); + lp = 0; + } + if (lxp->err == 0) + lxp->err = REG_ESPACE; + goto ret; + } + if (lxp->tok == ROP_END) + { + lxp->err = REG_NOPAT; + goto err; + } + if ((lp = alt(lxp)) == 0) /* parse entire RE */ + goto err; + if (lxp->maxref != 0 || (flags & REG_NOSUB) == 0) + { + if ((lp = libuxre_reg1tree(ROP_LP, lp)) == 0) + goto err; + lp->right.info.sub = 0; + } + if ((rp = libuxre_reg1tree(ROP_END, 0)) == 0) + goto err; + if ((lp = libuxre_reg2tree(ROP_CAT, lp, rp)) == 0) + goto err; + lp->parent = 0; +ret:; + if (lxp->clist != 0) + free(lxp->clist); + return lp; +} + +#ifdef REGDEBUG + +LIBUXRE_STATIC void +libuxre_regtree(Tree *tp, int n) +{ + const char *opstr; + char buf[32]; + int kind, next; + + if (n < 0) + next = -n + 2; + else + next = n + 2; + switch (tp->op) + { + case ROP_OR: + opstr = "|"; + kind = BINARY_ROP; + break; + case ROP_CAT: + opstr = "&"; + kind = BINARY_ROP; + break; + case ROP_STAR: + opstr = "*"; + kind = UNARY_ROP; + break; + case ROP_PLUS: + opstr = "+"; + kind = UNARY_ROP; + break; + case ROP_QUEST: + opstr = "?"; + kind = UNARY_ROP; + break; + case ROP_BRACE: + opstr = buf; + if (tp->right.info.num[1] == BRACE_INF) + { + sprintf(buf, "{%u,inf}", + (unsigned)tp->right.info.num[0]); + } + else + { + sprintf(buf, "{%u,%u}", + (unsigned)tp->right.info.num[0], + (unsigned)tp->right.info.num[1]); + } + kind = UNARY_ROP; + break; + case ROP_LP: + opstr = buf; + sprintf(buf, "%lu(", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_RP: + opstr = buf; + sprintf(buf, ")%lu", (unsigned long)tp->right.info.sub); + kind = UNARY_ROP; + break; + case ROP_NOP: + opstr = "<NOP>"; + kind = LEAF_ROP; + break; + case ROP_BOL: + opstr = "<BOL>"; + kind = LEAF_ROP; + break; + case ROP_EOL: + opstr = "<EOL>"; + kind = LEAF_ROP; + break; + case ROP_ALL: + opstr = "<ALL>"; + kind = LEAF_ROP; + break; + case ROP_ANYCH: + opstr = "<ANYCH>"; + kind = LEAF_ROP; + break; + case ROP_NOTNL: + opstr = "<NOTNL>"; + kind = LEAF_ROP; + break; + case ROP_EMPTY: + opstr = "<MT>"; + kind = LEAF_ROP; + break; + case ROP_NONE: + opstr = "<NONE>"; + kind = LEAF_ROP; + break; + case ROP_BKT: + opstr = buf; + sprintf(buf, "[%#lx]", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_BKTCOPY: + opstr = buf; + sprintf(buf, "[%#lx]CPY", (unsigned long)tp->right.info.bkt); + kind = LEAF_ROP; + break; + case ROP_LT: + opstr = "\\<"; + kind = LEAF_ROP; + break; + case ROP_GT: + opstr = "\\>"; + kind = LEAF_ROP; + break; + case ROP_REF: + opstr = buf; + sprintf(buf, "\\%lu", (unsigned long)tp->right.info.sub); + kind = LEAF_ROP; + break; + case ROP_END: + opstr = "<END>"; + kind = LEAF_ROP; + break; + default: + opstr = buf; + if (tp->op > UCHAR_MAX) + sprintf(buf, "W%#x", tp->op); + else if (tp->op <= 0) + sprintf(buf, "UNK=%u", tp->op); + else + sprintf(buf, "%c", tp->op); + kind = LEAF_ROP; + break; + } + if (kind == BINARY_ROP) + libuxre_regtree(tp->right.ptr, -next); + printf("%*c:%s\n", next - 1, n < 0 ? 'R' : n > 0 ? 'L' : 'T', opstr); + if (kind != LEAF_ROP) + libuxre_regtree(tp->left.ptr, next); +} + +#endif /*REGDEBUG*/ diff --git a/libuxre/stubs.c b/libuxre/stubs.c @@ -0,0 +1,82 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)stubs.c 1.27 (gritter) 6/26/05 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* stubbed-out routines needed to complete the RE libc code */ + +#include "colldata.h" + +struct lc_collate * +libuxre_lc_collate(struct lc_collate *cp) +{ + static struct lc_collate curinfo = {0}; /* means CHF_ENCODED */ + + return &curinfo; +} + +#include "wcharm.h" + +LIBUXRE_STATIC int +libuxre_mb2wc(w_type *wt, const unsigned char *s) +{ + wchar_t wc; + int len; + + if ((len = mbtowc(&wc, (const char *)&s[-1], MB_LEN_MAX)) > 0) + *wt = wc; + else if (len == 0) + *wt = '\0'; + else /*if (len < 0)*/ + *wt = (w_type)WEOF; + return len > 0 ? len - 1 : len; +} + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)libuxre.sl 1.27 (gritter) 6/26/05"; +/* SLIST */ +/* +_collelem.c: * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 +_collmult.c: * Sccsid @(#)_collmult.c 1.4 (gritter) 9/22/03 +bracket.c: * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03 +colldata.h: * Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04 +onefile.c: * Sccsid @(#)onefile.c 1.1 (gritter) 9/22/03 +re.h: * Sccsid @(#)re.h 1.15 (gritter) 2/6/05 +regcomp.c: * Sccsid @(#)regcomp.c 1.6 (gritter) 9/22/03 +regdfa.c: * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03 +regdfa.h: * Sccsid @(#)regdfa.h 1.3 (gritter) 9/22/03 +regerror.c: * Sccsid @(#)regerror.c 1.4 (gritter) 3/29/03 +regex.h: * Sccsid @(#)regex.h 1.13 (gritter) 2/6/05 +regexec.c: * Sccsid @(#)regexec.c 1.7 (gritter) 2/6/05 +regfree.c: * Sccsid @(#)regfree.c 1.3 (gritter) 9/22/03 +regnfa.c: * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05 +regparse.c: * Sccsid @(#)regparse.c 1.12 (gritter) 9/22/03 +wcharm.h: * Sccsid @(#)wcharm.h 1.12 (gritter) 10/18/03 +*/ diff --git a/libuxre/wcharm.h b/libuxre/wcharm.h @@ -0,0 +1,63 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)wcharm.h 1.12 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* Stubbed-out wide character locale information */ + +#ifndef LIBUXRE_WCHARM_H +#define LIBUXRE_WCHARM_H + +#ifndef LIBUXRE_STATIC +#define LIBUXRE_STATIC +#endif + +#ifndef LIBUXRE_WUCHAR_T +#define LIBUXRE_WUCHAR_T +typedef unsigned int wuchar_type; +#endif + +#ifndef LIBUXRE_W_TYPE +#define LIBUXRE_W_TYPE +typedef int w_type; +#endif + +#include <wchar.h> +#include <wctype.h> +#include <stdlib.h> + +#ifdef notdef +#define ISONEBYTE(ch) ((ch), 1) + +#define libuxre_mb2wc(wp, cp) ((wp), (cp), 0) +#endif /* notdef */ + +#define ISONEBYTE(ch) (((ch) & 0200) == 0 || mb_cur_max == 1) + +#define to_lower(ch) (mb_cur_max > 1 ? towlower(ch) : tolower(ch)) +#define to_upper(ch) (mb_cur_max > 1 ? towupper(ch) : toupper(ch)) + +LIBUXRE_STATIC int libuxre_mb2wc(w_type *, const unsigned char *); + +#endif /* !LIBUXRE_WCHARM_H */ diff --git a/mk/libbio/NOTICE b/mk/libbio/NOTICE @@ -0,0 +1,34 @@ +This copyright NOTICE applies to all files in this directory and +subdirectories, unless another copyright notice appears in a given +file or subdirectory. If you take substantial code from this software to use in +other programs, you must somehow include with it an appropriate +copyright notice that includes the copyright notice and the other +notices below. It is fine (and often tidier) to do that in a separate +file such as NOTICE, LICENCE or COPYING. + + Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. + Revisions Copyright © 2000-2005 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libbio/README b/mk/libbio/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libbio/bbuffered.c b/mk/libbio/bbuffered.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <bio.h> + +int +Bbuffered(Biobuf *bp) +{ + switch(bp->state) { + case Bracteof: + case Bractive: + return -bp->icount; + + case Bwactive: + return bp->bsize + bp->ocount; + + case Binactive: + return 0; + } + fprint(2, "Bbuffered: unknown state %d\n", bp->state); + return 0; +} diff --git a/mk/libbio/bcat.c b/mk/libbio/bcat.c @@ -0,0 +1,46 @@ +#include <fmt.h> +#include "bio.h" + +Biobuf bout; + +void +bcat(Biobuf *b, char *name) +{ + char buf[1000]; + int n; + + while((n = Bread(b, buf, sizeof buf)) > 0){ + if(Bwrite(&bout, buf, n) < 0) + fprint(2, "writing during %s: %r\n", name); + } + if(n < 0) + fprint(2, "reading %s: %r\n", name); +} + +int +main(int argc, char **argv) +{ + int i; + Biobuf b, *bp; + Fmt fmt; + + Binit(&bout, 1, O_WRONLY); + Bfmtinit(&fmt, &bout); + fmtprint(&fmt, "hello, world\n"); + Bfmtflush(&fmt); + + if(argc == 1){ + Binit(&b, 0, O_RDONLY); + bcat(&b, "<stdin>"); + }else{ + for(i=1; i<argc; i++){ + if((bp = Bopen(argv[i], O_RDONLY)) == 0){ + fprint(2, "Bopen %s: %r\n", argv[i]); + continue; + } + bcat(bp, argv[i]); + Bterm(bp); + } + } + exit(0); +} diff --git a/mk/libbio/bfildes.c b/mk/libbio/bfildes.c @@ -0,0 +1,9 @@ +#include "lib9.h" +#include <bio.h> + +int +Bfildes(Biobuf *bp) +{ + + return bp->fid; +} diff --git a/mk/libbio/bflush.c b/mk/libbio/bflush.c @@ -0,0 +1,33 @@ +#include "lib9.h" +#include <bio.h> + +int +Bflush(Biobuf *bp) +{ + int n, c; + + switch(bp->state) { + case Bwactive: + n = bp->bsize+bp->ocount; + if(n == 0) + return 0; + c = write(bp->fid, bp->bbuf, n); + if(n == c) { + bp->offset += n; + bp->ocount = -bp->bsize; + return 0; + } + bp->state = Binactive; + bp->ocount = 0; + break; + + case Bracteof: + bp->state = Bractive; + + case Bractive: + bp->icount = 0; + bp->gbuf = bp->ebuf; + return 0; + } + return Beof; +} diff --git a/mk/libbio/bgetc.c b/mk/libbio/bgetc.c @@ -0,0 +1,53 @@ +#include "lib9.h" +#include <bio.h> + +int +Bgetc(Biobuf *bp) +{ + int i; + +loop: + i = bp->icount; + if(i != 0) { + bp->icount = i+1; + return bp->ebuf[i]; + } + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + return Beof; + } + /* + * get next buffer, try to keep Bungetsize + * characters pre-catenated from the previous + * buffer to allow that many ungets. + */ + memmove(bp->bbuf-Bungetsize, bp->ebuf-Bungetsize, Bungetsize); + i = read(bp->fid, bp->bbuf, bp->bsize); + bp->gbuf = bp->bbuf; + if(i <= 0) { + bp->state = Bracteof; + if(i < 0) + bp->state = Binactive; + return Beof; + } + if(i < bp->bsize) { + memmove(bp->ebuf-i-Bungetsize, bp->bbuf-Bungetsize, i+Bungetsize); + bp->gbuf = bp->ebuf-i; + } + bp->icount = -i; + bp->offset += i; + goto loop; +} + +int +Bungetc(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount--; + return 1; +} diff --git a/mk/libbio/bgetd.c b/mk/libbio/bgetd.c @@ -0,0 +1,36 @@ +#include "lib9.h" +#include <bio.h> + +struct bgetd +{ + Biobuf* b; + int eof; +}; + +static int +Bgetdf(void *vp) +{ + int c; + struct bgetd *bg = vp; + + c = Bgetc(bg->b); + if(c == Beof) + bg->eof = 1; + return c; +} + +int +Bgetd(Biobuf *bp, double *dp) +{ + double d; + struct bgetd b; + + b.b = bp; + b.eof = 0; + d = fmtcharstod(Bgetdf, &b); + if(b.eof) + return -1; + Bungetc(bp); + *dp = d; + return 1; +} diff --git a/mk/libbio/bgetrune.c b/mk/libbio/bgetrune.c @@ -0,0 +1,47 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +long +Bgetrune(Biobuf *bp) +{ + int c, i; + Rune rune; + char str[UTFmax]; + + c = Bgetc(bp); + if(c < Runeself) { /* one char */ + bp->runesize = 1; + return c; + } + str[0] = c; + + for(i=1;;) { + c = Bgetc(bp); + if(c < 0) + return c; + str[i++] = c; + + if(fullrune(str, i)) { + bp->runesize = chartorune(&rune, str); + while(i > bp->runesize) { + Bungetc(bp); + i--; + } + return rune; + } + } +} + +int +Bungetrune(Biobuf *bp) +{ + + if(bp->state == Bracteof) + bp->state = Bractive; + if(bp->state != Bractive) + return Beof; + bp->icount -= bp->runesize; + bp->runesize = 0; + return 1; +} diff --git a/mk/libbio/binit.c b/mk/libbio/binit.c @@ -0,0 +1,153 @@ +#include "lib9.h" +#include <bio.h> + +enum +{ + MAXBUFS = 20 +}; + +static Biobuf* wbufs[MAXBUFS]; +static int atexitflag; + +static +void +batexit(void) +{ + Biobuf *bp; + int i; + + for(i=0; i<MAXBUFS; i++) { + bp = wbufs[i]; + if(bp != 0) { + wbufs[i] = 0; + Bflush(bp); + } + } +} + +static +void +deinstall(Biobuf *bp) +{ + int i; + + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == bp) + wbufs[i] = 0; +} + +static +void +install(Biobuf *bp) +{ + int i; + + deinstall(bp); + for(i=0; i<MAXBUFS; i++) + if(wbufs[i] == 0) { + wbufs[i] = bp; + break; + } + if(atexitflag == 0) { + atexitflag = 1; + atexit(batexit); + } +} + +int +Binits(Biobuf *bp, int f, int mode, unsigned char *p, int size) +{ + + p += Bungetsize; /* make room for Bungets */ + size -= Bungetsize; + + switch(mode&~(OCEXEC|ORCLOSE|OTRUNC)) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return Beof; + + case OREAD: + bp->state = Bractive; + bp->ocount = 0; + break; + + case OWRITE: + install(bp); + bp->state = Bwactive; + bp->ocount = -size; + break; + } + bp->bbuf = p; + bp->ebuf = p+size; + bp->bsize = size; + bp->icount = 0; + bp->gbuf = bp->ebuf; + bp->fid = f; + bp->flag = 0; + bp->rdline = 0; + bp->offset = 0; + bp->runesize = 0; + return 0; +} + + +int +Binit(Biobuf *bp, int f, int mode) +{ + return Binits(bp, f, mode, bp->b, sizeof(bp->b)); +} + +Biobuf* +Bfdopen(int f, int mode) +{ + Biobuf *bp; + + bp = malloc(sizeof(Biobuf)); + if(bp == 0) + return 0; + Binits(bp, f, mode, bp->b, sizeof(bp->b)); + bp->flag = Bmagic; + return bp; +} + +Biobuf* +Bopen(char *name, int mode) +{ + Biobuf *bp; + int f; + + switch(mode&~(OCEXEC|ORCLOSE|OTRUNC)) { + default: + fprint(2, "Bopen: unknown mode %d\n", mode); + return 0; + + case OREAD: + f = open(name, mode); + if(f < 0) + return 0; + break; + + case OWRITE: + f = create(name, mode, 0666); + if(f < 0) + return 0; + } + bp = Bfdopen(f, mode); + if(bp == 0) + close(f); + return bp; +} + +int +Bterm(Biobuf *bp) +{ + + deinstall(bp); + Bflush(bp); + if(bp->flag == Bmagic) { + bp->flag = 0; + close(bp->fid); + free(bp); + } + return 0; +} diff --git a/mk/libbio/bio.3 b/mk/libbio/bio.3 @@ -0,0 +1,371 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH BIO 3 +.SH NAME +Bopen, Bfdopen, Binit, Binits, Brdline, Brdstr, Bgetc, Bgetrune, Bgetd, Bungetc, Bungetrune, Bread, Bseek, Boffset, Bfildes, Blinelen, Bputc, Bputrune, Bprint, Bvprint, Bwrite, Bflush, Bterm, Bbuffered \- buffered input/output +.SH SYNOPSIS +.ta \w'\fLBiobuf* 'u +.B #include <utf.h> +.br +.B #include <fmt.h> +.br +.B #include <bio.h> +.PP +.B +Biobuf* Bopen(char *file, int mode) +.PP +.B +Biobuf* Bfdopen(int fd, int mode) +.PP +.B +int Binit(Biobuf *bp, int fd, int mode) +.PP +.B +int Binits(Biobufhdr *bp, int fd, int mode, uchar *buf, int size) +.PP +.B +int Bterm(Biobufhdr *bp) +.PP +.B +int Bprint(Biobufhdr *bp, char *format, ...) +.PP +.B +int Bvprint(Biobufhdr *bp, char *format, va_list arglist); +.PP +.B +void* Brdline(Biobufhdr *bp, int delim) +.PP +.B +char* Brdstr(Biobufhdr *bp, int delim, int nulldelim) +.PP +.B +int Blinelen(Biobufhdr *bp) +.PP +.B +vlong Boffset(Biobufhdr *bp) +.PP +.B +int Bfildes(Biobufhdr *bp) +.PP +.B +int Bgetc(Biobufhdr *bp) +.PP +.B +long Bgetrune(Biobufhdr *bp) +.PP +.B +int Bgetd(Biobufhdr *bp, double *d) +.PP +.B +int Bungetc(Biobufhdr *bp) +.PP +.B +int Bungetrune(Biobufhdr *bp) +.PP +.B +vlong Bseek(Biobufhdr *bp, vlong n, int type) +.PP +.B +int Bputc(Biobufhdr *bp, int c) +.PP +.B +int Bputrune(Biobufhdr *bp, long c) +.PP +.B +long Bread(Biobufhdr *bp, void *addr, long nbytes) +.PP +.B +long Bwrite(Biobufhdr *bp, void *addr, long nbytes) +.PP +.B +int Bflush(Biobufhdr *bp) +.PP +.B +int Bbuffered(Biobufhdr *bp) +.PP +.SH DESCRIPTION +These routines implement fast buffered I/O. +I/O on different file descriptors is independent. +.PP +.I Bopen +opens +.I file +for mode +.B O_RDONLY +or creates for mode +.BR O_WRONLY . +It calls +.IR malloc (3) +to allocate a buffer. +.PP +.I Bfdopen +allocates a buffer for the already-open file descriptor +.I fd +for mode +.B O_RDONLY +or +.BR O_WRONLY . +It calls +.IR malloc (3) +to allocate a buffer. +.PP +.I Binit +initializes a standard size buffer, type +.IR Biobuf , +with the open file descriptor passed in +by the user. +.I Binits +initializes a non-standard size buffer, type +.IR Biobufhdr , +with the open file descriptor, +buffer area, and buffer size passed in +by the user. +.I Biobuf +and +.I Biobufhdr +are related by the declaration: +.IP +.EX +typedef struct Biobuf Biobuf; +struct Biobuf +{ + Biobufhdr; + uchar b[Bungetsize+Bsize]; +}; +.EE +.PP +Arguments +of types pointer to Biobuf and pointer to Biobufhdr +can be used interchangeably in the following routines. +.PP +.IR Bopen , +.IR Binit , +or +.I Binits +should be called before any of the +other routines on that buffer. +.I Bfildes +returns the integer file descriptor of the associated open file. +.PP +.I Bterm +flushes the buffer for +.IR bp . +If the buffer was allocated by +.IR Bopen , +the buffer is +.I freed +and the file is closed. +.PP +.I Brdline +reads a string from the file associated with +.I bp +up to and including the first +.I delim +character. +The delimiter character at the end of the line is +not altered. +.I Brdline +returns a pointer to the start of the line or +.L 0 +on end-of-file or read error. +.I Blinelen +returns the length (including the delimiter) +of the most recent string returned by +.IR Brdline . +.PP +.I Brdstr +returns a +.IR malloc (3)-allocated +buffer containing the next line of input delimited by +.IR delim , +terminated by a NUL (0) byte. +Unlike +.IR Brdline , +which returns when its buffer is full even if no delimiter has been found, +.I Brdstr +will return an arbitrarily long line in a single call. +If +.I nulldelim +is set, the terminal delimiter will be overwritten with a NUL. +After a successful call to +.IR Brdstr , +the return value of +.I Blinelen +will be the length of the returned buffer, excluding the NUL. +.PP +.I Bgetc +returns the next character from +.IR bp , +or a negative value +at end of file. +.I Bungetc +may be called immediately after +.I Bgetc +to allow the same character to be reread. +.PP +.I Bgetrune +calls +.I Bgetc +to read the bytes of the next +.SM UTF +sequence in the input stream and returns the value of the rune +represented by the sequence. +It returns a negative value +at end of file. +.I Bungetrune +may be called immediately after +.I Bgetrune +to allow the same +.SM UTF +sequence to be reread as either bytes or a rune. +.I Bungetc +and +.I Bungetrune +may back up a maximum of five bytes. +.PP +.I Bgetd +uses +.I fmtcharstod +(see +.IR fmtstrtod (3)) +and +.I Bgetc +to read the formatted +floating-point number in the input stream, +skipping initial blanks and tabs. +The value is stored in +.BR *d. +.PP +.I Bread +reads +.I nbytes +of data from +.I bp +into memory starting at +.IR addr . +The number of bytes read is returned on success +and a negative value is returned if a read error occurred. +.PP +.I Bseek +applies +.IR lseek (2) +to +.IR bp . +It returns the new file offset. +.I Boffset +returns the file offset of the next character to be processed. +.PP +.I Bputc +outputs the low order 8 bits of +.I c +on +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, a zero is returned. +.PP +.I Bputrune +calls +.I Bputc +to output the low order +16 bits of +.I c +as a rune +in +.SM UTF +format +on the output stream. +.PP +.I Bprint +is a buffered interface to +.IR print (3). +If this causes a +.IR write +to occur and there is an error, +a negative value +.RB ( Beof ) +is returned. +Otherwise, the number of bytes output is returned. +.I Bvprint +does the same except it takes as argument a +.B va_list +parameter, so it can be called within a variadic function. +.PP +.I Bwrite +outputs +.I nbytes +of data starting at +.I addr +to +.IR bp . +If this causes a +.IR write +to occur and there is an error, +a negative value is returned. +Otherwise, the number of bytes written is returned. +.PP +.I Bflush +causes any buffered output associated with +.I bp +to be written. +The return is as for +.IR Bputc . +.I Bflush +is called on +exit for every buffer still open +for writing. +.PP +.I Bbuffered +returns the number of bytes in the buffer. +When reading, this is the number of bytes still available from the last +read on the file; when writing, it is the number of bytes ready to be +written. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR open (2), +.IR print (3), +.IR atexit (3), +.IR utf (7), +.SH DIAGNOSTICS +.I Bio +routines that return integers yield +.B Beof +if +.I bp +is not the descriptor of an open file. +.I Bopen +returns zero if the file cannot be opened in the given mode. +All routines set +.I errstr +on error. +.SH BUGS +.I Brdline +returns an error on strings longer than the buffer associated +with the file +and also if the end-of-file is encountered +before a delimiter. +.I Blinelen +will tell how many characters are available +in these cases. +In the case of a true end-of-file, +.I Blinelen +will return zero. +At the cost of allocating a buffer, +.I Brdstr +sidesteps these issues. +.PP +The data returned by +.I Brdline +may be overwritten by calls to any other +.I bio +routine on the same +.IR bp. diff --git a/mk/libbio/bio.h b/mk/libbio/bio.h @@ -0,0 +1,91 @@ +#ifndef _BIO_H_ +#define _BIO_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef AUTOLIB +AUTOLIB(bio) +#endif + +#include <fcntl.h> /* for O_RDONLY, O_WRONLY */ + +typedef struct Biobuf Biobuf; + +enum +{ + Bsize = 8*1024, + Bungetsize = 4, /* space for ungetc */ + Bmagic = 0x314159, + Beof = -1, + Bbad = -2, + + Binactive = 0, /* states */ + Bractive, + Bwactive, + Bracteof, + + Bend +}; + +struct Biobuf +{ + int icount; /* neg num of bytes at eob */ + int ocount; /* num of bytes at bob */ + int rdline; /* num of bytes after rdline */ + int runesize; /* num of bytes of last getrune */ + int state; /* r/w/inactive */ + int fid; /* open file */ + int flag; /* magic if malloc'ed */ + long long offset; /* offset of buffer in file */ + int bsize; /* size of buffer */ + unsigned char* bbuf; /* pointer to beginning of buffer */ + unsigned char* ebuf; /* pointer to end of buffer */ + unsigned char* gbuf; /* pointer to good data in buf */ + unsigned char b[Bungetsize+Bsize]; +}; + +#define BGETC(bp)\ + ((bp)->icount?(bp)->bbuf[(bp)->bsize+(bp)->icount++]:Bgetc((bp))) +#define BPUTC(bp,c)\ + ((bp)->ocount?(bp)->bbuf[(bp)->bsize+(bp)->ocount++]=(c),0:Bputc((bp),(c))) +#define BOFFSET(bp)\ + (((bp)->state==Bractive)?\ + (bp)->offset + (bp)->icount:\ + (((bp)->state==Bwactive)?\ + (bp)->offset + ((bp)->bsize + (bp)->ocount):\ + -1)) +#define BLINELEN(bp)\ + (bp)->rdline +#define BFILDES(bp)\ + (bp)->fid + +int Bbuffered(Biobuf*); +Biobuf* Bfdopen(int, int); +int Bfildes(Biobuf*); +int Bflush(Biobuf*); +int Bgetc(Biobuf*); +int Bgetd(Biobuf*, double*); +long Bgetrune(Biobuf*); +int Binit(Biobuf*, int, int); +int Binits(Biobuf*, int, int, unsigned char*, int); +int Blinelen(Biobuf*); +long long Boffset(Biobuf*); +Biobuf* Bopen(char*, int); +int Bprint(Biobuf*, char*, ...); +int Bputc(Biobuf*, int); +int Bputrune(Biobuf*, long); +void* Brdline(Biobuf*, int); +char* Brdstr(Biobuf*, int, int); +long Bread(Biobuf*, void*, long); +long long Bseek(Biobuf*, long long, int); +int Bterm(Biobuf*); +int Bungetc(Biobuf*); +int Bungetrune(Biobuf*); +long Bwrite(Biobuf*, void*, long); +int Bvprint(Biobuf*, char*, va_list); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libbio/boffset.c b/mk/libbio/boffset.c @@ -0,0 +1,25 @@ +#include "lib9.h" +#include <bio.h> + +vlong +Boffset(Biobuf *bp) +{ + vlong n; + + switch(bp->state) { + default: + fprint(2, "Boffset: unknown state %d\n", bp->state); + n = Beof; + break; + + case Bracteof: + case Bractive: + n = bp->offset + bp->icount; + break; + + case Bwactive: + n = bp->offset + (bp->bsize + bp->ocount); + break; + } + return n; +} diff --git a/mk/libbio/bprint.c b/mk/libbio/bprint.c @@ -0,0 +1,14 @@ +#include "lib9.h" +#include <bio.h> + +int +Bprint(Biobuf *bp, char *fmt, ...) +{ + int n; + va_list arg; + + va_start(arg, fmt); + n = Bvprint(bp, fmt, arg); + va_end(arg); + return n; +} diff --git a/mk/libbio/bputc.c b/mk/libbio/bputc.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <bio.h> + +int +Bputc(Biobuf *bp, int c) +{ + int i; + + for(;;) { + i = bp->ocount; + if(i) { + bp->ebuf[i++] = c; + bp->ocount = i; + return 0; + } + if(Bflush(bp) == Beof) + break; + } + return Beof; +} diff --git a/mk/libbio/bputrune.c b/mk/libbio/bputrune.c @@ -0,0 +1,23 @@ +#include "lib9.h" +#include <bio.h> +#include <utf.h> + +int +Bputrune(Biobuf *bp, long c) +{ + Rune rune; + char str[UTFmax]; + int n; + + rune = c; + if(rune < Runeself) { + Bputc(bp, rune); + return 1; + } + n = runetochar(str, &rune); + if(n == 0) + return Bbad; + if(Bwrite(bp, str, n) != n) + return Beof; + return n; +} diff --git a/mk/libbio/brdline.c b/mk/libbio/brdline.c @@ -0,0 +1,94 @@ +#include "lib9.h" +#include <bio.h> + +void* +Brdline(Biobuf *bp, int delim) +{ + char *ip, *ep; + int i, j; + + i = -bp->icount; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->rdline = 0; + bp->gbuf = bp->ebuf; + return 0; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->rdline = j; + bp->icount += j; + return ip; + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0) { + /* + * end of file with no delim + */ + memmove(bp->ebuf-i, bp->bbuf, i); + bp->rdline = i; + bp->icount = -i; + bp->gbuf = bp->ebuf-i; + return 0; + } + bp->offset += j; + i += j; + ep = memchr(ip, delim, j); + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->rdline = j; + bp->icount = j - i; + return ip; + } + ip += j; + } + + /* + * full buffer without finding + */ + bp->rdline = bp->bsize; + bp->icount = -bp->bsize; + bp->gbuf = bp->bbuf; + return 0; +} + +int +Blinelen(Biobuf *bp) +{ + + return bp->rdline; +} diff --git a/mk/libbio/brdstr.c b/mk/libbio/brdstr.c @@ -0,0 +1,111 @@ +#include "lib9.h" +#include <bio.h> + +static char* +badd(char *p, int *np, char *data, int ndata, int delim, int nulldelim) +{ + int n; + + n = *np; + p = realloc(p, n+ndata+1); + if(p){ + memmove(p+n, data, ndata); + n += ndata; + if(n>0 && nulldelim && p[n-1]==delim) + p[--n] = '\0'; + else + p[n] = '\0'; + *np = n; + } + return p; +} + +char* +Brdstr(Biobuf *bp, int delim, int nulldelim) +{ + char *ip, *ep, *p; + int i, j; + + i = -bp->icount; + bp->rdline = 0; + if(i == 0) { + /* + * eof or other error + */ + if(bp->state != Bractive) { + if(bp->state == Bracteof) + bp->state = Bractive; + bp->gbuf = bp->ebuf; + return nil; + } + } + + /* + * first try in remainder of buffer (gbuf doesn't change) + */ + ip = (char*)bp->ebuf - i; + ep = memchr(ip, delim, i); + if(ep) { + j = (ep - ip) + 1; + bp->icount += j; + return badd(nil, &bp->rdline, ip, j, delim, nulldelim); + } + + /* + * copy data to beginning of buffer + */ + if(i < bp->bsize) + memmove(bp->bbuf, ip, i); + bp->gbuf = bp->bbuf; + + /* + * append to buffer looking for the delim + */ + p = nil; + for(;;){ + ip = (char*)bp->bbuf + i; + while(i < bp->bsize) { + j = read(bp->fid, ip, bp->bsize-i); + if(j <= 0 && i == 0) + return p; + if(j <= 0 && i > 0){ + /* + * end of file but no delim. pretend we got a delim + * by making the delim \0 and smashing it with nulldelim. + */ + j = 1; + ep = ip; + delim = '\0'; + nulldelim = 1; + *ep = delim; /* there will be room for this */ + }else{ + bp->offset += j; + ep = memchr(ip, delim, j); + } + i += j; + if(ep) { + /* + * found in new piece + * copy back up and reset everything + */ + ip = (char*)bp->ebuf - i; + if(i < bp->bsize){ + memmove(ip, bp->bbuf, i); + bp->gbuf = (unsigned char*)ip; + } + j = (ep - (char*)bp->bbuf) + 1; + bp->icount = j - i; + return badd(p, &bp->rdline, ip, j, delim, nulldelim); + } + ip += j; + } + + /* + * full buffer without finding; add to user string and continue + */ + p = badd(p, &bp->rdline, (char*)bp->bbuf, bp->bsize, 0, 0); + i = 0; + bp->icount = 0; + bp->gbuf = bp->ebuf; + } +} diff --git a/mk/libbio/bread.c b/mk/libbio/bread.c @@ -0,0 +1,45 @@ +#include "lib9.h" +#include <bio.h> + +long +Bread(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, ic; + + p = ap; + c = count; + ic = bp->icount; + + while(c > 0) { + n = -ic; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bractive) + break; + i = read(bp->fid, bp->bbuf, bp->bsize); + if(i <= 0) { + bp->state = Bracteof; + if(i < 0) + bp->state = Binactive; + break; + } + bp->gbuf = bp->bbuf; + bp->offset += i; + if(i < bp->bsize) { + memmove(bp->ebuf-i, bp->bbuf, i); + bp->gbuf = bp->ebuf-i; + } + ic = -i; + continue; + } + memmove(p, bp->ebuf+ic, n); + c -= n; + ic += n; + p += n; + } + bp->icount = ic; + return count-c; +} diff --git a/mk/libbio/bseek.c b/mk/libbio/bseek.c @@ -0,0 +1,60 @@ +#include "lib9.h" +#include <bio.h> + +long long +Bseek(Biobuf *bp, long long offset, int base) +{ + vlong n, d; + int bufsz; + + switch(bp->state) { + default: + fprint(2, "Bseek: unknown state %d\n", bp->state); + return Beof; + + case Bracteof: + bp->state = Bractive; + bp->icount = 0; + bp->gbuf = bp->ebuf; + + case Bractive: + n = offset; + if(base == 1) { + n += Boffset(bp); + base = 0; + } + + /* + * try to seek within buffer + */ + if(base == 0) { + d = n - Boffset(bp); + bufsz = bp->ebuf - bp->gbuf; + if(-bufsz <= d && d <= bufsz){ + bp->icount += d; + if(d >= 0) { + if(bp->icount <= 0) + return n; + } else { + if(bp->ebuf - bp->gbuf >= -bp->icount) + return n; + } + } + } + + /* + * reset the buffer + */ + n = lseek(bp->fid, n, base); + bp->icount = 0; + bp->gbuf = bp->ebuf; + break; + + case Bwactive: + Bflush(bp); + n = seek(bp->fid, offset, base); + break; + } + bp->offset = n; + return n; +} diff --git a/mk/libbio/bvprint.c b/mk/libbio/bvprint.c @@ -0,0 +1,38 @@ +#include "lib9.h" +#include <bio.h> + +static int +fmtBflush(Fmt *f) +{ + Biobuf *bp; + + bp = f->farg; + bp->ocount = (char*)f->to - (char*)f->stop; + if(Bflush(bp) < 0) + return 0; + f->stop = bp->ebuf; + f->to = (char*)f->stop + bp->ocount; + f->start = f->to; + return 1; +} + +int +Bvprint(Biobuf *bp, char *fmt, va_list arg) +{ + int n; + Fmt f; + + f.runes = 0; + f.stop = bp->ebuf; + f.start = (char*)f.stop + bp->ocount; + f.to = f.start; + f.flush = fmtBflush; + f.farg = bp; + f.nfmt = 0; + fmtlocaleinit(&f, nil, nil, nil); + n = fmtvprint(&f, fmt, arg); + bp->ocount = (char*)f.to - (char*)f.stop; + if(n == 0) + n = f.nfmt; + return n; +} diff --git a/mk/libbio/bwrite.c b/mk/libbio/bwrite.c @@ -0,0 +1,38 @@ +#include "lib9.h" +#include <bio.h> + +long +Bwrite(Biobuf *bp, void *ap, long count) +{ + long c; + unsigned char *p; + int i, n, oc; + + p = ap; + c = count; + oc = bp->ocount; + + while(c > 0) { + n = -oc; + if(n > c) + n = c; + if(n == 0) { + if(bp->state != Bwactive) + return Beof; + i = write(bp->fid, bp->bbuf, bp->bsize); + if(i != bp->bsize) { + bp->state = Binactive; + return Beof; + } + bp->offset += i; + oc = -bp->bsize; + continue; + } + memmove(bp->ebuf+oc, p, n); + oc += n; + c -= n; + p += n; + } + bp->ocount = oc; + return count-c; +} diff --git a/mk/libbio/depsinc.mk b/mk/libbio/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libbio_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libbio_DEPDIR -lbio diff --git a/mk/libbio/lib9.h b/mk/libbio/lib9.h @@ -0,0 +1,26 @@ +#define _FILE_OFFSET_BITS 64 +#define _LARGEFILE64_SOURCE + +#include <utf.h> +#include <fmt.h> + +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY + +#define OCEXEC 0 +#define ORCLOSE 0 +#define OTRUNC 0 + +#define nil ((void*)0) + +typedef long long vlong; +typedef unsigned long long uvlong; + +#define seek(fd, offset, whence) lseek(fd, offset, whence) +#define create(name, mode, perm) creat(name, perm) + diff --git a/mk/libbio/mkfile b/mk/libbio/mkfile @@ -0,0 +1,23 @@ +LIB = libbio.a +LOBJ = bbuffered.o \ + bfildes.o \ + bflush.o \ + bgetc.o \ + bgetd.o \ + bgetrune.o \ + binit.o \ + boffset.o \ + bprint.o \ + bvprint.o \ + bputc.o \ + bputrune.o \ + brdline.o \ + brdstr.o \ + bread.o \ + bseek.o bwrite.o + +LOCAL_CFLAGS = -I"$PREFIX"/include -I. +DEPS = libutf libfmt + +<$mkbuild/mk.default + diff --git a/mk/libfmt/NOTICE b/mk/libfmt/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libfmt/README b/mk/libfmt/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libfmt/charstod.c b/mk/libfmt/charstod.c @@ -0,0 +1,73 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Reads a floating-point number by interpreting successive characters + * returned by (*f)(vp). The last call it makes to f terminates the + * scan, so is not a character in the number. It may therefore be + * necessary to back up the input stream up one byte after calling charstod. + */ + +double +fmtcharstod(int(*f)(void*), void *vp) +{ + double num, dem; + int neg, eneg, dig, exp, c; + + num = 0; + neg = 0; + dig = 0; + exp = 0; + eneg = 0; + + c = (*f)(vp); + while(c == ' ' || c == '\t') + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-') + neg = 1; + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + c = (*f)(vp); + } + if(c == '.') + c = (*f)(vp); + while(c >= '0' && c <= '9'){ + num = num*10 + c-'0'; + dig++; + c = (*f)(vp); + } + if(c == 'e' || c == 'E'){ + c = (*f)(vp); + if(c == '-' || c == '+'){ + if(c == '-'){ + dig = -dig; + eneg = 1; + } + c = (*f)(vp); + } + while(c >= '0' && c <= '9'){ + exp = exp*10 + c-'0'; + c = (*f)(vp); + } + } + exp -= dig; + if(exp < 0){ + exp = -exp; + eneg = !eneg; + } + dem = __fmtpow10(exp); + if(eneg) + num /= dem; + else + num *= dem; + if(neg) + return -num; + return num; +} diff --git a/mk/libfmt/depsinc.mk b/mk/libfmt/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libfmt_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libfmt_DEPDIR -lfmt diff --git a/mk/libfmt/dofmt.c b/mk/libfmt/dofmt.c @@ -0,0 +1,617 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ +int +dofmt(Fmt *f, char *fmt) +{ + Rune rune, *rt, *rs; + int r; + char *t, *s; + int n, nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself) + fmt++; + else{ + fmt += chartorune(&rune, fmt); + r = rune; + } + FMTRCHAR(f, rt, rs, r); + } + fmt++; + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + while((r = *(uchar*)fmt) && r != '%'){ + if(r < Runeself){ + FMTCHAR(f, t, s, r); + fmt++; + }else{ + n = chartorune(&rune, fmt); + if(t + n > s){ + t = (char*)__fmtflush(f, t, n); + if(t != nil) + s = (char*)f->stop; + else + return -1; + } + while(n--) + *t++ = *fmt++; + } + } + fmt++; + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = (char*)__fmtdispatch(f, fmt, 0); + if(fmt == nil) + return -1; + } +} + +void * +__fmtflush(Fmt *f, void *t, int len) +{ + if(f->runes) + f->nfmt += (Rune*)t - (Rune*)f->to; + else + f->nfmt += (char*)t - (char *)f->to; + f->to = t; + if(f->flush == 0 || (*f->flush)(f) == 0 || (char*)f->to + len > (char*)f->stop){ + f->stop = f->to; + return nil; + } + return f->to; +} + +/* + * put a formatted block of memory sz bytes long of n runes into the output buffer, + * left/right justified in a field of at least f->width characters (if FmtWidth is set) + */ +int +__fmtpad(Fmt *f, int n) +{ + char *t, *s; + int i; + + t = (char*)f->to; + s = (char*)f->stop; + for(i = 0; i < n; i++) + FMTCHAR(f, t, s, ' '); + f->nfmt += t - (char *)f->to; + f->to = t; + return 0; +} + +int +__rfmtpad(Fmt *f, int n) +{ + Rune *t, *s; + int i; + + t = (Rune*)f->to; + s = (Rune*)f->stop; + for(i = 0; i < n; i++) + FMTRCHAR(f, t, s, ' '); + f->nfmt += t - (Rune *)f->to; + f->to = t; + return 0; +} + +int +__fmtcpy(Fmt *f, const void *vm, int n, int sz) +{ + Rune *rt, *rs, r; + char *t, *s, *m, *me; + ulong fl; + int nc, w; + + m = (char*)vm; + me = m + sz; + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(nc = n; nc > 0; nc--){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +int +__fmtrcpy(Fmt *f, const void *vm, int n) +{ + Rune r, *m, *me, *rt, *rs; + char *t, *s; + ulong fl; + int w; + + m = (Rune*)vm; + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if((fl & FmtPrec) && n > f->prec) + n = f->prec; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - n) < 0) + return -1; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + for(me = m + n; m < me; m++) + FMTRCHAR(f, rt, rs, *m); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - n) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - n) < 0) + return -1; + t = (char*)f->to; + s = (char*)f->stop; + for(me = m + n; m < me; m++){ + r = *m; + FMTRUNE(f, t, s, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - n) < 0) + return -1; + } + return 0; +} + +/* fmt out one character */ +int +__charfmt(Fmt *f) +{ + char x[1]; + + x[0] = va_arg(f->args, int); + f->prec = 1; + return __fmtcpy(f, (const char*)x, 1, 1); +} + +/* fmt out one rune */ +int +__runefmt(Fmt *f) +{ + Rune x[1]; + + x[0] = va_arg(f->args, int); + return __fmtrcpy(f, (const void*)x, 1); +} + +/* public helper routine: fmt out a null terminated string already in hand */ +int +fmtstrcpy(Fmt *f, char *s) +{ + int i, j; + + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ +#ifdef PLAN9PORT + Rune r; + i = 0; + for(j=0; j<f->prec && s[i]; j++) + i += chartorune(&r, s+i); +#else + /* ANSI requires precision in bytes, not Runes */ + for(i=0; i<f->prec; i++) + if(s[i] == 0) + break; + j = utfnlen(s, i); /* won't print partial at end */ +#endif + return __fmtcpy(f, s, j, i); + } + return __fmtcpy(f, s, utflen(s), strlen(s)); +} + +/* fmt out a null terminated utf string */ +int +__strfmt(Fmt *f) +{ + char *s; + + s = va_arg(f->args, char *); + return fmtstrcpy(f, s); +} + +/* public helper routine: fmt out a null terminated rune string already in hand */ +int +fmtrunestrcpy(Fmt *f, Rune *s) +{ + Rune *e; + int n, p; + + if(!s) + return __fmtcpy(f, "<nil>", 5, 5); + /* if precision is specified, make sure we don't wander off the end */ + if(f->flags & FmtPrec){ + p = f->prec; + for(n = 0; n < p; n++) + if(s[n] == 0) + break; + }else{ + for(e = s; *e; e++) + ; + n = e - s; + } + return __fmtrcpy(f, s, n); +} + +/* fmt out a null terminated rune string */ +int +__runesfmt(Fmt *f) +{ + Rune *s; + + s = va_arg(f->args, Rune *); + return fmtrunestrcpy(f, s); +} + +/* fmt a % */ +int +__percentfmt(Fmt *f) +{ + Rune x[1]; + + x[0] = f->r; + f->prec = 1; + return __fmtrcpy(f, (const void*)x, 1); +} + +/* fmt an integer */ +int +__ifmt(Fmt *f) +{ + char buf[140], *p, *conv; + /* 140: for 64 bits of binary + 3-byte sep every 4 digits */ + uvlong vu; + ulong u; + int neg, base, i, n, fl, w, isv; + int ndig, len, excess, bytelen; + char *grouping; + char *thousands; + + neg = 0; + fl = f->flags; + isv = 0; + vu = 0; + u = 0; +#ifndef PLAN9PORT + /* + * Unsigned verbs for ANSI C + */ + switch(f->r){ + case 'o': + case 'p': + case 'u': + case 'x': + case 'X': + fl |= FmtUnsigned; + fl &= ~(FmtSign|FmtSpace); + break; + } +#endif + if(f->r == 'p'){ + u = (ulong)va_arg(f->args, void*); + f->r = 'x'; + fl |= FmtUnsigned; + }else if(fl & FmtVLong){ + isv = 1; + if(fl & FmtUnsigned) + vu = va_arg(f->args, uvlong); + else + vu = va_arg(f->args, vlong); + }else if(fl & FmtLong){ + if(fl & FmtUnsigned) + u = va_arg(f->args, ulong); + else + u = va_arg(f->args, long); + }else if(fl & FmtByte){ + if(fl & FmtUnsigned) + u = (uchar)va_arg(f->args, int); + else + u = (char)va_arg(f->args, int); + }else if(fl & FmtShort){ + if(fl & FmtUnsigned) + u = (ushort)va_arg(f->args, int); + else + u = (short)va_arg(f->args, int); + }else{ + if(fl & FmtUnsigned) + u = va_arg(f->args, uint); + else + u = va_arg(f->args, int); + } + conv = "0123456789abcdef"; + grouping = "\4"; /* for hex, octal etc. (undefined by spec but nice) */ + thousands = f->thousands; + switch(f->r){ + case 'd': + case 'i': + case 'u': + base = 10; + grouping = f->grouping; + break; + case 'X': + conv = "0123456789ABCDEF"; + /* fall through */ + case 'x': + base = 16; + thousands = ":"; + break; + case 'b': + base = 2; + thousands = ":"; + break; + case 'o': + base = 8; + break; + default: + return -1; + } + if(!(fl & FmtUnsigned)){ + if(isv && (vlong)vu < 0){ + vu = -(vlong)vu; + neg = 1; + }else if(!isv && (long)u < 0){ + u = -(long)u; + neg = 1; + } + } + p = buf + sizeof buf - 1; + n = 0; /* in runes */ + excess = 0; /* number of bytes > number runes */ + ndig = 0; + len = utflen(thousands); + bytelen = strlen(thousands); + if(isv){ + while(vu){ + i = vu % base; + vu /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = conv[i]; + n++; + } + }else{ + while(u){ + i = u % base; + u /= base; + if((fl & FmtComma) && n % 4 == 3){ + *p-- = ','; + n++; + } + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = conv[i]; + n++; + } + } + if(n == 0){ + /* + * "The result of converting a zero value with + * a precision of zero is no characters." - ANSI + * + * "For o conversion, # increases the precision, if and only if + * necessary, to force the first digit of the result to be a zero + * (if the value and precision are both 0, a single 0 is printed)." - ANSI + */ + if(!(fl & FmtPrec) || f->prec != 0 || (f->r == 'o' && (fl & FmtSharp))){ + *p-- = '0'; + n = 1; + if(fl & FmtApost) + __needsep(&ndig, &grouping); + } + + /* + * Zero values don't get 0x. + */ + if(f->r == 'x' || f->r == 'X') + fl &= ~FmtSharp; + } + for(w = f->prec; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = '0'; + } + if(neg || (fl & (FmtSign|FmtSpace))) + n++; + if(fl & FmtSharp){ + if(base == 16) + n += 2; + else if(base == 8){ + if(p[1] == '0') + fl &= ~FmtSharp; + else + n++; + } + } + if((fl & FmtZero) && !(fl & (FmtLeft|FmtPrec))){ + w = 0; + if(fl & FmtWidth) + w = f->width; + for(; n < w && p > buf+3; n++){ + if((fl & FmtApost) && __needsep(&ndig, &grouping)){ + n += len; + excess += bytelen - len; + p -= bytelen; + memmove(p+1, thousands, bytelen); + } + *p-- = '0'; + } + f->flags &= ~FmtWidth; + } + if(fl & FmtSharp){ + if(base == 16) + *p-- = f->r; + if(base == 16 || base == 8) + *p-- = '0'; + } + if(neg) + *p-- = '-'; + else if(fl & FmtSign) + *p-- = '+'; + else if(fl & FmtSpace) + *p-- = ' '; + f->flags &= ~FmtPrec; + return __fmtcpy(f, p + 1, n, n + excess); +} + +int +__countfmt(Fmt *f) +{ + void *p; + ulong fl; + + fl = f->flags; + p = va_arg(f->args, void*); + if(fl & FmtVLong){ + *(vlong*)p = f->nfmt; + }else if(fl & FmtLong){ + *(long*)p = f->nfmt; + }else if(fl & FmtByte){ + *(char*)p = f->nfmt; + }else if(fl & FmtShort){ + *(short*)p = f->nfmt; + }else{ + *(int*)p = f->nfmt; + } + return 0; +} + +int +__flagfmt(Fmt *f) +{ + switch(f->r){ + case ',': + f->flags |= FmtComma; + break; + case '-': + f->flags |= FmtLeft; + break; + case '+': + f->flags |= FmtSign; + break; + case '#': + f->flags |= FmtSharp; + break; + case '\'': + f->flags |= FmtApost; + break; + case ' ': + f->flags |= FmtSpace; + break; + case 'u': + f->flags |= FmtUnsigned; + break; + case 'h': + if(f->flags & FmtShort) + f->flags |= FmtByte; + f->flags |= FmtShort; + break; + case 'L': + f->flags |= FmtLDouble; + break; + case 'l': + if(f->flags & FmtLong) + f->flags |= FmtVLong; + f->flags |= FmtLong; + break; + } + return 1; +} + +/* default error format */ +int +__badfmt(Fmt *f) +{ + char x[2+UTFmax]; + int n; + + x[0] = '%'; + n = 1 + runetochar(x+1, &f->r); + x[n++] = '%'; + f->prec = n; + __fmtcpy(f, (const void*)x, n, n); + return 0; +} diff --git a/mk/libfmt/dorfmt.c b/mk/libfmt/dorfmt.c @@ -0,0 +1,50 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* format the output into f->to and return the number of characters fmted */ + +/* BUG: THIS FILE IS NOT UPDATED TO THE NEW SPEC */ +int +dorfmt(Fmt *f, const Rune *fmt) +{ + Rune *rt, *rs; + int r; + char *t, *s; + int nfmt; + + nfmt = f->nfmt; + for(;;){ + if(f->runes){ + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + while((r = *fmt++) && r != '%'){ + FMTRCHAR(f, rt, rs, r); + } + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(!r) + return f->nfmt - nfmt; + f->stop = rs; + }else{ + t = (char*)f->to; + s = (char*)f->stop; + while((r = *fmt++) && r != '%'){ + FMTRUNE(f, t, f->stop, r); + } + f->nfmt += t - (char *)f->to; + f->to = t; + if(!r) + return f->nfmt - nfmt; + f->stop = s; + } + + fmt = (Rune*)__fmtdispatch(f, (Rune*)fmt, 1); + if(fmt == nil) + return -1; + } + return 0; /* not reached */ +} diff --git a/mk/libfmt/errfmt.c b/mk/libfmt/errfmt.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <errno.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +__errfmt(Fmt *f) +{ + char *s; + + s = strerror(errno); + return fmtstrcpy(f, s); +} diff --git a/mk/libfmt/fltfmt.c b/mk/libfmt/fltfmt.c @@ -0,0 +1,668 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdio.h> +#include <math.h> +#include <float.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <stdarg.h> +#include <fmt.h> +#include <assert.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#include "nan.h" + +enum +{ + FDIGIT = 30, + FDEFLT = 6, + NSIGNIF = 17 +}; + +/* + * first few powers of 10, enough for about 1/2 of the + * total space for doubles. + */ +static double pows10[] = +{ + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22, 1e23, 1e24, 1e25, 1e26, 1e27, 1e28, 1e29, + 1e30, 1e31, 1e32, 1e33, 1e34, 1e35, 1e36, 1e37, 1e38, 1e39, + 1e40, 1e41, 1e42, 1e43, 1e44, 1e45, 1e46, 1e47, 1e48, 1e49, + 1e50, 1e51, 1e52, 1e53, 1e54, 1e55, 1e56, 1e57, 1e58, 1e59, + 1e60, 1e61, 1e62, 1e63, 1e64, 1e65, 1e66, 1e67, 1e68, 1e69, + 1e70, 1e71, 1e72, 1e73, 1e74, 1e75, 1e76, 1e77, 1e78, 1e79, + 1e80, 1e81, 1e82, 1e83, 1e84, 1e85, 1e86, 1e87, 1e88, 1e89, + 1e90, 1e91, 1e92, 1e93, 1e94, 1e95, 1e96, 1e97, 1e98, 1e99, + 1e100, 1e101, 1e102, 1e103, 1e104, 1e105, 1e106, 1e107, 1e108, 1e109, + 1e110, 1e111, 1e112, 1e113, 1e114, 1e115, 1e116, 1e117, 1e118, 1e119, + 1e120, 1e121, 1e122, 1e123, 1e124, 1e125, 1e126, 1e127, 1e128, 1e129, + 1e130, 1e131, 1e132, 1e133, 1e134, 1e135, 1e136, 1e137, 1e138, 1e139, + 1e140, 1e141, 1e142, 1e143, 1e144, 1e145, 1e146, 1e147, 1e148, 1e149, + 1e150, 1e151, 1e152, 1e153, 1e154, 1e155, 1e156, 1e157, 1e158, 1e159, +}; +#define npows10 ((int)(sizeof(pows10)/sizeof(pows10[0]))) +#define pow10(x) fmtpow10(x) + +static double +pow10(int n) +{ + double d; + int neg; + + neg = 0; + if(n < 0){ + neg = 1; + n = -n; + } + + if(n < npows10) + d = pows10[n]; + else{ + d = pows10[npows10-1]; + for(;;){ + n -= npows10 - 1; + if(n < npows10){ + d *= pows10[n]; + break; + } + d *= pows10[npows10 - 1]; + } + } + if(neg) + return 1./d; + return d; +} + +/* + * add 1 to the decimal integer string a of length n. + * if 99999 overflows into 10000, return 1 to tell caller + * to move the virtual decimal point. + */ +static int +xadd1(char *a, int n) +{ + char *b; + int c; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--) { + c = *b + 1; + if(c <= '9') { + *b = c; + return 0; + } + *b = '0'; + } + /* + * need to overflow adding digit. + * shift number down and insert 1 at beginning. + * decimal is known to be 0s or we wouldn't + * have gotten this far. (e.g., 99999+1 => 00000) + */ + a[0] = '1'; + return 1; +} + +/* + * subtract 1 from the decimal integer string a. + * if 10000 underflows into 09999, make it 99999 + * and return 1 to tell caller to move the virtual + * decimal point. this way, xsub1 is inverse of xadd1. + */ +static int +xsub1(char *a, int n) +{ + char *b; + int c; + + if(n < 0 || n > NSIGNIF) + return 0; + for(b = a+n-1; b >= a; b--) { + c = *b - 1; + if(c >= '0') { + if(c == '0' && b == a) { + /* + * just zeroed the top digit; shift everyone up. + * decimal is known to be 9s or we wouldn't + * have gotten this far. (e.g., 10000-1 => 09999) + */ + *b = '9'; + return 1; + } + *b = c; + return 0; + } + *b = '9'; + } + /* + * can't get here. the number a is always normalized + * so that it has a nonzero first digit. + */ + abort(); +} + +/* + * format exponent like sprintf(p, "e%+02d", e) + */ +static void +xfmtexp(char *p, int e, int ucase) +{ + char se[9]; + int i; + + *p++ = ucase ? 'E' : 'e'; + if(e < 0) { + *p++ = '-'; + e = -e; + } else + *p++ = '+'; + i = 0; + while(e) { + se[i++] = e % 10 + '0'; + e /= 10; + } + while(i < 2) + se[i++] = '0'; + while(i > 0) + *p++ = se[--i]; + *p++ = '\0'; +} + +/* + * compute decimal integer m, exp such that: + * f = m*10^exp + * m is as short as possible with losing exactness + * assumes special cases (NaN, +Inf, -Inf) have been handled. + */ +static void +xdtoa(double f, char *s, int *exp, int *neg, int *ns) +{ + int c, d, e2, e, ee, i, ndigit, oerrno; + char tmp[NSIGNIF+10]; + double g; + + oerrno = errno; /* in case strtod smashes errno */ + + /* + * make f non-negative. + */ + *neg = 0; + if(f < 0) { + f = -f; + *neg = 1; + } + + /* + * must handle zero specially. + */ + if(f == 0){ + *exp = 0; + s[0] = '0'; + s[1] = '\0'; + *ns = 1; + return; + } + + /* + * find g,e such that f = g*10^e. + * guess 10-exponent using 2-exponent, then fine tune. + */ + frexp(f, &e2); + e = (int)(e2 * .301029995664); + g = f * pow10(-e); + while(g < 1) { + e--; + g = f * pow10(-e); + } + while(g >= 10) { + e++; + g = f * pow10(-e); + } + + /* + * convert NSIGNIF digits as a first approximation. + */ + for(i=0; i<NSIGNIF; i++) { + d = (int)g; + s[i] = d+'0'; + g = (g-d) * 10; + } + s[i] = 0; + + /* + * adjust e because s is 314159... not 3.14159... + */ + e -= NSIGNIF-1; + xfmtexp(s+NSIGNIF, e, 0); + + /* + * adjust conversion until strtod(s) == f exactly. + */ + for(i=0; i<10; i++) { + g = fmtstrtod(s, nil); + if(f > g) { + if(xadd1(s, NSIGNIF)) { + /* gained a digit */ + e--; + xfmtexp(s+NSIGNIF, e, 0); + } + continue; + } + if(f < g) { + if(xsub1(s, NSIGNIF)) { + /* lost a digit */ + e++; + xfmtexp(s+NSIGNIF, e, 0); + } + continue; + } + break; + } + + /* + * play with the decimal to try to simplify. + */ + + /* + * bump last few digits up to 9 if we can + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '9') { + s[i] = '9'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; + break; + } + } + } + + /* + * add 1 in hopes of turning 9s to 0s + */ + if(s[NSIGNIF-1] == '9') { + strcpy(tmp, s); + ee = e; + if(xadd1(tmp, NSIGNIF)) { + ee--; + xfmtexp(tmp+NSIGNIF, ee, 0); + } + g = fmtstrtod(tmp, nil); + if(g == f) { + strcpy(s, tmp); + e = ee; + } + } + + /* + * bump last few digits down to 0 as we can. + */ + for(i=NSIGNIF-1; i>=NSIGNIF-3; i--) { + c = s[i]; + if(c != '0') { + s[i] = '0'; + g = fmtstrtod(s, nil); + if(g != f) { + s[i] = c; + break; + } + } + } + + /* + * remove trailing zeros. + */ + ndigit = NSIGNIF; + while(ndigit > 1 && s[ndigit-1] == '0'){ + e++; + --ndigit; + } + s[ndigit] = 0; + *exp = e; + *ns = ndigit; + errno = oerrno; +} + +#ifdef PLAN9PORT +static char *special[] = { "NaN", "NaN", "+Inf", "+Inf", "-Inf", "-Inf" }; +#else +static char *special[] = { "nan", "NAN", "inf", "INF", "-inf", "-INF" }; +#endif + +int +__efgfmt(Fmt *fmt) +{ + char buf[NSIGNIF+10], *dot, *digits, *p, *s, suf[10], *t; + double f; + int c, chr, dotwid, e, exp, fl, ndigits, neg, newndigits; + int pad, point, prec, realchr, sign, sufwid, ucase, wid, z1, z2; + Rune r, *rs, *rt; + + if(fmt->flags&FmtLong) + f = va_arg(fmt->args, long double); + else + f = va_arg(fmt->args, double); + + /* + * extract formatting flags + */ + fl = fmt->flags; + fmt->flags = 0; + prec = FDEFLT; + if(fl & FmtPrec) + prec = fmt->prec; + chr = fmt->r; + ucase = 0; + switch(chr) { + case 'A': + case 'E': + case 'F': + case 'G': + chr += 'a'-'A'; + ucase = 1; + break; + } + + /* + * pick off special numbers. + */ + if(__isNaN(f)) { + s = special[0+ucase]; + special: + fmt->flags = fl & (FmtWidth|FmtLeft); + return __fmtcpy(fmt, s, strlen(s), strlen(s)); + } + if(__isInf(f, 1)) { + s = special[2+ucase]; + goto special; + } + if(__isInf(f, -1)) { + s = special[4+ucase]; + goto special; + } + + /* + * get exact representation. + */ + digits = buf; + xdtoa(f, digits, &exp, &neg, &ndigits); + + /* + * get locale's decimal point. + */ + dot = fmt->decimal; + if(dot == nil) + dot = "."; + dotwid = utflen(dot); + + /* + * now the formatting fun begins. + * compute parameters for actual fmt: + * + * pad: number of spaces to insert before/after field. + * z1: number of zeros to insert before digits + * z2: number of zeros to insert after digits + * point: number of digits to print before decimal point + * ndigits: number of digits to use from digits[] + * suf: trailing suffix, like "e-5" + */ + realchr = chr; + switch(chr){ + case 'g': + /* + * convert to at most prec significant digits. (prec=0 means 1) + */ + if(prec == 0) + prec = 1; + if(ndigits > prec) { + if(digits[prec] >= '5' && xadd1(digits, prec)) + exp++; + exp += ndigits-prec; + ndigits = prec; + } + + /* + * extra rules for %g (implemented below): + * trailing zeros removed after decimal unless FmtSharp. + * decimal point only if digit follows. + */ + + /* fall through to %e */ + default: + case 'e': + /* + * one significant digit before decimal, no leading zeros. + */ + point = 1; + z1 = 0; + + /* + * decimal point is after ndigits digits right now. + * slide to be after first. + */ + e = exp + (ndigits-1); + + /* + * if this is %g, check exponent and convert prec + */ + if(realchr == 'g') { + if(-4 <= e && e < prec) + goto casef; + prec--; /* one digit before decimal; rest after */ + } + + /* + * compute trailing zero padding or truncate digits. + */ + if(1+prec >= ndigits) + z2 = 1+prec - ndigits; + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = 1+prec; + if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * had 999e4, now have 100e5 + */ + e++; + } + ndigits = newndigits; + z2 = 0; + } + xfmtexp(suf, e, ucase); + sufwid = strlen(suf); + break; + + casef: + case 'f': + /* + * determine where digits go with respect to decimal point + */ + if(ndigits+exp > 0) { + point = ndigits+exp; + z1 = 0; + } else { + point = 1; + z1 = 1 + -(ndigits+exp); + } + + /* + * %g specifies prec = number of significant digits + * convert to number of digits after decimal point + */ + if(realchr == 'g') + prec += z1 - point; + + /* + * compute trailing zero padding or truncate digits. + */ + if(point+prec >= z1+ndigits) + z2 = point+prec - (z1+ndigits); + else { + /* + * truncate digits + */ + assert(realchr != 'g'); + newndigits = point+prec - z1; + if(newndigits < 0) { + z1 += newndigits; + newndigits = 0; + } else if(newndigits == 0) { + /* perhaps round up */ + if(digits[0] >= '5'){ + digits[0] = '1'; + newndigits = 1; + goto newdigit; + } + } else if(digits[newndigits] >= '5' && xadd1(digits, newndigits)) { + /* + * digits was 999, is now 100; make it 1000 + */ + digits[newndigits++] = '0'; + newdigit: + /* + * account for new digit + */ + if(z1) /* 0.099 => 0.100 or 0.99 => 1.00*/ + z1--; + else /* 9.99 => 10.00 */ + point++; + } + z2 = 0; + ndigits = newndigits; + } + sufwid = 0; + break; + } + + /* + * if %g is given without FmtSharp, remove trailing zeros. + * must do after truncation, so that e.g. print %.3g 1.001 + * produces 1, not 1.00. sorry, but them's the rules. + */ + if(realchr == 'g' && !(fl & FmtSharp)) { + if(z1+ndigits+z2 >= point) { + if(z1+ndigits < point) + z2 = point - (z1+ndigits); + else{ + z2 = 0; + while(z1+ndigits > point && digits[ndigits-1] == '0') + ndigits--; + } + } + } + + /* + * compute width of all digits and decimal point and suffix if any + */ + wid = z1+ndigits+z2; + if(wid > point) + wid += dotwid; + else if(wid == point){ + if(fl & FmtSharp) + wid += dotwid; + else + point++; /* do not print any decimal point */ + } + wid += sufwid; + + /* + * determine sign + */ + sign = 0; + if(neg) + sign = '-'; + else if(fl & FmtSign) + sign = '+'; + else if(fl & FmtSpace) + sign = ' '; + if(sign) + wid++; + + /* + * compute padding + */ + pad = 0; + if((fl & FmtWidth) && fmt->width > wid) + pad = fmt->width - wid; + if(pad && !(fl & FmtLeft) && (fl & FmtZero)){ + z1 += pad; + point += pad; + pad = 0; + } + + /* + * format the actual field. too bad about doing this twice. + */ + if(fmt->runes){ + if(pad && !(fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + rt = (Rune*)fmt->to; + rs = (Rune*)fmt->stop; + if(sign) + FMTRCHAR(fmt, rt, rs, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTRCHAR(fmt, rt, rs, c); + if(--point == 0) { + for(p = dot; *p; ){ + p += chartorune(&r, p); + FMTRCHAR(fmt, rt, rs, r); + } + } + } + fmt->nfmt += rt - (Rune*)fmt->to; + fmt->to = rt; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __rfmtpad(fmt, pad) < 0) + return -1; + }else{ + if(pad && !(fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + t = (char*)fmt->to; + s = (char*)fmt->stop; + if(sign) + FMTCHAR(fmt, t, s, sign); + while(z1>0 || ndigits>0 || z2>0) { + if(z1 > 0){ + z1--; + c = '0'; + }else if(ndigits > 0){ + ndigits--; + c = *digits++; + }else{ + z2--; + c = '0'; + } + FMTCHAR(fmt, t, s, c); + if(--point == 0) + for(p=dot; *p; p++) + FMTCHAR(fmt, t, s, *p); + } + fmt->nfmt += t - (char*)fmt->to; + fmt->to = t; + if(sufwid && __fmtcpy(fmt, suf, sufwid, sufwid) < 0) + return -1; + if(pad && (fl & FmtLeft) && __fmtpad(fmt, pad) < 0) + return -1; + } + return 0; +} + diff --git a/mk/libfmt/fmt.c b/mk/libfmt/fmt.c @@ -0,0 +1,220 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +enum +{ + Maxfmt = 64 +}; + +typedef struct Convfmt Convfmt; +struct Convfmt +{ + int c; + volatile Fmts fmt; /* for spin lock in fmtfmt; avoids race due to write order */ +}; + +static struct +{ + /* lock by calling __fmtlock, __fmtunlock */ + int nfmt; + Convfmt fmt[Maxfmt]; +} fmtalloc; + +static Convfmt knownfmt[] = { + ' ', __flagfmt, + '#', __flagfmt, + '%', __percentfmt, + '\'', __flagfmt, + '+', __flagfmt, + ',', __flagfmt, + '-', __flagfmt, + 'C', __runefmt, /* Plan 9 addition */ + 'E', __efgfmt, +#ifndef PLAN9PORT + 'F', __efgfmt, /* ANSI only */ +#endif + 'G', __efgfmt, +#ifndef PLAN9PORT + 'L', __flagfmt, /* ANSI only */ +#endif + 'S', __runesfmt, /* Plan 9 addition */ + 'X', __ifmt, + 'b', __ifmt, /* Plan 9 addition */ + 'c', __charfmt, + 'd', __ifmt, + 'e', __efgfmt, + 'f', __efgfmt, + 'g', __efgfmt, + 'h', __flagfmt, +#ifndef PLAN9PORT + 'i', __ifmt, /* ANSI only */ +#endif + 'l', __flagfmt, + 'n', __countfmt, + 'o', __ifmt, + 'p', __ifmt, + 'r', __errfmt, + 's', __strfmt, +#ifdef PLAN9PORT + 'u', __flagfmt, +#else + 'u', __ifmt, +#endif + 'x', __ifmt, + 0, nil, +}; + + +int (*fmtdoquote)(int); + +/* + * __fmtlock() must be set + */ +static int +__fmtinstall(int c, Fmts f) +{ + Convfmt *p, *ep; + + if(c<=0 || c>=65536) + return -1; + if(!f) + f = __badfmt; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c) + break; + + if(p == &fmtalloc.fmt[Maxfmt]) + return -1; + + p->fmt = f; + if(p == ep){ /* installing a new format character */ + fmtalloc.nfmt++; + p->c = c; + } + + return 0; +} + +int +fmtinstall(int c, int (*f)(Fmt*)) +{ + int ret; + + __fmtlock(); + ret = __fmtinstall(c, f); + __fmtunlock(); + return ret; +} + +static Fmts +fmtfmt(int c) +{ + Convfmt *p, *ep; + + ep = &fmtalloc.fmt[fmtalloc.nfmt]; + for(p=fmtalloc.fmt; p<ep; p++) + if(p->c == c){ + while(p->fmt == nil) /* loop until value is updated */ + ; + return p->fmt; + } + + /* is this a predefined format char? */ + __fmtlock(); + for(p=knownfmt; p->c; p++) + if(p->c == c){ + __fmtinstall(p->c, p->fmt); + __fmtunlock(); + return p->fmt; + } + __fmtunlock(); + + return __badfmt; +} + +void* +__fmtdispatch(Fmt *f, void *fmt, int isrunes) +{ + Rune rune, r; + int i, n; + + f->flags = 0; + f->width = f->prec = 0; + + for(;;){ + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + fmt = (char*)fmt + chartorune(&rune, (char*)fmt); + r = rune; + } + f->r = r; + switch(r){ + case '\0': + return nil; + case '.': + f->flags |= FmtWidth|FmtPrec; + continue; + case '0': + if(!(f->flags & FmtWidth)){ + f->flags |= FmtZero; + continue; + } + /* fall through */ + case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + i = 0; + while(r >= '0' && r <= '9'){ + i = i * 10 + r - '0'; + if(isrunes){ + r = *(Rune*)fmt; + fmt = (Rune*)fmt + 1; + }else{ + r = *(char*)fmt; + fmt = (char*)fmt + 1; + } + } + if(isrunes) + fmt = (Rune*)fmt - 1; + else + fmt = (char*)fmt - 1; + numflag: + if(f->flags & FmtWidth){ + f->flags |= FmtPrec; + f->prec = i; + }else{ + f->flags |= FmtWidth; + f->width = i; + } + continue; + case '*': + i = va_arg(f->args, int); + if(i < 0){ + /* + * negative precision => + * ignore the precision. + */ + if(f->flags & FmtPrec){ + f->flags &= ~FmtPrec; + f->prec = 0; + continue; + } + i = -i; + f->flags |= FmtLeft; + } + goto numflag; + } + n = (*fmtfmt(r))(f); + if(n < 0) + return nil; + if(n == 0) + return fmt; + } +} diff --git a/mk/libfmt/fmt.h b/mk/libfmt/fmt.h @@ -0,0 +1,116 @@ +#ifndef _FMT_H_ +#define _FMT_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ + +#include <stdarg.h> +#include <utf.h> + +typedef struct Fmt Fmt; +struct Fmt{ + unsigned char runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt *); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + Rune r; /* % format Rune */ + int width; + int prec; + unsigned long flags; + char *decimal; /* decimal point; cannot be "" */ + + /* For %'d */ + char *thousands; /* separator for thousands */ + + /* + * Each char is an integer indicating #digits before next separator. Values: + * \xFF: no more grouping (or \x7F; defined to be CHAR_MAX in POSIX) + * \x00: repeat previous indefinitely + * \x**: count that many + */ + char *grouping; /* descriptor of separator placement */ +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtApost = FmtSign << 1, + FmtZero = FmtApost << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + FmtByte = FmtComma << 1, + FmtLDouble = FmtByte << 1, + + FmtFlag = FmtLDouble << 1 +}; + +extern int (*fmtdoquote)(int); + +/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/fmt/?*.c | grep -v static |grep -v __ */ +int dofmt(Fmt *f, char *fmt); +int dorfmt(Fmt *f, const Rune *fmt); +double fmtcharstod(int(*f)(void*), void *vp); +int fmtfdflush(Fmt *f); +int fmtfdinit(Fmt *f, int fd, char *buf, int size); +int fmtinstall(int c, int (*f)(Fmt*)); +int fmtnullinit(Fmt*); +void fmtlocaleinit(Fmt*, char*, char*, char*); +int fmtprint(Fmt *f, char *fmt, ...); +int fmtrune(Fmt *f, int r); +int fmtrunestrcpy(Fmt *f, Rune *s); +int fmtstrcpy(Fmt *f, char *s); +char* fmtstrflush(Fmt *f); +int fmtstrinit(Fmt *f); +double fmtstrtod(const char *as, char **aas); +int fmtvprint(Fmt *f, char *fmt, va_list args); +int fprint(int fd, char *fmt, ...); +int print(char *fmt, ...); +void quotefmtinstall(void); +int quoterunestrfmt(Fmt *f); +int quotestrfmt(Fmt *f); +Rune* runefmtstrflush(Fmt *f); +int runefmtstrinit(Fmt *f); +Rune* runeseprint(Rune *buf, Rune *e, char *fmt, ...); +Rune* runesmprint(char *fmt, ...); +int runesnprint(Rune *buf, int len, char *fmt, ...); +int runesprint(Rune *buf, char *fmt, ...); +Rune* runevseprint(Rune *buf, Rune *e, char *fmt, va_list args); +Rune* runevsmprint(char *fmt, va_list args); +int runevsnprint(Rune *buf, int len, char *fmt, va_list args); +char* seprint(char *buf, char *e, char *fmt, ...); +char* smprint(char *fmt, ...); +int snprint(char *buf, int len, char *fmt, ...); +int sprint(char *buf, char *fmt, ...); +int vfprint(int fd, char *fmt, va_list args); +char* vseprint(char *buf, char *e, char *fmt, va_list args); +char* vsmprint(char *fmt, va_list args); +int vsnprint(char *buf, int len, char *fmt, va_list args); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libfmt/fmtdef.h b/mk/libfmt/fmtdef.h @@ -0,0 +1,105 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ + +/* + * dofmt -- format to a buffer + * the number of characters formatted is returned, + * or -1 if there was an error. + * if the buffer is ever filled, flush is called. + * it should reset the buffer and return whether formatting should continue. + */ + +typedef int (*Fmts)(Fmt*); + +typedef struct Quoteinfo Quoteinfo; +struct Quoteinfo +{ + int quoted; /* if set, string must be quoted */ + int nrunesin; /* number of input runes that can be accepted */ + int nbytesin; /* number of input bytes that can be accepted */ + int nrunesout; /* number of runes that will be generated */ + int nbytesout; /* number of bytes that will be generated */ +}; + +/* Edit .+1,/^$/ |cfn |grep -v static | grep __ */ +double __Inf(int sign); +double __NaN(void); +int __badfmt(Fmt *f); +int __charfmt(Fmt *f); +int __countfmt(Fmt *f); +int __efgfmt(Fmt *fmt); +int __errfmt(Fmt *f); +int __flagfmt(Fmt *f); +int __fmtFdFlush(Fmt *f); +int __fmtcpy(Fmt *f, const void *vm, int n, int sz); +void* __fmtdispatch(Fmt *f, void *fmt, int isrunes); +void * __fmtflush(Fmt *f, void *t, int len); +void __fmtlock(void); +int __fmtpad(Fmt *f, int n); +double __fmtpow10(int n); +int __fmtrcpy(Fmt *f, const void *vm, int n); +void __fmtunlock(void); +int __ifmt(Fmt *f); +int __isInf(double d, int sign); +int __isNaN(double d); +int __needsep(int*, char**); +int __needsquotes(char *s, int *quotelenp); +int __percentfmt(Fmt *f); +void __quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout); +int __quotestrfmt(int runesin, Fmt *f); +int __rfmtpad(Fmt *f, int n); +int __runefmt(Fmt *f); +int __runeneedsquotes(Rune *r, int *quotelenp); +int __runesfmt(Fmt *f); +int __strfmt(Fmt *f); + +#define FMTCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (char*)s){\ + t = (char*)__fmtflush(f, t, 1);\ + if(t != nil)\ + s = (char*)f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRCHAR(f, t, s, c)\ + do{\ + if(t + 1 > (Rune*)s){\ + t = (Rune*)__fmtflush(f, t, sizeof(Rune));\ + if(t != nil)\ + s = (Rune*)f->stop;\ + else\ + return -1;\ + }\ + *t++ = c;\ + }while(0) + +#define FMTRUNE(f, t, s, r)\ + do{\ + Rune _rune;\ + int _runelen;\ + if(t + UTFmax > (char*)s && t + (_runelen = runelen(r)) > (char*)s){\ + t = (char*)__fmtflush(f, t, _runelen);\ + if(t != nil)\ + s = (char*)f->stop;\ + else\ + return -1;\ + }\ + if(r < Runeself)\ + *t++ = r;\ + else{\ + _rune = r;\ + t += runetochar(t, &_rune);\ + }\ + }while(0) + +#ifdef va_copy +# define VA_COPY(a,b) va_copy(a,b) +# define VA_END(a) va_end(a) +#else +# define VA_COPY(a,b) (a) = (b) +# define VA_END(a) +#endif + diff --git a/mk/libfmt/fmtfd.c b/mk/libfmt/fmtfd.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * public routine for final flush of a formatting buffer + * to a file descriptor; returns total char count. + */ +int +fmtfdflush(Fmt *f) +{ + if(__fmtFdFlush(f) <= 0) + return -1; + return f->nfmt; +} + +/* + * initialize an output buffer for buffered printing + */ +int +fmtfdinit(Fmt *f, int fd, char *buf, int size) +{ + f->runes = 0; + f->start = buf; + f->to = buf; + f->stop = buf + size; + f->flush = __fmtFdFlush; + f->farg = (void*)(uintptr_t)fd; + f->flags = 0; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} diff --git a/mk/libfmt/fmtfdflush.c b/mk/libfmt/fmtfdflush.c @@ -0,0 +1,22 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <unistd.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * generic routine for flushing a formatting buffer + * to a file descriptor + */ +int +__fmtFdFlush(Fmt *f) +{ + int n; + + n = (char*)f->to - (char*)f->start; + if(n && write((uintptr)f->farg, f->start, n) != n) + return 0; + f->to = f->start; + return 1; +} diff --git a/mk/libfmt/fmtinstall.3 b/mk/libfmt/fmtinstall.3 @@ -0,0 +1,379 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH FMTINSTALL 3 +.SH NAME +fmtinstall, dofmt, dorfmt, fmtprint, fmtvprint, fmtrune, fmtstrcpy, fmtrunestrcpy, fmtfdinit, fmtfdflush, fmtstrinit, fmtstrflush, runefmtstrinit, runefmtstrflush, errfmt \- support for user-defined print formats and output routines +.SH SYNOPSIS +.B #include <utf.h> +.br +.B #include <fmt.h> +.PP +.ft L +.nf +.ta \w' 'u +\w' 'u +\w' 'u +\w' 'u +\w' 'u +typedef struct Fmt Fmt; +struct Fmt{ + uchar runes; /* output buffer is runes or chars? */ + void *start; /* of buffer */ + void *to; /* current place in the buffer */ + void *stop; /* end of the buffer; overwritten if flush fails */ + int (*flush)(Fmt*); /* called when to == stop */ + void *farg; /* to make flush a closure */ + int nfmt; /* num chars formatted so far */ + va_list args; /* args passed to dofmt */ + int r; /* % format Rune */ + int width; + int prec; + ulong flags; +}; + +enum{ + FmtWidth = 1, + FmtLeft = FmtWidth << 1, + FmtPrec = FmtLeft << 1, + FmtSharp = FmtPrec << 1, + FmtSpace = FmtSharp << 1, + FmtSign = FmtSpace << 1, + FmtZero = FmtSign << 1, + FmtUnsigned = FmtZero << 1, + FmtShort = FmtUnsigned << 1, + FmtLong = FmtShort << 1, + FmtVLong = FmtLong << 1, + FmtComma = FmtVLong << 1, + + FmtFlag = FmtComma << 1 +}; +.fi +.PP +.B +.ta \w'\fLchar* 'u + +.PP +.B +int fmtfdinit(Fmt *f, int fd, char *buf, int nbuf); +.PP +.B +int fmtfdflush(Fmt *f); +.PP +.B +int fmtstrinit(Fmt *f); +.PP +.B +char* fmtstrflush(Fmt *f); +.PP +.B +int runefmtstrinit(Fmt *f); +.PP +.B +Rune* runefmtstrflush(Fmt *f); + +.PP +.B +int fmtinstall(int c, int (*fn)(Fmt*)); +.PP +.B +int dofmt(Fmt *f, char *fmt); +.PP +.B +int dorfmt(Fmt*, Rune *fmt); +.PP +.B +int fmtprint(Fmt *f, char *fmt, ...); +.PP +.B +int fmtvprint(Fmt *f, char *fmt, va_list v); +.PP +.B +int fmtrune(Fmt *f, int r); +.PP +.B +int fmtstrcpy(Fmt *f, char *s); +.PP +.B +int fmtrunestrcpy(Fmt *f, Rune *s); +.PP +.B +int errfmt(Fmt *f); +.SH DESCRIPTION +The interface described here allows the construction of custom +.IR print (3) +verbs and output routines. +In essence, they provide access to the workings of the formatted print code. +.PP +The +.IR print (3) +suite maintains its state with a data structure called +.BR Fmt . +A typical call to +.IR print (3) +or its relatives initializes a +.B Fmt +structure, passes it to subsidiary routines to process the output, +and finishes by emitting any saved state recorded in the +.BR Fmt . +The details of the +.B Fmt +are unimportant to outside users, except insofar as the general +design influences the interface. +The +.B Fmt +records whether the output is in runes or bytes, +the verb being processed, its precision and width, +and buffering parameters. +Most important, it also records a +.I flush +routine that the library will call if a buffer overflows. +When printing to a file descriptor, the flush routine will +emit saved characters and reset the buffer; when printing +to an allocated string, it will resize the string to receive more output. +The flush routine is nil when printing to fixed-size buffers. +User code need never provide a flush routine; this is done internally +by the library. +.SS Custom output routines +To write a custom output routine, such as an error handler that +formats and prints custom error messages, the output sequence can be run +from outside the library using the routines described here. +There are two main cases: output to an open file descriptor +and output to a string. +.PP +To write to a file descriptor, call +.I fmtfdinit +to initialize the local +.B Fmt +structure +.IR f , +giving the file descriptor +.IR fd , +the buffer +.IR buf , +and its size +.IR nbuf . +Then call +.IR fmtprint +or +.IR fmtvprint +to generate the output. +These behave like +.B fprint +(see +.IR print (3)) +or +.B vfprint +except that the characters are buffered until +.I fmtfdflush +is called and the return value is either 0 or \-1. +A typical example of this sequence appears in the Examples section. +.PP +The same basic sequence applies when outputting to an allocated string: +call +.I fmtstrinit +to initialize the +.BR Fmt , +then call +.I fmtprint +and +.I fmtvprint +to generate the output. +Finally, +.I fmtstrflush +will return the allocated string, which should be freed after use. +To output to a rune string, use +.I runefmtstrinit +and +.IR runefmtstrflush . +Regardless of the output style or type, +.I fmtprint +or +.I fmtvprint +generates the characters. +.SS Custom format verbs +.I Fmtinstall +is used to install custom verbs and flags labeled by character +.IR c , +which may be any non-zero Unicode character. +.I Fn +should be declared as +.IP +.EX +int fn(Fmt*) +.EE +.PP +.IB Fp ->r +is the flag or verb character to cause +.I fn +to be called. +In +.IR fn , +.IB fp ->width , +.IB fp ->prec +are the width and precision, and +.IB fp ->flags +the decoded flags for the verb (see +.IR print (3) +for a description of these items). +The standard flag values are: +.B FmtSign +.RB ( + ), +.B FmtLeft +.RB ( - ), +.B FmtSpace +.RB ( '\ ' ), +.B FmtSharp +.RB ( # ), +.B FmtComma +.RB ( , ), +.B FmtLong +.RB ( l ), +.B FmtShort +.RB ( h ), +.B FmtUnsigned +.RB ( u ), +and +.B FmtVLong +.RB ( ll ). +The flag bits +.B FmtWidth +and +.B FmtPrec +identify whether a width and precision were specified. +.PP +.I Fn +is passed a pointer to the +.B Fmt +structure recording the state of the output. +If +.IB fp ->r +is a verb (rather than a flag), +.I fn +should use +.B Fmt->args +to fetch its argument from the list, +then format it, and return zero. +If +.IB fp ->r +is a flag, +.I fn +should return one. +All interpretation of +.IB fp ->width\f1, +.IB fp ->prec\f1, +and +.IB fp-> flags +is left up to the conversion routine. +.I Fmtinstall +returns 0 if the installation succeeds, \-1 if it fails. +.PP +.IR Fmtprint +and +.IR fmtvprint +may be called to +help prepare output in custom conversion routines. +However, these functions clear the width, precision, and flags. +Both functions return 0 for success and \-1 for failure. +.PP +The functions +.I dofmt +and +.I dorfmt +are the underlying formatters; they +use the existing contents of +.B Fmt +and should be called only by sophisticated conversion routines. +These routines return the number of characters (bytes of UTF or runes) +produced. +.PP +Some internal functions may be useful to format primitive types. +They honor the width, precision and flags as described in +.IR print (3). +.I Fmtrune +formats a single character +.BR r . +.I Fmtstrcpy +formats a string +.BR s ; +.I fmtrunestrcpy +formats a rune string +.BR s . +.I Errfmt +formats the system error string. +All these routines return zero for successful execution. +Conversion routines that call these functions will work properly +regardless of whether the output is bytes or runes. +.\" .PP +.\" .IR 2c (1) +.\" describes the C directive +.\" .B #pragma +.\" .B varargck +.\" that can be used to provide type-checking for custom print verbs and output routines. +.SH EXAMPLES +This function prints an error message with a variable +number of arguments and then quits. +Compared to the corresponding example in +.IR print (3), +this version uses a smaller buffer, will never truncate +the output message, but might generate multiple +.B write +system calls to produce its output. +.IP +.EX +.ta 6n +6n +6n +6n +6n +6n +6n +6n +6n +#pragma varargck argpos error 1 + +void fatal(char *fmt, ...) +{ + Fmt f; + char buf[64]; + va_list arg; + + fmtfdinit(&f, 1, buf, sizeof buf); + fmtprint(&f, "fatal: "); + va_start(arg, fmt); + fmtvprint(&f, fmt, arg); + va_end(arg); + fmtprint(&f, "\en"); + fmtfdflush(&f); + exits("fatal error"); +} +.EE +.PP +This example adds a verb to print complex numbers. +.IP +.EX +typedef +struct { + double r, i; +} Complex; + +#pragma varargck type "X" Complex + +int +Xfmt(Fmt *f) +{ + Complex c; + + c = va_arg(f->args, Complex); + return fmtprint(f, "(%g,%g)", c.r, c.i); +} + +main(...) +{ + Complex x = (Complex){ 1.5, -2.3 }; + + fmtinstall('X', Xfmt); + print("x = %X\en", x); +} +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR print (3), +.IR utf (7) +.SH DIAGNOSTICS +These routines return negative numbers or nil for errors and set +.IR errstr . diff --git a/mk/libfmt/fmtlocale.c b/mk/libfmt/fmtlocale.c @@ -0,0 +1,55 @@ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Fill in the internationalization stuff in the State structure. + * For nil arguments, provide the sensible defaults: + * decimal is a period + * thousands separator is a comma + * thousands are marked every three digits + */ +void +fmtlocaleinit(Fmt *f, char *decimal, char *thousands, char *grouping) +{ + if(decimal == nil || decimal[0] == '\0') + decimal = "."; + if(thousands == nil) + thousands = ","; + if(grouping == nil) + grouping = "\3"; + f->decimal = decimal; + f->thousands = thousands; + f->grouping = grouping; +} + +/* + * We are about to emit a digit in e.g. %'d. If that digit would + * overflow a thousands (e.g.) grouping, tell the caller to emit + * the thousands separator. Always advance the digit counter + * and pointer into the grouping descriptor. + */ +int +__needsep(int *ndig, char **grouping) +{ + int group; + + (*ndig)++; + group = *(unsigned char*)*grouping; + /* CHAR_MAX means no further grouping. \0 means we got the empty string */ + if(group == 0xFF || group == 0x7f || group == 0x00) + return 0; + if(*ndig > group){ + /* if we're at end of string, continue with this grouping; else advance */ + if((*grouping)[1] != '\0') + (*grouping)++; + *ndig = 1; + return 1; + } + return 0; +} + diff --git a/mk/libfmt/fmtlock.c b/mk/libfmt/fmtlock.c @@ -0,0 +1,15 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +void +__fmtlock(void) +{ +} + +void +__fmtunlock(void) +{ +} diff --git a/mk/libfmt/fmtnull.c b/mk/libfmt/fmtnull.c @@ -0,0 +1,33 @@ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * Absorb output without using resources. + */ +static Rune nullbuf[32]; + +static int +__fmtnullflush(Fmt *f) +{ + f->to = nullbuf; + f->nfmt = 0; + return 0; +} + +int +fmtnullinit(Fmt *f) +{ + memset(f, 0, sizeof *f); + f->runes = 1; + f->start = nullbuf; + f->to = nullbuf; + f->stop = nullbuf+nelem(nullbuf); + f->flush = __fmtnullflush; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + diff --git a/mk/libfmt/fmtprint.c b/mk/libfmt/fmtprint.c @@ -0,0 +1,36 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * format a string into the output buffer + * designed for formats which themselves call fmt, + * but ignore any width flags + */ +int +fmtprint(Fmt *f, char *fmt, ...) +{ + va_list va; + int n; + + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(va, f->args); + VA_END(f->args); + va_start(f->args, fmt); + n = dofmt(f, fmt); + va_end(f->args); + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(f->args,va); + VA_END(va); + if(n >= 0) + return 0; + return n; +} + diff --git a/mk/libfmt/fmtquote.c b/mk/libfmt/fmtquote.c @@ -0,0 +1,259 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * How many bytes of output UTF will be produced by quoting (if necessary) this string? + * How many runes? How much of the input will be consumed? + * The parameter q is filled in by __quotesetup. + * The string may be UTF or Runes (s or r). + * Return count does not include NUL. + * Terminate the scan at the first of: + * NUL in input + * count exceeded in input + * count exceeded on output + * *ninp is set to number of input bytes accepted. + * nin may be <0 initially, to avoid checking input by count. + */ +void +__quotesetup(char *s, Rune *r, int nin, int nout, Quoteinfo *q, int sharp, int runesout) +{ + int w; + Rune c; + + q->quoted = 0; + q->nbytesout = 0; + q->nrunesout = 0; + q->nbytesin = 0; + q->nrunesin = 0; + if(sharp || nin==0 || (s && *s=='\0') || (r && *r=='\0')){ + if(nout < 2) + return; + q->quoted = 1; + q->nbytesout = 2; + q->nrunesout = 2; + } + for(; nin!=0; nin--){ + if(s) + w = chartorune(&c, s); + else{ + c = *r; + w = runelen(c); + } + + if(c == '\0') + break; + if(runesout){ + if(q->nrunesout+1 > nout) + break; + }else{ + if(q->nbytesout+w > nout) + break; + } + + if((c <= L' ') || (c == L'\'') || (fmtdoquote!=nil && fmtdoquote(c))){ + if(!q->quoted){ + if(runesout){ + if(1+q->nrunesout+1+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w+1 > nout) /* no room for quotes */ + break; + } + q->nrunesout += 2; /* include quotes */ + q->nbytesout += 2; /* include quotes */ + q->quoted = 1; + } + if(c == '\'') { + if(runesout){ + if(1+q->nrunesout+1 > nout) /* no room for quotes */ + break; + }else{ + if(1+q->nbytesout+w > nout) /* no room for quotes */ + break; + } + q->nbytesout++; + q->nrunesout++; /* quotes reproduce as two characters */ + } + } + + /* advance input */ + if(s) + s += w; + else + r++; + q->nbytesin += w; + q->nrunesin++; + + /* advance output */ + q->nbytesout += w; + q->nrunesout++; + +#ifndef PLAN9PORT + /* ANSI requires precision in bytes, not Runes. */ + nin-= w-1; /* and then n-- in the loop */ +#endif + } +} + +static int +qstrfmt(char *sin, Rune *rin, Quoteinfo *q, Fmt *f) +{ + Rune r, *rm, *rme; + char *t, *s, *m, *me; + Rune *rt, *rs; + ulong fl; + int nc, w; + + m = sin; + me = m + q->nbytesin; + rm = rin; + rme = rm + q->nrunesin; + + fl = f->flags; + w = 0; + if(fl & FmtWidth) + w = f->width; + if(f->runes){ + if(!(fl & FmtLeft) && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + if(!(fl & FmtLeft) && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + t = (char*)f->to; + s = (char*)f->stop; + rt = (Rune*)f->to; + rs = (Rune*)f->stop; + if(f->runes) + FMTRCHAR(f, rt, rs, '\''); + else + FMTRUNE(f, t, s, '\''); + for(nc = q->nrunesin; nc > 0; nc--){ + if(sin){ + r = *(uchar*)m; + if(r < Runeself) + m++; + else if((me - m) >= UTFmax || fullrune(m, me-m)) + m += chartorune(&r, m); + else + break; + }else{ + if(rm >= rme) + break; + r = *(uchar*)rm++; + } + if(f->runes){ + FMTRCHAR(f, rt, rs, r); + if(r == '\'') + FMTRCHAR(f, rt, rs, r); + }else{ + FMTRUNE(f, t, s, r); + if(r == '\'') + FMTRUNE(f, t, s, r); + } + } + + if(f->runes){ + FMTRCHAR(f, rt, rs, '\''); + USED(rs); + f->nfmt += rt - (Rune *)f->to; + f->to = rt; + if(fl & FmtLeft && __rfmtpad(f, w - q->nrunesout) < 0) + return -1; + }else{ + FMTRUNE(f, t, s, '\''); + USED(s); + f->nfmt += t - (char *)f->to; + f->to = t; + if(fl & FmtLeft && __fmtpad(f, w - q->nbytesout) < 0) + return -1; + } + return 0; +} + +int +__quotestrfmt(int runesin, Fmt *f) +{ + int nin, outlen; + Rune *r; + char *s; + Quoteinfo q; + + nin = -1; + if(f->flags&FmtPrec) + nin = f->prec; + if(runesin){ + r = va_arg(f->args, Rune *); + s = nil; + }else{ + s = va_arg(f->args, char *); + r = nil; + } + if(!s && !r) + return __fmtcpy(f, (void*)"<nil>", 5, 5); + + if(f->flush) + outlen = 0x7FFFFFFF; /* if we can flush, no output limit */ + else if(f->runes) + outlen = (Rune*)f->stop - (Rune*)f->to; + else + outlen = (char*)f->stop - (char*)f->to; + + __quotesetup(s, r, nin, outlen, &q, f->flags&FmtSharp, f->runes); +/*print("bytes in %d bytes out %d runes in %d runesout %d\n", q.nbytesin, q.nbytesout, q.nrunesin, q.nrunesout); */ + + if(runesin){ + if(!q.quoted) + return __fmtrcpy(f, r, q.nrunesin); + return qstrfmt(nil, r, &q, f); + } + + if(!q.quoted) + return __fmtcpy(f, s, q.nrunesin, q.nbytesin); + return qstrfmt(s, nil, &q, f); +} + +int +quotestrfmt(Fmt *f) +{ + return __quotestrfmt(0, f); +} + +int +quoterunestrfmt(Fmt *f) +{ + return __quotestrfmt(1, f); +} + +void +quotefmtinstall(void) +{ + fmtinstall('q', quotestrfmt); + fmtinstall('Q', quoterunestrfmt); +} + +int +__needsquotes(char *s, int *quotelenp) +{ + Quoteinfo q; + + __quotesetup(s, nil, -1, 0x7FFFFFFF, &q, 0, 0); + *quotelenp = q.nbytesout; + + return q.quoted; +} + +int +__runeneedsquotes(Rune *r, int *quotelenp) +{ + Quoteinfo q; + + __quotesetup(nil, r, -1, 0x7FFFFFFF, &q, 0, 0); + *quotelenp = q.nrunesout; + + return q.quoted; +} diff --git a/mk/libfmt/fmtrune.c b/mk/libfmt/fmtrune.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +fmtrune(Fmt *f, int r) +{ + Rune *rt; + char *t; + int n; + + if(f->runes){ + rt = (Rune*)f->to; + FMTRCHAR(f, rt, f->stop, r); + f->to = rt; + n = 1; + }else{ + t = (char*)f->to; + FMTRUNE(f, t, f->stop, r); + n = t - (char*)f->to; + f->to = t; + } + f->nfmt += n; + return 0; +} diff --git a/mk/libfmt/fmtstr.c b/mk/libfmt/fmtstr.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +fmtstrflush(Fmt *f) +{ + if(f->start == nil) + return nil; + *(char*)f->to = '\0'; + f->to = f->start; + return (char*)f->start; +} diff --git a/mk/libfmt/fmtvprint.c b/mk/libfmt/fmtvprint.c @@ -0,0 +1,37 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + + +/* + * format a string into the output buffer + * designed for formats which themselves call fmt, + * but ignore any width flags + */ +int +fmtvprint(Fmt *f, char *fmt, va_list args) +{ + va_list va; + int n; + + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_COPY(va,f->args); + VA_END(f->args); + VA_COPY(f->args,args); + n = dofmt(f, fmt); + f->flags = 0; + f->width = 0; + f->prec = 0; + VA_END(f->args); + VA_COPY(f->args,va); + VA_END(va); + if(n >= 0) + return 0; + return n; +} + diff --git a/mk/libfmt/fprint.c b/mk/libfmt/fprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +fprint(int fd, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vfprint(fd, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/mkfile b/mk/libfmt/mkfile @@ -0,0 +1,49 @@ +LIB = libfmt.a +LOBJ = \ + dofmt.o\ + dorfmt.o\ + errfmt.o\ + fltfmt.o\ + fmt.o\ + fmtfd.o\ + fmtfdflush.o\ + fmtlocale.o\ + fmtlock.o\ + fmtnull.o\ + fmtprint.o\ + fmtquote.o\ + fmtrune.o\ + fmtstr.o\ + fmtvprint.o\ + fprint.o\ + print.o\ + runefmtstr.o\ + runeseprint.o\ + runesmprint.o\ + runesnprint.o\ + runesprint.o\ + runevseprint.o\ + runevsmprint.o\ + runevsnprint.o\ + seprint.o\ + smprint.o\ + snprint.o\ + sprint.o\ + strtod.o\ + vfprint.o\ + vseprint.o\ + vsmprint.o\ + vsnprint.o\ + charstod.o\ + pow10.o\ + nan64.o + +LOCAL_CFLAGS = -I"$PREFIX"/include -I. +CLEAN_FILES = test test.o +DEPS = libutf + +<$mkbuild/mk.default + +test: deps $LIB test.o + $CC -o test test.o $LIB $CFLAGS $LDFLAGS $LOCAL_CFLAGS -L"$PREFIX"/lib -L../libutf -lutf + diff --git a/mk/libfmt/nan.h b/mk/libfmt/nan.h @@ -0,0 +1,4 @@ +extern double __NaN(void); +extern double __Inf(int); +extern int __isNaN(double); +extern int __isInf(double, int); diff --git a/mk/libfmt/nan64.c b/mk/libfmt/nan64.c @@ -0,0 +1,78 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ + +/* + * 64-bit IEEE not-a-number routines. + * This is big/little-endian portable assuming that + * the 64-bit doubles and 64-bit integers have the + * same byte ordering. + */ + +#include "plan9.h" +#include <assert.h> +#include "fmt.h" +#include "fmtdef.h" + +static uvlong uvnan = ((uvlong)0x7FF00000<<32)|0x00000001; +static uvlong uvinf = ((uvlong)0x7FF00000<<32)|0x00000000; +static uvlong uvneginf = ((uvlong)0xFFF00000<<32)|0x00000000; + +/* gcc sees through the obvious casts. */ +static uvlong +d2u(double d) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.d = d; + return u.v; +} + +static double +u2d(uvlong v) +{ + union { + uvlong v; + double d; + } u; + assert(sizeof(u.d) == sizeof(u.v)); + u.v = v; + return u.d; +} + +double +__NaN(void) +{ + return u2d(uvnan); +} + +int +__isNaN(double d) +{ + uvlong x; + + x = d2u(d); + /* IEEE 754: exponent bits 0x7FF and non-zero mantissa */ + return (x&uvinf) == uvinf && (x&~uvneginf) != 0; +} + +double +__Inf(int sign) +{ + return u2d(sign < 0 ? uvneginf : uvinf); +} + +int +__isInf(double d, int sign) +{ + uvlong x; + + x = d2u(d); + if(sign == 0) + return x==uvinf || x==uvneginf; + else if(sign > 0) + return x==uvinf; + else + return x==uvneginf; +} diff --git a/mk/libfmt/plan9.h b/mk/libfmt/plan9.h @@ -0,0 +1,38 @@ +#include <inttypes.h> + +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong +#define uintptr _fmtuintptr + +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long uvlong; +typedef long long vlong; +typedef uintptr_t uintptr; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem(x) (sizeof (x)/sizeof (x)[0]) + diff --git a/mk/libfmt/pow10.c b/mk/libfmt/pow10.c @@ -0,0 +1,45 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +/* + * this table might overflow 127-bit exponent representations. + * in that case, truncate it after 1.0e38. + * it is important to get all one can from this + * routine since it is used in atof to scale numbers. + * the presumption is that C converts fp numbers better + * than multipication of lower powers of 10. + */ + +static +double tab[] = +{ + 1.0e0, 1.0e1, 1.0e2, 1.0e3, 1.0e4, 1.0e5, 1.0e6, 1.0e7, 1.0e8, 1.0e9, + 1.0e10,1.0e11,1.0e12,1.0e13,1.0e14,1.0e15,1.0e16,1.0e17,1.0e18,1.0e19, + 1.0e20,1.0e21,1.0e22,1.0e23,1.0e24,1.0e25,1.0e26,1.0e27,1.0e28,1.0e29, + 1.0e30,1.0e31,1.0e32,1.0e33,1.0e34,1.0e35,1.0e36,1.0e37,1.0e38,1.0e39, + 1.0e40,1.0e41,1.0e42,1.0e43,1.0e44,1.0e45,1.0e46,1.0e47,1.0e48,1.0e49, + 1.0e50,1.0e51,1.0e52,1.0e53,1.0e54,1.0e55,1.0e56,1.0e57,1.0e58,1.0e59, + 1.0e60,1.0e61,1.0e62,1.0e63,1.0e64,1.0e65,1.0e66,1.0e67,1.0e68,1.0e69, +}; + +double +__fmtpow10(int n) +{ + int m; + + if(n < 0) { + n = -n; + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return 1/tab[n]; + m = n/2; + return __fmtpow10(-m) * __fmtpow10(m-n); + } + if(n < (int)(sizeof(tab)/sizeof(tab[0]))) + return tab[n]; + m = n/2; + return __fmtpow10(m) * __fmtpow10(n-m); +} diff --git a/mk/libfmt/print.3 b/mk/libfmt/print.3 @@ -0,0 +1,482 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.\" diffs from /usr/local/plan9/man/man3/print.3: +.\" +.\" - include different headers +.\" - drop reference to bio(3) +.\" - change exits to exit +.\" - text about unsigned verbs +.\" - source pointer +.\" +.TH PRINT 3 +.SH NAME +print, fprint, sprint, snprint, seprint, smprint, runesprint, runesnprint, runeseprint, runesmprint, vfprint, vsnprint, vseprint, vsmprint, runevsnprint, runevseprint, runevsmprint \- print formatted output +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B #include <fmt.h> +.PP +.ta \w'\fLchar* 'u +.B +int print(char *format, ...) +.PP +.B +int fprint(int fd, char *format, ...) +.PP +.B +int sprint(char *s, char *format, ...) +.PP +.B +int snprint(char *s, int len, char *format, ...) +.PP +.B +char* seprint(char *s, char *e, char *format, ...) +.PP +.B +char* smprint(char *format, ...) +.PP +.B +int runesprint(Rune *s, char *format, ...) +.PP +.B +int runesnprint(Rune *s, int len, char *format, ...) +.PP +.B +Rune* runeseprint(Rune *s, Rune *e, char *format, ...) +.PP +.B +Rune* runesmprint(char *format, ...) +.PP +.B +int vfprint(int fd, char *format, va_list v) +.PP +.B +int vsnprint(char *s, int len, char *format, va_list v) +.PP +.B +char* vseprint(char *s, char *e, char *format, va_list v) +.PP +.B +char* vsmprint(char *format, va_list v) +.PP +.B +int runevsnprint(Rune *s, int len, char *format, va_list v) +.PP +.B +Rune* runevseprint(Rune *s, Rune *e, char *format, va_list v) +.PP +.B +Rune* runevsmprint(Rune *format, va_list v) +.PP +.B +.SH DESCRIPTION +.I Print +writes text to the standard output. +.I Fprint +writes to the named output +file descriptor: +a buffered form +is described in +.IR bio (3). +.I Sprint +places text +followed by the NUL character +.RB ( \e0 ) +in consecutive bytes starting at +.IR s ; +it is the user's responsibility to ensure that +enough storage is available. +Each function returns the number of bytes +transmitted (not including the NUL +in the case of +.IR sprint ), +or +a negative value if an output error was encountered. +.PP +.I Snprint +is like +.IR sprint , +but will not place more than +.I len +bytes in +.IR s . +Its result is always NUL-terminated and holds the maximal +number of complete UTF-8 characters that can fit. +.I Seprint +is like +.IR snprint , +except that the end is indicated by a pointer +.I e +rather than a count and the return value points to the terminating NUL of the +resulting string. +.I Smprint +is like +.IR sprint , +except that it prints into and returns a string of the required length, which is +allocated by +.IR malloc (3). +.PP +The routines +.IR runesprint , +.IR runesnprint , +.IR runeseprint , +and +.I runesmprint +are the same as +.IR sprint , +.IR snprint , +.IR seprint +and +.I smprint +except that their output is rune strings instead of byte strings. +.PP +Finally, the routines +.IR vfprint , +.IR vsnprint , +.IR vseprint , +.IR vsmprint , +.IR runevsnprint , +.IR runevseprint , +and +.I runevsmprint +are like their +.BR v-less +relatives except they take as arguments a +.B va_list +parameter, so they can be called within a variadic function. +The Example section shows a representative usage. +.PP +Each of these functions +converts, formats, and prints its +trailing arguments +under control of a +.IR format +string. +The +format +contains two types of objects: +plain characters, which are simply copied to the +output stream, +and conversion specifications, +each of which results in fetching of +zero or more +arguments. +The results are undefined if there are arguments of the +wrong type or too few +arguments for the format. +If the format is exhausted while +arguments remain, the excess +is ignored. +.PP +Each conversion specification has the following format: +.IP +.B "% [flags] verb +.PP +The verb is a single character and each flag is a single character or a +(decimal) numeric string. +Up to two numeric strings may be used; +the first is called +.IR width , +the second +.IR precision . +A period can be used to separate them, and if the period is +present then +.I width +and +.I precision +are taken to be zero if missing, otherwise they are `omitted'. +Either or both of the numbers may be replaced with the character +.BR * , +meaning that the actual number will be obtained from the argument list +as an integer. +The flags and numbers are arguments to +the +.I verb +described below. +.PP +The numeric verbs +.BR d , +.BR i , +.BR u , +.BR o , +.BR b , +.BR x , +and +.B X +format their arguments in decimal, decimal, +unsigned decimal, octal, binary, hexadecimal, and upper case hexadecimal. +Each interprets the flags +.BR 0 , +.BR h , +.BR hh , +.BR l , +.BR + , +.BR - , +.BR , , +and +.B # +to mean pad with zeros, +short, byte, long, always print a sign, left justified, commas every three digits, +and alternate format. +Also, a space character in the flag +position is like +.BR + , +but prints a space instead of a plus sign for non-negative values. +If neither +short nor long is specified, +then the argument is an +.BR int . +If an unsigned verb is specified, +then the argument is interpreted as a +positive number and no sign is output; +space and +.B + +flags are ignored for unsigned verbs. +If two +.B l +flags are given, +then the argument is interpreted as a +.B vlong +(usually an 8-byte, sometimes a 4-byte integer). +If +.I precision +is not omitted, the number is padded on the left with zeros +until at least +.I precision +digits appear. +If +.I precision +is explicitly 0, and the number is 0, +no digits are generated, and alternate formatting +does not apply. +Then, if alternate format is specified, +for +.B o +conversion, the number is preceded by a +.B 0 +if it doesn't already begin with one. +For non-zero numbers and +.B x +conversion, the number is preceded by +.BR 0x ; +for +.B X +conversion, the number is preceded by +.BR 0X . +Finally, if +.I width +is not omitted, the number is padded on the left (or right, if +left justification is specified) with enough blanks to +make the field at least +.I width +characters long. +.PP +The floating point verbs +.BR f , +.BR e , +.BR E , +.BR g , +and +.B G +take a +.B double +argument. +Each interprets the flags +.BR 0 , +.BR L +.BR + , +.BR - , +and +.B # +to mean pad with zeros, +long double argument, +always print a sign, +left justified, +and +alternate format. +.I Width +is the minimum field width and, +if the converted value takes up less than +.I width +characters, it is padded on the left (or right, if `left justified') +with spaces. +.I Precision +is the number of digits that are converted after the decimal place for +.BR e , +.BR E , +and +.B f +conversions, +and +.I precision +is the maximum number of significant digits for +.B g +and +.B G +conversions. +The +.B f +verb produces output of the form +.RB [ - ] digits [ .digits\fR]. +.B E +conversion appends an exponent +.BR E [ - ] digits , +and +.B e +conversion appends an exponent +.BR e [ - ] digits . +The +.B g +verb will output the argument in either +.B e +or +.B f +with the goal of producing the smallest output. +Also, trailing zeros are omitted from the fraction part of +the output, and a trailing decimal point appears only if it is followed +by a digit. +The +.B G +verb is similar, but uses +.B E +format instead of +.BR e . +When alternate format is specified, the result will always contain a decimal point, +and for +.B g +and +.B G +conversions, trailing zeros are not removed. +.PP +The +.B s +verb copies a string +(pointer to +.BR char ) +to the output. +The number of characters copied +.RI ( n ) +is the minimum +of the size of the string and +.IR precision . +These +.I n +characters are justified within a field of +.I width +characters as described above. +If a +.I precision +is given, it is safe for the string not to be nul-terminated +as long as it is at least +.I precision +characters (not bytes!) long. +The +.B S +verb is similar, but it interprets its pointer as an array +of runes (see +.IR utf (7)); +the runes are converted to +.SM UTF +before output. +.PP +The +.B c +verb copies a single +.B char +(promoted to +.BR int ) +justified within a field of +.I width +characters as described above. +The +.B C +verb is similar, but works on runes. +.PP +The +.B p +verb formats a pointer value. +At the moment, it is a synonym for +.BR x , +but that will change if pointers and integers are different sizes. +.PP +The +.B r +verb takes no arguments; it copies the error string returned by a call to +.IR strerror (3) +with an argument of +.IR errno. +.PP +Custom verbs may be installed using +.IR fmtinstall (3). +.SH EXAMPLE +This function prints an error message with a variable +number of arguments and then quits. +.IP +.EX +.ta 6n +6n +6n +void fatal(char *msg, ...) +{ + char buf[1024], *out; + va_list arg; + + out = seprint(buf, buf+sizeof buf, "Fatal error: "); + va_start(arg, msg); + out = vseprint(out, buf+sizeof buf, msg, arg); + va_end(arg); + write(2, buf, out-buf); + exit(1); +} +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR fmtinstall (3), +.IR fprintf (3), +.IR utf (7) +.SH DIAGNOSTICS +Routines that write to a file descriptor or call +.IR malloc +set +.IR errstr . +.SH BUGS +The formatting is close to that specified for ANSI +.IR fprintf (3); +the main difference is that +.B b +and +.B r +are not in ANSI and some +.B C9X +verbs and syntax are missing. +Also, and distinctly not a bug, +.I print +and friends generate +.SM UTF +rather than +.SM ASCII. +.PP +There is no +.IR runeprint , +.IR runefprint , +etc. because runes are byte-order dependent and should not be written directly to a file; use the +UTF output of +.I print +or +.I fprint +instead. +Also, +.I sprint +is deprecated for safety reasons; use +.IR snprint , +.IR seprint , +or +.I smprint +instead. +Safety also precludes the existence of +.IR runesprint . diff --git a/mk/libfmt/print.c b/mk/libfmt/print.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +print(char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vfprint(1, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/runefmtstr.c b/mk/libfmt/runefmtstr.c @@ -0,0 +1,16 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <stdlib.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runefmtstrflush(Fmt *f) +{ + if(f->start == nil) + return nil; + *(Rune*)f->to = '\0'; + f->to = f->start; + return f->start; +} diff --git a/mk/libfmt/runeseprint.c b/mk/libfmt/runeseprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runeseprint(Rune *buf, Rune *e, char *fmt, ...) +{ + Rune *p; + va_list args; + + va_start(args, fmt); + p = runevseprint(buf, e, fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/runesmprint.c b/mk/libfmt/runesmprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runesmprint(char *fmt, ...) +{ + va_list args; + Rune *p; + + va_start(args, fmt); + p = runevsmprint(fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/runesnprint.c b/mk/libfmt/runesnprint.c @@ -0,0 +1,19 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runesnprint(Rune *buf, int len, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = runevsnprint(buf, len, fmt, args); + va_end(args); + return n; +} + diff --git a/mk/libfmt/runesprint.c b/mk/libfmt/runesprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runesprint(Rune *buf, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = runevsnprint(buf, 256, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/runevseprint.c b/mk/libfmt/runevseprint.c @@ -0,0 +1,29 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +Rune* +runevseprint(Rune *buf, Rune *e, char *fmt, va_list args) +{ + Fmt f; + + if(e <= buf) + return nil; + f.runes = 1; + f.start = buf; + f.to = buf; + f.stop = e - 1; + f.flush = nil; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(Rune*)f.to = '\0'; + return (Rune*)f.to; +} + diff --git a/mk/libfmt/runevsmprint.c b/mk/libfmt/runevsmprint.c @@ -0,0 +1,86 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* + * Plan 9 port version must include libc.h in order to + * get Plan 9 debugging malloc, which sometimes returns + * different pointers than the standard malloc. + */ +#ifdef PLAN9PORT +#include <u.h> +#include <libc.h> +#include "fmtdef.h" +#else +#include <stdlib.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#endif + +static int +runeFmtStrFlush(Fmt *f) +{ + Rune *s; + int n; + + if(f->start == nil) + return 0; + n = (uintptr)f->farg; + n *= 2; + s = (Rune*)f->start; + f->start = realloc(s, sizeof(Rune)*n); + if(f->start == nil){ + f->farg = nil; + f->to = nil; + f->stop = nil; + free(s); + return 0; + } + f->farg = (void*)(uintptr)n; + f->to = (Rune*)f->start + ((Rune*)f->to - s); + f->stop = (Rune*)f->start + n - 1; + return 1; +} + +int +runefmtstrinit(Fmt *f) +{ + int n; + + memset(f, 0, sizeof *f); + f->runes = 1; + n = 32; + f->start = malloc(sizeof(Rune)*n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (Rune*)f->start + n - 1; + f->flush = runeFmtStrFlush; + f->farg = (void*)(uintptr)n; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + +/* + * print into an allocated string buffer + */ +Rune* +runevsmprint(char *fmt, va_list args) +{ + Fmt f; + int n; + + if(runefmtstrinit(&f) < 0) + return nil; + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(f.start == nil) + return nil; + if(n < 0){ + free(f.start); + return nil; + } + *(Rune*)f.to = '\0'; + return (Rune*)f.start; +} diff --git a/mk/libfmt/runevsnprint.c b/mk/libfmt/runevsnprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +runevsnprint(Rune *buf, int len, char *fmt, va_list args) +{ + Fmt f; + + if(len <= 0) + return -1; + f.runes = 1; + f.start = buf; + f.to = buf; + f.stop = buf + len - 1; + f.flush = nil; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(Rune*)f.to = '\0'; + return (Rune*)f.to - buf; +} diff --git a/mk/libfmt/seprint.c b/mk/libfmt/seprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +seprint(char *buf, char *e, char *fmt, ...) +{ + char *p; + va_list args; + + va_start(args, fmt); + p = vseprint(buf, e, fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/smprint.c b/mk/libfmt/smprint.c @@ -0,0 +1,17 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +smprint(char *fmt, ...) +{ + va_list args; + char *p; + + va_start(args, fmt); + p = vsmprint(fmt, args); + va_end(args); + return p; +} diff --git a/mk/libfmt/snprint.c b/mk/libfmt/snprint.c @@ -0,0 +1,18 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +snprint(char *buf, int len, char *fmt, ...) +{ + int n; + va_list args; + + va_start(args, fmt); + n = vsnprint(buf, len, fmt, args); + va_end(args); + return n; +} + diff --git a/mk/libfmt/sprint.c b/mk/libfmt/sprint.c @@ -0,0 +1,30 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include <fmt.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +sprint(char *buf, char *fmt, ...) +{ + int n; + uint len; + va_list args; + + len = 1<<30; /* big number, but sprint is deprecated anyway */ + /* + * on PowerPC, the stack is near the top of memory, so + * we must be sure not to overflow a 32-bit pointer. + * + * careful! gcc-4.2 assumes buf+len < buf can never be true and + * optimizes the test away. casting to uintptr works around this bug. + */ + if((uintptr)buf+len < (uintptr)buf) + len = -(uintptr)buf-1; + + va_start(args, fmt); + n = vsnprint(buf, len, fmt, args); + va_end(args); + return n; +} diff --git a/mk/libfmt/strtod.c b/mk/libfmt/strtod.c @@ -0,0 +1,520 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <math.h> +#include <ctype.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +static ulong +umuldiv(ulong a, ulong b, ulong c) +{ + double d; + + d = ((double)a * (double)b) / (double)c; + if(d >= 4294967295.) + d = 4294967295.; + return (ulong)d; +} + +/* + * This routine will convert to arbitrary precision + * floating point entirely in multi-precision fixed. + * The answer is the closest floating point number to + * the given decimal number. Exactly half way are + * rounded ala ieee rules. + * Method is to scale input decimal between .500 and .999... + * with external power of 2, then binary search for the + * closest mantissa to this decimal number. + * Nmant is is the required precision. (53 for ieee dp) + * Nbits is the max number of bits/word. (must be <= 28) + * Prec is calculated - the number of words of fixed mantissa. + */ +enum +{ + Nbits = 28, /* bits safely represented in a ulong */ + Nmant = 53, /* bits of precision required */ + Prec = (Nmant+Nbits+1)/Nbits, /* words of Nbits each to represent mantissa */ + Sigbit = 1<<(Prec*Nbits-Nmant), /* first significant bit of Prec-th word */ + Ndig = 1500, + One = (ulong)(1<<Nbits), + Half = (ulong)(One>>1), + Maxe = 310, + + Fsign = 1<<0, /* found - */ + Fesign = 1<<1, /* found e- */ + Fdpoint = 1<<2, /* found . */ + + S0 = 0, /* _ _S0 +S1 #S2 .S3 */ + S1, /* _+ #S2 .S3 */ + S2, /* _+# #S2 .S4 eS5 */ + S3, /* _+. #S4 */ + S4, /* _+#.# #S4 eS5 */ + S5, /* _+#.#e +S6 #S7 */ + S6, /* _+#.#e+ #S7 */ + S7 /* _+#.#e+# #S7 */ +}; + +static int xcmp(char*, char*); +static int fpcmp(char*, ulong*); +static void frnorm(ulong*); +static void divascii(char*, int*, int*, int*); +static void mulascii(char*, int*, int*, int*); + +typedef struct Tab Tab; +struct Tab +{ + int bp; + int siz; + char* cmp; +}; + +double +fmtstrtod(const char *as, char **aas) +{ + int na, ex, dp, bp, c, i, flag, state; + ulong low[Prec], hig[Prec], mid[Prec]; + double d; + char *s, a[Ndig]; + + flag = 0; /* Fsign, Fesign, Fdpoint */ + na = 0; /* number of digits of a[] */ + dp = 0; /* na of decimal point */ + ex = 0; /* exonent */ + + state = S0; + for(s=(char*)as;; s++) { + c = *s; + if(c >= '0' && c <= '9') { + switch(state) { + case S0: + case S1: + case S2: + state = S2; + break; + case S3: + case S4: + state = S4; + break; + + case S5: + case S6: + case S7: + state = S7; + ex = ex*10 + (c-'0'); + continue; + } + if(na == 0 && c == '0') { + dp--; + continue; + } + if(na < Ndig-50) + a[na++] = c; + continue; + } + switch(c) { + case '\t': + case '\n': + case '\v': + case '\f': + case '\r': + case ' ': + if(state == S0) + continue; + break; + case '-': + if(state == S0) + flag |= Fsign; + else + flag |= Fesign; + case '+': + if(state == S0) + state = S1; + else + if(state == S5) + state = S6; + else + break; /* syntax */ + continue; + case '.': + flag |= Fdpoint; + dp = na; + if(state == S0 || state == S1) { + state = S3; + continue; + } + if(state == S2) { + state = S4; + continue; + } + break; + case 'e': + case 'E': + if(state == S2 || state == S4) { + state = S5; + continue; + } + break; + } + break; + } + + /* + * clean up return char-pointer + */ + switch(state) { + case S0: + if(xcmp(s, "nan") == 0) { + if(aas != nil) + *aas = s+3; + goto retnan; + } + case S1: + if(xcmp(s, "infinity") == 0) { + if(aas != nil) + *aas = s+8; + goto retinf; + } + if(xcmp(s, "inf") == 0) { + if(aas != nil) + *aas = s+3; + goto retinf; + } + case S3: + if(aas != nil) + *aas = (char*)as; + goto ret0; /* no digits found */ + case S6: + s--; /* back over +- */ + case S5: + s--; /* back over e */ + break; + } + if(aas != nil) + *aas = s; + + if(flag & Fdpoint) + while(na > 0 && a[na-1] == '0') + na--; + if(na == 0) + goto ret0; /* zero */ + a[na] = 0; + if(!(flag & Fdpoint)) + dp = na; + if(flag & Fesign) + ex = -ex; + dp += ex; + if(dp < -Maxe){ + errno = ERANGE; + goto ret0; /* underflow by exp */ + } else + if(dp > +Maxe) + goto retinf; /* overflow by exp */ + + /* + * normalize the decimal ascii number + * to range .[5-9][0-9]* e0 + */ + bp = 0; /* binary exponent */ + while(dp > 0) + divascii(a, &na, &dp, &bp); + while(dp < 0 || a[0] < '5') + mulascii(a, &na, &dp, &bp); + + /* close approx by naive conversion */ + mid[0] = 0; + mid[1] = 1; + for(i=0; (c=a[i]) != '\0'; i++) { + mid[0] = mid[0]*10 + (c-'0'); + mid[1] = mid[1]*10; + if(i >= 8) + break; + } + low[0] = umuldiv(mid[0], One, mid[1]); + hig[0] = umuldiv(mid[0]+1, One, mid[1]); + for(i=1; i<Prec; i++) { + low[i] = 0; + hig[i] = One-1; + } + + /* binary search for closest mantissa */ + for(;;) { + /* mid = (hig + low) / 2 */ + c = 0; + for(i=0; i<Prec; i++) { + mid[i] = hig[i] + low[i]; + if(c) + mid[i] += One; + c = mid[i] & 1; + mid[i] >>= 1; + } + frnorm(mid); + + /* compare */ + c = fpcmp(a, mid); + if(c > 0) { + c = 1; + for(i=0; i<Prec; i++) + if(low[i] != mid[i]) { + c = 0; + low[i] = mid[i]; + } + if(c) + break; /* between mid and hig */ + continue; + } + if(c < 0) { + for(i=0; i<Prec; i++) + hig[i] = mid[i]; + continue; + } + + /* only hard part is if even/odd roundings wants to go up */ + c = mid[Prec-1] & (Sigbit-1); + if(c == Sigbit/2 && (mid[Prec-1]&Sigbit) == 0) + mid[Prec-1] -= c; + break; /* exactly mid */ + } + + /* normal rounding applies */ + c = mid[Prec-1] & (Sigbit-1); + mid[Prec-1] -= c; + if(c >= Sigbit/2) { + mid[Prec-1] += Sigbit; + frnorm(mid); + } + goto out; + +ret0: + return 0; + +retnan: + return __NaN(); + +retinf: + /* + * Unix strtod requires these. Plan 9 would return Inf(0) or Inf(-1). */ + errno = ERANGE; + if(flag & Fsign) + return -HUGE_VAL; + return HUGE_VAL; + +out: + d = 0; + for(i=0; i<Prec; i++) + d = d*One + mid[i]; + if(flag & Fsign) + d = -d; + d = ldexp(d, bp - Prec*Nbits); + if(d == 0){ /* underflow */ + errno = ERANGE; + } + return d; +} + +static void +frnorm(ulong *f) +{ + int i, c; + + c = 0; + for(i=Prec-1; i>0; i--) { + f[i] += c; + c = f[i] >> Nbits; + f[i] &= One-1; + } + f[0] += c; +} + +static int +fpcmp(char *a, ulong* f) +{ + ulong tf[Prec]; + int i, d, c; + + for(i=0; i<Prec; i++) + tf[i] = f[i]; + + for(;;) { + /* tf *= 10 */ + for(i=0; i<Prec; i++) + tf[i] = tf[i]*10; + frnorm(tf); + d = (tf[0] >> Nbits) + '0'; + tf[0] &= One-1; + + /* compare next digit */ + c = *a; + if(c == 0) { + if('0' < d) + return -1; + if(tf[0] != 0) + goto cont; + for(i=1; i<Prec; i++) + if(tf[i] != 0) + goto cont; + return 0; + } + if(c > d) + return +1; + if(c < d) + return -1; + a++; + cont:; + } +} + +static void +divby(char *a, int *na, int b) +{ + int n, c; + char *p; + + p = a; + n = 0; + while(n>>b == 0) { + c = *a++; + if(c == 0) { + while(n) { + c = n*10; + if(c>>b) + break; + n = c; + } + goto xx; + } + n = n*10 + c-'0'; + (*na)--; + } + for(;;) { + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + c = *a++; + if(c == 0) + break; + n = n*10 + c-'0'; + } + (*na)++; +xx: + while(n) { + n = n*10; + c = n>>b; + n -= c<<b; + *p++ = c + '0'; + (*na)++; + } + *p = 0; +} + +static Tab tab1[] = +{ + 1, 0, "", + 3, 1, "7", + 6, 2, "63", + 9, 3, "511", + 13, 4, "8191", + 16, 5, "65535", + 19, 6, "524287", + 23, 7, "8388607", + 26, 8, "67108863", + 27, 9, "134217727", +}; + +static void +divascii(char *a, int *na, int *dp, int *bp) +{ + int b, d; + Tab *t; + + d = *dp; + if(d >= (int)(nelem(tab1))) + d = (int)(nelem(tab1))-1; + t = tab1 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) > 0) + d--; + *dp -= d; + *bp += b; + divby(a, na, b); +} + +static void +mulby(char *a, char *p, char *q, int b) +{ + int n, c; + + n = 0; + *p = 0; + for(;;) { + q--; + if(q < a) + break; + c = *q - '0'; + c = (c<<b) + n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } + while(n) { + c = n; + n = c/10; + c -= n*10; + p--; + *p = c + '0'; + } +} + +static Tab tab2[] = +{ + 1, 1, "", /* dp = 0-0 */ + 3, 3, "125", + 6, 5, "15625", + 9, 7, "1953125", + 13, 10, "1220703125", + 16, 12, "152587890625", + 19, 14, "19073486328125", + 23, 17, "11920928955078125", + 26, 19, "1490116119384765625", + 27, 19, "7450580596923828125", /* dp 8-9 */ +}; + +static void +mulascii(char *a, int *na, int *dp, int *bp) +{ + char *p; + int d, b; + Tab *t; + + d = -*dp; + if(d >= (int)(nelem(tab2))) + d = (int)(nelem(tab2))-1; + t = tab2 + d; + b = t->bp; + if(memcmp(a, t->cmp, t->siz) < 0) + d--; + p = a + *na; + *bp -= b; + *dp += d; + *na += d; + mulby(a, p+d, p, b); +} + +static int +xcmp(char *a, char *b) +{ + int c1, c2; + + while((c1 = *b++) != '\0') { + c2 = *a++; + if(isupper(c2)) + c2 = tolower(c2); + if(c1 != c2) + return 1; + } + return 0; +} diff --git a/mk/libfmt/test.c b/mk/libfmt/test.c @@ -0,0 +1,53 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* Copyright (c) 2004 Google Inc.; see LICENSE */ + +#include <stdio.h> +#include <stdarg.h> +#include <utf.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +main(int argc, char *argv[]) +{ + quotefmtinstall(); + print("hello world\n"); + print("x: %x\n", 0x87654321); + print("u: %u\n", 0x87654321); + print("d: %d\n", 0x87654321); + print("s: %s\n", "hi there"); + print("q: %q\n", "hi i'm here"); + print("c: %c\n", '!'); + print("g: %g %g %g\n", 3.14159, 3.14159e10, 3.14159e-10); + print("e: %e %e %e\n", 3.14159, 3.14159e10, 3.14159e-10); + print("f: %f %f %f\n", 3.14159, 3.14159e10, 3.14159e-10); + print("smiley: %C\n", (Rune)0x263a); + print("%g %.18g\n", 2e25, 2e25); + print("%2.18g\n", 1.0); + print("%2.18f\n", 1.0); + print("%f\n", 3.1415927/4); + print("%d\n", 23); + print("%i\n", 23); + print("%0.10d\n", 12345); + + /* test %4$d formats */ + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$06d %2$d %1$d\n", 444, 333, 111, 222); + print("%3$d %4$*5$06d %2$d %1$d\n", 444, 333, 111, 222, 20); + print("%3$hd %4$*5$06d %2$d %1$d\n", 444, 333, (short)111, 222, 20); + print("%3$lld %4$*5$06d %2$d %1$d\n", 444, 333, 111LL, 222, 20); + + /* test %'d formats */ + print("%'d %'d %'d\n", 1, 2222, 33333333); + print("%'019d\n", 0); + print("%08d %08d %08d\n", 1, 2222, 33333333); + print("%'08d %'08d %'08d\n", 1, 2222, 33333333); + print("%'x %'X %'b\n", 0x11111111, 0xabcd1234, 12345); + print("%'lld %'lld %'lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%019lld %019lld %019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'019lld %'019lld %'019lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'020lld %'020lld %'020lld\n", 1LL, 222222222LL, 3333333333333LL); + print("%'llx %'llX %'llb\n", 0x111111111111LL, 0xabcd12345678LL, 112342345LL); + return 0; +} diff --git a/mk/libfmt/test2.c b/mk/libfmt/test2.c @@ -0,0 +1,9 @@ +#include <stdarg.h> +#include <utf.h> +#include <fmt.h> + +int +main(int argc, char **argv) +{ + print("%020.10d\n", 100); +} diff --git a/mk/libfmt/test3.c b/mk/libfmt/test3.c @@ -0,0 +1,52 @@ +#include <u.h> +#include <libc.h> +#include <stdio.h> + +void +test(char *fmt, ...) +{ + va_list arg; + char fmtbuf[100], stdbuf[100]; + + va_start(arg, fmt); + vsnprint(fmtbuf, sizeof fmtbuf, fmt, arg); + va_end(arg); + + va_start(arg, fmt); + vsnprint(stdbuf, sizeof stdbuf, fmt, arg); + va_end(arg); + + if(strcmp(fmtbuf, stdbuf) != 0) + print("fmt %s: fmt=\"%s\" std=\"%s\"\n", fmt, fmtbuf, stdbuf); + + print("fmt %s: %s\n", fmt, fmtbuf); +} + + +int +main(int argc, char *argv[]) +{ + test("%f", 3.14159); + test("%f", 3.14159e10); + test("%f", 3.14159e-10); + + test("%e", 3.14159); + test("%e", 3.14159e10); + test("%e", 3.14159e-10); + + test("%g", 3.14159); + test("%g", 3.14159e10); + test("%g", 3.14159e-10); + + test("%g", 2e25); + test("%.18g", 2e25); + + test("%2.18g", 1.0); + test("%2.18f", 1.0); + test("%f", 3.1415927/4); + + test("%20.10d", 12345); + test("%0.10d", 12345); + + return 0; +} diff --git a/mk/libfmt/vfprint.c b/mk/libfmt/vfprint.c @@ -0,0 +1,21 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +vfprint(int fd, char *fmt, va_list args) +{ + Fmt f; + char buf[256]; + int n; + + fmtfdinit(&f, fd, buf, sizeof(buf)); + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(n > 0 && __fmtFdFlush(&f) == 0) + return -1; + return n; +} diff --git a/mk/libfmt/vseprint.c b/mk/libfmt/vseprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +char* +vseprint(char *buf, char *e, char *fmt, va_list args) +{ + Fmt f; + + if(e <= buf) + return nil; + f.runes = 0; + f.start = buf; + f.to = buf; + f.stop = e - 1; + f.flush = 0; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(char*)f.to = '\0'; + return (char*)f.to; +} + diff --git a/mk/libfmt/vsmprint.c b/mk/libfmt/vsmprint.c @@ -0,0 +1,83 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +/* + * Plan 9 port version must include libc.h in order to + * get Plan 9 debugging malloc, which sometimes returns + * different pointers than the standard malloc. + */ +#ifdef PLAN9PORT +#include <u.h> +#include <libc.h> +#include "fmtdef.h" +#else +#include <stdlib.h> +#include <string.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" +#endif + +static int +fmtStrFlush(Fmt *f) +{ + char *s; + int n; + + if(f->start == nil) + return 0; + n = (uintptr)f->farg; + n *= 2; + s = (char*)f->start; + f->start = realloc(s, n); + if(f->start == nil){ + f->farg = nil; + f->to = nil; + f->stop = nil; + free(s); + return 0; + } + f->farg = (void*)(uintptr)n; + f->to = (char*)f->start + ((char*)f->to - s); + f->stop = (char*)f->start + n - 1; + return 1; +} + +int +fmtstrinit(Fmt *f) +{ + int n; + + memset(f, 0, sizeof *f); + f->runes = 0; + n = 32; + f->start = malloc(n); + if(f->start == nil) + return -1; + f->to = f->start; + f->stop = (char*)f->start + n - 1; + f->flush = fmtStrFlush; + f->farg = (void*)(uintptr)n; + f->nfmt = 0; + fmtlocaleinit(f, nil, nil, nil); + return 0; +} + +/* + * print into an allocated string buffer + */ +char* +vsmprint(char *fmt, va_list args) +{ + Fmt f; + int n; + + if(fmtstrinit(&f) < 0) + return nil; + VA_COPY(f.args,args); + n = dofmt(&f, fmt); + VA_END(f.args); + if(n < 0){ + free(f.start); + return nil; + } + return fmtstrflush(&f); +} diff --git a/mk/libfmt/vsnprint.c b/mk/libfmt/vsnprint.c @@ -0,0 +1,28 @@ +/* Copyright (c) 2002-2006 Lucent Technologies; see LICENSE */ +#include <stdlib.h> +#include <stdarg.h> +#include "plan9.h" +#include "fmt.h" +#include "fmtdef.h" + +int +vsnprint(char *buf, int len, char *fmt, va_list args) +{ + Fmt f; + + if(len <= 0) + return -1; + f.runes = 0; + f.start = buf; + f.to = buf; + f.stop = buf + len - 1; + f.flush = 0; + f.farg = nil; + f.nfmt = 0; + VA_COPY(f.args,args); + fmtlocaleinit(&f, nil, nil, nil); + dofmt(&f, fmt); + VA_END(f.args); + *(char*)f.to = '\0'; + return (char*)f.to - buf; +} diff --git a/mk/libregexp/NOTICE b/mk/libregexp/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software is Rob Pike. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 regular expression library. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libregexp/README b/mk/libregexp/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libregexp/depsinc.mk b/mk/libregexp/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libregexp_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libregexp_DEPDIR -lregexp9 diff --git a/mk/libregexp/lib9.h b/mk/libregexp/lib9.h @@ -0,0 +1,10 @@ +#include <fmt.h> +#include <setjmp.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#define exits(x) exit(x && *x ? 1 : 0) + +#define nil 0 + diff --git a/mk/libregexp/mkfile b/mk/libregexp/mkfile @@ -0,0 +1,15 @@ +LIB = libregexp9.a +LOBJ = \ + regcomp.o\ + regerror.o\ + regexec.o\ + regsub.o\ + regaux.o\ + rregexec.o\ + rregsub.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include +DEPS = libfmt libutf + +<$mkbuild/mk.default + diff --git a/mk/libregexp/regaux.c b/mk/libregexp/regaux.c @@ -0,0 +1,112 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * save a new match in mp + */ +extern void +_renewmatch(Resub *mp, int ms, Resublist *sp) +{ + int i; + + if(mp==0 || ms<=0) + return; + if(mp[0].s.sp==0 || sp->m[0].s.sp<mp[0].s.sp || + (sp->m[0].s.sp==mp[0].s.sp && sp->m[0].e.ep>mp[0].e.ep)){ + for(i=0; i<ms && i<NSUBEXP; i++) + mp[i] = sp->m[i]; + for(; i<ms; i++) + mp[i].s.sp = mp[i].e.ep = 0; + } +} + +/* + * Note optimization in _renewthread: + * *lp must be pending when _renewthread called; if *l has been looked + * at already, the optimization is a bug. + */ +extern Relist* +_renewthread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Resublist *sep) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sep->m[0].s.sp < p->se.m[0].s.sp){ + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + p->se = *sep; + else + p->se.m[0] = sep->m[0]; + (++p)->inst = 0; + return p; +} + +/* + * same as renewthread, but called with + * initial empty start pointer. + */ +extern Relist* +_renewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + char *sp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(sp < p->se.m[0].s.sp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.sp = sp; + (++p)->inst = 0; + return p; +} + +extern Relist* +_rrenewemptythread(Relist *lp, /* _relist to add to */ + Reinst *ip, /* instruction to add */ + int ms, + Rune *rsp) /* pointers to subexpressions */ +{ + Relist *p; + + for(p=lp; p->inst; p++){ + if(p->inst == ip){ + if(rsp < p->se.m[0].s.rsp) { + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + } + return 0; + } + } + p->inst = ip; + if(ms > 1) + memset(&p->se, 0, sizeof(p->se)); + p->se.m[0].s.rsp = rsp; + (++p)->inst = 0; + return p; +} diff --git a/mk/libregexp/regcomp.c b/mk/libregexp/regcomp.c @@ -0,0 +1,555 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +#define TRUE 1 +#define FALSE 0 + +/* + * Parser Information + */ +typedef +struct Node +{ + Reinst* first; + Reinst* last; +}Node; + +#define NSTACK 20 +static Node andstack[NSTACK]; +static Node *andp; +static int atorstack[NSTACK]; +static int* atorp; +static int cursubid; /* id of current subexpression */ +static int subidstack[NSTACK]; /* parallel to atorstack */ +static int* subidp; +static int lastwasand; /* Last token was operand */ +static int nbra; +static char* exprp; /* pointer to next character in source expression */ +static int lexdone; +static int nclass; +static Reclass*classp; +static Reinst* freep; +static int errors; +static Rune yyrune; /* last lex'd rune */ +static Reclass*yyclassp; /* last lex'd class */ + +/* predeclared crap */ +static void operator(int); +static void pushand(Reinst*, Reinst*); +static void pushator(int); +static void evaluntil(int); +static int bldcclass(void); + +static jmp_buf regkaboom; + +static void +rcerror(char *s) +{ + errors++; + regerror(s); + longjmp(regkaboom, 1); +} + +static Reinst* +newinst(int t) +{ + freep->type = t; + freep->u2.left = 0; + freep->u1.right = 0; + return freep++; +} + +static void +operand(int t) +{ + Reinst *i; + + if(lastwasand) + operator(CAT); /* catenate is implicit */ + i = newinst(t); + + if(t == CCLASS || t == NCCLASS) + i->u1.cp = yyclassp; + if(t == RUNE) + i->u1.r = yyrune; + + pushand(i, i); + lastwasand = TRUE; +} + +static void +operator(int t) +{ + if(t==RBRA && --nbra<0) + rcerror("unmatched right paren"); + if(t==LBRA){ + if(++cursubid >= NSUBEXP) + rcerror ("too many subexpressions"); + nbra++; + if(lastwasand) + operator(CAT); + } else + evaluntil(t); + if(t != RBRA) + pushator(t); + lastwasand = FALSE; + if(t==STAR || t==QUEST || t==PLUS || t==RBRA) + lastwasand = TRUE; /* these look like operands */ +} + +static void +regerr2(char *s, int c) +{ + char buf[100]; + char *cp = buf; + while(*s) + *cp++ = *s++; + *cp++ = c; + *cp = '\0'; + rcerror(buf); +} + +static void +cant(char *s) +{ + char buf[100]; + strcpy(buf, "can't happen: "); + strcat(buf, s); + rcerror(buf); +} + +static void +pushand(Reinst *f, Reinst *l) +{ + if(andp >= &andstack[NSTACK]) + cant("operand stack overflow"); + andp->first = f; + andp->last = l; + andp++; +} + +static void +pushator(int t) +{ + if(atorp >= &atorstack[NSTACK]) + cant("operator stack overflow"); + *atorp++ = t; + *subidp++ = cursubid; +} + +static Node* +popand(int op) +{ + Reinst *inst; + + if(andp <= &andstack[0]){ + regerr2("missing operand for ", op); + inst = newinst(NOP); + pushand(inst,inst); + } + return --andp; +} + +static int +popator(void) +{ + if(atorp <= &atorstack[0]) + cant("operator stack underflow"); + --subidp; + return *--atorp; +} + +static void +evaluntil(int pri) +{ + Node *op1, *op2; + Reinst *inst1, *inst2; + + while(pri==RBRA || atorp[-1]>=pri){ + switch(popator()){ + default: + rcerror("unknown operator in evaluntil"); + break; + case LBRA: /* must have been RBRA */ + op1 = popand('('); + inst2 = newinst(RBRA); + inst2->u1.subid = *subidp; + op1->last->u2.next = inst2; + inst1 = newinst(LBRA); + inst1->u1.subid = *subidp; + inst1->u2.next = op1->first; + pushand(inst1, inst2); + return; + case OR: + op2 = popand('|'); + op1 = popand('|'); + inst2 = newinst(NOP); + op2->last->u2.next = inst2; + op1->last->u2.next = inst2; + inst1 = newinst(OR); + inst1->u1.right = op1->first; + inst1->u2.left = op2->first; + pushand(inst1, inst2); + break; + case CAT: + op2 = popand(0); + op1 = popand(0); + op1->last->u2.next = op2->first; + pushand(op1->first, op2->last); + break; + case STAR: + op2 = popand('*'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(inst1, inst1); + break; + case PLUS: + op2 = popand('+'); + inst1 = newinst(OR); + op2->last->u2.next = inst1; + inst1->u1.right = op2->first; + pushand(op2->first, inst1); + break; + case QUEST: + op2 = popand('?'); + inst1 = newinst(OR); + inst2 = newinst(NOP); + inst1->u2.left = inst2; + inst1->u1.right = op2->first; + op2->last->u2.next = inst2; + pushand(inst1, inst2); + break; + } + } +} + +static Reprog* +optimize(Reprog *pp) +{ + Reinst *inst, *target; + int size; + Reprog *npp; + Reclass *cl; + int diff; + + /* + * get rid of NOOP chains + */ + for(inst=pp->firstinst; inst->type!=END; inst++){ + target = inst->u2.next; + while(target->type == NOP) + target = target->u2.next; + inst->u2.next = target; + } + + /* + * The original allocation is for an area larger than + * necessary. Reallocate to the actual space used + * and then relocate the code. + */ + size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst); + npp = realloc(pp, size); + if(npp==0 || npp==pp) + return pp; + diff = (char *)npp - (char *)pp; + freep = (Reinst *)((char *)freep + diff); + for(inst=npp->firstinst; inst<freep; inst++){ + switch(inst->type){ + case OR: + case STAR: + case PLUS: + case QUEST: + inst->u1.right = (void*)((char*)inst->u1.right + diff); + break; + case CCLASS: + case NCCLASS: + inst->u1.right = (void*)((char*)inst->u1.right + diff); + cl = inst->u1.cp; + cl->end = (void*)((char*)cl->end + diff); + break; + } + inst->u2.left = (void*)((char*)inst->u2.left + diff); + } + npp->startinst = (void*)((char*)npp->startinst + diff); + return npp; +} + +#ifdef DEBUG +static void +dumpstack(void){ + Node *stk; + int *ip; + + print("operators\n"); + for(ip=atorstack; ip<atorp; ip++) + print("0%o\n", *ip); + print("operands\n"); + for(stk=andstack; stk<andp; stk++) + print("0%o\t0%o\n", stk->first->type, stk->last->type); +} + +static void +dump(Reprog *pp) +{ + Reinst *l; + Rune *p; + + l = pp->firstinst; + do{ + print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type, + l->u2.left-pp->firstinst, l->u1.right-pp->firstinst); + if(l->type == RUNE) + print("\t%C\n", l->u1.r); + else if(l->type == CCLASS || l->type == NCCLASS){ + print("\t["); + if(l->type == NCCLASS) + print("^"); + for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2) + if(p[0] == p[1]) + print("%C", p[0]); + else + print("%C-%C", p[0], p[1]); + print("]\n"); + } else + print("\n"); + }while(l++->type); +} +#endif + +static Reclass* +newclass(void) +{ + if(nclass >= NCLASS) + regerr2("too many character classes; limit", NCLASS+'0'); + return &(classp[nclass++]); +} + +static int +nextc(Rune *rp) +{ + if(lexdone){ + *rp = 0; + return 1; + } + exprp += chartorune(rp, exprp); + if(*rp == '\\'){ + exprp += chartorune(rp, exprp); + return 1; + } + if(*rp == 0) + lexdone = 1; + return 0; +} + +static int +lex(int literal, int dot_type) +{ + int quoted; + + quoted = nextc(&yyrune); + if(literal || quoted){ + if(yyrune == 0) + return END; + return RUNE; + } + + switch(yyrune){ + case 0: + return END; + case '*': + return STAR; + case '?': + return QUEST; + case '+': + return PLUS; + case '|': + return OR; + case '.': + return dot_type; + case '(': + return LBRA; + case ')': + return RBRA; + case '^': + return BOL; + case '$': + return EOL; + case '[': + return bldcclass(); + } + return RUNE; +} + +static int +bldcclass(void) +{ + int type; + Rune r[NCCRUNE]; + Rune *p, *ep, *np; + Rune rune; + int quoted; + + /* we have already seen the '[' */ + type = CCLASS; + yyclassp = newclass(); + + /* look ahead for negation */ + /* SPECIAL CASE!!! negated classes don't match \n */ + ep = r; + quoted = nextc(&rune); + if(!quoted && rune == '^'){ + type = NCCLASS; + quoted = nextc(&rune); + *ep++ = '\n'; + *ep++ = '\n'; + } + + /* parse class into a set of spans */ + for(; ep<&r[NCCRUNE];){ + if(rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + if(!quoted && rune == ']') + break; + if(!quoted && rune == '-'){ + if(ep == r){ + rcerror("malformed '[]'"); + return 0; + } + quoted = nextc(&rune); + if((!quoted && rune == ']') || rune == 0){ + rcerror("malformed '[]'"); + return 0; + } + *(ep-1) = rune; + } else { + *ep++ = rune; + *ep++ = rune; + } + quoted = nextc(&rune); + } + + /* sort on span start */ + for(p = r; p < ep; p += 2){ + for(np = p; np < ep; np += 2) + if(*np < *p){ + rune = np[0]; + np[0] = p[0]; + p[0] = rune; + rune = np[1]; + np[1] = p[1]; + p[1] = rune; + } + } + + /* merge spans */ + np = yyclassp->spans; + p = r; + if(r == ep) + yyclassp->end = np; + else { + np[0] = *p++; + np[1] = *p++; + for(; p < ep; p += 2) + if(p[0] <= np[1]){ + if(p[1] > np[1]) + np[1] = p[1]; + } else { + np += 2; + np[0] = p[0]; + np[1] = p[1]; + } + yyclassp->end = np+2; + } + + return type; +} + +static Reprog* +regcomp1(char *s, int literal, int dot_type) +{ + int token; + Reprog *volatile pp; + + /* get memory for the program */ + pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s)); + if(pp == 0){ + regerror("out of memory"); + return 0; + } + freep = pp->firstinst; + classp = pp->class; + errors = 0; + + if(setjmp(regkaboom)) + goto out; + + /* go compile the sucker */ + lexdone = 0; + exprp = s; + nclass = 0; + nbra = 0; + atorp = atorstack; + andp = andstack; + subidp = subidstack; + lastwasand = FALSE; + cursubid = 0; + + /* Start with a low priority operator to prime parser */ + pushator(START-1); + while((token = lex(literal, dot_type)) != END){ + if((token&0300) == OPERATOR) + operator(token); + else + operand(token); + } + + /* Close with a low priority operator */ + evaluntil(START); + + /* Force END */ + operand(END); + evaluntil(START); +#ifdef DEBUG + dumpstack(); +#endif + if(nbra) + rcerror("unmatched left paren"); + --andp; /* points to first and only operand */ + pp->startinst = andp->first; +#ifdef DEBUG + dump(pp); +#endif + pp = optimize(pp); +#ifdef DEBUG + print("start: %d\n", andp->first-pp->firstinst); + dump(pp); +#endif +out: + if(errors){ + free(pp); + pp = 0; + } + return pp; +} + +extern Reprog* +regcomp(char *s) +{ + return regcomp1(s, 0, ANY); +} + +extern Reprog* +regcomplit(char *s) +{ + return regcomp1(s, 1, ANY); +} + +extern Reprog* +regcompnl(char *s) +{ + return regcomp1(s, 0, ANYNL); +} diff --git a/mk/libregexp/regcomp.h b/mk/libregexp/regcomp.h @@ -0,0 +1,74 @@ +/* + * substitution list + */ +#define uchar __reuchar +typedef unsigned char uchar; +#define nelem(x) (sizeof(x)/sizeof((x)[0])) + +#define NSUBEXP 32 +typedef struct Resublist Resublist; +struct Resublist +{ + Resub m[NSUBEXP]; +}; + +/* max character classes per program */ +extern Reprog RePrOg; +#define NCLASS (sizeof(RePrOg.class)/sizeof(Reclass)) + +/* max rune ranges per character class */ +#define NCCRUNE (sizeof(Reclass)/sizeof(Rune)) + +/* + * Actions and Tokens (Reinst types) + * + * 02xx are operators, value == precedence + * 03xx are tokens, i.e. operands for operators + */ +#define RUNE 0177 +#define OPERATOR 0200 /* Bitmask of all operators */ +#define START 0200 /* Start, used for marker on stack */ +#define RBRA 0201 /* Right bracket, ) */ +#define LBRA 0202 /* Left bracket, ( */ +#define OR 0203 /* Alternation, | */ +#define CAT 0204 /* Concatentation, implicit operator */ +#define STAR 0205 /* Closure, * */ +#define PLUS 0206 /* a+ == aa* */ +#define QUEST 0207 /* a? == a|nothing, i.e. 0 or 1 a's */ +#define ANY 0300 /* Any character except newline, . */ +#define ANYNL 0301 /* Any character including newline, . */ +#define NOP 0302 /* No operation, internal use only */ +#define BOL 0303 /* Beginning of line, ^ */ +#define EOL 0304 /* End of line, $ */ +#define CCLASS 0305 /* Character class, [] */ +#define NCCLASS 0306 /* Negated character class, [] */ +#define END 0377 /* Terminate: match found */ + +/* + * regexec execution lists + */ +#define LISTSIZE 10 +#define BIGLISTSIZE (10*LISTSIZE) +typedef struct Relist Relist; +struct Relist +{ + Reinst* inst; /* Reinstruction of the thread */ + Resublist se; /* matched subexpressions in this thread */ +}; +typedef struct Reljunk Reljunk; +struct Reljunk +{ + Relist* relist[2]; + Relist* reliste[2]; + int starttype; + Rune startchar; + char* starts; + char* eol; + Rune* rstarts; + Rune* reol; +}; + +extern Relist* _renewthread(Relist*, Reinst*, int, Resublist*); +extern void _renewmatch(Resub*, int, Resublist*); +extern Relist* _renewemptythread(Relist*, Reinst*, int, char*); +extern Relist* _rrenewemptythread(Relist*, Reinst*, int, Rune*); diff --git a/mk/libregexp/regerror.c b/mk/libregexp/regerror.c @@ -0,0 +1,14 @@ +#include "lib9.h" +#include "regexp9.h" + +void +regerror(char *s) +{ + char buf[132]; + + strcpy(buf, "regerror: "); + strcat(buf, s); + strcat(buf, "\n"); + write(2, buf, strlen(buf)); + exits("regerr"); +} diff --git a/mk/libregexp/regexec.c b/mk/libregexp/regexec.c @@ -0,0 +1,231 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +regexec1(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + char *s; + int i, checkstart; + Rune r, *rp, *ep; + int n; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + char *p; + + match = 0; + checkstart = j->starttype; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.sp = 0; + mp[i].e.ep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->starts; + do{ + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = utfrune(s, j->startchar); + if(p == 0 || s == j->eol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = utfrune(s, '\n'); + if(p == 0 || s == j->eol) + return match; + s = p+1; + break; + } + } + r = *(uchar*)s; + if(r < Runeself) + n = 1; + else + n = chartorune(&r, s); + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + if(match == 0) + _renewemptythread(tl, progp->startinst, ms, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ /* assignment = */ + for(inst = tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + } + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.sp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.ep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->eol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.ep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->eol) + break; + checkstart = j->starttype && nl->inst==0; + s += n; + }while(r); + return match; +} + +static int +regexec2(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + int rv; + Relist *relist0, *relist1; + + /* mark space */ + relist0 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist0 == nil) + return -1; + relist1 = malloc(BIGLISTSIZE*sizeof(Relist)); + if(relist1 == nil){ + free(relist1); + return -1; + } + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + BIGLISTSIZE - 2; + j->reliste[1] = relist1 + BIGLISTSIZE - 2; + + rv = regexec1(progp, bol, mp, ms, j); + free(relist0); + free(relist1); + return rv; +} + +extern int +regexec(Reprog *progp, /* program to run */ + char *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.starts = bol; + j.eol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.starts = mp->s.sp; + if(mp->e.ep) + j.eol = mp->e.ep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = regexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = regexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/mk/libregexp/regexp9.3 b/mk/libregexp/regexp9.3 @@ -0,0 +1,220 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH REGEXP9 3 +.SH NAME +regcomp, regcomplit, regcompnl, regexec, regsub, rregexec, rregsub, regerror \- regular expression +.SH SYNOPSIS +.B #include <utf.h> +.br +.B #include <fmt.h> +.br +.B #include <regexp9.h> +.PP +.ta \w'\fLRegprog 'u +.B +Reprog *regcomp(char *exp) +.PP +.B +Reprog *regcomplit(char *exp) +.PP +.B +Reprog *regcompnl(char *exp) +.PP +.nf +.B +int regexec(Reprog *prog, char *string, Resub *match, int msize) +.PP +.nf +.B +void regsub(char *source, char *dest, int dlen, Resub *match, int msize) +.PP +.nf +.B +int rregexec(Reprog *prog, Rune *string, Resub *match, int msize) +.PP +.nf +.B +void rregsub(Rune *source, Rune *dest, int dlen, Resub *match, int msize) +.PP +.B +void regerror(char *msg) +.SH DESCRIPTION +.I Regcomp +compiles a +regular expression and returns +a pointer to the generated description. +The space is allocated by +.IR malloc (3) +and may be released by +.IR free . +Regular expressions are exactly as in +.IR regexp9 (7). +.PP +.I Regcomplit +is like +.I regcomp +except that all characters are treated literally. +.I Regcompnl +is like +.I regcomp +except that the +.B . +metacharacter matches all characters, including newlines. +.PP +.I Regexec +matches a null-terminated +.I string +against the compiled regular expression in +.IR prog . +If it matches, +.I regexec +returns +.B 1 +and fills in the array +.I match +with character pointers to the substrings of +.I string +that correspond to the +parenthesized subexpressions of +.IR exp : +.BI match[ i ].sp +points to the beginning and +.BI match[ i ].ep +points just beyond +the end of the +.IR i th +substring. +(Subexpression +.I i +begins at the +.IR i th +left parenthesis, counting from 1.) +Pointers in +.B match[0] +pick out the substring that corresponds to +the whole regular expression. +Unused elements of +.I match +are filled with zeros. +Matches involving +.LR * , +.LR + , +and +.L ? +are extended as far as possible. +The number of array elements in +.I match +is given by +.IR msize . +The structure of elements of +.I match +is: +.IP +.EX +typedef struct { + union { + char *sp; + Rune *rsp; + }; + union { + char *ep; + Rune *rep; + }; +} Resub; +.EE +.LP +If +.B match[0].sp +is nonzero on entry, +.I regexec +starts matching at that point within +.IR string . +If +.B match[0].ep +is nonzero on entry, +the last character matched is the one +preceding that point. +.PP +.I Regsub +places in +.I dest +a substitution instance of +.I source +in the context of the last +.I regexec +performed using +.IR match . +Each instance of +.BI \e n\f1, +where +.I n +is a digit, is replaced by the +string delimited by +.BI match[ n ].sp +and +.BI match[ n ].ep\f1. +Each instance of +.L & +is replaced by the string delimited by +.B match[0].sp +and +.BR match[0].ep . +The substitution will always be null terminated and +trimmed to fit into dlen bytes. +.PP +.IR Regerror , +called whenever an error is detected in +.IR regcomp , +writes the string +.I msg +on the standard error file and exits. +.I Regerror +can be replaced to perform +special error processing. +If the user supplied +.I regerror +returns rather than exits, +.I regcomp +will return 0. +.PP +.I Rregexec +and +.I rregsub +are variants of +.I regexec +and +.I regsub +that use strings of +.B Runes +instead of strings of +.BR chars . +With these routines, the +.I rsp +and +.I rep +fields of the +.I match +array elements should be used. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH "SEE ALSO" +.IR grep (1) +.SH DIAGNOSTICS +.I Regcomp +returns +.B 0 +for an illegal expression +or other failure. +.I Regexec +returns 0 +if +.I string +is not matched. +.SH BUGS +There is no way to specify or match a NUL character; NULs terminate patterns and strings. diff --git a/mk/libregexp/regexp9.7 b/mk/libregexp/regexp9.7 @@ -0,0 +1,141 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH REGEXP9 7 +.SH NAME +regexp \- Plan 9 regular expression notation +.SH DESCRIPTION +This manual page describes the regular expression +syntax used by the Plan 9 regular expression library +.IR regexp9 (3). +It is the form used by +.IR egrep (1) +before +.I egrep +got complicated. +.PP +A +.I "regular expression" +specifies +a set of strings of characters. +A member of this set of strings is said to be +.I matched +by the regular expression. In many applications +a delimiter character, commonly +.LR / , +bounds a regular expression. +In the following specification for regular expressions +the word `character' means any character (rune) but newline. +.PP +The syntax for a regular expression +.B e0 +is +.IP +.EX +e3: literal | charclass | '.' | '^' | '$' | '(' e0 ')' + +e2: e3 + | e2 REP + +REP: '*' | '+' | '?' + +e1: e2 + | e1 e2 + +e0: e1 + | e0 '|' e1 +.EE +.PP +A +.B literal +is any non-metacharacter, or a metacharacter +(one of +.BR .*+?[]()|\e^$ ), +or the delimiter +preceded by +.LR \e . +.PP +A +.B charclass +is a nonempty string +.I s +bracketed +.BI [ \|s\| ] +(or +.BI [^ s\| ]\fR); +it matches any character in (or not in) +.IR s . +A negated character class never +matches newline. +A substring +.IB a - b\f1, +with +.I a +and +.I b +in ascending +order, stands for the inclusive +range of +characters between +.I a +and +.IR b . +In +.IR s , +the metacharacters +.LR - , +.LR ] , +an initial +.LR ^ , +and the regular expression delimiter +must be preceded by a +.LR \e ; +other metacharacters +have no special meaning and +may appear unescaped. +.PP +A +.L . +matches any character. +.PP +A +.L ^ +matches the beginning of a line; +.L $ +matches the end of the line. +.PP +The +.B REP +operators match zero or more +.RB ( * ), +one or more +.RB ( + ), +zero or one +.RB ( ? ), +instances respectively of the preceding regular expression +.BR e2 . +.PP +A concatenated regular expression, +.BR "e1\|e2" , +matches a match to +.B e1 +followed by a match to +.BR e2 . +.PP +An alternative regular expression, +.BR "e0\||\|e1" , +matches either a match to +.B e0 +or a match to +.BR e1 . +.PP +A match to any part of a regular expression +extends as far as possible without preventing +a match to the remainder of the regular expression. +.SH "SEE ALSO +.IR regexp9 (3) diff --git a/mk/libregexp/regexp9.h b/mk/libregexp/regexp9.h @@ -0,0 +1,96 @@ +#ifndef _REGEXP9_H_ +#define _REGEXP9_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +#ifdef AUTOLIB +AUTOLIB(regexp9) +#endif + +#include <utf.h> + +typedef struct Resub Resub; +typedef struct Reclass Reclass; +typedef struct Reinst Reinst; +typedef struct Reprog Reprog; + +/* + * Sub expression matches + */ +struct Resub{ + union + { + char *sp; + Rune *rsp; + }s; + union + { + char *ep; + Rune *rep; + }e; +}; + +/* + * character class, each pair of rune's defines a range + */ +struct Reclass{ + Rune *end; + Rune spans[64]; +}; + +/* + * Machine instructions + */ +struct Reinst{ + int type; + union { + Reclass *cp; /* class pointer */ + Rune r; /* character */ + int subid; /* sub-expression id for RBRA and LBRA */ + Reinst *right; /* right child of OR */ + }u1; + union { /* regexp relies on these two being in the same union */ + Reinst *left; /* left child of OR */ + Reinst *next; /* next instruction for CAT & LBRA */ + }u2; +}; + +/* + * Reprogram definition + */ +struct Reprog{ + Reinst *startinst; /* start pc */ + Reclass class[16]; /* .data */ + Reinst firstinst[5]; /* .text */ +}; + +extern Reprog *regcomp9(char*); +extern Reprog *regcomplit9(char*); +extern Reprog *regcompnl9(char*); +extern void regerror9(char*); +extern int regexec9(Reprog*, char*, Resub*, int); +extern void regsub9(char*, char*, int, Resub*, int); + +extern int rregexec9(Reprog*, Rune*, Resub*, int); +extern void rregsub9(Rune*, Rune*, int, Resub*, int); + +/* + * Darwin simply cannot handle having routines that + * override other library routines. + */ +#ifndef NOPLAN9DEFINES +#define regcomp regcomp9 +#define regcomplit regcomplit9 +#define regcompnl regcompnl9 +#define regerror regerror9 +#define regexec regexec9 +#define regsub regsub9 +#define rregexec rregexec9 +#define rregsub rregsub9 +#endif + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libregexp/regsub.c b/mk/libregexp/regsub.c @@ -0,0 +1,63 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +regsub(char *sp, /* source string */ + char *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + char *ssp, *ep; + int i; + + ep = dp+dlen-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.sp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.sp; + ssp < mp[i].e.ep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.sp != 0 && mp!=0 && ms>0) + if(mp[0].s.sp != 0) + for(ssp = mp[0].s.sp; + ssp < mp[0].e.ep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; +} diff --git a/mk/libregexp/rregexec.c b/mk/libregexp/rregexec.c @@ -0,0 +1,212 @@ +#include "lib9.h" +#include "regexp9.h" +#include "regcomp.h" + +/* + * return 0 if no match + * >0 if a match + * <0 if we ran out of _relist space + */ +static int +rregexec1(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j) +{ + int flag=0; + Reinst *inst; + Relist *tlp; + Rune *s; + int i, checkstart; + Rune r, *rp, *ep; + Relist* tl; /* This list, next list */ + Relist* nl; + Relist* tle; /* ends of this and next list */ + Relist* nle; + int match; + Rune *p; + + match = 0; + checkstart = j->startchar; + if(mp) + for(i=0; i<ms; i++) { + mp[i].s.rsp = 0; + mp[i].e.rep = 0; + } + j->relist[0][0].inst = 0; + j->relist[1][0].inst = 0; + + /* Execute machine once for each character, including terminal NUL */ + s = j->rstarts; + do{ + + /* fast check for first char */ + if(checkstart) { + switch(j->starttype) { + case RUNE: + p = runestrchr(s, j->startchar); + if(p == 0 || p == j->reol) + return match; + s = p; + break; + case BOL: + if(s == bol) + break; + p = runestrchr(s, '\n'); + if(p == 0 || s == j->reol) + return match; + s = p+1; + break; + } + } + + r = *s; + + /* switch run lists */ + tl = j->relist[flag]; + tle = j->reliste[flag]; + nl = j->relist[flag^=1]; + nle = j->reliste[flag]; + nl->inst = 0; + + /* Add first instruction to current list */ + _rrenewemptythread(tl, progp->startinst, ms, s); + + /* Execute machine until current list is empty */ + for(tlp=tl; tlp->inst; tlp++){ + for(inst=tlp->inst; ; inst = inst->u2.next){ + switch(inst->type){ + case RUNE: /* regular character */ + if(inst->u1.r == r) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case LBRA: + tlp->se.m[inst->u1.subid].s.rsp = s; + continue; + case RBRA: + tlp->se.m[inst->u1.subid].e.rep = s; + continue; + case ANY: + if(r != '\n') + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case ANYNL: + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case BOL: + if(s == bol || *(s-1) == '\n') + continue; + break; + case EOL: + if(s == j->reol || r == 0 || r == '\n') + continue; + break; + case CCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]){ + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + } + break; + case NCCLASS: + ep = inst->u1.cp->end; + for(rp = inst->u1.cp->spans; rp < ep; rp += 2) + if(r >= rp[0] && r <= rp[1]) + break; + if(rp == ep) + if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle) + return -1; + break; + case OR: + /* evaluate right choice later */ + if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle) + return -1; + /* efficiency: advance and re-evaluate */ + continue; + case END: /* Match! */ + match = 1; + tlp->se.m[0].e.rep = s; + if(mp != 0) + _renewmatch(mp, ms, &tlp->se); + break; + } + break; + } + } + if(s == j->reol) + break; + checkstart = j->startchar && nl->inst==0; + s++; + }while(r); + return match; +} + +static int +rregexec2(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms, /* number of elements at mp */ + Reljunk *j +) +{ + Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE]; + + /* mark space */ + j->relist[0] = relist0; + j->relist[1] = relist1; + j->reliste[0] = relist0 + nelem(relist0) - 2; + j->reliste[1] = relist1 + nelem(relist1) - 2; + + return rregexec1(progp, bol, mp, ms, j); +} + +extern int +rregexec(Reprog *progp, /* program to run */ + Rune *bol, /* string to run machine on */ + Resub *mp, /* subexpression elements */ + int ms) /* number of elements at mp */ +{ + Reljunk j; + Relist relist0[LISTSIZE], relist1[LISTSIZE]; + int rv; + + /* + * use user-specified starting/ending location if specified + */ + j.rstarts = bol; + j.reol = 0; + if(mp && ms>0){ + if(mp->s.sp) + j.rstarts = mp->s.rsp; + if(mp->e.ep) + j.reol = mp->e.rep; + } + j.starttype = 0; + j.startchar = 0; + if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) { + j.starttype = RUNE; + j.startchar = progp->startinst->u1.r; + } + if(progp->startinst->type == BOL) + j.starttype = BOL; + + /* mark space */ + j.relist[0] = relist0; + j.relist[1] = relist1; + j.reliste[0] = relist0 + nelem(relist0) - 2; + j.reliste[1] = relist1 + nelem(relist1) - 2; + + rv = rregexec1(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + rv = rregexec2(progp, bol, mp, ms, &j); + if(rv >= 0) + return rv; + return -1; +} diff --git a/mk/libregexp/rregsub.c b/mk/libregexp/rregsub.c @@ -0,0 +1,63 @@ +#include "lib9.h" +#include "regexp9.h" + +/* substitute into one string using the matches from the last regexec() */ +extern void +rregsub(Rune *sp, /* source string */ + Rune *dp, /* destination string */ + int dlen, + Resub *mp, /* subexpression elements */ + int ms) /* number of elements pointed to by mp */ +{ + Rune *ssp, *ep; + int i; + + ep = dp+(dlen/sizeof(Rune))-1; + while(*sp != '\0'){ + if(*sp == '\\'){ + switch(*++sp){ + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + i = *sp-'0'; + if(mp[i].s.rsp != 0 && mp!=0 && ms>i) + for(ssp = mp[i].s.rsp; + ssp < mp[i].e.rep; + ssp++) + if(dp < ep) + *dp++ = *ssp; + break; + case '\\': + if(dp < ep) + *dp++ = '\\'; + break; + case '\0': + sp--; + break; + default: + if(dp < ep) + *dp++ = *sp; + break; + } + }else if(*sp == '&'){ + if(mp[0].s.rsp != 0 && mp!=0 && ms>0) + if(mp[0].s.rsp != 0) + for(ssp = mp[0].s.rsp; + ssp < mp[0].e.rep; ssp++) + if(dp < ep) + *dp++ = *ssp; + }else{ + if(dp < ep) + *dp++ = *sp; + } + sp++; + } + *dp = '\0'; +} diff --git a/mk/libregexp/test.c b/mk/libregexp/test.c @@ -0,0 +1,46 @@ +#include "lib9.h" +#include <regexp9.h> + +struct x +{ + char *re; + char *s; + Reprog *p; +}; + +struct x t[] = { + { "^[^!@]+$", "/bin/upas/aliasmail '&'", 0 }, + { "^local!(.*)$", "/mail/box/\\1/mbox", 0 }, + { "^plan9!(.*)$", "\\1", 0 }, + { "^helix!(.*)$", "\\1", 0 }, + { "^([^!]+)@([^!@]+)$", "\\2!\\1", 0 }, + { "^(uk\\.[^!]*)(!.*)$", "/bin/upas/uk2uk '\\1' '\\2'", 0 }, + { "^[^!]*\\.[^!]*!.*$", "inet!&", 0 }, + { "^\xE2\x98\xBA$", "smiley", 0 }, + { "^(coma|research|pipe|pyxis|inet|hunny|gauss)!(.*)$", "/mail/lib/qmail '\\s' 'net!\\1' '\\2'", 0 }, + { "^.*$", "/mail/lib/qmail '\\s' 'net!research' '&'", 0 }, + { 0, 0, 0 }, +}; + +main(int ac, char **av) +{ + Resub rs[10]; + char dst[128]; + int n; + struct x *tp; + + for(tp = t; tp->re; tp++) + tp->p = regcomp(tp->re); + + + for(tp = t; tp->re; tp++){ + print("%s VIA %s", av[1], tp->re); + memset(rs, 0, sizeof rs); + if(regexec(tp->p, av[1], rs, 10)){ + regsub(tp->s, dst, sizeof dst, rs, 10); + print(" sub %s -> %s", tp->s, dst); + } + print("\n"); + } + exit(0); +} diff --git a/mk/libregexp/test2.c b/mk/libregexp/test2.c @@ -0,0 +1,20 @@ +#include "lib9.h" +#include <regexp9.h> + + +main(int ac, char **av) +{ + Resub rs[10]; + Reprog *p; + char *s; + int i; + + p = regcomp("[^a-z]"); + s = "\n"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + s = "0"; + if(regexec(p, s, rs, 10)) + print("%s %lux %lux %lux\n", s, s, rs[0].sp, rs[0].ep); + exit(0); +} diff --git a/mk/libutf/NOTICE b/mk/libutf/NOTICE @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. +*/ + +This is a Unix port of the Plan 9 formatted I/O package. + +Please send comments about the packaging +to Russ Cox <rsc@swtch.com>. + + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/libutf/README b/mk/libutf/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/libutf/depsinc.mk b/mk/libutf/depsinc.mk @@ -0,0 +1,2 @@ +DEPS_CFLAGS = $DEPS_CFLAGS -I$libutf_DEPDIR +DEPS_LDFLAGS = $DEPS_LDFLAGS -L$libutf_DEPDIR -lutf diff --git a/mk/libutf/isalpharune.3 b/mk/libutf/isalpharune.3 @@ -0,0 +1,57 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH ISALPHARUNE 3 +.SH NAME +isalpharune, islowerrune, isspacerune, istitlerune, isupperrune, tolowerrune, totitlerune, toupperrune \- Unicode character classes and cases +.SH SYNOPSIS +.B #include <utf.h> +.PP +.B +int isalpharune(Rune c) +.PP +.B +int islowerrune(Rune c) +.PP +.B +int isspacerune(Rune c) +.PP +.B +int istitlerune(Rune c) +.PP +.B +int isupperrune(Rune c) +.PP +.B +Rune tolowerrune(Rune c) +.PP +.B +Rune totitlerune(Rune c) +.PP +.B +Rune toupperrune(Rune c) +.SH DESCRIPTION +These routines examine and operate on Unicode characters, +in particular a subset of their properties as defined in the Unicode standard. +Unicode defines some characters as alphabetic and specifies three cases: +upper, lower, and title. +Analogously to +.IR isalpha (3) +for +.SM ASCII\c +, +these routines +test types and modify cases for Unicode characters. +The names are self-explanatory. +.PP +The case-conversion routines return the character unchanged if it has no case. +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH "SEE ALSO +.IR isalpha (3) , +.IR "The Unicode Standard" . diff --git a/mk/libutf/mkfile b/mk/libutf/mkfile @@ -0,0 +1,27 @@ +LIB = libutf.a +LOBJ = \ + rune.o\ + runestrcat.o\ + runestrchr.o\ + runestrcmp.o\ + runestrcpy.o\ + runestrdup.o\ + runestrlen.o\ + runestrecpy.o\ + runestrncat.o\ + runestrncmp.o\ + runestrncpy.o\ + runestrrchr.o\ + runestrstr.o\ + runetype.o\ + utfecpy.o\ + utflen.o\ + utfnlen.o\ + utfrrune.o\ + utfrune.o\ + utfutf.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include + +<$mkbuild/mk.default + diff --git a/mk/libutf/plan9.h b/mk/libutf/plan9.h @@ -0,0 +1,29 @@ +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _utfuchar +#define ushort _utfushort +#define uint _utfuint +#define ulong _utfulong +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem(x) (sizeof (x)/sizeof (x)[0]) + diff --git a/mk/libutf/rune.3 b/mk/libutf/rune.3 @@ -0,0 +1,194 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH RUNE 3 +.SH NAME +runetochar, chartorune, runelen, runenlen, fullrune, utfecpy, utflen, utfnlen, utfrune, utfrrune, utfutf \- rune/UTF conversion +.SH SYNOPSIS +.ta \w'\fLchar*xx'u +.B #include <utf.h> +.PP +.B +int runetochar(char *s, Rune *r) +.PP +.B +int chartorune(Rune *r, char *s) +.PP +.B +int runelen(long r) +.PP +.B +int runenlen(Rune *r, int n) +.PP +.B +int fullrune(char *s, int n) +.PP +.B +char* utfecpy(char *s1, char *es1, char *s2) +.PP +.B +int utflen(char *s) +.PP +.B +int utfnlen(char *s, long n) +.PP +.B +char* utfrune(char *s, long c) +.PP +.B +char* utfrrune(char *s, long c) +.PP +.B +char* utfutf(char *s1, char *s2) +.SH DESCRIPTION +These routines convert to and from a +.SM UTF +byte stream and runes. +.PP +.I Runetochar +copies one rune at +.I r +to at most +.B UTFmax +bytes starting at +.I s +and returns the number of bytes copied. +.BR UTFmax , +defined as +.B 3 +in +.BR <libc.h> , +is the maximum number of bytes required to represent a rune. +.PP +.I Chartorune +copies at most +.B UTFmax +bytes starting at +.I s +to one rune at +.I r +and returns the number of bytes copied. +If the input is not exactly in +.SM UTF +format, +.I chartorune +will convert to 0x80 and return 1. +.PP +.I Runelen +returns the number of bytes +required to convert +.I r +into +.SM UTF. +.PP +.I Runenlen +returns the number of bytes +required to convert the +.I n +runes pointed to by +.I r +into +.SM UTF. +.PP +.I Fullrune +returns 1 if the string +.I s +of length +.I n +is long enough to be decoded by +.I chartorune +and 0 otherwise. +This does not guarantee that the string +contains a legal +.SM UTF +encoding. +This routine is used by programs that +obtain input a byte at +a time and need to know when a full rune +has arrived. +.PP +The following routines are analogous to the +corresponding string routines with +.B utf +substituted for +.B str +and +.B rune +substituted for +.BR chr . +.PP +.I Utfecpy +copies UTF sequences until a null sequence has been copied, but writes no +sequences beyond +.IR es1 . +If any sequences are copied, +.I s1 +is terminated by a null sequence, and a pointer to that sequence is returned. +Otherwise, the original +.I s1 +is returned. +.PP +.I Utflen +returns the number of runes that +are represented by the +.SM UTF +string +.IR s . +.PP +.I Utfnlen +returns the number of complete runes that +are represented by the first +.I n +bytes of +.SM UTF +string +.IR s . +If the last few bytes of the string contain an incompletely coded rune, +.I utfnlen +will not count them; in this way, it differs from +.IR utflen , +which includes every byte of the string. +.PP +.I Utfrune +.RI ( utfrrune ) +returns a pointer to the first (last) +occurrence of rune +.I c +in the +.SM UTF +string +.IR s , +or 0 if +.I c +does not occur in the string. +The NUL byte terminating a string is considered to +be part of the string +.IR s . +.PP +.I Utfutf +returns a pointer to the first occurrence of +the +.SM UTF +string +.I s2 +as a +.SM UTF +substring of +.IR s1 , +or 0 if there is none. +If +.I s2 +is the null string, +.I utfutf +returns +.IR s1 . +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR utf (7), +.IR tcs (1) diff --git a/mk/libutf/rune.c b/mk/libutf/rune.c @@ -0,0 +1,217 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +enum +{ + Bit1 = 7, + Bitx = 6, + Bit2 = 5, + Bit3 = 4, + Bit4 = 3, + Bit5 = 2, + + T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ + Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ + T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ + T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ + T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ + T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ + + Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ + Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ + Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ + Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ + + Maskx = (1<<Bitx)-1, /* 0011 1111 */ + Testx = Maskx ^ 0xFF, /* 1100 0000 */ + + Bad = Runeerror +}; + +int +chartorune(Rune *rune, char *str) +{ + int c, c1, c2, c3; + long l; + + /* + * one character sequence + * 00000-0007F => T1 + */ + c = *(uchar*)str; + if(c < Tx) { + *rune = c; + return 1; + } + + /* + * two character sequence + * 0080-07FF => T2 Tx + */ + c1 = *(uchar*)(str+1) ^ Tx; + if(c1 & Testx) + goto bad; + if(c < T3) { + if(c < T2) + goto bad; + l = ((c << Bitx) | c1) & Rune2; + if(l <= Rune1) + goto bad; + *rune = l; + return 2; + } + + /* + * three character sequence + * 0800-FFFF => T3 Tx Tx + */ + c2 = *(uchar*)(str+2) ^ Tx; + if(c2 & Testx) + goto bad; + if(c < T4) { + l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; + if(l <= Rune2) + goto bad; + *rune = l; + return 3; + } + + /* + * four character sequence + * 10000-10FFFF => T4 Tx Tx Tx + */ + if(UTFmax >= 4) { + c3 = *(uchar*)(str+3) ^ Tx; + if(c3 & Testx) + goto bad; + if(c < T5) { + l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; + if(l <= Rune3) + goto bad; + if(l > Runemax) + goto bad; + *rune = l; + return 4; + } + } + + /* + * bad decoding + */ +bad: + *rune = Bad; + return 1; +} + +int +runetochar(char *str, Rune *rune) +{ + long c; + + /* + * one character sequence + * 00000-0007F => 00-7F + */ + c = *rune; + if(c <= Rune1) { + str[0] = c; + return 1; + } + + /* + * two character sequence + * 00080-007FF => T2 Tx + */ + if(c <= Rune2) { + str[0] = T2 | (c >> 1*Bitx); + str[1] = Tx | (c & Maskx); + return 2; + } + + /* + * three character sequence + * 00800-0FFFF => T3 Tx Tx + */ + if(c > Runemax) + c = Runeerror; + if(c <= Rune3) { + str[0] = T3 | (c >> 2*Bitx); + str[1] = Tx | ((c >> 1*Bitx) & Maskx); + str[2] = Tx | (c & Maskx); + return 3; + } + + /* + * four character sequence + * 010000-1FFFFF => T4 Tx Tx Tx + */ + str[0] = T4 | (c >> 3*Bitx); + str[1] = Tx | ((c >> 2*Bitx) & Maskx); + str[2] = Tx | ((c >> 1*Bitx) & Maskx); + str[3] = Tx | (c & Maskx); + return 4; +} + +int +runelen(long c) +{ + Rune rune; + char str[10]; + + rune = c; + return runetochar(str, &rune); +} + +int +runenlen(Rune *r, int nrune) +{ + int nb, c; + + nb = 0; + while(nrune--) { + c = *r++; + if(c <= Rune1) + nb++; + else + if(c <= Rune2) + nb += 2; + else + if(c <= Rune3 || c > Runemax) + nb += 3; + else + nb += 4; + } + return nb; +} + +int +fullrune(char *str, int n) +{ + int c; + + if(n <= 0) + return 0; + c = *(uchar*)str; + if(c < Tx) + return 1; + if(c < T3) + return n >= 2; + if(UTFmax == 3 || c < T4) + return n >= 3; + return n >= 4; +} diff --git a/mk/libutf/runestrcat.3 b/mk/libutf/runestrcat.3 @@ -0,0 +1,74 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH RUNESTRCAT 3 +.SH NAME +runestrcat, +runestrncat, +runestrcmp, +runestrncmp, +runestrcpy, +runestrncpy, +runestrecpy, +runestrlen, +runestrchr, +runestrrchr, +runestrdup, +runestrstr \- rune string operations +.SH SYNOPSIS +.B #include <u.h> +.br +.B #include <libc.h> +.PP +.ta \w'\fLRune* \fP'u +.B +Rune* runestrcat(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncat(Rune *s1, Rune *s2, long n) +.PP +.B +int runestrcmp(Rune *s1, Rune *s2) +.PP +.B +int runestrncmp(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrcpy(Rune *s1, Rune *s2) +.PP +.B +Rune* runestrncpy(Rune *s1, Rune *s2, long n) +.PP +.B +Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2) +.PP +.B +long runestrlen(Rune *s) +.PP +.B +Rune* runestrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrrchr(Rune *s, Rune c) +.PP +.B +Rune* runestrdup(Rune *s) +.PP +.B +Rune* runestrstr(Rune *s1, Rune *s2) +.SH DESCRIPTION +These functions are rune string analogues of +the corresponding functions in +.IR strcat (3). +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR rune (3), +.IR strcat (3) +.SH BUGS +The outcome of overlapping moves varies among implementations. diff --git a/mk/libutf/runestrcat.c b/mk/libutf/runestrcat.c @@ -0,0 +1,25 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrcat(Rune *s1, Rune *s2) +{ + + runestrcpy(runestrchr(s1, 0), s2); + return s1; +} diff --git a/mk/libutf/runestrchr.c b/mk/libutf/runestrchr.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrchr(Rune *s, Rune c) +{ + Rune c0 = c; + Rune c1; + + if(c == 0) { + while(*s++) + ; + return s-1; + } + + while(c1 = *s++) + if(c1 == c0) + return s-1; + return 0; +} diff --git a/mk/libutf/runestrcmp.c b/mk/libutf/runestrcmp.c @@ -0,0 +1,35 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +runestrcmp(Rune *s1, Rune *s2) +{ + Rune c1, c2; + + for(;;) { + c1 = *s1++; + c2 = *s2++; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + return 0; + } +} diff --git a/mk/libutf/runestrcpy.c b/mk/libutf/runestrcpy.c @@ -0,0 +1,28 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrcpy(Rune *s1, Rune *s2) +{ + Rune *os1; + + os1 = s1; + while(*s1++ = *s2++) + ; + return os1; +} diff --git a/mk/libutf/runestrdup.c b/mk/libutf/runestrdup.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include <stdlib.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrdup(Rune *s) +{ + Rune *ns; + + ns = malloc(sizeof(Rune)*(runestrlen(s) + 1)); + if(ns == 0) + return 0; + + return runestrcpy(ns, s); +} diff --git a/mk/libutf/runestrecpy.c b/mk/libutf/runestrecpy.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrecpy(Rune *s1, Rune *es1, Rune *s2) +{ + if(s1 >= es1) + return s1; + + while(*s1++ = *s2++){ + if(s1 == es1){ + *--s1 = '\0'; + break; + } + } + return s1; +} diff --git a/mk/libutf/runestrlen.c b/mk/libutf/runestrlen.c @@ -0,0 +1,24 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +long +runestrlen(Rune *s) +{ + + return runestrchr(s, 0) - s; +} diff --git a/mk/libutf/runestrncat.c b/mk/libutf/runestrncat.c @@ -0,0 +1,32 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrncat(Rune *s1, Rune *s2, long n) +{ + Rune *os1; + + os1 = s1; + s1 = runestrchr(s1, 0); + while(*s1++ = *s2++) + if(--n < 0) { + s1[-1] = 0; + break; + } + return os1; +} diff --git a/mk/libutf/runestrncmp.c b/mk/libutf/runestrncmp.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +runestrncmp(Rune *s1, Rune *s2, long n) +{ + Rune c1, c2; + + while(n > 0) { + c1 = *s1++; + c2 = *s2++; + n--; + if(c1 != c2) { + if(c1 > c2) + return 1; + return -1; + } + if(c1 == 0) + break; + } + return 0; +} diff --git a/mk/libutf/runestrncpy.c b/mk/libutf/runestrncpy.c @@ -0,0 +1,33 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrncpy(Rune *s1, Rune *s2, long n) +{ + int i; + Rune *os1; + + os1 = s1; + for(i = 0; i < n; i++) + if((*s1++ = *s2++) == 0) { + while(++i < n) + *s1++ = 0; + return os1; + } + return os1; +} diff --git a/mk/libutf/runestrrchr.c b/mk/libutf/runestrrchr.c @@ -0,0 +1,30 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +Rune* +runestrrchr(Rune *s, Rune c) +{ + Rune *r; + + if(c == 0) + return runestrchr(s, 0); + r = 0; + while(s = runestrchr(s, c)) + r = s++; + return r; +} diff --git a/mk/libutf/runestrstr.c b/mk/libutf/runestrstr.c @@ -0,0 +1,44 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +Rune* +runestrstr(Rune *s1, Rune *s2) +{ + Rune *p, *pa, *pb; + int c0, c; + + c0 = *s2; + if(c0 == 0) + return s1; + s2++; + for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) { + pa = p; + for(pb=s2;; pb++) { + c = *pb; + if(c == 0) + return p; + if(c != *++pa) + break; + } + } + return 0; +} diff --git a/mk/libutf/runetype.c b/mk/libutf/runetype.c @@ -0,0 +1,1151 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +/* + * alpha ranges - + * only covers ranges not in lower||upper + */ +static +Rune __alpha2[] = +{ + 0x00d8, 0x00f6, /* Ø - ö */ + 0x00f8, 0x01f5, /* ø - ǵ */ + 0x0250, 0x02a8, /* ɐ - ʨ */ + 0x038e, 0x03a1, /* Ύ - Ρ */ + 0x03a3, 0x03ce, /* Σ - ώ */ + 0x03d0, 0x03d6, /* ϐ - ϖ */ + 0x03e2, 0x03f3, /* Ϣ - ϳ */ + 0x0490, 0x04c4, /* Ґ - ӄ */ + 0x0561, 0x0587, /* ա - և */ + 0x05d0, 0x05ea, /* א - ת */ + 0x05f0, 0x05f2, /* װ - ײ */ + 0x0621, 0x063a, /* ء - غ */ + 0x0640, 0x064a, /* ـ - ي */ + 0x0671, 0x06b7, /* ٱ - ڷ */ + 0x06ba, 0x06be, /* ں - ھ */ + 0x06c0, 0x06ce, /* ۀ - ێ */ + 0x06d0, 0x06d3, /* ې - ۓ */ + 0x0905, 0x0939, /* अ - ह */ + 0x0958, 0x0961, /* क़ - ॡ */ + 0x0985, 0x098c, /* অ - ঌ */ + 0x098f, 0x0990, /* এ - ঐ */ + 0x0993, 0x09a8, /* ও - ন */ + 0x09aa, 0x09b0, /* প - র */ + 0x09b6, 0x09b9, /* শ - হ */ + 0x09dc, 0x09dd, /* ড় - ঢ় */ + 0x09df, 0x09e1, /* য় - ৡ */ + 0x09f0, 0x09f1, /* ৰ - ৱ */ + 0x0a05, 0x0a0a, /* ਅ - ਊ */ + 0x0a0f, 0x0a10, /* ਏ - ਐ */ + 0x0a13, 0x0a28, /* ਓ - ਨ */ + 0x0a2a, 0x0a30, /* ਪ - ਰ */ + 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ + 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ + 0x0a38, 0x0a39, /* ਸ - ਹ */ + 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ + 0x0a85, 0x0a8b, /* અ - ઋ */ + 0x0a8f, 0x0a91, /* એ - ઑ */ + 0x0a93, 0x0aa8, /* ઓ - ન */ + 0x0aaa, 0x0ab0, /* પ - ર */ + 0x0ab2, 0x0ab3, /* લ - ળ */ + 0x0ab5, 0x0ab9, /* વ - હ */ + 0x0b05, 0x0b0c, /* ଅ - ଌ */ + 0x0b0f, 0x0b10, /* ଏ - ଐ */ + 0x0b13, 0x0b28, /* ଓ - ନ */ + 0x0b2a, 0x0b30, /* ପ - ର */ + 0x0b32, 0x0b33, /* ଲ - ଳ */ + 0x0b36, 0x0b39, /* ଶ - ହ */ + 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ + 0x0b5f, 0x0b61, /* ୟ - ୡ */ + 0x0b85, 0x0b8a, /* அ - ஊ */ + 0x0b8e, 0x0b90, /* எ - ஐ */ + 0x0b92, 0x0b95, /* ஒ - க */ + 0x0b99, 0x0b9a, /* ங - ச */ + 0x0b9e, 0x0b9f, /* ஞ - ட */ + 0x0ba3, 0x0ba4, /* ண - த */ + 0x0ba8, 0x0baa, /* ந - ப */ + 0x0bae, 0x0bb5, /* ம - வ */ + 0x0bb7, 0x0bb9, /* ஷ - ஹ */ + 0x0c05, 0x0c0c, /* అ - ఌ */ + 0x0c0e, 0x0c10, /* ఎ - ఐ */ + 0x0c12, 0x0c28, /* ఒ - న */ + 0x0c2a, 0x0c33, /* ప - ళ */ + 0x0c35, 0x0c39, /* వ - హ */ + 0x0c60, 0x0c61, /* ౠ - ౡ */ + 0x0c85, 0x0c8c, /* ಅ - ಌ */ + 0x0c8e, 0x0c90, /* ಎ - ಐ */ + 0x0c92, 0x0ca8, /* ಒ - ನ */ + 0x0caa, 0x0cb3, /* ಪ - ಳ */ + 0x0cb5, 0x0cb9, /* ವ - ಹ */ + 0x0ce0, 0x0ce1, /* ೠ - ೡ */ + 0x0d05, 0x0d0c, /* അ - ഌ */ + 0x0d0e, 0x0d10, /* എ - ഐ */ + 0x0d12, 0x0d28, /* ഒ - ന */ + 0x0d2a, 0x0d39, /* പ - ഹ */ + 0x0d60, 0x0d61, /* ൠ - ൡ */ + 0x0e01, 0x0e30, /* ก - ะ */ + 0x0e32, 0x0e33, /* า - ำ */ + 0x0e40, 0x0e46, /* เ - ๆ */ + 0x0e5a, 0x0e5b, /* ๚ - ๛ */ + 0x0e81, 0x0e82, /* ກ - ຂ */ + 0x0e87, 0x0e88, /* ງ - ຈ */ + 0x0e94, 0x0e97, /* ດ - ທ */ + 0x0e99, 0x0e9f, /* ນ - ຟ */ + 0x0ea1, 0x0ea3, /* ມ - ຣ */ + 0x0eaa, 0x0eab, /* ສ - ຫ */ + 0x0ead, 0x0eae, /* ອ - ຮ */ + 0x0eb2, 0x0eb3, /* າ - ຳ */ + 0x0ec0, 0x0ec4, /* ເ - ໄ */ + 0x0edc, 0x0edd, /* ໜ - ໝ */ + 0x0f18, 0x0f19, /* ༘ - ༙ */ + 0x0f40, 0x0f47, /* ཀ - ཇ */ + 0x0f49, 0x0f69, /* ཉ - ཀྵ */ + 0x10d0, 0x10f6, /* ა - ჶ */ + 0x1100, 0x1159, /* ᄀ - ᅙ */ + 0x115f, 0x11a2, /* ᅟ - ᆢ */ + 0x11a8, 0x11f9, /* ᆨ - ᇹ */ + 0x1e00, 0x1e9b, /* Ḁ - ẛ */ + 0x1f50, 0x1f57, /* ὐ - ὗ */ + 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ + 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ + 0x1fc2, 0x1fc4, /* ῂ - ῄ */ + 0x1fc6, 0x1fcc, /* ῆ - ῌ */ + 0x1fd0, 0x1fd3, /* ῐ - ΐ */ + 0x1fd6, 0x1fdb, /* ῖ - Ί */ + 0x1fe0, 0x1fec, /* ῠ - Ῥ */ + 0x1ff2, 0x1ff4, /* ῲ - ῴ */ + 0x1ff6, 0x1ffc, /* ῶ - ῼ */ + 0x210a, 0x2113, /* ℊ - ℓ */ + 0x2115, 0x211d, /* ℕ - ℝ */ + 0x2120, 0x2122, /* ℠ - ™ */ + 0x212a, 0x2131, /* K - ℱ */ + 0x2133, 0x2138, /* ℳ - ℸ */ + 0x3041, 0x3094, /* ぁ - ゔ */ + 0x30a1, 0x30fa, /* ァ - ヺ */ + 0x3105, 0x312c, /* ㄅ - ㄬ */ + 0x3131, 0x318e, /* ㄱ - ㆎ */ + 0x3192, 0x319f, /* ㆒ - ㆟ */ + 0x3260, 0x327b, /* ㉠ - ㉻ */ + 0x328a, 0x32b0, /* ㊊ - ㊰ */ + 0x32d0, 0x32fe, /* ㋐ - ㋾ */ + 0x3300, 0x3357, /* ㌀ - ㍗ */ + 0x3371, 0x3376, /* ㍱ - ㍶ */ + 0x337b, 0x3394, /* ㍻ - ㎔ */ + 0x3399, 0x339e, /* ㎙ - ㎞ */ + 0x33a9, 0x33ad, /* ㎩ - ㎭ */ + 0x33b0, 0x33c1, /* ㎰ - ㏁ */ + 0x33c3, 0x33c5, /* ㏃ - ㏅ */ + 0x33c7, 0x33d7, /* ㏇ - ㏗ */ + 0x33d9, 0x33dd, /* ㏙ - ㏝ */ + 0x4e00, 0x9fff, /* 一 - 鿿 */ + 0xac00, 0xd7a3, /* 가 - 힣 */ + 0xf900, 0xfb06, /* 豈 - st */ + 0xfb13, 0xfb17, /* ﬓ - ﬗ */ + 0xfb1f, 0xfb28, /* ײַ - ﬨ */ + 0xfb2a, 0xfb36, /* שׁ - זּ */ + 0xfb38, 0xfb3c, /* טּ - לּ */ + 0xfb40, 0xfb41, /* נּ - סּ */ + 0xfb43, 0xfb44, /* ףּ - פּ */ + 0xfb46, 0xfbb1, /* צּ - ﮱ */ + 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ + 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ + 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ + 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ + 0xfe70, 0xfe72, /* ﹰ - ﹲ */ + 0xfe76, 0xfefc, /* ﹶ - ﻼ */ + 0xff66, 0xff6f, /* ヲ - ッ */ + 0xff71, 0xff9d, /* ア - ン */ + 0xffa0, 0xffbe, /* ᅠ - ᄒ */ + 0xffc2, 0xffc7, /* ᅡ - ᅦ */ + 0xffca, 0xffcf, /* ᅧ - ᅬ */ + 0xffd2, 0xffd7, /* ᅭ - ᅲ */ + 0xffda, 0xffdc, /* ᅳ - ᅵ */ +}; + +/* + * alpha singlets - + * only covers ranges not in lower||upper + */ +static +Rune __alpha1[] = +{ + 0x00aa, /* ª */ + 0x00b5, /* µ */ + 0x00ba, /* º */ + 0x03da, /* Ϛ */ + 0x03dc, /* Ϝ */ + 0x03de, /* Ϟ */ + 0x03e0, /* Ϡ */ + 0x06d5, /* ە */ + 0x09b2, /* ল */ + 0x0a5e, /* ਫ਼ */ + 0x0a8d, /* ઍ */ + 0x0ae0, /* ૠ */ + 0x0b9c, /* ஜ */ + 0x0cde, /* ೞ */ + 0x0e4f, /* ๏ */ + 0x0e84, /* ຄ */ + 0x0e8a, /* ຊ */ + 0x0e8d, /* ຍ */ + 0x0ea5, /* ລ */ + 0x0ea7, /* ວ */ + 0x0eb0, /* ະ */ + 0x0ebd, /* ຽ */ + 0x1fbe, /* ι */ + 0x207f, /* ⁿ */ + 0x20a8, /* ₨ */ + 0x2102, /* ℂ */ + 0x2107, /* ℇ */ + 0x2124, /* ℤ */ + 0x2126, /* Ω */ + 0x2128, /* ℨ */ + 0xfb3e, /* מּ */ + 0xfe74, /* ﹴ */ +}; + +/* + * space ranges + */ +static +Rune __space2[] = +{ + 0x0009, 0x000a, /* tab and newline */ + 0x0020, 0x0020, /* space */ + 0x00a0, 0x00a0, /*   */ + 0x2000, 0x200b, /*   - ​ */ + 0x2028, 0x2029, /* 
 - 
 */ + 0x3000, 0x3000, /*   */ + 0xfeff, 0xfeff, /*  */ +}; + +/* + * lower case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __toupper2[] = +{ + 0x0061, 0x007a, 468, /* a-z A-Z */ + 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ + 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ + 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ + 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ + 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ + 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ + 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ + 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ + 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ + 0x0430, 0x044f, 468, /* а-я А-Я */ + 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ + 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ + 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ + 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ + 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ + 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ + 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ + 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ + 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ + 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ + 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ + 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ + 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ + 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ + 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ + 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ + 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ + 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ + 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ + 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ + 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ + 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ + 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ + 0xff41, 0xff5a, 468, /* a-z A-Z */ +}; + +/* + * lower case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __toupper1[] = +{ + 0x00ff, 621, /* ÿ Ÿ */ + 0x0101, 499, /* ā Ā */ + 0x0103, 499, /* ă Ă */ + 0x0105, 499, /* ą Ą */ + 0x0107, 499, /* ć Ć */ + 0x0109, 499, /* ĉ Ĉ */ + 0x010b, 499, /* ċ Ċ */ + 0x010d, 499, /* č Č */ + 0x010f, 499, /* ď Ď */ + 0x0111, 499, /* đ Đ */ + 0x0113, 499, /* ē Ē */ + 0x0115, 499, /* ĕ Ĕ */ + 0x0117, 499, /* ė Ė */ + 0x0119, 499, /* ę Ę */ + 0x011b, 499, /* ě Ě */ + 0x011d, 499, /* ĝ Ĝ */ + 0x011f, 499, /* ğ Ğ */ + 0x0121, 499, /* ġ Ġ */ + 0x0123, 499, /* ģ Ģ */ + 0x0125, 499, /* ĥ Ĥ */ + 0x0127, 499, /* ħ Ħ */ + 0x0129, 499, /* ĩ Ĩ */ + 0x012b, 499, /* ī Ī */ + 0x012d, 499, /* ĭ Ĭ */ + 0x012f, 499, /* į Į */ + 0x0131, 268, /* ı I */ + 0x0133, 499, /* ij IJ */ + 0x0135, 499, /* ĵ Ĵ */ + 0x0137, 499, /* ķ Ķ */ + 0x013a, 499, /* ĺ Ĺ */ + 0x013c, 499, /* ļ Ļ */ + 0x013e, 499, /* ľ Ľ */ + 0x0140, 499, /* ŀ Ŀ */ + 0x0142, 499, /* ł Ł */ + 0x0144, 499, /* ń Ń */ + 0x0146, 499, /* ņ Ņ */ + 0x0148, 499, /* ň Ň */ + 0x014b, 499, /* ŋ Ŋ */ + 0x014d, 499, /* ō Ō */ + 0x014f, 499, /* ŏ Ŏ */ + 0x0151, 499, /* ő Ő */ + 0x0153, 499, /* œ Œ */ + 0x0155, 499, /* ŕ Ŕ */ + 0x0157, 499, /* ŗ Ŗ */ + 0x0159, 499, /* ř Ř */ + 0x015b, 499, /* ś Ś */ + 0x015d, 499, /* ŝ Ŝ */ + 0x015f, 499, /* ş Ş */ + 0x0161, 499, /* š Š */ + 0x0163, 499, /* ţ Ţ */ + 0x0165, 499, /* ť Ť */ + 0x0167, 499, /* ŧ Ŧ */ + 0x0169, 499, /* ũ Ũ */ + 0x016b, 499, /* ū Ū */ + 0x016d, 499, /* ŭ Ŭ */ + 0x016f, 499, /* ů Ů */ + 0x0171, 499, /* ű Ű */ + 0x0173, 499, /* ų Ų */ + 0x0175, 499, /* ŵ Ŵ */ + 0x0177, 499, /* ŷ Ŷ */ + 0x017a, 499, /* ź Ź */ + 0x017c, 499, /* ż Ż */ + 0x017e, 499, /* ž Ž */ + 0x017f, 200, /* ſ S */ + 0x0183, 499, /* ƃ Ƃ */ + 0x0185, 499, /* ƅ Ƅ */ + 0x0188, 499, /* ƈ Ƈ */ + 0x018c, 499, /* ƌ Ƌ */ + 0x0192, 499, /* ƒ Ƒ */ + 0x0199, 499, /* ƙ Ƙ */ + 0x01a1, 499, /* ơ Ơ */ + 0x01a3, 499, /* ƣ Ƣ */ + 0x01a5, 499, /* ƥ Ƥ */ + 0x01a8, 499, /* ƨ Ƨ */ + 0x01ad, 499, /* ƭ Ƭ */ + 0x01b0, 499, /* ư Ư */ + 0x01b4, 499, /* ƴ Ƴ */ + 0x01b6, 499, /* ƶ Ƶ */ + 0x01b9, 499, /* ƹ Ƹ */ + 0x01bd, 499, /* ƽ Ƽ */ + 0x01c5, 499, /* Dž DŽ */ + 0x01c6, 498, /* dž DŽ */ + 0x01c8, 499, /* Lj LJ */ + 0x01c9, 498, /* lj LJ */ + 0x01cb, 499, /* Nj NJ */ + 0x01cc, 498, /* nj NJ */ + 0x01ce, 499, /* ǎ Ǎ */ + 0x01d0, 499, /* ǐ Ǐ */ + 0x01d2, 499, /* ǒ Ǒ */ + 0x01d4, 499, /* ǔ Ǔ */ + 0x01d6, 499, /* ǖ Ǖ */ + 0x01d8, 499, /* ǘ Ǘ */ + 0x01da, 499, /* ǚ Ǚ */ + 0x01dc, 499, /* ǜ Ǜ */ + 0x01df, 499, /* ǟ Ǟ */ + 0x01e1, 499, /* ǡ Ǡ */ + 0x01e3, 499, /* ǣ Ǣ */ + 0x01e5, 499, /* ǥ Ǥ */ + 0x01e7, 499, /* ǧ Ǧ */ + 0x01e9, 499, /* ǩ Ǩ */ + 0x01eb, 499, /* ǫ Ǫ */ + 0x01ed, 499, /* ǭ Ǭ */ + 0x01ef, 499, /* ǯ Ǯ */ + 0x01f2, 499, /* Dz DZ */ + 0x01f3, 498, /* dz DZ */ + 0x01f5, 499, /* ǵ Ǵ */ + 0x01fb, 499, /* ǻ Ǻ */ + 0x01fd, 499, /* ǽ Ǽ */ + 0x01ff, 499, /* ǿ Ǿ */ + 0x0201, 499, /* ȁ Ȁ */ + 0x0203, 499, /* ȃ Ȃ */ + 0x0205, 499, /* ȅ Ȅ */ + 0x0207, 499, /* ȇ Ȇ */ + 0x0209, 499, /* ȉ Ȉ */ + 0x020b, 499, /* ȋ Ȋ */ + 0x020d, 499, /* ȍ Ȍ */ + 0x020f, 499, /* ȏ Ȏ */ + 0x0211, 499, /* ȑ Ȑ */ + 0x0213, 499, /* ȓ Ȓ */ + 0x0215, 499, /* ȕ Ȕ */ + 0x0217, 499, /* ȗ Ȗ */ + 0x0253, 290, /* ɓ Ɓ */ + 0x0254, 294, /* ɔ Ɔ */ + 0x025b, 297, /* ɛ Ɛ */ + 0x0260, 295, /* ɠ Ɠ */ + 0x0263, 293, /* ɣ Ɣ */ + 0x0268, 291, /* ɨ Ɨ */ + 0x0269, 289, /* ɩ Ɩ */ + 0x026f, 289, /* ɯ Ɯ */ + 0x0272, 287, /* ɲ Ɲ */ + 0x0283, 282, /* ʃ Ʃ */ + 0x0288, 282, /* ʈ Ʈ */ + 0x0292, 281, /* ʒ Ʒ */ + 0x03ac, 462, /* ά Ά */ + 0x03cc, 436, /* ό Ό */ + 0x03d0, 438, /* ϐ Β */ + 0x03d1, 443, /* ϑ Θ */ + 0x03d5, 453, /* ϕ Φ */ + 0x03d6, 446, /* ϖ Π */ + 0x03e3, 499, /* ϣ Ϣ */ + 0x03e5, 499, /* ϥ Ϥ */ + 0x03e7, 499, /* ϧ Ϧ */ + 0x03e9, 499, /* ϩ Ϩ */ + 0x03eb, 499, /* ϫ Ϫ */ + 0x03ed, 499, /* ϭ Ϭ */ + 0x03ef, 499, /* ϯ Ϯ */ + 0x03f0, 414, /* ϰ Κ */ + 0x03f1, 420, /* ϱ Ρ */ + 0x0461, 499, /* ѡ Ѡ */ + 0x0463, 499, /* ѣ Ѣ */ + 0x0465, 499, /* ѥ Ѥ */ + 0x0467, 499, /* ѧ Ѧ */ + 0x0469, 499, /* ѩ Ѩ */ + 0x046b, 499, /* ѫ Ѫ */ + 0x046d, 499, /* ѭ Ѭ */ + 0x046f, 499, /* ѯ Ѯ */ + 0x0471, 499, /* ѱ Ѱ */ + 0x0473, 499, /* ѳ Ѳ */ + 0x0475, 499, /* ѵ Ѵ */ + 0x0477, 499, /* ѷ Ѷ */ + 0x0479, 499, /* ѹ Ѹ */ + 0x047b, 499, /* ѻ Ѻ */ + 0x047d, 499, /* ѽ Ѽ */ + 0x047f, 499, /* ѿ Ѿ */ + 0x0481, 499, /* ҁ Ҁ */ + 0x0491, 499, /* ґ Ґ */ + 0x0493, 499, /* ғ Ғ */ + 0x0495, 499, /* ҕ Ҕ */ + 0x0497, 499, /* җ Җ */ + 0x0499, 499, /* ҙ Ҙ */ + 0x049b, 499, /* қ Қ */ + 0x049d, 499, /* ҝ Ҝ */ + 0x049f, 499, /* ҟ Ҟ */ + 0x04a1, 499, /* ҡ Ҡ */ + 0x04a3, 499, /* ң Ң */ + 0x04a5, 499, /* ҥ Ҥ */ + 0x04a7, 499, /* ҧ Ҧ */ + 0x04a9, 499, /* ҩ Ҩ */ + 0x04ab, 499, /* ҫ Ҫ */ + 0x04ad, 499, /* ҭ Ҭ */ + 0x04af, 499, /* ү Ү */ + 0x04b1, 499, /* ұ Ұ */ + 0x04b3, 499, /* ҳ Ҳ */ + 0x04b5, 499, /* ҵ Ҵ */ + 0x04b7, 499, /* ҷ Ҷ */ + 0x04b9, 499, /* ҹ Ҹ */ + 0x04bb, 499, /* һ Һ */ + 0x04bd, 499, /* ҽ Ҽ */ + 0x04bf, 499, /* ҿ Ҿ */ + 0x04c2, 499, /* ӂ Ӂ */ + 0x04c4, 499, /* ӄ Ӄ */ + 0x04c8, 499, /* ӈ Ӈ */ + 0x04cc, 499, /* ӌ Ӌ */ + 0x04d1, 499, /* ӑ Ӑ */ + 0x04d3, 499, /* ӓ Ӓ */ + 0x04d5, 499, /* ӕ Ӕ */ + 0x04d7, 499, /* ӗ Ӗ */ + 0x04d9, 499, /* ә Ә */ + 0x04db, 499, /* ӛ Ӛ */ + 0x04dd, 499, /* ӝ Ӝ */ + 0x04df, 499, /* ӟ Ӟ */ + 0x04e1, 499, /* ӡ Ӡ */ + 0x04e3, 499, /* ӣ Ӣ */ + 0x04e5, 499, /* ӥ Ӥ */ + 0x04e7, 499, /* ӧ Ӧ */ + 0x04e9, 499, /* ө Ө */ + 0x04eb, 499, /* ӫ Ӫ */ + 0x04ef, 499, /* ӯ Ӯ */ + 0x04f1, 499, /* ӱ Ӱ */ + 0x04f3, 499, /* ӳ Ӳ */ + 0x04f5, 499, /* ӵ Ӵ */ + 0x04f9, 499, /* ӹ Ӹ */ + 0x1e01, 499, /* ḁ Ḁ */ + 0x1e03, 499, /* ḃ Ḃ */ + 0x1e05, 499, /* ḅ Ḅ */ + 0x1e07, 499, /* ḇ Ḇ */ + 0x1e09, 499, /* ḉ Ḉ */ + 0x1e0b, 499, /* ḋ Ḋ */ + 0x1e0d, 499, /* ḍ Ḍ */ + 0x1e0f, 499, /* ḏ Ḏ */ + 0x1e11, 499, /* ḑ Ḑ */ + 0x1e13, 499, /* ḓ Ḓ */ + 0x1e15, 499, /* ḕ Ḕ */ + 0x1e17, 499, /* ḗ Ḗ */ + 0x1e19, 499, /* ḙ Ḙ */ + 0x1e1b, 499, /* ḛ Ḛ */ + 0x1e1d, 499, /* ḝ Ḝ */ + 0x1e1f, 499, /* ḟ Ḟ */ + 0x1e21, 499, /* ḡ Ḡ */ + 0x1e23, 499, /* ḣ Ḣ */ + 0x1e25, 499, /* ḥ Ḥ */ + 0x1e27, 499, /* ḧ Ḧ */ + 0x1e29, 499, /* ḩ Ḩ */ + 0x1e2b, 499, /* ḫ Ḫ */ + 0x1e2d, 499, /* ḭ Ḭ */ + 0x1e2f, 499, /* ḯ Ḯ */ + 0x1e31, 499, /* ḱ Ḱ */ + 0x1e33, 499, /* ḳ Ḳ */ + 0x1e35, 499, /* ḵ Ḵ */ + 0x1e37, 499, /* ḷ Ḷ */ + 0x1e39, 499, /* ḹ Ḹ */ + 0x1e3b, 499, /* ḻ Ḻ */ + 0x1e3d, 499, /* ḽ Ḽ */ + 0x1e3f, 499, /* ḿ Ḿ */ + 0x1e41, 499, /* ṁ Ṁ */ + 0x1e43, 499, /* ṃ Ṃ */ + 0x1e45, 499, /* ṅ Ṅ */ + 0x1e47, 499, /* ṇ Ṇ */ + 0x1e49, 499, /* ṉ Ṉ */ + 0x1e4b, 499, /* ṋ Ṋ */ + 0x1e4d, 499, /* ṍ Ṍ */ + 0x1e4f, 499, /* ṏ Ṏ */ + 0x1e51, 499, /* ṑ Ṑ */ + 0x1e53, 499, /* ṓ Ṓ */ + 0x1e55, 499, /* ṕ Ṕ */ + 0x1e57, 499, /* ṗ Ṗ */ + 0x1e59, 499, /* ṙ Ṙ */ + 0x1e5b, 499, /* ṛ Ṛ */ + 0x1e5d, 499, /* ṝ Ṝ */ + 0x1e5f, 499, /* ṟ Ṟ */ + 0x1e61, 499, /* ṡ Ṡ */ + 0x1e63, 499, /* ṣ Ṣ */ + 0x1e65, 499, /* ṥ Ṥ */ + 0x1e67, 499, /* ṧ Ṧ */ + 0x1e69, 499, /* ṩ Ṩ */ + 0x1e6b, 499, /* ṫ Ṫ */ + 0x1e6d, 499, /* ṭ Ṭ */ + 0x1e6f, 499, /* ṯ Ṯ */ + 0x1e71, 499, /* ṱ Ṱ */ + 0x1e73, 499, /* ṳ Ṳ */ + 0x1e75, 499, /* ṵ Ṵ */ + 0x1e77, 499, /* ṷ Ṷ */ + 0x1e79, 499, /* ṹ Ṹ */ + 0x1e7b, 499, /* ṻ Ṻ */ + 0x1e7d, 499, /* ṽ Ṽ */ + 0x1e7f, 499, /* ṿ Ṿ */ + 0x1e81, 499, /* ẁ Ẁ */ + 0x1e83, 499, /* ẃ Ẃ */ + 0x1e85, 499, /* ẅ Ẅ */ + 0x1e87, 499, /* ẇ Ẇ */ + 0x1e89, 499, /* ẉ Ẉ */ + 0x1e8b, 499, /* ẋ Ẋ */ + 0x1e8d, 499, /* ẍ Ẍ */ + 0x1e8f, 499, /* ẏ Ẏ */ + 0x1e91, 499, /* ẑ Ẑ */ + 0x1e93, 499, /* ẓ Ẓ */ + 0x1e95, 499, /* ẕ Ẕ */ + 0x1ea1, 499, /* ạ Ạ */ + 0x1ea3, 499, /* ả Ả */ + 0x1ea5, 499, /* ấ Ấ */ + 0x1ea7, 499, /* ầ Ầ */ + 0x1ea9, 499, /* ẩ Ẩ */ + 0x1eab, 499, /* ẫ Ẫ */ + 0x1ead, 499, /* ậ Ậ */ + 0x1eaf, 499, /* ắ Ắ */ + 0x1eb1, 499, /* ằ Ằ */ + 0x1eb3, 499, /* ẳ Ẳ */ + 0x1eb5, 499, /* ẵ Ẵ */ + 0x1eb7, 499, /* ặ Ặ */ + 0x1eb9, 499, /* ẹ Ẹ */ + 0x1ebb, 499, /* ẻ Ẻ */ + 0x1ebd, 499, /* ẽ Ẽ */ + 0x1ebf, 499, /* ế Ế */ + 0x1ec1, 499, /* ề Ề */ + 0x1ec3, 499, /* ể Ể */ + 0x1ec5, 499, /* ễ Ễ */ + 0x1ec7, 499, /* ệ Ệ */ + 0x1ec9, 499, /* ỉ Ỉ */ + 0x1ecb, 499, /* ị Ị */ + 0x1ecd, 499, /* ọ Ọ */ + 0x1ecf, 499, /* ỏ Ỏ */ + 0x1ed1, 499, /* ố Ố */ + 0x1ed3, 499, /* ồ Ồ */ + 0x1ed5, 499, /* ổ Ổ */ + 0x1ed7, 499, /* ỗ Ỗ */ + 0x1ed9, 499, /* ộ Ộ */ + 0x1edb, 499, /* ớ Ớ */ + 0x1edd, 499, /* ờ Ờ */ + 0x1edf, 499, /* ở Ở */ + 0x1ee1, 499, /* ỡ Ỡ */ + 0x1ee3, 499, /* ợ Ợ */ + 0x1ee5, 499, /* ụ Ụ */ + 0x1ee7, 499, /* ủ Ủ */ + 0x1ee9, 499, /* ứ Ứ */ + 0x1eeb, 499, /* ừ Ừ */ + 0x1eed, 499, /* ử Ử */ + 0x1eef, 499, /* ữ Ữ */ + 0x1ef1, 499, /* ự Ự */ + 0x1ef3, 499, /* ỳ Ỳ */ + 0x1ef5, 499, /* ỵ Ỵ */ + 0x1ef7, 499, /* ỷ Ỷ */ + 0x1ef9, 499, /* ỹ Ỹ */ + 0x1f51, 508, /* ὑ Ὑ */ + 0x1f53, 508, /* ὓ Ὓ */ + 0x1f55, 508, /* ὕ Ὕ */ + 0x1f57, 508, /* ὗ Ὗ */ + 0x1fb3, 509, /* ᾳ ᾼ */ + 0x1fc3, 509, /* ῃ ῌ */ + 0x1fe5, 507, /* ῥ Ῥ */ + 0x1ff3, 509, /* ῳ ῼ */ +}; + +/* + * upper case ranges + * 3rd col is conversion excess 500 + */ +static +Rune __tolower2[] = +{ + 0x0041, 0x005a, 532, /* A-Z a-z */ + 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ + 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ + 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ + 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ + 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ + 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ + 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ + 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ + 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ + 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ + 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ + 0x0410, 0x042f, 532, /* А-Я а-я */ + 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ + 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ + 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ + 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ + 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ + 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ + 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ + 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ + 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ + 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ + 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ + 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ + 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ + 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ + 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ + 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ + 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ + 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ + 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ + 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ + 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ + 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ + 0xff21, 0xff3a, 532, /* A-Z a-z */ +}; + +/* + * upper case singlets + * 2nd col is conversion excess 500 + */ +static +Rune __tolower1[] = +{ + 0x0100, 501, /* Ā ā */ + 0x0102, 501, /* Ă ă */ + 0x0104, 501, /* Ą ą */ + 0x0106, 501, /* Ć ć */ + 0x0108, 501, /* Ĉ ĉ */ + 0x010a, 501, /* Ċ ċ */ + 0x010c, 501, /* Č č */ + 0x010e, 501, /* Ď ď */ + 0x0110, 501, /* Đ đ */ + 0x0112, 501, /* Ē ē */ + 0x0114, 501, /* Ĕ ĕ */ + 0x0116, 501, /* Ė ė */ + 0x0118, 501, /* Ę ę */ + 0x011a, 501, /* Ě ě */ + 0x011c, 501, /* Ĝ ĝ */ + 0x011e, 501, /* Ğ ğ */ + 0x0120, 501, /* Ġ ġ */ + 0x0122, 501, /* Ģ ģ */ + 0x0124, 501, /* Ĥ ĥ */ + 0x0126, 501, /* Ħ ħ */ + 0x0128, 501, /* Ĩ ĩ */ + 0x012a, 501, /* Ī ī */ + 0x012c, 501, /* Ĭ ĭ */ + 0x012e, 501, /* Į į */ + 0x0130, 301, /* İ i */ + 0x0132, 501, /* IJ ij */ + 0x0134, 501, /* Ĵ ĵ */ + 0x0136, 501, /* Ķ ķ */ + 0x0139, 501, /* Ĺ ĺ */ + 0x013b, 501, /* Ļ ļ */ + 0x013d, 501, /* Ľ ľ */ + 0x013f, 501, /* Ŀ ŀ */ + 0x0141, 501, /* Ł ł */ + 0x0143, 501, /* Ń ń */ + 0x0145, 501, /* Ņ ņ */ + 0x0147, 501, /* Ň ň */ + 0x014a, 501, /* Ŋ ŋ */ + 0x014c, 501, /* Ō ō */ + 0x014e, 501, /* Ŏ ŏ */ + 0x0150, 501, /* Ő ő */ + 0x0152, 501, /* Œ œ */ + 0x0154, 501, /* Ŕ ŕ */ + 0x0156, 501, /* Ŗ ŗ */ + 0x0158, 501, /* Ř ř */ + 0x015a, 501, /* Ś ś */ + 0x015c, 501, /* Ŝ ŝ */ + 0x015e, 501, /* Ş ş */ + 0x0160, 501, /* Š š */ + 0x0162, 501, /* Ţ ţ */ + 0x0164, 501, /* Ť ť */ + 0x0166, 501, /* Ŧ ŧ */ + 0x0168, 501, /* Ũ ũ */ + 0x016a, 501, /* Ū ū */ + 0x016c, 501, /* Ŭ ŭ */ + 0x016e, 501, /* Ů ů */ + 0x0170, 501, /* Ű ű */ + 0x0172, 501, /* Ų ų */ + 0x0174, 501, /* Ŵ ŵ */ + 0x0176, 501, /* Ŷ ŷ */ + 0x0178, 379, /* Ÿ ÿ */ + 0x0179, 501, /* Ź ź */ + 0x017b, 501, /* Ż ż */ + 0x017d, 501, /* Ž ž */ + 0x0181, 710, /* Ɓ ɓ */ + 0x0182, 501, /* Ƃ ƃ */ + 0x0184, 501, /* Ƅ ƅ */ + 0x0186, 706, /* Ɔ ɔ */ + 0x0187, 501, /* Ƈ ƈ */ + 0x018b, 501, /* Ƌ ƌ */ + 0x0190, 703, /* Ɛ ɛ */ + 0x0191, 501, /* Ƒ ƒ */ + 0x0193, 705, /* Ɠ ɠ */ + 0x0194, 707, /* Ɣ ɣ */ + 0x0196, 711, /* Ɩ ɩ */ + 0x0197, 709, /* Ɨ ɨ */ + 0x0198, 501, /* Ƙ ƙ */ + 0x019c, 711, /* Ɯ ɯ */ + 0x019d, 713, /* Ɲ ɲ */ + 0x01a0, 501, /* Ơ ơ */ + 0x01a2, 501, /* Ƣ ƣ */ + 0x01a4, 501, /* Ƥ ƥ */ + 0x01a7, 501, /* Ƨ ƨ */ + 0x01a9, 718, /* Ʃ ʃ */ + 0x01ac, 501, /* Ƭ ƭ */ + 0x01ae, 718, /* Ʈ ʈ */ + 0x01af, 501, /* Ư ư */ + 0x01b3, 501, /* Ƴ ƴ */ + 0x01b5, 501, /* Ƶ ƶ */ + 0x01b7, 719, /* Ʒ ʒ */ + 0x01b8, 501, /* Ƹ ƹ */ + 0x01bc, 501, /* Ƽ ƽ */ + 0x01c4, 502, /* DŽ dž */ + 0x01c5, 501, /* Dž dž */ + 0x01c7, 502, /* LJ lj */ + 0x01c8, 501, /* Lj lj */ + 0x01ca, 502, /* NJ nj */ + 0x01cb, 501, /* Nj nj */ + 0x01cd, 501, /* Ǎ ǎ */ + 0x01cf, 501, /* Ǐ ǐ */ + 0x01d1, 501, /* Ǒ ǒ */ + 0x01d3, 501, /* Ǔ ǔ */ + 0x01d5, 501, /* Ǖ ǖ */ + 0x01d7, 501, /* Ǘ ǘ */ + 0x01d9, 501, /* Ǚ ǚ */ + 0x01db, 501, /* Ǜ ǜ */ + 0x01de, 501, /* Ǟ ǟ */ + 0x01e0, 501, /* Ǡ ǡ */ + 0x01e2, 501, /* Ǣ ǣ */ + 0x01e4, 501, /* Ǥ ǥ */ + 0x01e6, 501, /* Ǧ ǧ */ + 0x01e8, 501, /* Ǩ ǩ */ + 0x01ea, 501, /* Ǫ ǫ */ + 0x01ec, 501, /* Ǭ ǭ */ + 0x01ee, 501, /* Ǯ ǯ */ + 0x01f1, 502, /* DZ dz */ + 0x01f2, 501, /* Dz dz */ + 0x01f4, 501, /* Ǵ ǵ */ + 0x01fa, 501, /* Ǻ ǻ */ + 0x01fc, 501, /* Ǽ ǽ */ + 0x01fe, 501, /* Ǿ ǿ */ + 0x0200, 501, /* Ȁ ȁ */ + 0x0202, 501, /* Ȃ ȃ */ + 0x0204, 501, /* Ȅ ȅ */ + 0x0206, 501, /* Ȇ ȇ */ + 0x0208, 501, /* Ȉ ȉ */ + 0x020a, 501, /* Ȋ ȋ */ + 0x020c, 501, /* Ȍ ȍ */ + 0x020e, 501, /* Ȏ ȏ */ + 0x0210, 501, /* Ȑ ȑ */ + 0x0212, 501, /* Ȓ ȓ */ + 0x0214, 501, /* Ȕ ȕ */ + 0x0216, 501, /* Ȗ ȗ */ + 0x0386, 538, /* Ά ά */ + 0x038c, 564, /* Ό ό */ + 0x03e2, 501, /* Ϣ ϣ */ + 0x03e4, 501, /* Ϥ ϥ */ + 0x03e6, 501, /* Ϧ ϧ */ + 0x03e8, 501, /* Ϩ ϩ */ + 0x03ea, 501, /* Ϫ ϫ */ + 0x03ec, 501, /* Ϭ ϭ */ + 0x03ee, 501, /* Ϯ ϯ */ + 0x0460, 501, /* Ѡ ѡ */ + 0x0462, 501, /* Ѣ ѣ */ + 0x0464, 501, /* Ѥ ѥ */ + 0x0466, 501, /* Ѧ ѧ */ + 0x0468, 501, /* Ѩ ѩ */ + 0x046a, 501, /* Ѫ ѫ */ + 0x046c, 501, /* Ѭ ѭ */ + 0x046e, 501, /* Ѯ ѯ */ + 0x0470, 501, /* Ѱ ѱ */ + 0x0472, 501, /* Ѳ ѳ */ + 0x0474, 501, /* Ѵ ѵ */ + 0x0476, 501, /* Ѷ ѷ */ + 0x0478, 501, /* Ѹ ѹ */ + 0x047a, 501, /* Ѻ ѻ */ + 0x047c, 501, /* Ѽ ѽ */ + 0x047e, 501, /* Ѿ ѿ */ + 0x0480, 501, /* Ҁ ҁ */ + 0x0490, 501, /* Ґ ґ */ + 0x0492, 501, /* Ғ ғ */ + 0x0494, 501, /* Ҕ ҕ */ + 0x0496, 501, /* Җ җ */ + 0x0498, 501, /* Ҙ ҙ */ + 0x049a, 501, /* Қ қ */ + 0x049c, 501, /* Ҝ ҝ */ + 0x049e, 501, /* Ҟ ҟ */ + 0x04a0, 501, /* Ҡ ҡ */ + 0x04a2, 501, /* Ң ң */ + 0x04a4, 501, /* Ҥ ҥ */ + 0x04a6, 501, /* Ҧ ҧ */ + 0x04a8, 501, /* Ҩ ҩ */ + 0x04aa, 501, /* Ҫ ҫ */ + 0x04ac, 501, /* Ҭ ҭ */ + 0x04ae, 501, /* Ү ү */ + 0x04b0, 501, /* Ұ ұ */ + 0x04b2, 501, /* Ҳ ҳ */ + 0x04b4, 501, /* Ҵ ҵ */ + 0x04b6, 501, /* Ҷ ҷ */ + 0x04b8, 501, /* Ҹ ҹ */ + 0x04ba, 501, /* Һ һ */ + 0x04bc, 501, /* Ҽ ҽ */ + 0x04be, 501, /* Ҿ ҿ */ + 0x04c1, 501, /* Ӂ ӂ */ + 0x04c3, 501, /* Ӄ ӄ */ + 0x04c7, 501, /* Ӈ ӈ */ + 0x04cb, 501, /* Ӌ ӌ */ + 0x04d0, 501, /* Ӑ ӑ */ + 0x04d2, 501, /* Ӓ ӓ */ + 0x04d4, 501, /* Ӕ ӕ */ + 0x04d6, 501, /* Ӗ ӗ */ + 0x04d8, 501, /* Ә ә */ + 0x04da, 501, /* Ӛ ӛ */ + 0x04dc, 501, /* Ӝ ӝ */ + 0x04de, 501, /* Ӟ ӟ */ + 0x04e0, 501, /* Ӡ ӡ */ + 0x04e2, 501, /* Ӣ ӣ */ + 0x04e4, 501, /* Ӥ ӥ */ + 0x04e6, 501, /* Ӧ ӧ */ + 0x04e8, 501, /* Ө ө */ + 0x04ea, 501, /* Ӫ ӫ */ + 0x04ee, 501, /* Ӯ ӯ */ + 0x04f0, 501, /* Ӱ ӱ */ + 0x04f2, 501, /* Ӳ ӳ */ + 0x04f4, 501, /* Ӵ ӵ */ + 0x04f8, 501, /* Ӹ ӹ */ + 0x1e00, 501, /* Ḁ ḁ */ + 0x1e02, 501, /* Ḃ ḃ */ + 0x1e04, 501, /* Ḅ ḅ */ + 0x1e06, 501, /* Ḇ ḇ */ + 0x1e08, 501, /* Ḉ ḉ */ + 0x1e0a, 501, /* Ḋ ḋ */ + 0x1e0c, 501, /* Ḍ ḍ */ + 0x1e0e, 501, /* Ḏ ḏ */ + 0x1e10, 501, /* Ḑ ḑ */ + 0x1e12, 501, /* Ḓ ḓ */ + 0x1e14, 501, /* Ḕ ḕ */ + 0x1e16, 501, /* Ḗ ḗ */ + 0x1e18, 501, /* Ḙ ḙ */ + 0x1e1a, 501, /* Ḛ ḛ */ + 0x1e1c, 501, /* Ḝ ḝ */ + 0x1e1e, 501, /* Ḟ ḟ */ + 0x1e20, 501, /* Ḡ ḡ */ + 0x1e22, 501, /* Ḣ ḣ */ + 0x1e24, 501, /* Ḥ ḥ */ + 0x1e26, 501, /* Ḧ ḧ */ + 0x1e28, 501, /* Ḩ ḩ */ + 0x1e2a, 501, /* Ḫ ḫ */ + 0x1e2c, 501, /* Ḭ ḭ */ + 0x1e2e, 501, /* Ḯ ḯ */ + 0x1e30, 501, /* Ḱ ḱ */ + 0x1e32, 501, /* Ḳ ḳ */ + 0x1e34, 501, /* Ḵ ḵ */ + 0x1e36, 501, /* Ḷ ḷ */ + 0x1e38, 501, /* Ḹ ḹ */ + 0x1e3a, 501, /* Ḻ ḻ */ + 0x1e3c, 501, /* Ḽ ḽ */ + 0x1e3e, 501, /* Ḿ ḿ */ + 0x1e40, 501, /* Ṁ ṁ */ + 0x1e42, 501, /* Ṃ ṃ */ + 0x1e44, 501, /* Ṅ ṅ */ + 0x1e46, 501, /* Ṇ ṇ */ + 0x1e48, 501, /* Ṉ ṉ */ + 0x1e4a, 501, /* Ṋ ṋ */ + 0x1e4c, 501, /* Ṍ ṍ */ + 0x1e4e, 501, /* Ṏ ṏ */ + 0x1e50, 501, /* Ṑ ṑ */ + 0x1e52, 501, /* Ṓ ṓ */ + 0x1e54, 501, /* Ṕ ṕ */ + 0x1e56, 501, /* Ṗ ṗ */ + 0x1e58, 501, /* Ṙ ṙ */ + 0x1e5a, 501, /* Ṛ ṛ */ + 0x1e5c, 501, /* Ṝ ṝ */ + 0x1e5e, 501, /* Ṟ ṟ */ + 0x1e60, 501, /* Ṡ ṡ */ + 0x1e62, 501, /* Ṣ ṣ */ + 0x1e64, 501, /* Ṥ ṥ */ + 0x1e66, 501, /* Ṧ ṧ */ + 0x1e68, 501, /* Ṩ ṩ */ + 0x1e6a, 501, /* Ṫ ṫ */ + 0x1e6c, 501, /* Ṭ ṭ */ + 0x1e6e, 501, /* Ṯ ṯ */ + 0x1e70, 501, /* Ṱ ṱ */ + 0x1e72, 501, /* Ṳ ṳ */ + 0x1e74, 501, /* Ṵ ṵ */ + 0x1e76, 501, /* Ṷ ṷ */ + 0x1e78, 501, /* Ṹ ṹ */ + 0x1e7a, 501, /* Ṻ ṻ */ + 0x1e7c, 501, /* Ṽ ṽ */ + 0x1e7e, 501, /* Ṿ ṿ */ + 0x1e80, 501, /* Ẁ ẁ */ + 0x1e82, 501, /* Ẃ ẃ */ + 0x1e84, 501, /* Ẅ ẅ */ + 0x1e86, 501, /* Ẇ ẇ */ + 0x1e88, 501, /* Ẉ ẉ */ + 0x1e8a, 501, /* Ẋ ẋ */ + 0x1e8c, 501, /* Ẍ ẍ */ + 0x1e8e, 501, /* Ẏ ẏ */ + 0x1e90, 501, /* Ẑ ẑ */ + 0x1e92, 501, /* Ẓ ẓ */ + 0x1e94, 501, /* Ẕ ẕ */ + 0x1ea0, 501, /* Ạ ạ */ + 0x1ea2, 501, /* Ả ả */ + 0x1ea4, 501, /* Ấ ấ */ + 0x1ea6, 501, /* Ầ ầ */ + 0x1ea8, 501, /* Ẩ ẩ */ + 0x1eaa, 501, /* Ẫ ẫ */ + 0x1eac, 501, /* Ậ ậ */ + 0x1eae, 501, /* Ắ ắ */ + 0x1eb0, 501, /* Ằ ằ */ + 0x1eb2, 501, /* Ẳ ẳ */ + 0x1eb4, 501, /* Ẵ ẵ */ + 0x1eb6, 501, /* Ặ ặ */ + 0x1eb8, 501, /* Ẹ ẹ */ + 0x1eba, 501, /* Ẻ ẻ */ + 0x1ebc, 501, /* Ẽ ẽ */ + 0x1ebe, 501, /* Ế ế */ + 0x1ec0, 501, /* Ề ề */ + 0x1ec2, 501, /* Ể ể */ + 0x1ec4, 501, /* Ễ ễ */ + 0x1ec6, 501, /* Ệ ệ */ + 0x1ec8, 501, /* Ỉ ỉ */ + 0x1eca, 501, /* Ị ị */ + 0x1ecc, 501, /* Ọ ọ */ + 0x1ece, 501, /* Ỏ ỏ */ + 0x1ed0, 501, /* Ố ố */ + 0x1ed2, 501, /* Ồ ồ */ + 0x1ed4, 501, /* Ổ ổ */ + 0x1ed6, 501, /* Ỗ ỗ */ + 0x1ed8, 501, /* Ộ ộ */ + 0x1eda, 501, /* Ớ ớ */ + 0x1edc, 501, /* Ờ ờ */ + 0x1ede, 501, /* Ở ở */ + 0x1ee0, 501, /* Ỡ ỡ */ + 0x1ee2, 501, /* Ợ ợ */ + 0x1ee4, 501, /* Ụ ụ */ + 0x1ee6, 501, /* Ủ ủ */ + 0x1ee8, 501, /* Ứ ứ */ + 0x1eea, 501, /* Ừ ừ */ + 0x1eec, 501, /* Ử ử */ + 0x1eee, 501, /* Ữ ữ */ + 0x1ef0, 501, /* Ự ự */ + 0x1ef2, 501, /* Ỳ ỳ */ + 0x1ef4, 501, /* Ỵ ỵ */ + 0x1ef6, 501, /* Ỷ ỷ */ + 0x1ef8, 501, /* Ỹ ỹ */ + 0x1f59, 492, /* Ὑ ὑ */ + 0x1f5b, 492, /* Ὓ ὓ */ + 0x1f5d, 492, /* Ὕ ὕ */ + 0x1f5f, 492, /* Ὗ ὗ */ + 0x1fbc, 491, /* ᾼ ᾳ */ + 0x1fcc, 491, /* ῌ ῃ */ + 0x1fec, 493, /* Ῥ ῥ */ + 0x1ffc, 491, /* ῼ ῳ */ +}; + +/* + * title characters are those between + * upper and lower case. ie DZ Dz dz + */ +static +Rune __totitle1[] = +{ + 0x01c4, 501, /* DŽ Dž */ + 0x01c6, 499, /* dž Dž */ + 0x01c7, 501, /* LJ Lj */ + 0x01c9, 499, /* lj Lj */ + 0x01ca, 501, /* NJ Nj */ + 0x01cc, 499, /* nj Nj */ + 0x01f1, 501, /* DZ Dz */ + 0x01f3, 499, /* dz Dz */ +}; + +static Rune* +bsearch(Rune c, Rune *t, int n, int ne) +{ + Rune *p; + int m; + + while(n > 1) { + m = n/2; + p = t + m*ne; + if(c >= p[0]) { + t = p; + n = n-m; + } else + n = m; + } + if(n && c >= t[0]) + return t; + return 0; +} + +Rune +tolowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +toupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return c + p[2] - 500; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +Rune +totitlerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); + if(p && c == p[0]) + return c + p[1] - 500; + return c; +} + +int +islowerrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isupperrune(Rune c) +{ + Rune *p; + + p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); + if(p && c == p[0]) + return 1; + return 0; +} + +int +isalpharune(Rune c) +{ + Rune *p; + + if(isupperrune(c) || islowerrune(c)) + return 1; + p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + p = bsearch(c, __alpha1, nelem(__alpha1), 1); + if(p && c == p[0]) + return 1; + return 0; +} + +int +istitlerune(Rune c) +{ + return isupperrune(c) && islowerrune(c); +} + +int +isspacerune(Rune c) +{ + Rune *p; + + p = bsearch(c, __space2, nelem(__space2)/2, 2); + if(p && c >= p[0] && c <= p[1]) + return 1; + return 0; +} diff --git a/mk/libutf/utf.7 b/mk/libutf/utf.7 @@ -0,0 +1,99 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH UTF 7 +.SH NAME +UTF, Unicode, ASCII, rune \- character set and format +.SH DESCRIPTION +The Plan 9 character set and representation are +based on the Unicode Standard and on the ISO multibyte +.SM UTF-8 +encoding (Universal Character +Set Transformation Format, 8 bits wide). +The Unicode Standard represents its characters in 16 +bits; +.SM UTF-8 +represents such +values in an 8-bit byte stream. +Throughout this manual, +.SM UTF-8 +is shortened to +.SM UTF. +.PP +In Plan 9, a +.I rune +is a 16-bit quantity representing a Unicode character. +Internally, programs may store characters as runes. +However, any external manifestation of textual information, +in files or at the interface between programs, uses a +machine-independent, byte-stream encoding called +.SM UTF. +.PP +.SM UTF +is designed so the 7-bit +.SM ASCII +set (values hexadecimal 00 to 7F), +appear only as themselves +in the encoding. +Runes with values above 7F appear as sequences of two or more +bytes with values only from 80 to FF. +.PP +The +.SM UTF +encoding of the Unicode Standard is backward compatible with +.SM ASCII\c +: +programs presented only with +.SM ASCII +work on Plan 9 +even if not written to deal with +.SM UTF, +as do +programs that deal with uninterpreted byte streams. +However, programs that perform semantic processing on +.SM ASCII +graphic +characters must convert from +.SM UTF +to runes +in order to work properly with non-\c +.SM ASCII +input. +See +.IR rune (3). +.PP +Letting numbers be binary, +a rune x is converted to a multibyte +.SM UTF +sequence +as follows: +.PP +01. x in [00000000.0bbbbbbb] → 0bbbbbbb +.br +10. x in [00000bbb.bbbbbbbb] → 110bbbbb, 10bbbbbb +.br +11. x in [bbbbbbbb.bbbbbbbb] → 1110bbbb, 10bbbbbb, 10bbbbbb +.br +.PP +Conversion 01 provides a one-byte sequence that spans the +.SM ASCII +character set in a compatible way. +Conversions 10 and 11 represent higher-valued characters +as sequences of two or three bytes with the high bit set. +Plan 9 does not support the 4, 5, and 6 byte sequences proposed by X-Open. +When there are multiple ways to encode a value, for example rune 0, +the shortest encoding is used. +.PP +In the inverse mapping, +any sequence except those described above +is incorrect and is converted to rune hexadecimal 0080. +.SH "SEE ALSO" +.IR ascii (1), +.IR tcs (1), +.IR rune (3), +.IR "The Unicode Standard" . diff --git a/mk/libutf/utf.h b/mk/libutf/utf.h @@ -0,0 +1,54 @@ +#ifndef _UTF_H_ +#define _UTF_H_ 1 +#if defined(__cplusplus) +extern "C" { +#endif + +typedef unsigned int Rune; /* 32 bits */ + +enum +{ + UTFmax = 4, /* maximum bytes per rune */ + Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ + Runeself = 0x80, /* rune and UTF sequences are the same (<) */ + Runeerror = 0xFFFD, /* decoding error in UTF */ + Runemax = 0x10FFFF /* maximum rune value */ +}; + +/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ +int chartorune(Rune *rune, char *str); +int fullrune(char *str, int n); +int isalpharune(Rune c); +int islowerrune(Rune c); +int isspacerune(Rune c); +int istitlerune(Rune c); +int isupperrune(Rune c); +int runelen(long c); +int runenlen(Rune *r, int nrune); +Rune* runestrcat(Rune *s1, Rune *s2); +Rune* runestrchr(Rune *s, Rune c); +int runestrcmp(Rune *s1, Rune *s2); +Rune* runestrcpy(Rune *s1, Rune *s2); +Rune* runestrdup(Rune *s) ; +Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2); +long runestrlen(Rune *s); +Rune* runestrncat(Rune *s1, Rune *s2, long n); +int runestrncmp(Rune *s1, Rune *s2, long n); +Rune* runestrncpy(Rune *s1, Rune *s2, long n); +Rune* runestrrchr(Rune *s, Rune c); +Rune* runestrstr(Rune *s1, Rune *s2); +int runetochar(char *str, Rune *rune); +Rune tolowerrune(Rune c); +Rune totitlerune(Rune c); +Rune toupperrune(Rune c); +char* utfecpy(char *to, char *e, char *from); +int utflen(char *s); +int utfnlen(char *s, long m); +char* utfrrune(char *s, long c); +char* utfrune(char *s, long c); +char* utfutf(char *s1, char *s2); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/mk/libutf/utfdef.h b/mk/libutf/utfdef.h @@ -0,0 +1,33 @@ +/* + * compiler directive on Plan 9 + */ +#ifndef USED +#define USED(x) if(x);else +#endif + +/* + * easiest way to make sure these are defined + */ +#define uchar _fmtuchar +#define ushort _fmtushort +#define uint _fmtuint +#define ulong _fmtulong +#define vlong _fmtvlong +#define uvlong _fmtuvlong +typedef unsigned char uchar; +typedef unsigned short ushort; +typedef unsigned int uint; +typedef unsigned long ulong; +typedef unsigned long long uvlong; +typedef long long vlong; + +/* + * nil cannot be ((void*)0) on ANSI C, + * because it is used for function pointers + */ +#undef nil +#define nil 0 + +#undef nelem +#define nelem ((void*)0) + diff --git a/mk/libutf/utfecpy.c b/mk/libutf/utfecpy.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#define _BSD_SOURCE 1 /* memccpy */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfecpy(char *to, char *e, char *from) +{ + char *end; + + if(to >= e) + return to; + end = memccpy(to, from, '\0', e - to); + if(end == nil){ + end = e-1; + while(end>to && (*--end&0xC0)==0x80) + ; + *end = '\0'; + }else{ + end--; + } + return end; +} diff --git a/mk/libutf/utflen.c b/mk/libutf/utflen.c @@ -0,0 +1,37 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +utflen(char *s) +{ + int c; + long n; + Rune rune; + + n = 0; + for(;;) { + c = *(uchar*)s; + if(c < Runeself) { + if(c == 0) + return n; + s++; + } else + s += chartorune(&rune, s); + n++; + } +} diff --git a/mk/libutf/utfnlen.c b/mk/libutf/utfnlen.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +int +utfnlen(char *s, long m) +{ + int c; + long n; + Rune rune; + char *es; + + es = s + m; + for(n = 0; s < es; n++) { + c = *(uchar*)s; + if(c < Runeself){ + if(c == '\0') + break; + s++; + continue; + } + if(!fullrune(s, es-s)) + break; + s += chartorune(&rune, s); + } + return n; +} diff --git a/mk/libutf/utfrrune.c b/mk/libutf/utfrrune.c @@ -0,0 +1,45 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfrrune(char *s, long c) +{ + long c1; + Rune r; + char *s1; + + if(c < Runesync) /* not part of utf sequence */ + return strrchr(s, c); + + s1 = 0; + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return s1; + if(c1 == c) + s1 = s; + s++; + continue; + } + c1 = chartorune(&r, s); + if(r == c) + s1 = s; + s += c1; + } +} diff --git a/mk/libutf/utfrune.c b/mk/libutf/utfrune.c @@ -0,0 +1,44 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + +char* +utfrune(char *s, long c) +{ + long c1; + Rune r; + int n; + + if(c < Runesync) /* not part of utf sequence */ + return strchr(s, c); + + for(;;) { + c1 = *(uchar*)s; + if(c1 < Runeself) { /* one byte rune */ + if(c1 == 0) + return 0; + if(c1 == c) + return s; + s++; + continue; + } + n = chartorune(&r, s); + if(r == c) + return s; + s += n; + } +} diff --git a/mk/libutf/utfutf.c b/mk/libutf/utfutf.c @@ -0,0 +1,41 @@ +/* + * The authors of this software are Rob Pike and Ken Thompson. + * Copyright (c) 2002 by Lucent Technologies. + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE + * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + */ +#include <stdarg.h> +#include <string.h> +#include "plan9.h" +#include "utf.h" + + +/* + * Return pointer to first occurrence of s2 in s1, + * 0 if none + */ +char* +utfutf(char *s1, char *s2) +{ + char *p; + long f, n1, n2; + Rune r; + + n1 = chartorune(&r, s2); + f = r; + if(f <= Runesync) /* represents self */ + return strstr(s1, s2); + + n2 = strlen(s2); + for(p=s1; p=utfrune(p, f); p+=n1) + if(strncmp(p, s2, n2) == 0) + return p; + return 0; +} diff --git a/mk/mk/NOTICE b/mk/mk/NOTICE @@ -0,0 +1,34 @@ +This copyright NOTICE applies to all files in this directory and +subdirectories, unless another copyright notice appears in a given +file or subdirectory. If you take substantial code from this software to use in +other programs, you must somehow include with it an appropriate +copyright notice that includes the copyright notice and the other +notices below. It is fine (and often tidier) to do that in a separate +file such as NOTICE, LICENCE or COPYING. + + Copyright © 1994-1999 Lucent Technologies Inc. All rights reserved. + Revisions Copyright © 2000-2003 Vita Nuova Holdings Limited (www.vitanuova.com). All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +---- + +This software is also made available under the Lucent Public License +version 1.02; see http://plan9.bell-labs.com/plan9dist/license.html + diff --git a/mk/mk/README b/mk/mk/README @@ -0,0 +1,5 @@ +This software was packaged for Unix by Russ Cox. +Please send comments to rsc@swtch.com. + +http://swtch.com/plan9port/unix + diff --git a/mk/mk/arc.c b/mk/mk/arc.c @@ -0,0 +1,52 @@ +#include "mk.h" + +Arc * +newarc(Node *n, Rule *r, char *stem, Resub *match) +{ + Arc *a; + + a = (Arc *)Malloc(sizeof(Arc)); + a->n = n; + a->r = r; + a->stem = strdup(stem); + rcopy(a->match, match, NREGEXP); + a->next = 0; + a->flag = 0; + a->prog = r->prog; + return(a); +} + +void +dumpa(char *s, Arc *a) +{ + char buf[1024]; + + Bprint(&bout, "%sArc@%p: n=%p r=%p flag=0x%x stem='%s'", + s, a, a->n, a->r, a->flag, a->stem); + if(a->prog) + Bprint(&bout, " prog='%s'", a->prog); + Bprint(&bout, "\n"); + + if(a->n){ + snprint(buf, sizeof(buf), "%s ", (*s == ' ')? s:""); + dumpn(buf, a->n); + } +} + +void +nrep(void) +{ + Symtab *sym; + Word *w; + + sym = symlook("NREP", S_VAR, 0); + if(sym){ + w = sym->u.ptr; + if (w && w->s && *w->s) + nreps = atoi(w->s); + } + if(nreps < 1) + nreps = 1; + if(DEBUG(D_GRAPH)) + Bprint(&bout, "nreps = %d\n", nreps); +} diff --git a/mk/mk/archive.c b/mk/mk/archive.c @@ -0,0 +1,253 @@ +#include "mk.h" +#define ARMAG "!<arch>\n" +#define SARMAG 8 + +#define ARFMAG "`\n" +#define SARNAME 16 + +struct ar_hdr +{ + char name[SARNAME]; + char date[12]; + char uid[6]; + char gid[6]; + char mode[8]; + char size[10]; + char fmag[2]; +}; +#define SAR_HDR (SARNAME+44) + +static int dolong = 1; + +static void atimes(char *); +static char *split(char*, char**); + +long +readn(int f, void *av, long n) +{ + char *a; + long m, t; + + a = av; + t = 0; + while(t < n){ + m = read(f, a+t, n-t); + if(m <= 0){ + if(t == 0) + return m; + break; + } + t += m; + } + return t; +} +long +atimeof(int force, char *name) +{ + Symtab *sym; + long t; + char *archive, *member, buf[512]; + + archive = split(name, &member); + if(archive == 0) + Exit(); + + t = mtime(archive); + sym = symlook(archive, S_AGG, 0); + if(sym){ + if(force || (t > sym->u.value)){ + atimes(archive); + sym->u.value = t; + } + } + else{ + atimes(archive); + /* mark the aggegate as having been done */ + symlook(strdup(archive), S_AGG, "")->u.value = t; + } + /* truncate long member name to sizeof of name field in archive header */ + if(dolong) + snprint(buf, sizeof(buf), "%s(%s)", archive, member); + else + snprint(buf, sizeof(buf), "%s(%.*s)", archive, SARNAME, member); + sym = symlook(buf, S_TIME, 0); + if (sym) + return sym->u.value; + return 0; +} + +void +atouch(char *name) +{ + char *archive, *member; + int fd, i; + struct ar_hdr h; + long t; + + archive = split(name, &member); + if(archive == 0) + Exit(); + + fd = open(archive, ORDWR); + if(fd < 0){ + fd = create(archive, OWRITE, 0666); + if(fd < 0){ + fprint(2, "create %s: %r\n", archive); + Exit(); + } + write(fd, ARMAG, SARMAG); + } + if(symlook(name, S_TIME, 0)){ + /* hoon off and change it in situ */ + LSEEK(fd, SARMAG, 0); + while(read(fd, (char *)&h, sizeof(h)) == sizeof(h)){ + for(i = SARNAME-1; i > 0 && h.name[i] == ' '; i--) + ; + h.name[i+1]=0; + if(strcmp(member, h.name) == 0){ + t = SARNAME-sizeof(h); /* ughgghh */ + LSEEK(fd, t, 1); + fprint(fd, "%-12ld", time(0)); + break; + } + t = atol(h.size); + if(t&01) t++; + LSEEK(fd, t, 1); + } + } + close(fd); +} + +static void +atimes(char *ar) +{ + struct ar_hdr h; + long t; + int fd, i, namelen; + char buf[2048], *p, *strings; + char name[1024]; + Symtab *sym; + + strings = nil; + fd = open(ar, OREAD); + if(fd < 0) + return; + + if(read(fd, buf, SARMAG) != SARMAG){ + close(fd); + return; + } + while(readn(fd, (char *)&h, sizeof(h)) == sizeof(h)){ + t = atol(h.date); + if(t == 0) /* as it sometimes happens; thanks ken */ + t = 1; + namelen = 0; + if(memcmp(h.name, "#1/", 3) == 0){ /* BSD */ + namelen = atoi(h.name+3); + if(namelen >= sizeof name){ + namelen = 0; + goto skip; + } + if(readn(fd, name, namelen) != namelen) + break; + name[namelen] = 0; + }else if(memcmp(h.name, "// ", 2) == 0){ /* GNU */ + /* date, uid, gid, mode all ' ' */ + for(i=2; i<16+12+6+6+8; i++) + if(h.name[i] != ' ') + goto skip; + t = atol(h.size); + if(t&01) + t++; + free(strings); + strings = malloc(t+1); + if(strings){ + if(readn(fd, strings, t) != t){ + free(strings); + strings = nil; + break; + } + strings[t] = 0; + continue; + } + goto skip; + }else if(strings && h.name[0]=='/' && isdigit((uchar)h.name[1])){ + i = strtol(h.name+1, &p, 10); + if(*p != ' ' || i >= strlen(strings)) + goto skip; + p = strings+i; + for(; *p && *p != '/'; p++) + ; + namelen = p-(strings+i); + if(namelen >= sizeof name){ + namelen = 0; + goto skip; + } + memmove(name, strings+i, namelen); + name[namelen] = 0; + namelen = 0; + }else{ + strncpy(name, h.name, sizeof(h.name)); + for(i = sizeof(h.name)-1; i > 0 && name[i] == ' '; i--) + ; + if(name[i] == '/') /* system V bug */ + i--; + name[i+1]=0; + } + snprint(buf, sizeof buf, "%s(%s)", ar, name); + sym = symlook(strdup(buf), S_TIME, (void *)t); + sym->u.value = t; + skip: + t = atol(h.size); + if(t&01) t++; + t -= namelen; + LSEEK(fd, t, 1); + } + close(fd); + free(strings); +} + +static int +type(char *file) +{ + int fd; + char buf[SARMAG]; + + fd = open(file, OREAD); + if(fd < 0){ + if(symlook(file, S_BITCH, 0) == 0){ + if(strlen(file) < 2 || strcmp(file+strlen(file)-2, ".a") != 0) + Bprint(&bout, "%s doesn't exist: assuming it will be an archive\n", file); + symlook(file, S_BITCH, (void *)file); + } + return 1; + } + if(read(fd, buf, SARMAG) != SARMAG){ + close(fd); + return 0; + } + close(fd); + return !strncmp(ARMAG, buf, SARMAG); +} + +static char* +split(char *name, char **member) +{ + char *p, *q; + + p = strdup(name); + q = utfrune(p, '('); + if(q){ + *q++ = 0; + if(member) + *member = q; + q = utfrune(q, ')'); + if (q) + *q = 0; + if(type(p)) + return p; + free(p); + fprint(2, "mk: '%s' is not an archive\n", name); + } + return 0; +} diff --git a/mk/mk/bufblock.c b/mk/mk/bufblock.c @@ -0,0 +1,88 @@ +#include "mk.h" + +static Bufblock *freelist; +#define QUANTA 4096 + +Bufblock * +newbuf(void) +{ + Bufblock *p; + + if (freelist) { + p = freelist; + freelist = freelist->next; + } else { + p = (Bufblock *) Malloc(sizeof(Bufblock)); + p->start = Malloc(QUANTA*sizeof(*p->start)); + p->end = p->start+QUANTA; + } + p->current = p->start; + *p->start = 0; + p->next = 0; + return p; +} + +void +freebuf(Bufblock *p) +{ + p->next = freelist; + freelist = p; +} + +void +growbuf(Bufblock *p) +{ + int n; + Bufblock *f; + char *cp; + + n = p->end-p->start+QUANTA; + /* search the free list for a big buffer */ + for (f = freelist; f; f = f->next) { + if (f->end-f->start >= n) { + memcpy(f->start, p->start, p->end-p->start); + cp = f->start; + f->start = p->start; + p->start = cp; + cp = f->end; + f->end = p->end; + p->end = cp; + f->current = f->start; + break; + } + } + if (!f) { /* not found - grow it */ + p->start = Realloc(p->start, n); + p->end = p->start+n; + } + p->current = p->start+n-QUANTA; +} + +void +bufcpy(Bufblock *buf, char *cp, int n) +{ + + while (n--) + insert(buf, *cp++); +} + +void +insert(Bufblock *buf, int c) +{ + + if (buf->current >= buf->end) + growbuf(buf); + *buf->current++ = c; +} + +void +rinsert(Bufblock *buf, Rune r) +{ + int n; + + n = runelen(r); + if (buf->current+n > buf->end) + growbuf(buf); + runetochar(buf->current, &r); + buf->current += n; +} diff --git a/mk/mk/env.c b/mk/mk/env.c @@ -0,0 +1,149 @@ +#include "mk.h" + +enum { + ENVQUANTA=10 +}; + +Envy *envy; +static int nextv; + +static char *myenv[] = +{ + "target", + "stem", + "prereq", + "pid", + "nproc", + "newprereq", + "alltarget", + "newmember", + "stem0", /* must be in order from here */ + "stem1", + "stem2", + "stem3", + "stem4", + "stem5", + "stem6", + "stem7", + "stem8", + "stem9", + 0 +}; + +void +initenv(void) +{ + char **p; + + for(p = myenv; *p; p++) + symlook(*p, S_INTERNAL, (void *)""); + readenv(); /* o.s. dependent */ +} + +static void +envinsert(char *name, Word *value) +{ + static int envsize; + + if (nextv >= envsize) { + envsize += ENVQUANTA; + envy = (Envy *) Realloc((char *) envy, envsize*sizeof(Envy)); + } + envy[nextv].name = name; + envy[nextv++].values = value; +} + +static void +envupd(char *name, Word *value) +{ + Envy *e; + + for(e = envy; e->name; e++) + if(strcmp(name, e->name) == 0){ + delword(e->values); + e->values = value; + return; + } + e->name = name; + e->values = value; + envinsert(0,0); +} + +static void +ecopy(Symtab *s) +{ + char **p; + + if(symlook(s->name, S_NOEXPORT, 0)) + return; + for(p = myenv; *p; p++) + if(strcmp(*p, s->name) == 0) + return; + envinsert(s->name, s->u.ptr); +} + +void +execinit(void) +{ + char **p; + + nextv = 0; + for(p = myenv; *p; p++) + envinsert(*p, stow("")); + + symtraverse(S_VAR, ecopy); + envinsert(0, 0); +} + +Envy* +buildenv(Job *j, int slot) +{ + char **p, *cp, *qp; + Word *w, *v, **l; + int i; + char buf[256]; + + envupd("target", wdup(j->t)); + if(j->r->attr&REGEXP) + envupd("stem",newword("")); + else + envupd("stem", newword(j->stem)); + envupd("prereq", wdup(j->p)); + sprint(buf, "%d", getpid()); + envupd("pid", newword(buf)); + sprint(buf, "%d", slot); + envupd("nproc", newword(buf)); + envupd("newprereq", wdup(j->np)); + envupd("alltarget", wdup(j->at)); + l = &v; + v = w = wdup(j->np); + while(w){ + cp = strchr(w->s, '('); + if(cp){ + qp = strchr(cp+1, ')'); + if(qp){ + *qp = 0; + strcpy(w->s, cp+1); + l = &w->next; + w = w->next; + continue; + } + } + *l = w->next; + free(w->s); + free(w); + w = *l; + } + envupd("newmember", v); + /* update stem0 -> stem9 */ + for(p = myenv; *p; p++) + if(strcmp(*p, "stem0") == 0) + break; + for(i = 0; *p; i++, p++){ + if((j->r->attr&REGEXP) && j->match[i]) + envupd(*p, newword(j->match[i])); + else + envupd(*p, newword("")); + } + return envy; +} diff --git a/mk/mk/file.c b/mk/mk/file.c @@ -0,0 +1,90 @@ +#include "mk.h" + +/* table-driven version in bootes dump of 12/31/96 */ + +long +mtime(char *name) +{ + return mkmtime(name); +} + +long +timeof(char *name, int force) +{ + Symtab *sym; + long t; + + if(utfrune(name, '(')) + return atimeof(force, name); /* archive */ + + if(force) + return mtime(name); + + + sym = symlook(name, S_TIME, 0); + if (sym) + return sym->u.value; + + t = mtime(name); + if(t == 0) + return 0; + + symlook(name, S_TIME, (void*)t); /* install time in cache */ + return t; +} + +void +touch(char *name) +{ + Bprint(&bout, "touch(%s)\n", name); + if(nflag) + return; + + if(utfrune(name, '(')) + atouch(name); /* archive */ + else if(chgtime(name) < 0) { + fprint(2, "%s: %r\n", name); + Exit(); + } +} + +void +delete(char *name) +{ + if(utfrune(name, '(') == 0) { /* file */ + if(remove(name) < 0) + fprint(2, "remove %s: %r\n", name); + } else + fprint(2, "hoon off; mk can'tdelete archive members\n"); +} + +void +timeinit(char *s) +{ + long t; + char *cp; + Rune r; + int c, n; + + t = time(0); + while (*s) { + cp = s; + do{ + n = chartorune(&r, s); + if (r == ' ' || r == ',' || r == '\n') + break; + s += n; + } while(*s); + c = *s; + *s = 0; + symlook(strdup(cp), S_TIME, (void *)t)->u.value = t; + if (c) + *s++ = c; + while(*s){ + n = chartorune(&r, s); + if(r != ' ' && r != ',' && r != '\n') + break; + s += n; + } + } +} diff --git a/mk/mk/fns.h b/mk/mk/fns.h @@ -0,0 +1,88 @@ +#undef waitfor +#define waitfor mkwaitfor + +void addrule(char*, Word*, char*, Word*, int, int, char*); +void addrules(Word*, Word*, char*, int, int, char*); +void addw(Word*, char*); +void assert(char*, int); +int assline(Biobuf *, Bufblock *); +long atimeof(int,char*); +void atouch(char*); +void bufcpy(Bufblock *, char *, int); +Envy *buildenv(Job*, int); +void catchnotes(void); +int chgtime(char*); +void clrmade(Node*); +void delete(char*); +void delword(Word*); +int dorecipe(Node*); +void dumpa(char*, Arc*); +void dumpj(char*, Job*, int); +void dumpn(char*, Node*); +void dumpr(char*, Rule*); +void dumpv(char*); +void dumpw(char*, Word*); +void execinit(void); +int execsh(char*, char*, Bufblock*, Envy*, Shell*, Word*); +void Exit(void); +void expunge(int, char*); +void freebuf(Bufblock*); +void front(char*); +Node *graph(char*); +void growbuf(Bufblock *); +void initenv(void); +void initshell(void); +void insert(Bufblock *, int); +void ipop(void); +void ipush(void); +void killchildren(char*); +void *Malloc(int); +char *maketmp(int*); +int match(char*, char*, char*, Shell*); +char *membername(char*, int, char*); +void mk(char*); +unsigned long mkmtime(char*); +long mtime(char*); +Arc *newarc(Node*, Rule*, char*, Resub*); +Bufblock *newbuf(void); +Job *newjob(Rule*, Node*, char*, char**, Word*, Word*, Word*, Word*); +Word *newword(char*); +int nextrune(Biobuf*, int); +int nextslot(void); +void nproc(void); +void nrep(void); +int outofdate(Node*, Arc*, int); +void parse(char*, int, int); +int pipecmd(char*, Envy*, int*, Shell*, Word*); +void popshell(void); +void prusage(void); +void pushshell(void); +void rcopy(char**, Resub*, int); +void readenv(void); +void *Realloc(void*, int); +void rinsert(Bufblock *, Rune); +char *rulecnt(void); +void run(Job*); +char *setshell(Word*); +void setvar(char*, void*); +int shargv(Word*, int, char***); +char *shname(char*); +void shprint(char*, Envy*, Bufblock*, Shell*); +Word *stow(char*); +void subst(char*, char*, char*); +void symdel(char*, int); +void syminit(void); +Symtab *symlook(char*, int, void*); +void symstat(void); +void symtraverse(int, void(*)(Symtab*)); +void timeinit(char*); +long timeof(char*, int); +void touch(char*); +void update(int, Node*); +void usage(void); +Word *varsub(char**); +int waitfor(char*); +int waitup(int, int*); +Word *wdup(Word*); +int work(Node*, Node*, Arc*); +char *wtos(Word*, int); diff --git a/mk/mk/graph.c b/mk/mk/graph.c @@ -0,0 +1,279 @@ +#include "mk.h" + +static Node *applyrules(char *, char *); +static void togo(Node *); +static int vacuous(Node *); +static Node *newnode(char *); +static void trace(char *, Arc *); +static void cyclechk(Node *); +static void ambiguous(Node *); +static void attribute(Node *); + +Node * +graph(char *target) +{ + Node *node; + char *cnt; + + cnt = rulecnt(); + node = applyrules(target, cnt); + free(cnt); + cyclechk(node); + node->flags |= PROBABLE; /* make sure it doesn't get deleted */ + vacuous(node); + ambiguous(node); + attribute(node); + return(node); +} + +static Node * +applyrules(char *target, char *cnt) +{ + Symtab *sym; + Node *node; + Rule *r; + Arc head, *a = &head; + Word *w; + char stem[NAMEBLOCK], buf[NAMEBLOCK]; + Resub rmatch[NREGEXP]; + +/* print("applyrules(%lux='%s')\n", target, target); */ + sym = symlook(target, S_NODE, 0); + if(sym) + return sym->u.ptr; + target = strdup(target); + node = newnode(target); + head.n = 0; + head.next = 0; + sym = symlook(target, S_TARGET, 0); + memset((char*)rmatch, 0, sizeof(rmatch)); + for(r = sym? sym->u.ptr:0; r; r = r->chain){ + if(r->attr&META) continue; + if(strcmp(target, r->target)) continue; + if((!r->recipe || !*r->recipe) && (!r->tail || !r->tail->s || !*r->tail->s)) continue; /* no effect; ignore */ + if(cnt[r->rule] >= nreps) continue; + cnt[r->rule]++; + node->flags |= PROBABLE; + +/* if(r->attr&VIR) + * node->flags |= VIRTUAL; + * if(r->attr&NOREC) + * node->flags |= NORECIPE; + * if(r->attr&DEL) + * node->flags |= DELETE; + */ + if(!r->tail || !r->tail->s || !*r->tail->s) { + a->next = newarc((Node *)0, r, "", rmatch); + a = a->next; + } else + for(w = r->tail; w; w = w->next){ + a->next = newarc(applyrules(w->s, cnt), r, "", rmatch); + a = a->next; + } + cnt[r->rule]--; + head.n = node; + } + for(r = metarules; r; r = r->next){ + if((!r->recipe || !*r->recipe) && (!r->tail || !r->tail->s || !*r->tail->s)) continue; /* no effect; ignore */ + if ((r->attr&NOVIRT) && a != &head && (a->r->attr&VIR)) + continue; + if(r->attr&REGEXP){ + stem[0] = 0; + patrule = r; + memset((char*)rmatch, 0, sizeof(rmatch)); + if(regexec(r->pat, node->name, rmatch, NREGEXP) == 0) + continue; + } else { + if(!match(node->name, r->target, stem, r->shellt)) continue; + } + if(cnt[r->rule] >= nreps) continue; + cnt[r->rule]++; + +/* if(r->attr&VIR) + * node->flags |= VIRTUAL; + * if(r->attr&NOREC) + * node->flags |= NORECIPE; + * if(r->attr&DEL) + * node->flags |= DELETE; + */ + + if(!r->tail || !r->tail->s || !*r->tail->s) { + a->next = newarc((Node *)0, r, stem, rmatch); + a = a->next; + } else + for(w = r->tail; w; w = w->next){ + if(r->attr&REGEXP) + regsub(w->s, buf, sizeof buf, rmatch, NREGEXP); + else + subst(stem, w->s, buf); + a->next = newarc(applyrules(buf, cnt), r, stem, rmatch); + a = a->next; + } + cnt[r->rule]--; + } + a->next = node->prereqs; + node->prereqs = head.next; + return(node); +} + +static void +togo(Node *node) +{ + Arc *la, *a; + + /* delete them now */ + la = 0; + for(a = node->prereqs; a; la = a, a = a->next) + if(a->flag&TOGO){ + if(a == node->prereqs) + node->prereqs = a->next; + else + la->next = a->next, a = la; + } +} + +static int +vacuous(Node *node) +{ + Arc *la, *a; + int vac = !(node->flags&PROBABLE); + + if(node->flags&READY) + return(node->flags&VACUOUS); + node->flags |= READY; + for(a = node->prereqs; a; a = a->next) + if(a->n && vacuous(a->n) && (a->r->attr&META)) + a->flag |= TOGO; + else + vac = 0; + /* if a rule generated arcs that DON'T go; no others from that rule go */ + for(a = node->prereqs; a; a = a->next) + if((a->flag&TOGO) == 0) + for(la = node->prereqs; la; la = la->next) + if((la->flag&TOGO) && (la->r == a->r)){ + la->flag &= ~TOGO; + } + togo(node); + if(vac) + node->flags |= VACUOUS; + return(vac); +} + +static Node * +newnode(char *name) +{ + register Node *node; + + node = (Node *)Malloc(sizeof(Node)); + symlook(name, S_NODE, (void *)node); + node->name = name; + node->time = timeof(name, 0); + node->prereqs = 0; + node->flags = node->time? PROBABLE : 0; + node->next = 0; + return(node); +} + +void +dumpn(char *s, Node *n) +{ + char buf[1024]; + Arc *a; + + snprint(buf, sizeof buf, "%s ", (*s == ' ')? s:""); + Bprint(&bout, "%s%s@%ld: time=%ld flags=0x%x next=%ld\n", + s, n->name, n, n->time, n->flags, n->next); + for(a = n->prereqs; a; a = a->next) + dumpa(buf, a); +} + +static void +trace(char *s, Arc *a) +{ + fprint(2, "\t%s", s); + while(a){ + fprint(2, " <-(%s:%d)- %s", a->r->file, a->r->line, + a->n? a->n->name:""); + if(a->n){ + for(a = a->n->prereqs; a; a = a->next) + if(*a->r->recipe) break; + } else + a = 0; + } + fprint(2, "\n"); +} + +static void +cyclechk(Node *n) +{ + Arc *a; + + if((n->flags&CYCLE) && n->prereqs){ + fprint(2, "mk: cycle in graph detected at target %s\n", n->name); + Exit(); + } + n->flags |= CYCLE; + for(a = n->prereqs; a; a = a->next) + if(a->n) + cyclechk(a->n); + n->flags &= ~CYCLE; +} + +static void +ambiguous(Node *n) +{ + Arc *a; + Rule *r = 0; + Arc *la; + int bad = 0; + + la = 0; + for(a = n->prereqs; a; a = a->next){ + if(a->n) + ambiguous(a->n); + if(*a->r->recipe == 0) continue; + if(r == 0) + r = a->r, la = a; + else{ + if(r->recipe != a->r->recipe){ + if((r->attr&META) && !(a->r->attr&META)){ + la->flag |= TOGO; + r = a->r, la = a; + } else if(!(r->attr&META) && (a->r->attr&META)){ + a->flag |= TOGO; + continue; + } + } + if(r->recipe != a->r->recipe){ + if(bad == 0){ + fprint(2, "mk: ambiguous recipes for %s:\n", n->name); + bad = 1; + trace(n->name, la); + } + trace(n->name, a); + } + } + } + if(bad) + Exit(); + togo(n); +} + +static void +attribute(Node *n) +{ + register Arc *a; + + for(a = n->prereqs; a; a = a->next){ + if(a->r->attr&VIR) + n->flags |= VIRTUAL; + if(a->r->attr&NOREC) + n->flags |= NORECIPE; + if(a->r->attr&DEL) + n->flags |= DELETE; + if(a->n) + attribute(a->n); + } + if(n->flags&VIRTUAL) + n->time = 0; +} diff --git a/mk/mk/job.c b/mk/mk/job.c @@ -0,0 +1,33 @@ +#include "mk.h" + +Job * +newjob(Rule *r, Node *nlist, char *stem, char **match, Word *pre, Word *npre, Word *tar, Word *atar) +{ + register Job *j; + + j = (Job *)Malloc(sizeof(Job)); + j->r = r; + j->n = nlist; + j->stem = stem; + j->match = match; + j->p = pre; + j->np = npre; + j->t = tar; + j->at = atar; + j->nproc = -1; + j->next = 0; + return(j); +} + +void +dumpj(char *s, Job *j, int all) +{ + Bprint(&bout, "%s\n", s); + while(j){ + Bprint(&bout, "job@%ld: r=%ld n=%ld stem='%s' nproc=%d\n", + j, j->r, j->n, j->stem, j->nproc); + Bprint(&bout, "\ttarget='%s' alltarget='%s' prereq='%s' nprereq='%s'\n", + wtos(j->t, ' '), wtos(j->at, ' '), wtos(j->p, ' '), wtos(j->np, ' ')); + j = all? j->next : 0; + } +} diff --git a/mk/mk/lex.c b/mk/mk/lex.c @@ -0,0 +1,146 @@ +#include "mk.h" + +static int bquote(Biobuf*, Bufblock*); + +/* + * Assemble a line skipping blank lines, comments, and eliding + * escaped newlines + */ +int +assline(Biobuf *bp, Bufblock *buf) +{ + int c; + int lastc; + + buf->current=buf->start; + while ((c = nextrune(bp, 1)) >= 0){ + switch(c) + { + case '\r': /* consumes CRs for Win95 */ + continue; + case '\n': + if (buf->current != buf->start) { + insert(buf, 0); + return 1; + } + break; /* skip empty lines */ + case '\\': + case '\'': + case '"': + rinsert(buf, c); + if (shellt->escapetoken(bp, buf, 1, c) == 0) + Exit(); + break; + case '`': + if (bquote(bp, buf) == 0) + Exit(); + break; + case '#': + lastc = '#'; + while ((c = Bgetc(bp)) != '\n') { + if (c < 0) + goto eof; + if(c != '\r') + lastc = c; + } + mkinline++; + if (lastc == '\\') + break; /* propagate escaped newlines??*/ + if (buf->current != buf->start) { + insert(buf, 0); + return 1; + } + break; + default: + rinsert(buf, c); + break; + } + } +eof: + insert(buf, 0); + return *buf->start != 0; +} + +/* + * assemble a back-quoted shell command into a buffer + */ +static int +bquote(Biobuf *bp, Bufblock *buf) +{ + int c, line, term; + int start; + + line = mkinline; + while((c = Bgetrune(bp)) == ' ' || c == '\t') + ; + if(c == '{'){ + term = '}'; /* rc style */ + while((c = Bgetrune(bp)) == ' ' || c == '\t') + ; + } else + term = '`'; /* sh style */ + + start = buf->current-buf->start; + for(;c > 0; c = nextrune(bp, 0)){ + if(c == term){ + insert(buf, '\n'); + insert(buf,0); + buf->current = buf->start+start; + execinit(); + execsh(0, buf->current, buf, envy, shellt, shellcmd); + return 1; + } + if(c == '\n') + break; + if(c == '\'' || c == '"' || c == '\\'){ + insert(buf, c); + if(!shellt->escapetoken(bp, buf, 1, c)) + return 0; + continue; + } + rinsert(buf, c); + } + SYNERR(line); + fprint(2, "missing closing %c after `\n", term); + return 0; +} + +/* + * get next character stripping escaped newlines + * the flag specifies whether escaped newlines are to be elided or + * replaced with a blank. + */ +int +nextrune(Biobuf *bp, int elide) +{ + int c, c2; + static int savec; + + if(savec){ + c = savec; + savec = 0; + return c; + } + + for (;;) { + c = Bgetrune(bp); + if (c == '\\') { + c2 = Bgetrune(bp); + if(c2 == '\r'){ + savec = c2; + c2 = Bgetrune(bp); + } + if (c2 == '\n') { + savec = 0; + mkinline++; + if (elide) + continue; + return ' '; + } + Bungetrune(bp); + } + if (c == '\n') + mkinline++; + return c; + } +} diff --git a/mk/mk/main.c b/mk/mk/main.c @@ -0,0 +1,287 @@ +#include "mk.h" + +#define MKFILE "mkfile" + +int debug; +Rule *rules, *metarules; +int nflag = 0; +int tflag = 0; +int iflag = 0; +int kflag = 0; +int aflag = 0; +int uflag = 0; +char *explain = 0; +Word *target1; +int nreps = 1; +Job *jobs; +Biobuf bout; +Rule *patrule; +void badusage(void); +#ifdef PROF +short buf[10000]; +#endif + +int +main(int argc, char **argv) +{ + Word *w; + char *s, *temp; + char *files[256], **f = files, **ff; + int sflag = 0; + int i; + int tfd = -1; + Biobuf tb; + Bufblock *buf; + Bufblock *whatif; + + /* + * start with a copy of the current environment variables + * instead of sharing them + */ + + Binit(&bout, 1, OWRITE); + buf = newbuf(); + whatif = 0; + USED(argc); + for(argv++; *argv && (**argv == '-'); argv++) + { + bufcpy(buf, argv[0], strlen(argv[0])); + insert(buf, ' '); + switch(argv[0][1]) + { + case 'a': + aflag = 1; + break; + case 'd': + if(*(s = &argv[0][2])) + while(*s) switch(*s++) + { + case 'p': debug |= D_PARSE; break; + case 'g': debug |= D_GRAPH; break; + case 'e': debug |= D_EXEC; break; + } + else + debug = 0xFFFF; + break; + case 'e': + explain = &argv[0][2]; + break; + case 'f': + if(*++argv == 0) + badusage(); + *f++ = *argv; + bufcpy(buf, argv[0], strlen(argv[0])); + insert(buf, ' '); + break; + case 'i': + iflag = 1; + break; + case 'k': + kflag = 1; + break; + case 'n': + nflag = 1; + break; + case 's': + sflag = 1; + break; + case 't': + tflag = 1; + break; + case 'u': + uflag = 1; + break; + case 'w': + if(whatif == 0) + whatif = newbuf(); + else + insert(whatif, ' '); + if(argv[0][2]) + bufcpy(whatif, &argv[0][2], strlen(&argv[0][2])); + else { + if(*++argv == 0) + badusage(); + bufcpy(whatif, &argv[0][0], strlen(&argv[0][0])); + } + break; + default: + badusage(); + } + } +#ifdef PROF + { + extern etext(); + monitor(main, etext, buf, sizeof buf, 300); + } +#endif + + if(aflag) + iflag = 1; + usage(); + syminit(); + initshell(); + initenv(); + usage(); + + /* + assignment args become null strings + */ + temp = 0; + for(i = 0; argv[i]; i++) if(utfrune(argv[i], '=')){ + bufcpy(buf, argv[i], strlen(argv[i])); + insert(buf, ' '); + if(tfd < 0){ + temp = maketmp(&tfd); + if(temp == 0) { + fprint(2, "temp file: %r\n"); + Exit(); + } + Binit(&tb, tfd, OWRITE); + } + Bprint(&tb, "%s\n", argv[i]); + *argv[i] = 0; + } + if(tfd >= 0){ + Bflush(&tb); + LSEEK(tfd, 0L, 0); + parse("command line args", tfd, 1); + remove(temp); + } + + if (buf->current != buf->start) { + buf->current--; + insert(buf, 0); + } + symlook("MKFLAGS", S_VAR, (void *) stow(buf->start)); + buf->current = buf->start; + for(i = 0; argv[i]; i++){ + if(*argv[i] == 0) continue; + if(i) + insert(buf, ' '); + bufcpy(buf, argv[i], strlen(argv[i])); + } + insert(buf, 0); + symlook("MKARGS", S_VAR, (void *) stow(buf->start)); + freebuf(buf); + + if(f == files){ + if(access(MKFILE, 4) == 0) + parse(MKFILE, open(MKFILE, 0), 0); + } else + for(ff = files; ff < f; ff++) + parse(*ff, open(*ff, 0), 0); + if(DEBUG(D_PARSE)){ + dumpw("default targets", target1); + dumpr("rules", rules); + dumpr("metarules", metarules); + dumpv("variables"); + } + if(whatif){ + insert(whatif, 0); + timeinit(whatif->start); + freebuf(whatif); + } + execinit(); + /* skip assignment args */ + while(*argv && (**argv == 0)) + argv++; + + catchnotes(); + if(*argv == 0){ + if(target1) + for(w = target1; w; w = w->next) + mk(w->s); + else { + fprint(2, "mk: nothing to mk\n"); + Exit(); + } + } else { + if(sflag){ + for(; *argv; argv++) + if(**argv) + mk(*argv); + } else { + Word *head, *tail, *t; + + /* fake a new rule with all the args as prereqs */ + tail = 0; + t = 0; + for(; *argv; argv++) + if(**argv){ + if(tail == 0) + tail = t = newword(*argv); + else { + t->next = newword(*argv); + t = t->next; + } + } + if(tail->next == 0) + mk(tail->s); + else { + head = newword("command line arguments"); + addrules(head, tail, strdup(""), VIR, mkinline, 0); + mk(head->s); + } + } + } + if(uflag) + prusage(); + exits(0); + return 0; +} + +void +badusage(void) +{ + + fprint(2, "Usage: mk [-f file] [-n] [-a] [-e] [-t] [-k] [-i] [-d[egp]] [targets ...]\n"); + Exit(); +} + +void * +Malloc(int n) +{ + register void *s; + + s = malloc(n); + if(!s) { + fprint(2, "mk: cannot alloc %d bytes\n", n); + Exit(); + } + return(s); +} + +void * +Realloc(void *s, int n) +{ + if(s) + s = realloc(s, n); + else + s = malloc(n); + if(!s) { + fprint(2, "mk: cannot alloc %d bytes\n", n); + Exit(); + } + return(s); +} + +void +assert(char *s, int n) +{ + if(!n){ + fprint(2, "mk: Assertion ``%s'' failed.\n", s); + Exit(); + } +} + +void +regerror(char *s) +{ + if(patrule) + fprint(2, "mk: %s:%d: regular expression error; %s\n", + patrule->file, patrule->line, s); + else + fprint(2, "mk: %s:%d: regular expression error; %s\n", + infile, mkinline, s); + Exit(); +} diff --git a/mk/mk/match.c b/mk/mk/match.c @@ -0,0 +1,49 @@ +#include "mk.h" + +int +match(char *name, char *template, char *stem, Shell *sh) +{ + Rune r; + int n; + + while(*name && *template){ + n = chartorune(&r, template); + if (PERCENT(r)) + break; + while (n--) + if(*name++ != *template++) + return 0; + } + if(!PERCENT(*template)) + return 0; + n = strlen(name)-strlen(template+1); + if (n < 0) + return 0; + if (strcmp(template+1, name+n)) + return 0; + strncpy(stem, name, n); + stem[n] = 0; + if(*template == '&') + return !sh->charin(stem, "./"); + return 1; +} + +void +subst(char *stem, char *template, char *dest) +{ + Rune r; + char *s; + int n; + + while(*template){ + n = chartorune(&r, template); + if (PERCENT(r)) { + template += n; + for (s = stem; *s; s++) + *dest++ = *s; + } else + while (n--) + *dest++ = *template++; + } + *dest = 0; +} diff --git a/mk/mk/mk.1 b/mk/mk/mk.1 @@ -0,0 +1,693 @@ +.deEX +.ift .ft5 +.nf +.. +.deEE +.ft1 +.fi +.. +.TH MK 1 +.SH NAME +mk \- maintain (make) related files +.SH SYNOPSIS +.B mk +[ +.B -f +.I mkfile +] ... +[ +.I option ... +] +[ +.I target ... +] +.SH DESCRIPTION +.I Mk +uses the dependency rules specified in +.I mkfile +to control the update (usually by compilation) of +.I targets +(usually files) +from the source files upon which they depend. +The +.I mkfile +(default +.LR mkfile ) +contains a +.I rule +for each target that identifies the files and other +targets upon which it depends and an +.IR sh (1) +script, a +.IR recipe , +to update the target. +The script is run if the target does not exist +or if it is older than any of the files it depends on. +.I Mkfile +may also contain +.I meta-rules +that define actions for updating implicit targets. +If no +.I target +is specified, the target of the first rule (not meta-rule) in +.I mkfile +is updated. +.PP +The environment variable +.B $NPROC +determines how many targets may be updated simultaneously; +Some operating systems, e.g., Plan 9, set +.B $NPROC +automatically to the number of CPUs on the current machine. +.PP +Options are: +.TP \w'\fL-d[egp]\ 'u +.B -a +Assume all targets to be out of date. +Thus, everything is updated. +.PD 0 +.TP +.BR -d [ egp ] +Produce debugging output +.RB ( p +is for parsing, +.B g +for graph building, +.B e +for execution). +.TP +.B -e +Explain why each target is made. +.TP +.B -i +Force any missing intermediate targets to be made. +.TP +.B -k +Do as much work as possible in the face of errors. +.TP +.B -n +Print, but do not execute, the commands +needed to update the targets. +.TP +.B -s +Make the command line arguments sequentially rather than in parallel. +.TP +.B -t +Touch (update the modified date of) file targets, without +executing any recipes. +.TP +.BI -w target1 , target2,... +Pretend the modify time for each +.I target +is the current time; useful in conjunction with +.B -n +to learn what updates would be triggered by +modifying the +.IR targets . +.PD +.SS The \fLmkfile\fP +A +.I mkfile +consists of +.I assignments +(described under `Environment') and +.IR rules . +A rule contains +.I targets +and a +.IR tail . +A target is a literal string +and is normally a file name. +The tail contains zero or more +.I prerequisites +and an optional +.IR recipe , +which is an +.B shell +script. +Each line of the recipe must begin with white space. +A rule takes the form +.IP +.EX +target: prereq1 prereq2 + \f2recipe using\fP prereq1, prereq2 \f2to build\fP target +.EE +.PP +When the recipe is executed, +the first character on every line is elided. +.PP +After the colon on the target line, a rule may specify +.IR attributes , +described below. +.PP +A +.I meta-rule +has a target of the form +.IB A % B +where +.I A +and +.I B +are (possibly empty) strings. +A meta-rule acts as a rule for any potential target whose +name matches +.IB A % B +with +.B % +replaced by an arbitrary string, called the +.IR stem . +In interpreting a meta-rule, +the stem is substituted for all occurrences of +.B % +in the prerequisite names. +In the recipe of a meta-rule, the environment variable +.B $stem +contains the string matched by the +.BR % . +For example, a meta-rule to compile a C program +might be: +.IP +.EX +%: %.c + cc -c $stem.c + ld -o $stem $stem.o +.EE +.PP +Meta-rules may contain an ampersand +.B & +rather than a percent sign +.BR % . +A +.B % +matches a maximal length string of any characters; +an +.B & +matches a maximal length string of any characters except period +or slash. +.PP +The text of the +.I mkfile +is processed as follows. +Lines beginning with +.B < +followed by a file name are replaced by the contents of the named +file. +Lines beginning with +.B "<|" +followed by a file name are replaced by the output +of the execution of the named +file. +Blank lines and comments, which run from unquoted +.B # +characters to the following newline, are deleted. +The character sequence backslash-newline is deleted, +so long lines in +.I mkfile +may be folded. +Non-recipe lines are processed by substituting for +.BI `{ command } +the output of the +.I command +when run by +.IR sh . +References to variables are replaced by the variables' values. +Special characters may be quoted using single quotes +.BR \&'' +as in +.IR sh (1). +.PP +Assignments and rules are distinguished by +the first unquoted occurrence of +.B : +(rule) +or +.B = +(assignment). +.PP +A later rule may modify or override an existing rule under the +following conditions: +.TP +\- +If the targets of the rules exactly match and one rule +contains only a prerequisite clause and no recipe, the +clause is added to the prerequisites of the other rule. +If either or both targets are virtual, the recipe is +always executed. +.TP +\- +If the targets of the rules match exactly and the +prerequisites do not match and both rules +contain recipes, +.I mk +reports an ``ambiguous recipe'' error. +.TP +\- +If the target and prerequisites of both rules match exactly, +the second rule overrides the first. +.SS Environment +Rules may make use of +shell +environment variables. +A legal reference of the form +.B $OBJ +or +.B ${name} +is expanded as in +.IR sh (1). +A reference of the form +.BI ${name: A % B = C\fL%\fID\fL}\fR, +where +.I A, B, C, D +are (possibly empty) strings, +has the value formed by expanding +.B $name +and substituting +.I C +for +.I A +and +.I D +for +.I B +in each word in +.B $name +that matches pattern +.IB A % B\f1. +.PP +Variables can be set by +assignments of the form +.I + var\fL=\fR[\fIattr\fL=\fR]\fIvalue\fR +.br +Blanks in the +.I value +break it into words. +Such variables are exported +to the environment of +recipes as they are executed, unless +.BR U , +the only legal attribute +.IR attr , +is present. +The initial value of a variable is +taken from (in increasing order of precedence) +the default values below, +.I mk's +environment, the +.IR mkfiles , +and any command line assignment as an argument to +.IR mk . +A variable assignment argument overrides the first (but not any subsequent) +assignment to that variable. +.PP +The variable +.B MKFLAGS +contains all the option arguments (arguments starting with +.L - +or containing +.LR = ) +and +.B MKARGS +contains all the targets in the call to +.IR mk . +.PP +The variable +.B MKSHELL +contains the shell command line +.I mk +uses to run recipes. +If the first word of the command ends in +.B rc +or +.BR rcsh , +.I mk +uses +.IR rc (1)'s +quoting rules; otherwise it uses +.IR sh (1)'s. +The +.B MKSHELL +variable is consulted when the mkfile is read, not when it is executed, +so that different shells can be used within a single mkfile: +.IP +.EX +MKSHELL=$PLAN9/bin/rc +use-rc:V: + for(i in a b c) echo $i + +MKSHELL=sh +use-sh:V: + for i in a b c; do echo $i; done +.EE +.LP +Mkfiles included via +.B < +or +.B <| +.RI ( q.v. ) +see their own private copy of +.BR MKSHELL , +which always starts set to +.B sh . +.PP +Dynamic information may be included in the mkfile by using a line of the form +.IP +\fR<|\fIcommand\fR \fIargs\fR +.LP +This runs the command +.I command +with the given arguments +.I args +and pipes its standard output to +.I mk +to be included as part of the mkfile. For instance, the Inferno kernels +use this technique +to run a shell command with an awk script and a configuration +file as arguments in order for +the +.I awk +script to process the file and output a set of variables and their values. +.SS Execution +.PP +During execution, +.I mk +determines which targets must be updated, and in what order, +to build the +.I names +specified on the command line. +It then runs the associated recipes. +.PP +A target is considered up to date if it has no prerequisites or +if all its prerequisites are up to date and it is newer +than all its prerequisites. +Once the recipe for a target has executed, the target is +considered up to date. +.PP +The date stamp +used to determine if a target is up to date is computed +differently for different types of targets. +If a target is +.I virtual +(the target of a rule with the +.B V +attribute), +its date stamp is initially zero; when the target is +updated the date stamp is set to +the most recent date stamp of its prerequisites. +Otherwise, if a target does not exist as a file, +its date stamp is set to the most recent date stamp of its prerequisites, +or zero if it has no prerequisites. +Otherwise, the target is the name of a file and +the target's date stamp is always that file's modification date. +The date stamp is computed when the target is needed in +the execution of a rule; it is not a static value. +.PP +Nonexistent targets that have prerequisites +and are themselves prerequisites are treated specially. +Such a target +.I t +is given the date stamp of its most recent prerequisite +and if this causes all the targets which have +.I t +as a prerequisite to be up to date, +.I t +is considered up to date. +Otherwise, +.I t +is made in the normal fashion. +The +.B -i +flag overrides this special treatment. +.PP +Files may be made in any order that respects +the preceding restrictions. +.PP +A recipe is executed by supplying the recipe as standard input to +the command +.BR /bin/sh . +(Note that unlike +.IR make , +.I mk +feeds the entire recipe to the shell rather than running each line +of the recipe separately.) +The environment is augmented by the following variables: +.TP 14 +.B $alltarget +all the targets of this rule. +.TP +.B $newprereq +the prerequisites that caused this rule to execute. +.TP +.B $newmember +the prerequisites that are members of an aggregate +that caused this rule to execute. +When the prerequisites of a rule are members of an +aggregate, +.B $newprereq +contains the name of the aggregate and out of date +members, while +.B $newmember +contains only the name of the members. +.TP +.B $nproc +the process slot for this recipe. +It satisfies +.RB 0≤ $nproc < $NPROC . +.TP +.B $pid +the process id for the +.I mk +executing the recipe. +.TP +.B $prereq +all the prerequisites for this rule. +.TP +.B $stem +if this is a meta-rule, +.B $stem +is the string that matched +.B % +or +.BR & . +Otherwise, it is empty. +For regular expression meta-rules (see below), the variables +.LR stem0 ", ...," +.L stem9 +are set to the corresponding subexpressions. +.TP +.B $target +the targets for this rule that need to be remade. +.PP +These variables are available only during the execution of a recipe, +not while evaluating the +.IR mkfile . +.PP +Unless the rule has the +.B Q +attribute, +the recipe is printed prior to execution +with recognizable environment variables expanded. +Commands returning error status +cause +.I mk +to terminate. +.PP +Recipes and backquoted +.B rc +commands in places such as assignments +execute in a copy of +.I mk's +environment; changes they make to +environment variables are not visible from +.IR mk . +.PP +Variable substitution in a rule is done when +the rule is read; variable substitution in the recipe is done +when the recipe is executed. For example: +.IP +.EX +bar=a.c +foo: $bar + $CC -o foo $bar +bar=b.c +.EE +.PP +will compile +.B b.c +into +.BR foo , +if +.B a.c +is newer than +.BR foo . +.SS Aggregates +Names of the form +.IR a ( b ) +refer to member +.I b +of the aggregate +.IR a . +.SS Attributes +The colon separating the target from the prerequisites +may be +immediately followed by +.I attributes +and another colon. +The attributes are: +.TP +.B D +If the recipe exits with a non-null status, the target is deleted. +.TP +.B E +Continue execution if the recipe draws errors. +.TP +.B N +If there is no recipe, the target has its time updated. +.TP +.B n +The rule is a meta-rule that cannot be a target of a virtual rule. +Only files match the pattern in the target. +.TP +.B P +The characters after the +.B P +until the terminating +.B : +are taken as a program name. +It will be invoked as +.B "sh -c prog 'arg1' 'arg2'" +and should return a zero exit status +if and only if arg1 is up to date with respect to arg2. +Date stamps are still propagated in the normal way. +.TP +.B Q +The recipe is not printed prior to execution. +.TP +.B R +The rule is a meta-rule using regular expressions. +In the rule, +.B % +has no special meaning. +The target is interpreted as a regular expression as defined in +.IR regexp9 (7). +The prerequisites may contain references +to subexpressions in form +.BI \e n\f1, +as in the substitute command of +.IR sed (1). +.TP +.B U +The targets are considered to have been updated +even if the recipe did not do so. +.TP +.B V +The targets of this rule are marked as virtual. +They are distinct from files of the same name. +.PD +.SH EXAMPLES +A simple mkfile to compile a program: +.IP +.EX +.ta 8n +8n +8n +8n +8n +8n +8n +</$objtype/mkfile + +prog: a.$O b.$O c.$O + $LD $LDFLAGS -o $target $prereq + +%.$O: %.c + $CC $CFLAGS $stem.c +.EE +.PP +Override flag settings in the mkfile: +.IP +.EX +% mk target 'CFLAGS=-S -w' +.EE +.PP +Maintain a library: +.IP +.EX +libc.a(%.$O):N: %.$O +libc.a: libc.a(abs.$O) libc.a(access.$O) libc.a(alarm.$O) ... + ar r libc.a $newmember +.EE +.PP +String expression variables to derive names from a master list: +.IP +.EX +NAMES=alloc arc bquote builtins expand main match mk var word +OBJ=${NAMES:%=%.$O} +.EE +.PP +Regular expression meta-rules: +.IP +.EX +([^/]*)/(.*)\e.$O:R: \e1/\e2.c + cd $stem1; $CC $CFLAGS $stem2.c +.EE +.PP +A correct way to deal with +.IR yacc (1) +grammars. +The file +.B lex.c +includes the file +.B x.tab.h +rather than +.B y.tab.h +in order to reflect changes in content, not just modification time. +.IP +.EX +lex.$O: x.tab.h +x.tab.h: y.tab.h + cmp -s x.tab.h y.tab.h || cp y.tab.h x.tab.h +y.tab.c y.tab.h: gram.y + $YACC -d gram.y +.EE +.PP +The above example could also use the +.B P +attribute for the +.B x.tab.h +rule: +.IP +.EX +x.tab.h:Pcmp -s: y.tab.h + cp y.tab.h x.tab.h +.EE +.SH SOURCE +.B http://swtch.com/plan9port/unix +.SH SEE ALSO +.IR sh (1), +.IR regexp9 (7) +.PP +A. Hume, +``Mk: a Successor to Make'' +(Tenth Edition Research Unix Manuals). +.PP +Andrew G. Hume and Bob Flandrena, +``Maintaining Files on Plan 9 with Mk''. +.BR DOCPREFIX/doc/mk.pdf . +.SH HISTORY +Andrew Hume wrote +.I mk +for Tenth Edition Research Unix. +It was later ported to Plan 9. +This software is a port of the Plan 9 version back to Unix. +.SH BUGS +Identical recipes for regular expression meta-rules only have one target. +.PP +Seemingly appropriate input like +.B CFLAGS=-DHZ=60 +is parsed as an erroneous attribute; correct it by inserting +a space after the first +.LR = . +.PP +The recipes printed by +.I mk +before being passed to +the shell +for execution are sometimes erroneously expanded +for printing. Don't trust what's printed; rely +on what the shell +does. diff --git a/mk/mk/mk.c b/mk/mk/mk.c @@ -0,0 +1,234 @@ +#include "mk.h" + +int runerrs; + +void +mk(char *target) +{ + Node *node; + int did = 0; + + nproc(); /* it can be updated dynamically */ + nrep(); /* it can be updated dynamically */ + runerrs = 0; + node = graph(target); + if(DEBUG(D_GRAPH)){ + dumpn("new target\n", node); + Bflush(&bout); + } + clrmade(node); + while(node->flags&NOTMADE){ + if(work(node, (Node *)0, (Arc *)0)) + did = 1; /* found something to do */ + else { + if(waitup(1, (int *)0) > 0){ + if(node->flags&(NOTMADE|BEINGMADE)){ + assert("must be run errors", runerrs); + break; /* nothing more waiting */ + } + } + } + } + if(node->flags&BEINGMADE) + waitup(-1, (int *)0); + while(jobs) + waitup(-2, (int *)0); + assert("target didn't get done", runerrs || (node->flags&MADE)); + if(did == 0) + Bprint(&bout, "mk: '%s' is up to date\n", node->name); +} + +void +clrmade(Node *n) +{ + Arc *a; + + n->flags &= ~(CANPRETEND|PRETENDING); + if(strchr(n->name, '(') ==0 || n->time) + n->flags |= CANPRETEND; + MADESET(n, NOTMADE); + for(a = n->prereqs; a; a = a->next) + if(a->n) + clrmade(a->n); +} + +static void +unpretend(Node *n) +{ + MADESET(n, NOTMADE); + n->flags &= ~(CANPRETEND|PRETENDING); + n->time = 0; +} + +static char* +dir(void) +{ + static char buf[1024]; + + return getcwd(buf, sizeof buf); +} + +int +work(Node *node, Node *p, Arc *parc) +{ + Arc *a, *ra; + int weoutofdate; + int ready; + int did = 0; + + /*print("work(%s) flags=0x%x time=%ld\n", node->name, node->flags, node->time); */ + if(node->flags&BEINGMADE) + return(did); + if((node->flags&MADE) && (node->flags&PRETENDING) && p && outofdate(p, parc, 0)){ + if(explain) + fprint(1, "unpretending %s(%ld) because %s is out of date(%ld)\n", + node->name, node->time, p->name, p->time); + unpretend(node); + } + /* + have a look if we are pretending in case + someone has been unpretended out from underneath us + */ + if(node->flags&MADE){ + if(node->flags&PRETENDING){ + node->time = 0; + }else + return(did); + } + /* consider no prerequsite case */ + if(node->prereqs == 0){ + if(node->time == 0){ + fprint(2, "mk: don't know how to make '%s' in %s\n", node->name, dir()); + if(kflag){ + node->flags |= BEINGMADE; + runerrs++; + } else + Exit(); + } else + MADESET(node, MADE); + return(did); + } + /* + now see if we are out of date or what + */ + ready = 1; + weoutofdate = aflag; + ra = 0; + for(a = node->prereqs; a; a = a->next) + if(a->n){ + did = work(a->n, node, a) || did; + if(a->n->flags&(NOTMADE|BEINGMADE)) + ready = 0; + if(outofdate(node, a, 0)){ + weoutofdate = 1; + if((ra == 0) || (ra->n == 0) + || (ra->n->time < a->n->time)) + ra = a; + } + } else { + if(node->time == 0){ + if(ra == 0) + ra = a; + weoutofdate = 1; + } + } + if(ready == 0) /* can't do anything now */ + return(did); + if(weoutofdate == 0){ + MADESET(node, MADE); + return(did); + } + /* + can we pretend to be made? + */ + if((iflag == 0) && (node->time == 0) && (node->flags&(PRETENDING|CANPRETEND)) + && p && ra->n && !outofdate(p, ra, 0)){ + node->flags &= ~CANPRETEND; + MADESET(node, MADE); + if(explain && ((node->flags&PRETENDING) == 0)) + fprint(1, "pretending %s has time %ld\n", node->name, node->time); + node->flags |= PRETENDING; + return(did); + } + /* + node is out of date and we REALLY do have to do something. + quickly rescan for pretenders + */ + for(a = node->prereqs; a; a = a->next) + if(a->n && (a->n->flags&PRETENDING)){ + if(explain) + Bprint(&bout, "unpretending %s because of %s because of %s\n", + a->n->name, node->name, ra->n? ra->n->name : "rule with no prerequisites"); + + unpretend(a->n); + did = work(a->n, node, a) || did; + ready = 0; + } + if(ready == 0) /* try later unless nothing has happened for -k's sake */ + return(did || work(node, p, parc)); + did = dorecipe(node) || did; + return(did); +} + +void +update(int fake, Node *node) +{ + Arc *a; + + MADESET(node, fake? BEINGMADE : MADE); + if(((node->flags&VIRTUAL) == 0) && (access(node->name, 0) == 0)){ + node->time = timeof(node->name, 1); + node->flags &= ~(CANPRETEND|PRETENDING); + for(a = node->prereqs; a; a = a->next) + if(a->prog) + outofdate(node, a, 1); + } else { + node->time = 1; + for(a = node->prereqs; a; a = a->next) + if(a->n && outofdate(node, a, 1)) + node->time = a->n->time; + } +/* print("----node %s time=%ld flags=0x%x\n", node->name, node->time, node->flags);*/ +} + +static int +pcmp(char *prog, char *p, char *q, Shell *sh, Word *shcmd) +{ + char buf[3*NAMEBLOCK]; + int pid; + + Bflush(&bout); + snprint(buf, sizeof buf, "%s '%s' '%s'\n", prog, p, q); + pid = pipecmd(buf, 0, 0, sh, shcmd); + while(waitup(-3, &pid) >= 0) + ; + return(pid? 2:1); +} + +int +outofdate(Node *node, Arc *arc, int eval) +{ + char buf[3*NAMEBLOCK], *str; + Symtab *sym; + int ret; + + str = 0; + if(arc->prog){ + snprint(buf, sizeof buf, "%s%c%s", node->name, 0377, arc->n->name); + sym = symlook(buf, S_OUTOFDATE, 0); + if(sym == 0 || eval){ + if(sym == 0) + str = strdup(buf); + ret = pcmp(arc->prog, node->name, arc->n->name, arc->r->shellt, arc->r->shellcmd); + if(sym) + sym->u.value = ret; + else + symlook(str, S_OUTOFDATE, (void *)(uintptr)ret); + } else + ret = sym->u.value; + return(ret-1); + } else if(strchr(arc->n->name, '(') && arc->n->time == 0) /* missing archive member */ + return 1; + else + return node->time <= arc->n->time; +} diff --git a/mk/mk/mk.h b/mk/mk/mk.h @@ -0,0 +1,185 @@ +#include "sys.h" + +#undef assert +#define assert mkassert +extern Biobuf bout; + +typedef struct Bufblock +{ + struct Bufblock *next; + char *start; + char *end; + char *current; +} Bufblock; + +typedef struct Word +{ + char *s; + struct Word *next; +} Word; + +typedef struct Envy +{ + char *name; + Word *values; +} Envy; + +extern Envy *envy; + +typedef struct Shell +{ + char *name; + char *termchars; /* used in parse.c to isolate assignment attribute */ + int iws; /* inter-word separator in environment */ + char *(*charin)(char*, char*); /* search for unescaped characters */ + char *(*expandquote)(char*, Rune, Bufblock*); /* extract escaped token */ + int (*escapetoken)(Biobuf*, Bufblock*, int, int); /* input escaped token */ + char *(*copyq)(char*, Rune, Bufblock*); /* check for quoted strings */ + int (*matchname)(char*); /* does name match */ +} Shell; + +typedef struct Rule +{ + char *target; /* one target */ + Word *tail; /* constituents of targets */ + char *recipe; /* do it ! */ + short attr; /* attributes */ + short line; /* source line */ + char *file; /* source file */ + Word *alltargets; /* all the targets */ + int rule; /* rule number */ + Reprog *pat; /* reg exp goo */ + char *prog; /* to use in out of date */ + struct Rule *chain; /* hashed per target */ + struct Rule *next; + Shell *shellt; /* shell to use with this rule */ + Word *shellcmd; +} Rule; + +extern Rule *rules, *metarules, *patrule; + +/* Rule.attr */ +#define META 0x0001 +#define UNUSED 0x0002 +#define UPD 0x0004 +#define QUIET 0x0008 +#define VIR 0x0010 +#define REGEXP 0x0020 +#define NOREC 0x0040 +#define DEL 0x0080 +#define NOVIRT 0x0100 + +#define NREGEXP 10 + +typedef struct Arc +{ + short flag; + struct Node *n; + Rule *r; + char *stem; + char *prog; + char *match[NREGEXP]; + struct Arc *next; +} Arc; + + /* Arc.flag */ +#define TOGO 1 + +typedef struct Node +{ + char *name; + long time; + unsigned short flags; + Arc *prereqs; + struct Node *next; /* list for a rule */ +} Node; + + /* Node.flags */ +#define VIRTUAL 0x0001 +#define CYCLE 0x0002 +#define READY 0x0004 +#define CANPRETEND 0x0008 +#define PRETENDING 0x0010 +#define NOTMADE 0x0020 +#define BEINGMADE 0x0040 +#define MADE 0x0080 +#define MADESET(n,m) n->flags = (n->flags&~(NOTMADE|BEINGMADE|MADE))|(m) +#define PROBABLE 0x0100 +#define VACUOUS 0x0200 +#define NORECIPE 0x0400 +#define DELETE 0x0800 +#define NOMINUSE 0x1000 + +typedef struct Job +{ + Rule *r; /* master rule for job */ + Node *n; /* list of node targets */ + char *stem; + char **match; + Word *p; /* prerequistes */ + Word *np; /* new prerequistes */ + Word *t; /* targets */ + Word *at; /* all targets */ + int nproc; /* slot number */ + struct Job *next; +} Job; +extern Job *jobs; + +typedef struct Symtab +{ + short space; + char *name; + union { + void *ptr; + uintptr value; + } u; + struct Symtab *next; +} Symtab; + +enum { + S_VAR, /* variable -> value */ + S_TARGET, /* target -> rule */ + S_TIME, /* file -> time */ + S_PID, /* pid -> products */ + S_NODE, /* target name -> node */ + S_AGG, /* aggregate -> time */ + S_BITCH, /* bitched about aggregate not there */ + S_NOEXPORT, /* var -> noexport */ + S_OVERRIDE, /* can't override */ + S_OUTOFDATE, /* n1\377n2 -> 2(outofdate) or 1(not outofdate) */ + S_MAKEFILE, /* target -> node */ + S_MAKEVAR, /* dumpable mk variable */ + S_EXPORTED, /* var -> current exported value */ + S_WESET, /* variable; we set in the mkfile */ + S_INTERNAL /* an internal mk variable (e.g., stem, target) */ +}; + +extern int debug; +extern int nflag, tflag, iflag, kflag, aflag, mflag; +extern int mkinline; +extern char *infile; +extern int nreps; +extern char *explain; +extern Shell *shellt; +extern Word *shellcmd; + +extern Shell shshell, rcshell; + +#define SYNERR(l) (fprint(2, "mk: %s:%d: syntax error; ", infile, ((l)>=0)?(l):mkinline)) +#define RERR(r) (fprint(2, "mk: %s:%d: rule error; ", (r)->file, (r)->line)) +#define NAMEBLOCK 1000 +#define BIGBLOCK 20000 + +#define SEP(c) (((c)==' ')||((c)=='\t')||((c)=='\n')) +#define WORDCHR(r) ((r) > ' ' && !utfrune("!\"#$%&'()*+,-./:;<=>?@[\\]^`{|}~", (r))) + +#define DEBUG(x) (debug&(x)) +#define D_PARSE 0x01 +#define D_GRAPH 0x02 +#define D_EXEC 0x04 + +#define LSEEK(f,o,p) seek(f,o,p) + +#define PERCENT(ch) (((ch) == '%') || ((ch) == '&')) + +#include "fns.h" diff --git a/mk/mk/mkfile b/mk/mk/mkfile @@ -0,0 +1,35 @@ +BIN = mk +OBJ = \ + arc.o\ + archive.o\ + bufblock.o\ + env.o\ + file.o\ + graph.o\ + job.o\ + lex.o\ + main.o\ + match.o\ + mk.o\ + parse.o\ + recipe.o\ + rc.o\ + rule.o\ + run.o\ + sh.o\ + shell.o\ + shprint.o\ + symtab.o\ + var.o\ + varsub.o\ + word.o\ + unix.o\ + +LOCAL_CFLAGS = -I"$PREFIX"/include +LOCAL_LDFLAGS = -L"$PREFIX"/lib +INSTALL_BIN = mk +INSTALL_MAN1 = mk.1 +DEPS = libbio libfmt libutf libregexp + +<$mkbuild/mk.default + diff --git a/mk/mk/parse.c b/mk/mk/parse.c @@ -0,0 +1,318 @@ +#include "mk.h" + +char *infile; +int mkinline; +static int rhead(char *, Word **, Word **, int *, char **); +static char *rbody(Biobuf*); +extern Word *target1; + +void +parse(char *f, int fd, int varoverride) +{ + int hline; + char *body; + Word *head, *tail; + int attr, set, pid; + char *prog, *p; + int newfd; + Biobuf in; + Bufblock *buf; + char *err; + + if(fd < 0){ + fprint(2, "open %s: %r\n", f); + Exit(); + } + pushshell(); + ipush(); + infile = strdup(f); + mkinline = 1; + Binit(&in, fd, OREAD); + buf = newbuf(); + while(assline(&in, buf)){ + hline = mkinline; + switch(rhead(buf->start, &head, &tail, &attr, &prog)) + { + case '<': + p = wtos(tail, ' '); + if(*p == 0){ + SYNERR(-1); + fprint(2, "missing include file name\n"); + Exit(); + } + newfd = open(p, OREAD); + if(newfd < 0){ + fprint(2, "warning: skipping missing include file %s: %r\n", p); + } else + parse(p, newfd, 0); + break; + case '|': + p = wtos(tail, ' '); + if(*p == 0){ + SYNERR(-1); + fprint(2, "missing include program name\n"); + Exit(); + } + execinit(); + pid=pipecmd(p, envy, &newfd, shellt, shellcmd); + if(newfd < 0){ + fprint(2, "warning: skipping missing program file %s: %r\n", p); + } else + parse(p, newfd, 0); + while(waitup(-3, &pid) >= 0) + ; + if(pid != 0){ + fprint(2, "bad include program status\n"); + Exit(); + } + break; + case ':': + body = rbody(&in); + addrules(head, tail, body, attr, hline, prog); + break; + case '=': + if(head->next){ + SYNERR(-1); + fprint(2, "multiple vars on left side of assignment\n"); + Exit(); + } + if(symlook(head->s, S_OVERRIDE, 0)){ + set = varoverride; + } else { + set = 1; + if(varoverride) + symlook(head->s, S_OVERRIDE, (void *)""); + } + if(set){ +/* +char *cp; +dumpw("tail", tail); +cp = wtos(tail, ' '); print("assign %s to %s\n", head->s, cp); free(cp); +*/ + setvar(head->s, (void *) tail); + symlook(head->s, S_WESET, (void *)""); + if(strcmp(head->s, "MKSHELL") == 0){ + if((err = setshell(tail)) != nil){ + SYNERR(hline); + fprint(2, "%s\n", err); + Exit(); + break; + } + } + } + if(attr) + symlook(head->s, S_NOEXPORT, (void *)""); + break; + default: + SYNERR(hline); + fprint(2, "expected one of :<=\n"); + Exit(); + break; + } + } + close(fd); + freebuf(buf); + ipop(); + popshell(); +} + +void +addrules(Word *head, Word *tail, char *body, int attr, int hline, char *prog) +{ + Word *w; + + assert("addrules args", head && body); + /* tuck away first non-meta rule as default target*/ + if(target1 == 0 && !(attr&REGEXP)){ + for(w = head; w; w = w->next) + if(shellt->charin(w->s, "%&")) + break; + if(w == 0) + target1 = wdup(head); + } + for(w = head; w; w = w->next) + addrule(w->s, tail, body, head, attr, hline, prog); +} + +static int +rhead(char *line, Word **h, Word **t, int *attr, char **prog) +{ + char *p; + char *pp; + int sep; + Rune r; + int n; + Word *w; + + p = shellt->charin(line,":=<"); + if(p == 0) + return('?'); + sep = *p; + *p++ = 0; + if(sep == '<' && *p == '|'){ + sep = '|'; + p++; + } + *attr = 0; + *prog = 0; + if(sep == '='){ + pp = shellt->charin(p, shellt->termchars); /* termchars is shell-dependent */ + if (pp && *pp == '=') { + while (p != pp) { + n = chartorune(&r, p); + switch(r) + { + default: + SYNERR(-1); + fprint(2, "unknown attribute '%c'\n",*p); + Exit(); + case 'U': + *attr = 1; + break; + } + p += n; + } + p++; /* skip trailing '=' */ + } + } + if((sep == ':') && *p && (*p != ' ') && (*p != '\t')){ + while (*p) { + n = chartorune(&r, p); + if (r == ':') + break; + p += n; + switch(r) + { + default: + SYNERR(-1); + fprint(2, "unknown attribute '%c'\n", p[-1]); + Exit(); + case 'D': + *attr |= DEL; + break; + case 'E': + *attr |= NOMINUSE; + break; + case 'n': + *attr |= NOVIRT; + break; + case 'N': + *attr |= NOREC; + break; + case 'P': + pp = utfrune(p, ':'); + if (pp == 0 || *pp == 0) + goto eos; + *pp = 0; + *prog = strdup(p); + *pp = ':'; + p = pp; + break; + case 'Q': + *attr |= QUIET; + break; + case 'R': + *attr |= REGEXP; + break; + case 'U': + *attr |= UPD; + break; + case 'V': + *attr |= VIR; + break; + } + } + if (*p++ != ':') { + eos: + SYNERR(-1); + fprint(2, "missing trailing :\n"); + Exit(); + } + } + *h = w = stow(line); + if(*w->s == 0 && sep != '<' && sep != '|' && sep != 'S') { + SYNERR(mkinline-1); + fprint(2, "no var on left side of assignment/rule\n"); + Exit(); + } + *t = stow(p); + return(sep); +} + +static char * +rbody(Biobuf *in) +{ + Bufblock *buf; + int r, lastr; + char *p; + + lastr = '\n'; + buf = newbuf(); + for(;;){ + r = Bgetrune(in); + if (r < 0) + break; + if (lastr == '\n') { + if (r == '#') + rinsert(buf, r); + else if (r != ' ' && r != '\t') { + Bungetrune(in); + break; + } + } else + rinsert(buf, r); + lastr = r; + if (r == '\n') + mkinline++; + } + insert(buf, 0); + p = strdup(buf->start); + freebuf(buf); + return p; +} + +struct input +{ + char *file; + int line; + struct input *next; +}; +static struct input *inputs = 0; + +void +ipush(void) +{ + struct input *in, *me; + + me = (struct input *)Malloc(sizeof(*me)); + me->file = infile; + me->line = mkinline; + me->next = 0; + if(inputs == 0) + inputs = me; + else { + for(in = inputs; in->next; ) + in = in->next; + in->next = me; + } +} + +void +ipop(void) +{ + struct input *in, *me; + + assert("pop input list", inputs != 0); + if(inputs->next == 0){ + me = inputs; + inputs = 0; + } else { + for(in = inputs; in->next->next; ) + in = in->next; + me = in->next; + in->next = 0; + } + infile = me->file; + mkinline = me->line; + free((char *)me); +} diff --git a/mk/mk/rc.c b/mk/mk/rc.c @@ -0,0 +1,194 @@ +#include "mk.h" + +/* + * This file contains functions that depend on rc's syntax. Most + * of the routines extract strings observing rc's escape conventions + */ + + +/* + * skip a token in single quotes. + */ +static char * +squote(char *cp) +{ + Rune r; + int n; + + while(*cp){ + n = chartorune(&r, cp); + if(r == '\'') { + n += chartorune(&r, cp+n); + if(r != '\'') + return(cp); + } + cp += n; + } + SYNERR(-1); /* should never occur */ + fprint(2, "missing closing '\n"); + return 0; +} + +/* + * search a string for characters in a pattern set + * characters in quotes and variable generators are escaped + */ +char * +rccharin(char *cp, char *pat) +{ + Rune r; + int n, vargen; + + vargen = 0; + while(*cp){ + n = chartorune(&r, cp); + switch(r){ + case '\'': /* skip quoted string */ + cp = squote(cp+1); /* n must = 1 */ + if(!cp) + return 0; + break; + case '$': + if(*(cp+1) == '{') + vargen = 1; + break; + case '}': + if(vargen) + vargen = 0; + else if(utfrune(pat, r)) + return cp; + break; + default: + if(vargen == 0 && utfrune(pat, r)) + return cp; + break; + } + cp += n; + } + if(vargen){ + SYNERR(-1); + fprint(2, "missing closing } in pattern generator\n"); + } + return 0; +} + +/* + * extract an escaped token. Possible escape chars are single-quote, + * double-quote,and backslash. Only the first is valid for rc. the + * others are just inserted into the receiving buffer. + */ +char* +rcexpandquote(char *s, Rune r, Bufblock *b) +{ + if (r != '\'') { + rinsert(b, r); + return s; + } + + while(*s){ + s += chartorune(&r, s); + if(r == '\'') { + if(*s == '\'') + s++; + else + return s; + } + rinsert(b, r); + } + return 0; +} + +/* + * Input an escaped token. Possible escape chars are single-quote, + * double-quote and backslash. Only the first is a valid escape for + * rc; the others are just inserted into the receiving buffer. + */ +int +rcescapetoken(Biobuf *bp, Bufblock *buf, int preserve, int esc) +{ + int c, line; + + if(esc != '\'') + return 1; + + line = mkinline; + while((c = nextrune(bp, 0)) > 0){ + if(c == '\''){ + if(preserve) + rinsert(buf, c); + c = Bgetrune(bp); + if (c < 0) + break; + if(c != '\''){ + Bungetrune(bp); + return 1; + } + } + rinsert(buf, c); + } + SYNERR(line); fprint(2, "missing closing %c\n", esc); + return 0; +} + +/* + * copy a single-quoted string; s points to char after opening quote + */ +static char * +copysingle(char *s, Bufblock *buf) +{ + Rune r; + + while(*s){ + s += chartorune(&r, s); + rinsert(buf, r); + if(r == '\'') + break; + } + return s; +} +/* + * check for quoted strings. backquotes are handled here; single quotes above. + * s points to char after opening quote, q. + */ +char * +rccopyq(char *s, Rune q, Bufblock *buf) +{ + if(q == '\'') /* copy quoted string */ + return copysingle(s, buf); + + if(q != '`') /* not quoted */ + return s; + + while(*s){ /* copy backquoted string */ + s += chartorune(&q, s); + rinsert(buf, q); + if(q == '}') + break; + if(q == '\'') + s = copysingle(s, buf); /* copy quoted string */ + } + return s; +} + +static int +rcmatchname(char *name) +{ + char *p; + + if((p = strrchr(name, '/')) != nil) + name = p+1; + if(name[0] == 'r' && name[1] == 'c') + return 1; + return 0; +} + +Shell rcshell = { + "rc", + "'= \t", + '\1', + rccharin, + rcexpandquote, + rcescapetoken, + rccopyq, + rcmatchname +}; diff --git a/mk/mk/recipe.c b/mk/mk/recipe.c @@ -0,0 +1,117 @@ +#include "mk.h" + +int +dorecipe(Node *node) +{ + char buf[BIGBLOCK]; + register Node *n; + Rule *r = 0; + Arc *a, *aa; + Word head, ahead, lp, ln, *w, *ww, *aw; + Symtab *s; + int did = 0; + + aa = 0; + /* + pick up the rule + */ + for(a = node->prereqs; a; a = a->next) + if(*a->r->recipe) + r = (aa = a)->r; + /* + no recipe? go to buggery! + */ + if(r == 0){ + if(!(node->flags&VIRTUAL) && !(node->flags&NORECIPE)){ + fprint(2, "mk: no recipe to make '%s'\n", node->name); + Exit(); + } + if(strchr(node->name, '(') && node->time == 0) + MADESET(node, MADE); + else + update(0, node); + if(tflag){ + if(!(node->flags&VIRTUAL)) + touch(node->name); + else if(explain) + Bprint(&bout, "no touch of virtual '%s'\n", node->name); + } + return(did); + } + /* + build the node list + */ + node->next = 0; + head.next = 0; + ww = &head; + ahead.next = 0; + aw = &ahead; + if(r->attr&REGEXP){ + ww->next = newword(node->name); + aw->next = newword(node->name); + } else { + for(w = r->alltargets; w; w = w->next){ + if(r->attr&META) + subst(aa->stem, w->s, buf); + else + strcpy(buf, w->s); + aw->next = newword(buf); + aw = aw->next; + if((s = symlook(buf, S_NODE, 0)) == 0) + continue; /* not a node we are interested in */ + n = s->u.ptr; + if(aflag == 0 && n->time) { + for(a = n->prereqs; a; a = a->next) + if(a->n && outofdate(n, a, 0)) + break; + if(a == 0) + continue; + } + ww->next = newword(buf); + ww = ww->next; + if(n == node) continue; + n->next = node->next; + node->next = n; + } + } + for(n = node; n; n = n->next) + if((n->flags&READY) == 0) + return(did); + /* + gather the params for the job + */ + lp.next = ln.next = 0; + for(n = node; n; n = n->next){ + for(a = n->prereqs; a; a = a->next){ + if(a->n){ + addw(&lp, a->n->name); + if(outofdate(n, a, 0)){ + addw(&ln, a->n->name); + if(explain) + fprint(1, "%s(%ld) < %s(%ld)\n", + n->name, n->time, a->n->name, a->n->time); + } + } else { + if(explain) + fprint(1, "%s has no prerequisites\n", + n->name); + } + } + MADESET(n, BEINGMADE); + } + /*print("lt=%s ln=%s lp=%s\n",wtos(head.next, ' '),wtos(ln.next, ' '),wtos(lp.next, ' '));*/ + run(newjob(r, node, aa->stem, aa->match, lp.next, ln.next, head.next, ahead.next)); + return(1); +} + +void +addw(Word *w, char *s) +{ + Word *lw; + + for(lw = w; w = w->next; lw = w){ + if(strcmp(s, w->s) == 0) + return; + } + lw->next = newword(s); +} diff --git a/mk/mk/rule.c b/mk/mk/rule.c @@ -0,0 +1,112 @@ +#include "mk.h" + +static Rule *lr, *lmr; +static int rcmp(Rule *r, char *target, Word *tail); +static int nrules = 0; + +void +addrule(char *head, Word *tail, char *body, Word *ahead, int attr, int hline, char *prog) +{ + Rule *r; + Rule *rr; + Symtab *sym; + int reuse; + + r = 0; + reuse = 0; + if(sym = symlook(head, S_TARGET, 0)){ + for(r = sym->u.ptr; r; r = r->chain) + if(rcmp(r, head, tail) == 0){ + reuse = 1; + break; + } + } + if(r == 0) + r = (Rule *)Malloc(sizeof(Rule)); + r->shellt = shellt; + r->shellcmd = shellcmd; + r->target = head; + r->tail = tail; + r->recipe = body; + r->line = hline; + r->file = infile; + r->attr = attr; + r->alltargets = ahead; + r->prog = prog; + r->rule = nrules++; + if(!reuse){ + rr = symlook(head, S_TARGET, (void *)r)->u.ptr; + if(rr != r){ + r->chain = rr->chain; + rr->chain = r; + } else + r->chain = 0; + } + if(!reuse) + r->next = 0; + if((attr&REGEXP) || shellt->charin(head, "%&")){ + r->attr |= META; + if(reuse) + return; + if(attr&REGEXP){ + patrule = r; + r->pat = regcomp(head); + } + if(metarules == 0) + metarules = lmr = r; + else { + lmr->next = r; + lmr = r; + } + } else { + if(reuse) + return; + r->pat = 0; + if(rules == 0) + rules = lr = r; + else { + lr->next = r; + lr = r; + } + } +} + +void +dumpr(char *s, Rule *r) +{ + if(r == nil) + return; + Bprint(&bout, "%s: start=%ld shelltype=%s shellcmd=%s\n", + s, r, r->shellt->name, wtos(r->shellcmd, ' ')); + for(; r; r = r->next){ + Bprint(&bout, "\tRule %ld: %s[%d] attr=%x next=%ld chain=%ld alltarget='%s'", + r, r->file, r->line, r->attr, r->next, r->chain, wtos(r->alltargets, ' ')); + if(r->prog) + Bprint(&bout, " prog='%s'", r->prog); + Bprint(&bout, "\n\ttarget=%s: %s\n", r->target, wtos(r->tail, ' ')); + Bprint(&bout, "\trecipe@%ld='%s'\n", r->recipe, r->recipe); + } +} + +static int +rcmp(Rule *r, char *target, Word *tail) +{ + Word *w; + + if(strcmp(r->target, target)) + return 1; + for(w = r->tail; w && tail; w = w->next, tail = tail->next) + if(strcmp(w->s, tail->s)) + return 1; + return(w || tail); +} + +char * +rulecnt(void) +{ + char *s; + + s = Malloc(nrules); + memset(s, 0, nrules); + return(s); +} diff --git a/mk/mk/run.c b/mk/mk/run.c @@ -0,0 +1,296 @@ +#include "mk.h" + +typedef struct Event +{ + int pid; + Job *job; +} Event; +static Event *events; +static int nevents, nrunning, nproclimit; + +typedef struct Process +{ + int pid; + int status; + struct Process *b, *f; +} Process; +static Process *phead, *pfree; +static void sched(void); +static void pnew(int, int), pdelete(Process *); + +int pidslot(int); + +void +run(Job *j) +{ + Job *jj; + + if(jobs){ + for(jj = jobs; jj->next; jj = jj->next) + ; + jj->next = j; + } else + jobs = j; + j->next = 0; + /* this code also in waitup after parse redirect */ + if(nrunning < nproclimit) + sched(); +} + +static void +sched(void) +{ + char *flags; + Job *j; + Bufblock *buf; + int slot; + Node *n; + Envy *e; + + if(jobs == 0){ + usage(); + return; + } + j = jobs; + jobs = j->next; + if(DEBUG(D_EXEC)) + fprint(1, "firing up job for target %s\n", wtos(j->t, ' ')); + slot = nextslot(); + events[slot].job = j; + buf = newbuf(); + e = buildenv(j, slot); + shprint(j->r->recipe, e, buf, j->r->shellt); + if(!tflag && (nflag || !(j->r->attr&QUIET))) + Bwrite(&bout, buf->start, (long)strlen(buf->start)); + freebuf(buf); + if(nflag||tflag){ + for(n = j->n; n; n = n->next){ + if(tflag){ + if(!(n->flags&VIRTUAL)) + touch(n->name); + else if(explain) + Bprint(&bout, "no touch of virtual '%s'\n", n->name); + } + n->time = time((long *)0); + MADESET(n, MADE); + } + } else { + if(DEBUG(D_EXEC)) + fprint(1, "recipe='%s'", j->r->recipe);/**/ + Bflush(&bout); + if(j->r->attr&NOMINUSE) + flags = 0; + else + flags = "-e"; + events[slot].pid = execsh(flags, j->r->recipe, 0, e, j->r->shellt, j->r->shellcmd); + usage(); + nrunning++; + if(DEBUG(D_EXEC)) + fprint(1, "pid for target %s = %d\n", wtos(j->t, ' '), events[slot].pid); + } +} + +int +waitup(int echildok, int *retstatus) +{ + Envy *e; + int pid; + int slot; + Symtab *s; + Word *w; + Job *j; + char buf[ERRMAX]; + Bufblock *bp; + int uarg = 0; + int done; + Node *n; + Process *p; + extern int runerrs; + + /* first check against the proces slist */ + if(retstatus) + for(p = phead; p; p = p->f) + if(p->pid == *retstatus){ + *retstatus = p->status; + pdelete(p); + return(-1); + } +again: /* rogue processes */ + pid = waitfor(buf); + if(pid == -1){ + if(echildok > 0) + return(1); + else { + fprint(2, "mk: (waitup %d): %r\n", echildok); + Exit(); + } + } + if(DEBUG(D_EXEC)) + fprint(1, "waitup got pid=%d, status='%s'\n", pid, buf); + if(retstatus && pid == *retstatus){ + *retstatus = buf[0]? 1:0; + return(-1); + } + slot = pidslot(pid); + if(slot < 0){ + if(DEBUG(D_EXEC)) + fprint(2, "mk: wait returned unexpected process %d\n", pid); + pnew(pid, buf[0]? 1:0); + goto again; + } + j = events[slot].job; + usage(); + nrunning--; + events[slot].pid = -1; + if(buf[0]){ + e = buildenv(j, slot); + bp = newbuf(); + shprint(j->r->recipe, e, bp, j->r->shellt); + front(bp->start); + fprint(2, "mk: %s: exit status=%s", bp->start, buf); + freebuf(bp); + for(n = j->n, done = 0; n; n = n->next) + if(n->flags&DELETE){ + if(done++ == 0) + fprint(2, ", deleting"); + fprint(2, " '%s'", n->name); + delete(n->name); + } + fprint(2, "\n"); + if(kflag){ + runerrs++; + uarg = 1; + } else { + jobs = 0; + Exit(); + } + } + for(w = j->t; w; w = w->next){ + if((s = symlook(w->s, S_NODE, 0)) == 0) + continue; /* not interested in this node */ + update(uarg, s->u.ptr); + } + if(nrunning < nproclimit) + sched(); + return(0); +} + +void +nproc(void) +{ + Symtab *sym; + Word *w; + + if(sym = symlook("NPROC", S_VAR, 0)) { + w = sym->u.ptr; + if (w && w->s && w->s[0]) + nproclimit = atoi(w->s); + } + if(nproclimit < 1) + nproclimit = 1; + if(DEBUG(D_EXEC)) + fprint(1, "nprocs = %d\n", nproclimit); + if(nproclimit > nevents){ + if(nevents) + events = (Event *)Realloc((char *)events, nproclimit*sizeof(Event)); + else + events = (Event *)Malloc(nproclimit*sizeof(Event)); + while(nevents < nproclimit) + events[nevents++].pid = 0; + } +} + +int +nextslot(void) +{ + int i; + + for(i = 0; i < nproclimit; i++) + if(events[i].pid <= 0) return i; + assert("out of slots!!", 0); + return 0; /* cyntax */ +} + +int +pidslot(int pid) +{ + int i; + + for(i = 0; i < nevents; i++) + if(events[i].pid == pid) return(i); + if(DEBUG(D_EXEC)) + fprint(2, "mk: wait returned unexpected process %d\n", pid); + return(-1); +} + + +static void +pnew(int pid, int status) +{ + Process *p; + + if(pfree){ + p = pfree; + pfree = p->f; + } else + p = (Process *)Malloc(sizeof(Process)); + p->pid = pid; + p->status = status; + p->f = phead; + phead = p; + if(p->f) + p->f->b = p; + p->b = 0; +} + +static void +pdelete(Process *p) +{ + if(p->f) + p->f->b = p->b; + if(p->b) + p->b->f = p->f; + else + phead = p->f; + p->f = pfree; + pfree = p; +} + +void +killchildren(char *msg) +{ + Process *p; + + kflag = 1; /* to make sure waitup doesn't exit */ + jobs = 0; /* make sure no more get scheduled */ + for(p = phead; p; p = p->f) + expunge(p->pid, msg); + while(waitup(1, (int *)0) == 0) + ; + Bprint(&bout, "mk: %s\n", msg); + Exit(); +} + +static long tslot[1000]; +static long tick; + +void +usage(void) +{ + long t; + + time(&t); + if(tick) + tslot[nrunning] += (t-tick); + tick = t; +} + +void +prusage(void) +{ + int i; + + usage(); + for(i = 0; i <= nevents; i++) + fprint(1, "%d: %ld\n", i, tslot[i]); +} diff --git a/mk/mk/sh.c b/mk/mk/sh.c @@ -0,0 +1,206 @@ +#include "mk.h" + +/* + * This file contains functions that depend on the shell's syntax. Most + * of the routines extract strings observing the shell's escape conventions. + */ + + +/* + * skip a token in quotes. + */ +static char * +squote(char *cp, int c) +{ + Rune r; + int n; + + while(*cp){ + n = chartorune(&r, cp); + if(r == c) + return cp; + if(r == '\\') + n += chartorune(&r, cp+n); + cp += n; + } + SYNERR(-1); /* should never occur */ + fprint(2, "missing closing '\n"); + return 0; +} +/* + * search a string for unescaped characters in a pattern set + */ +static char * +shcharin(char *cp, char *pat) +{ + Rune r; + int n, vargen; + + vargen = 0; + while(*cp){ + n = chartorune(&r, cp); + switch(r){ + case '\\': /* skip escaped char */ + cp += n; + n = chartorune(&r, cp); + break; + case '\'': /* skip quoted string */ + case '"': + cp = squote(cp+1, r); /* n must = 1 */ + if(!cp) + return 0; + break; + case '$': + if(*(cp+1) == '{') + vargen = 1; + break; + case '}': + if(vargen) + vargen = 0; + else if(utfrune(pat, r)) + return cp; + break; + default: + if(vargen == 0 && utfrune(pat, r)) + return cp; + break; + } + cp += n; + } + if(vargen){ + SYNERR(-1); + fprint(2, "missing closing } in pattern generator\n"); + } + return 0; +} + +/* + * extract an escaped token. Possible escape chars are single-quote, + * double-quote,and backslash. + */ +static char* +shexpandquote(char *s, Rune esc, Bufblock *b) +{ + Rune r; + + if (esc == '\\') { + s += chartorune(&r, s); + rinsert(b, r); + return s; + } + + while(*s){ + s += chartorune(&r, s); + if(r == esc) + return s; + if (r == '\\') { + rinsert(b, r); + s += chartorune(&r, s); + } + rinsert(b, r); + } + return 0; +} + +/* + * Input an escaped token. Possible escape chars are single-quote, + * double-quote and backslash. + */ +static int +shescapetoken(Biobuf *bp, Bufblock *buf, int preserve, int esc) +{ + int c, line; + + if(esc == '\\') { + c = Bgetrune(bp); + if(c == '\r') + c = Bgetrune(bp); + if (c == '\n') + mkinline++; + rinsert(buf, c); + return 1; + } + + line = mkinline; + while((c = nextrune(bp, 0)) >= 0){ + if(c == esc){ + if(preserve) + rinsert(buf, c); + return 1; + } + if(c == '\\') { + rinsert(buf, c); + c = Bgetrune(bp); + if(c == '\r') + c = Bgetrune(bp); + if (c < 0) + break; + if (c == '\n') + mkinline++; + } + rinsert(buf, c); + } + SYNERR(line); fprint(2, "missing closing %c\n", esc); + return 0; +} + +/* + * copy a quoted string; s points to char after opening quote + */ +static char * +copysingle(char *s, Rune q, Bufblock *buf) +{ + Rune r; + + while(*s){ + s += chartorune(&r, s); + rinsert(buf, r); + if(r == q) + break; + } + return s; +} +/* + * check for quoted strings. backquotes are handled here; single quotes above. + * s points to char after opening quote, q. + */ +static char * +shcopyq(char *s, Rune q, Bufblock *buf) +{ + if(q == '\'' || q == '"') /* copy quoted string */ + return copysingle(s, q, buf); + + if(q != '`') /* not quoted */ + return s; + + while(*s){ /* copy backquoted string */ + s += chartorune(&q, s); + rinsert(buf, q); + if(q == '`') + break; + if(q == '\'' || q == '"') + s = copysingle(s, q, buf); /* copy quoted string */ + } + return s; +} + +static int +shmatchname(char *name) +{ + USED(name); + + return 1; +} + + +Shell shshell = { + "sh", + "\"'= \t", /*used in parse.c to isolate assignment attribute*/ + ' ', /* inter-word separator in env */ + shcharin, + shexpandquote, + shescapetoken, + shcopyq, + shmatchname +}; + diff --git a/mk/mk/shell.c b/mk/mk/shell.c @@ -0,0 +1,80 @@ +#include "mk.h" + +static Shell *shells[] = { + &rcshell, + &shshell +}; + +Shell *shelldefault = &shshell; + +Shell *shellt; +Word *shellcmd; + +typedef struct Shellstack Shellstack; +struct Shellstack +{ + Shell *t; + Word *w; + Shellstack *next; +}; + +Shellstack *shellstack; + +char* +setshell(Word *w) +{ + int i; + + if(w->s == nil) + return "shell name not found on line"; + + for(i=0; i<nelem(shells); i++) + if(shells[i]->matchname(w->s)) + break; + if(i == nelem(shells)) + return "cannot determine shell type"; + shellt = shells[i]; + shellcmd = w; + return nil; +} + +void +initshell(void) +{ + shellcmd = stow(shelldefault->name); + shellt = shelldefault; + setvar("MKSHELL", shellcmd); +} + +void +pushshell(void) +{ + Shellstack *s; + + /* save */ + s = Malloc(sizeof *s); + s->t = shellt; + s->w = shellcmd; + s->next = shellstack; + shellstack = s; + + initshell(); /* reset to defaults */ +} + +void +popshell(void) +{ + Shellstack *s; + + if(shellstack == nil){ + fprint(2, "internal shellstack error\n"); + Exit(); + } + + s = shellstack; + shellstack = s->next; + shellt = s->t; + shellcmd = s->w; + setvar("MKSHELL", shellcmd); + free(s); +} diff --git a/mk/mk/shprint.c b/mk/mk/shprint.c @@ -0,0 +1,125 @@ +#include "mk.h" + +static char *vexpand(char*, Envy*, Bufblock*); + +#define getfields mkgetfields + +static int +getfields(char *str, char **args, int max, int mflag, char *set) +{ + Rune r; + int nr, intok, narg; + + if(max <= 0) + return 0; + + narg = 0; + args[narg] = str; + if(!mflag) + narg++; + intok = 0; + for(;; str += nr) { + nr = chartorune(&r, str); + if(r == 0) + break; + if(utfrune(set, r)) { + if(narg >= max) + break; + *str = 0; + intok = 0; + args[narg] = str + nr; + if(!mflag) + narg++; + } else { + if(!intok && mflag) + narg++; + intok = 1; + } + } + return narg; +} + +void +shprint(char *s, Envy *env, Bufblock *buf, Shell *sh) +{ + int n; + Rune r; + + while(*s) { + n = chartorune(&r, s); + if (r == '$') + s = vexpand(s, env, buf); + else { + rinsert(buf, r); + s += n; + s = sh->copyq(s, r, buf); /*handle quoted strings*/ + } + } + insert(buf, 0); +} + +static char * +mygetenv(char *name, Envy *env) +{ + if (!env) + return 0; + if (symlook(name, S_WESET, 0) == 0 && symlook(name, S_INTERNAL, 0) == 0) + return 0; + /* only resolve internal variables and variables we've set */ + for(; env->name; env++){ + if (strcmp(env->name, name) == 0) + return wtos(env->values, ' '); + } + return 0; +} + +static char * +vexpand(char *w, Envy *env, Bufblock *buf) +{ + char *s, carry, *p, *q; + + assert("vexpand no $", *w == '$'); + p = w+1; /* skip dollar sign */ + if(*p == '{') { + p++; + q = utfrune(p, '}'); + if (!q) + q = strchr(p, 0); + } else + q = shname(p); + carry = *q; + *q = 0; + s = mygetenv(p, env); + *q = carry; + if (carry == '}') + q++; + if (s) { + bufcpy(buf, s, strlen(s)); + free(s); + } else /* copy name intact*/ + bufcpy(buf, w, q-w); + return(q); +} + +void +front(char *s) +{ + char *t, *q; + int i, j; + char *flds[512]; + + q = strdup(s); + i = getfields(q, flds, 512, 0, " \t\n"); + if(i > 5){ + flds[4] = flds[i-1]; + flds[3] = "..."; + i = 5; + } + t = s; + for(j = 0; j < i; j++){ + for(s = flds[j]; *s; *t++ = *s++); + *t++ = ' '; + } + *t = 0; + free(q); +} diff --git a/mk/mk/symtab.c b/mk/mk/symtab.c @@ -0,0 +1,97 @@ +#include "mk.h" + +#define NHASH 4099 +#define HASHMUL 79L /* this is a good value */ +static Symtab *hash[NHASH]; + +void +syminit(void) +{ + Symtab **s, *ss, *next; + + for(s = hash; s < &hash[NHASH]; s++){ + for(ss = *s; ss; ss = next){ + next = ss->next; + free((char *)ss); + } + *s = 0; + } +} + +Symtab * +symlook(char *sym, int space, void *install) +{ + long h; + char *p; + Symtab *s; + + for(p = sym, h = space; *p; h += *p++) + h *= HASHMUL; + if(h < 0) + h = ~h; + h %= NHASH; + for(s = hash[h]; s; s = s->next) + if((s->space == space) && (strcmp(s->name, sym) == 0)) + return(s); + if(install == 0) + return(0); + s = (Symtab *)Malloc(sizeof(Symtab)); + s->space = space; + s->name = sym; + s->u.ptr = install; + s->next = hash[h]; + hash[h] = s; + return(s); +} + +void +symdel(char *sym, int space) +{ + long h; + char *p; + Symtab *s, *ls; + + /* multiple memory leaks */ + + for(p = sym, h = space; *p; h += *p++) + h *= HASHMUL; + if(h < 0) + h = ~h; + h %= NHASH; + for(s = hash[h], ls = 0; s; ls = s, s = s->next) + if((s->space == space) && (strcmp(s->name, sym) == 0)){ + if(ls) + ls->next = s->next; + else + hash[h] = s->next; + free((char *)s); + } +} + +void +symtraverse(int space, void (*fn)(Symtab*)) +{ + Symtab **s, *ss; + + for(s = hash; s < &hash[NHASH]; s++) + for(ss = *s; ss; ss = ss->next) + if(ss->space == space) + (*fn)(ss); +} + +void +symstat(void) +{ + Symtab **s, *ss; + int n; + int l[1000]; + + memset((char *)l, 0, sizeof(l)); + for(s = hash; s < &hash[NHASH]; s++){ + for(ss = *s, n = 0; ss; ss = ss->next) + n++; + l[n]++; + } + for(n = 0; n < 1000; n++) + if(l[n]) Bprint(&bout, "%ld of length %d\n", l[n], n); +} diff --git a/mk/mk/sys.h b/mk/mk/sys.h @@ -0,0 +1,27 @@ +#include <utf.h> +#include <fmt.h> +#include <bio.h> +#include <regexp9.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <ctype.h> +#include <time.h> +#include <stdint.h> + +#define OREAD O_RDONLY +#define OWRITE O_WRONLY +#define ORDWR O_RDWR +#define nil 0 +#define nelem(x) (sizeof(x)/sizeof((x)[0])) +#define seek lseek +#define remove unlink +#define exits(x) exit(x && *(char*)x ? 1 : 0) +#define USED(x) if(x){}else +#define create(name, mode, perm) open(name, mode|O_CREAT, perm) +#define ERRMAX 256 + +typedef uintptr_t uintptr; +#define uchar mk_uchar +typedef unsigned char uchar; diff --git a/mk/mk/unix.c b/mk/mk/unix.c @@ -0,0 +1,341 @@ +#define NOPLAN9DEFINES +#include "mk.h" +#include <sys/wait.h> +#include <signal.h> +#include <sys/stat.h> +#include <sys/time.h> + +char *shell = "/bin/sh"; +char *shellname = "sh"; + +extern char **environ; + +static void +mkperror(char *s) +{ + fprint(2, "%s: %r\n", s); +} + +void +readenv(void) +{ + char **p, *s; + Word *w; + + for(p = environ; *p; p++){ +/* rsc 5/5/2004 -- This misparses fn#cd={whatever} + s = shname(*p); + if(*s == '=') { + *s = 0; + w = newword(s+1); + } else + w = newword(""); +*/ + s = strchr(*p, '='); + if(s){ + *s = 0; + w = newword(s+1); + } else + w = newword(""); + if (symlook(*p, S_INTERNAL, 0)) + continue; + s = strdup(*p); + setvar(s, (void *)w); + symlook(s, S_EXPORTED, (void*)"")->u.ptr = ""; + } +} + +/* + * done on child side of fork, so parent's env is not affected + * and we don't care about freeing memory because we're going + * to exec immediately after this. + */ +void +exportenv(Envy *e, Shell *sh) +{ + int i; + char **p; + static char buf[16384]; + + p = 0; + for(i = 0; e->name; e++, i++) { + p = (char**) Realloc(p, (i+2)*sizeof(char*)); + if(e->values) + snprint(buf, sizeof buf, "%s=%s", e->name, wtos(e->values, sh->iws)); + else + snprint(buf, sizeof buf, "%s=", e->name); + p[i] = strdup(buf); + } + p[i] = 0; + environ = p; +} + +int +waitfor(char *msg) +{ + int status; + int pid; + + *msg = 0; + pid = wait(&status); + if(pid > 0) { + if(status&0x7f) { + if(status&0x80) + snprint(msg, ERRMAX, "signal %d, core dumped", status&0x7f); + else + snprint(msg, ERRMAX, "signal %d", status&0x7f); + } else if(status&0xff00) + snprint(msg, ERRMAX, "exit(%d)", (status>>8)&0xff); + } + return pid; +} + +void +expunge(int pid, char *msg) +{ + if(strcmp(msg, "interrupt")) + kill(pid, SIGINT); + else + kill(pid, SIGHUP); +} + +int mypid; + +int +shargv(Word *cmd, int extra, char ***pargv) +{ + char **argv; + int i, n; + Word *w; + + n = 0; + for(w=cmd; w; w=w->next) + n++; + + argv = Malloc((n+extra+1)*sizeof(argv[0])); + i = 0; + for(w=cmd; w; w=w->next) + argv[i++] = w->s; + argv[n] = 0; + *pargv = argv; + return n; +} + +int +execsh(char *args, char *cmd, Bufblock *buf, Envy *e, Shell *sh, Word *shellcmd) +{ + char *p, **argv; + int tot, n, pid, in[2], out[2]; + + if(buf && pipe(out) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + mypid = getpid(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid == 0){ + if(buf) + close(out[0]); + if(pipe(in) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid != 0){ + dup2(in[0], 0); + if(buf){ + dup2(out[1], 1); + close(out[1]); + } + close(in[0]); + close(in[1]); + if (e) + exportenv(e, sh); + n = shargv(shellcmd, 1, &argv); + argv[n++] = args; + argv[n] = 0; + execvp(argv[0], argv); + mkperror(shell); + _exit(1); + } + close(out[1]); + close(in[0]); + if(DEBUG(D_EXEC)) + fprint(1, "starting: %s\n", cmd); + p = cmd+strlen(cmd); + while(cmd < p){ + n = write(in[1], cmd, p-cmd); + if(n < 0) + break; + cmd += n; + } + close(in[1]); + _exit(0); + } + if(buf){ + close(out[1]); + tot = 0; + for(;;){ + if (buf->current >= buf->end) + growbuf(buf); + n = read(out[0], buf->current, buf->end-buf->current); + if(n <= 0) + break; + buf->current += n; + tot += n; + } + if (tot && buf->current[-1] == '\n') + buf->current--; + close(out[0]); + } + return pid; +} + +int +pipecmd(char *cmd, Envy *e, int *fd, Shell *sh, Word *shellcmd) +{ + int pid, pfd[2]; + int n; + char **argv; + + if(DEBUG(D_EXEC)) + fprint(1, "pipecmd='%s'\n", cmd);/**/ + + if(fd && pipe(pfd) < 0){ + mkperror("pipe"); + Exit(); + } + pid = fork(); + if(pid < 0){ + mkperror("mk fork"); + Exit(); + } + if(pid == 0){ + if(fd){ + close(pfd[0]); + dup2(pfd[1], 1); + close(pfd[1]); + } + if(e) + exportenv(e, sh); + n = shargv(shellcmd, 2, &argv); + argv[n++] = "-c"; + argv[n++] = cmd; + argv[n] = 0; + execvp(argv[0], argv); + mkperror(shell); + _exit(1); + } + if(fd){ + close(pfd[1]); + *fd = pfd[0]; + } + return pid; +} + +void +Exit(void) +{ + while(wait(0) >= 0) + ; + exits("error"); +} + +static struct +{ + int sig; + char *msg; +} sigmsgs[] = +{ + SIGALRM, "alarm", + SIGFPE, "sys: fp: fptrap", + SIGPIPE, "sys: write on closed pipe", + SIGILL, "sys: trap: illegal instruction", +/* SIGSEGV, "sys: segmentation violation", */ + 0, 0 +}; + +static void +notifyf(int sig) +{ + int i; + + for(i = 0; sigmsgs[i].msg; i++) + if(sigmsgs[i].sig == sig) + killchildren(sigmsgs[i].msg); + + /* should never happen */ + signal(sig, SIG_DFL); + kill(getpid(), sig); +} + +void +catchnotes(void) +{ + int i; + + for(i = 0; sigmsgs[i].msg; i++) + signal(sigmsgs[i].sig, notifyf); +} + +char* +maketmp(int *pfd) +{ + static char temp[] = "/tmp/mkargXXXXXX"; + static char buf[100]; + int fd; + + strcpy(buf, temp); + fd = mkstemp(buf); + if(fd < 0) + return 0; + *pfd = fd; + return buf; +} + +int +chgtime(char *name) +{ + if(access(name, 0) >= 0) + return utimes(name, 0); + return close(creat(name, 0666)); +} + +void +rcopy(char **to, Resub *match, int n) +{ + int c; + char *p; + + *to = match->s.sp; /* stem0 matches complete target */ + for(to++, match++; --n > 0; to++, match++){ + if(match->s.sp && match->e.ep){ + p = match->e.ep; + c = *p; + *p = 0; + *to = strdup(match->s.sp); + *p = c; + } + else + *to = 0; + } +} + +unsigned long +mkmtime(char *name) +{ + struct stat st; + + if(stat(name, &st) < 0) + return 0; + + return st.st_mtime; +} diff --git a/mk/mk/var.c b/mk/mk/var.c @@ -0,0 +1,41 @@ +#include "mk.h" + +void +setvar(char *name, void *ptr) +{ + symlook(name, S_VAR, ptr)->u.ptr = ptr; + symlook(name, S_MAKEVAR, (void*)""); +} + +static void +print1(Symtab *s) +{ + Word *w; + + Bprint(&bout, "\t%s=", s->name); + for (w = s->u.ptr; w; w = w->next) + Bprint(&bout, "'%s'", w->s); + Bprint(&bout, "\n"); +} + +void +dumpv(char *s) +{ + Bprint(&bout, "%s:\n", s); + symtraverse(S_VAR, print1); +} + +char * +shname(char *a) +{ + Rune r; + int n; + + while (*a) { + n = chartorune(&r, a); + if (!WORDCHR(r)) + break; + a += n; + } + return a; +} diff --git a/mk/mk/varsub.c b/mk/mk/varsub.c @@ -0,0 +1,252 @@ +#include "mk.h" + +static Word *subsub(Word*, char*, char*); +static Word *expandvar(char**); +static Bufblock *varname(char**); +static Word *extractpat(char*, char**, char*, char*); +static int submatch(char*, Word*, Word*, int*, char**); +static Word *varmatch(char *); + +Word * +varsub(char **s) +{ + Bufblock *b; + Word *w; + + if(**s == '{') /* either ${name} or ${name: A%B==C%D}*/ + return expandvar(s); + + b = varname(s); + if(b == 0) + return 0; + + w = varmatch(b->start); + freebuf(b); + return w; +} + +/* + * extract a variable name + */ +static Bufblock* +varname(char **s) +{ + Bufblock *b; + char *cp; + Rune r; + int n; + + b = newbuf(); + cp = *s; + for(;;){ + n = chartorune(&r, cp); + if (!WORDCHR(r)) + break; + rinsert(b, r); + cp += n; + } + if (b->current == b->start){ + SYNERR(-1); + fprint(2, "missing variable name <%s>\n", *s); + freebuf(b); + return 0; + } + *s = cp; + insert(b, 0); + return b; +} + +static Word* +varmatch(char *name) +{ + Word *w; + Symtab *sym; + + sym = symlook(name, S_VAR, 0); + if(sym){ + /* check for at least one non-NULL value */ + for (w = sym->u.ptr; w; w = w->next) + if(w->s && *w->s) + return wdup(w); + } + return 0; +} + +static Word* +expandvar(char **s) +{ + Word *w; + Bufblock *buf; + Symtab *sym; + char *cp, *begin, *end; + + begin = *s; + (*s)++; /* skip the '{' */ + buf = varname(s); + if (buf == 0) + return 0; + cp = *s; + if (*cp == '}') { /* ${name} variant*/ + (*s)++; /* skip the '}' */ + w = varmatch(buf->start); + freebuf(buf); + return w; + } + if (*cp != ':') { + SYNERR(-1); + fprint(2, "bad variable name <%s>\n", buf->start); + freebuf(buf); + return 0; + } + cp++; + end = shellt->charin(cp , "}"); + if(end == 0){ + SYNERR(-1); + fprint(2, "missing '}': %s\n", begin); + Exit(); + } + *end = 0; + *s = end+1; + + sym = symlook(buf->start, S_VAR, 0); + if(sym == 0 || sym->u.ptr == 0) + w = newword(buf->start); + else + w = subsub(sym->u.ptr, cp, end); + freebuf(buf); + return w; +} + +static Word* +extractpat(char *s, char **r, char *term, char *end) +{ + int save; + char *cp; + Word *w; + + cp = shellt->charin(s, term); + if(cp){ + *r = cp; + if(cp == s) + return 0; + save = *cp; + *cp = 0; + w = stow(s); + *cp = save; + } else { + *r = end; + w = stow(s); + } + return w; +} + +static Word* +subsub(Word *v, char *s, char *end) +{ + int nmid; + Word *head, *tail, *w, *h; + Word *a, *b, *c, *d; + Bufblock *buf; + char *cp, *enda; + + a = extractpat(s, &cp, "=%&", end); + b = c = d = 0; + if(PERCENT(*cp)) + b = extractpat(cp+1, &cp, "=", end); + if(*cp == '=') + c = extractpat(cp+1, &cp, "&%", end); + if(PERCENT(*cp)) + d = stow(cp+1); + else if(*cp) + d = stow(cp); + + head = tail = 0; + buf = newbuf(); + for(; v; v = v->next){ + h = w = 0; + if(submatch(v->s, a, b, &nmid, &enda)){ + /* enda points to end of A match in source; + * nmid = number of chars between end of A and start of B + */ + if(c){ + h = w = wdup(c); + while(w->next) + w = w->next; + } + if(PERCENT(*cp) && nmid > 0){ + if(w){ + bufcpy(buf, w->s, strlen(w->s)); + bufcpy(buf, enda, nmid); + insert(buf, 0); + free(w->s); + w->s = strdup(buf->start); + } else { + bufcpy(buf, enda, nmid); + insert(buf, 0); + h = w = newword(buf->start); + } + buf->current = buf->start; + } + if(d && *d->s){ + if(w){ + + bufcpy(buf, w->s, strlen(w->s)); + bufcpy(buf, d->s, strlen(d->s)); + insert(buf, 0); + free(w->s); + w->s = strdup(buf->start); + w->next = wdup(d->next); + while(w->next) + w = w->next; + buf->current = buf->start; + } else + h = w = wdup(d); + } + } + if(w == 0) + h = w = newword(v->s); + + if(head == 0) + head = h; + else + tail->next = h; + tail = w; + } + freebuf(buf); + delword(a); + delword(b); + delword(c); + delword(d); + return head; +} + +static int +submatch(char *s, Word *a, Word *b, int *nmid, char **enda) +{ + Word *w; + int n; + char *end; + + n = 0; + for(w = a; w; w = w->next){ + n = strlen(w->s); + if(strncmp(s, w->s, n) == 0) + break; + } + if(a && w == 0) /* a == NULL matches everything*/ + return 0; + + *enda = s+n; /* pointer to end a A part match */ + *nmid = strlen(s)-n; /* size of remainder of source */ + end = *enda+*nmid; + for(w = b; w; w = w->next){ + n = strlen(w->s); + if(strcmp(w->s, end-n) == 0){ + *nmid -= n; + break; + } + } + if(b && w == 0) /* b == NULL matches everything */ + return 0; + return 1; +} diff --git a/mk/mk/word.c b/mk/mk/word.c @@ -0,0 +1,189 @@ +#include "mk.h" + +static Word *nextword(char**); + +Word* +newword(char *s) +{ + Word *w; + + w = (Word *)Malloc(sizeof(Word)); + w->s = strdup(s); + w->next = 0; + return(w); +} + +Word * +stow(char *s) +{ + Word *head, *w, *new; + + w = head = 0; + while(*s){ + new = nextword(&s); + if(new == 0) + break; + if (w) + w->next = new; + else + head = w = new; + while(w->next) + w = w->next; + + } + if (!head) + head = newword(""); + return(head); +} + +char * +wtos(Word *w, int sep) +{ + Bufblock *buf; + char *cp; + + buf = newbuf(); + for(; w; w = w->next){ + for(cp = w->s; *cp; cp++) + insert(buf, *cp); + if(w->next) + insert(buf, sep); + } + insert(buf, 0); + cp = strdup(buf->start); + freebuf(buf); + return(cp); +} + +Word* +wdup(Word *w) +{ + Word *v, *new, *base; + + v = base = 0; + while(w){ + new = newword(w->s); + if(v) + v->next = new; + else + base = new; + v = new; + w = w->next; + } + return base; +} + +void +delword(Word *w) +{ + Word *v; + + while(v = w){ + w = w->next; + if(v->s) + free(v->s); + free(v); + } +} + +/* + * break out a word from a string handling quotes, executions, + * and variable expansions. + */ +static Word* +nextword(char **s) +{ + Bufblock *b; + Word *head, *tail, *w; + Rune r; + char *cp; + int empty; + + cp = *s; + b = newbuf(); +restart: + head = tail = 0; + while(*cp == ' ' || *cp == '\t') /* leading white space */ + cp++; + empty = 1; + while(*cp){ + cp += chartorune(&r, cp); + switch(r) + { + case ' ': + case '\t': + case '\n': + goto out; + case '\\': + case '\'': + case '"': + empty = 0; + cp = shellt->expandquote(cp, r, b); + if(cp == 0){ + fprint(2, "missing closing quote: %s\n", *s); + Exit(); + } + break; + case '$': + w = varsub(&cp); + if(w == 0){ + if(empty) + goto restart; + break; + } + empty = 0; + if(b->current != b->start){ + bufcpy(b, w->s, strlen(w->s)); + insert(b, 0); + free(w->s); + w->s = strdup(b->start); + b->current = b->start; + } + if(head){ + bufcpy(b, tail->s, strlen(tail->s)); + bufcpy(b, w->s, strlen(w->s)); + insert(b, 0); + free(tail->s); + tail->s = strdup(b->start); + tail->next = w->next; + free(w->s); + free(w); + b->current = b->start; + } else + tail = head = w; + while(tail->next) + tail = tail->next; + break; + default: + empty = 0; + rinsert(b, r); + break; + } + } +out: + *s = cp; + if(b->current != b->start){ + if(head){ + cp = b->current; + bufcpy(b, tail->s, strlen(tail->s)); + bufcpy(b, b->start, cp-b->start); + insert(b, 0); + free(tail->s); + tail->s = strdup(cp); + } else { + insert(b, 0); + head = newword(b->start); + } + } + freebuf(b); + return head; +} + +void +dumpw(char *s, Word *w) +{ + Bprint(&bout, "%s", s); + for(; w; w = w->next) + Bprint(&bout, " '%s'", w->s); + Bputc(&bout, '\n'); +} diff --git a/mk/mkfile b/mk/mkfile @@ -0,0 +1,4 @@ +TARG = libbio libutf libregexp libfmt mk + +<$mkbuild/mk.parent + diff --git a/mkfile b/mkfile @@ -0,0 +1,5 @@ +TARG = _install find sed ed grep expr od stty nawk \ + patch diff printf dc dd fmt hd bc ps pgrep tar cp mk \ + libcommon libuxre lex yacc + +<$mkbuild/mk.parent diff --git a/nawk/COPYING b/nawk/COPYING @@ -0,0 +1,340 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Library General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + <one line to give the program's name and a brief idea of what it does.> + Copyright (C) <year> <name of author> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + <signature of Ty Coon>, 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General +Public License instead of this License. diff --git a/nawk/NOTES b/nawk/NOTES @@ -0,0 +1,20 @@ +Notes for the 'nawk' utility +============================ + +Changes since the version published by Caldera in OS Utilities 0.1a +(<http://unixtools.sourceforge.net/>) include: + +- The lex part of the code can be built with both Unix lex and flex. +- Support for multibyte characters. +- Proper support for LC_TIME (locale-specifix radix character recognized + in input data, but not in scripts). +- No line length limitations on input and output data. +- No limit on the number of fields per record. + +4.4BSD old awk and Brian W. Kernighan's 'One True awk' (available at +<http://cm.bell-labs.com/cm/cs/who/bwk/index.html>) have been used as +reference for some of the changes; in addition, the manual page +enclosed here has been derived from 'One True awk' since Caldera +did not enclose one. + + Gunnar Ritter 7/30/05 diff --git a/nawk/awk.g.y b/nawk/awk.g.y @@ -0,0 +1,468 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from RCS Header: awk.g.y 1.2 91/06/25 */ + +%{ +#include "awk.h" +#include <unistd.h> +#include <inttypes.h> +#include <pfmt.h> +int yywrap(void) { return(1); } +#ifndef DEBUG +# define PUTS(x) +#endif +Node *beginloc = 0, *endloc = 0; +int infunc = 0; /* = 1 if in arglist or body of func */ +unsigned char *curfname = 0; +Node *arglist = 0; /* list of args for current function */ +static void setfname(Cell *); +static int constnode(Node *); +static unsigned char *strnode(Node *); +static Node *notnull(Node *); +extern const char illstat[]; + +extern int yylex(void); +%} + +%union { + Node *p; + Cell *cp; + intptr_t i; + unsigned char *s; +} + +%token <i> FIRSTTOKEN /* must be first */ +%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND +%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' +%token <i> ARRAY +%token <i> MATCH NOTMATCH MATCHOP +%token <i> FINAL DOT ALL CCL NCCL CHAR MCHAR OR STAR QUEST PLUS +%token <i> AND BOR APPEND EQ GE GT LE LT NE IN +%token <i> ARG BLTIN BREAK CONTINUE DELETE DO EXIT FOR FUNC +%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT +%token <i> ADD MINUS MULT DIVIDE MOD +%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ +%token <i> PRINT PRINTF SPRINTF +%token <p> ELSE INTEST CONDEXPR +%token <i> POSTINCR PREINCR POSTDECR PREDECR +%token <cp> VAR IVAR VARNF CALL NUMBER STRING FIELD +%token <s> REGEXPR + +%type <p> pas pattern ppattern plist pplist patlist prarg term re +%type <p> pa_pat pa_stat pa_stats +%type <s> reg_expr +%type <p> simple_stmt opt_simple_stmt stmt stmtlist +%type <p> var varname funcname varlist +%type <p> for if while +%type <i> pst opt_pst lbrace rparen comma nl opt_nl and bor +%type <i> subop print + +%right ASGNOP +%right '?' +%right ':' +%left BOR +%left AND +%left GETLINE +%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' +%left ARG BLTIN BREAK CALL CONTINUE DELETE DO EXIT FOR FIELD FUNC +%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER +%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR +%left REGEXPR VAR VARNF IVAR WHILE '(' +%left CAT +%left '+' '-' +%left '*' '/' '%' +%left NOT UMINUS +%right POWER +%right DECR INCR +%left INDIRECT +%token LASTTOKEN /* must be last */ + +%% + +program: + pas { if (errorflag==0) + winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } + | error { yyclearin; bracecheck(); vyyerror(":95:Bailing out"); } + ; + +and: + AND | and NL + ; + +bor: + BOR | bor NL + ; + +comma: + ',' | comma NL + ; + +do: + DO { } | do NL + ; + +else: + ELSE { } | else NL + ; + +for: + FOR '(' opt_simple_stmt ';' pattern ';' opt_simple_stmt rparen stmt + { $$ = stat4(FOR, $3, notnull($5), $7, $9); } + | FOR '(' opt_simple_stmt ';' ';' opt_simple_stmt rparen stmt + { $$ = stat4(FOR, $3, NIL, $6, $8); } + | FOR '(' varname IN varname rparen stmt + { $$ = stat3(IN, $3, makearr($5), $7); } + ; + +funcname: + VAR { setfname($1); } + | CALL { setfname($1); } + ; + +if: + IF '(' pattern rparen { $$ = notnull($3); } + ; + +lbrace: + '{' | lbrace NL + ; + +nl: + NL | nl NL + ; + +opt_nl: + /* empty */ { $$ = 0; } + | nl + ; + +opt_pst: + /* empty */ { $$ = 0; } + | pst + ; + + +opt_simple_stmt: + /* empty */ { $$ = 0; } + | simple_stmt + ; + +pas: + opt_pst { $$ = 0; } + | opt_pst pa_stats opt_pst { $$ = $2; } + ; + +pa_pat: + pattern { $$ = notnull($1); } + ; + +pa_stat: + pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } + | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } + | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } + | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } + | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } + | XBEGIN lbrace stmtlist '}' + { beginloc = linkum(beginloc, $3); $$ = 0; } + | XEND lbrace stmtlist '}' + { endloc = linkum(endloc, $3); $$ = 0; } + | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' + { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } + ; + +pa_stats: + pa_stat + | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } + ; + +patlist: + pattern + | patlist comma pattern { $$ = linkum($1, $3); } + ; + +ppattern: + var ASGNOP ppattern { $$ = op2($2, $1, $3); } + | ppattern '?' ppattern ':' ppattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | ppattern bor ppattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | ppattern and ppattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | ppattern MATCHOP ppattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | term + ; + +pattern: + var ASGNOP pattern { $$ = op2($2, $1, $3); } + | pattern '?' pattern ':' pattern %prec '?' + { $$ = op3(CONDEXPR, notnull($1), $3, $5); } + | pattern bor pattern %prec BOR + { $$ = op2(BOR, notnull($1), notnull($3)); } + | pattern and pattern %prec AND + { $$ = op2(AND, notnull($1), notnull($3)); } + | NOT pattern + { $$ = op1(NOT, op2(NE,$2,valtonode(lookup("$zero&null",symtab),CCON))); } + | pattern EQ pattern { $$ = op2($2, $1, $3); } + | pattern GE pattern { $$ = op2($2, $1, $3); } + | pattern GT pattern { $$ = op2($2, $1, $3); } + | pattern LE pattern { $$ = op2($2, $1, $3); } + | pattern LT pattern { $$ = op2($2, $1, $3); } + | pattern NE pattern { $$ = op2($2, $1, $3); } + | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | pattern MATCHOP pattern + { if (constnode($3)) + $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); + else + $$ = op3($2, (Node *)1, $1, $3); } + | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } + | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } + | pattern '|' GETLINE var { $$ = op3(GETLINE, $4, (Node*)$2, $1); } + | pattern '|' GETLINE { $$ = op3(GETLINE, (Node*)0, (Node*)$2, $1); } + | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } + | term + ; + +plist: + pattern comma pattern { $$ = linkum($1, $3); } + | plist comma pattern { $$ = linkum($1, $3); } + ; + +pplist: + ppattern + | pplist comma ppattern { $$ = linkum($1, $3); } + ; + +prarg: + /* empty */ { $$ = rectonode(); } + | pplist + | '(' plist ')' { $$ = $2; } + ; + +print: + PRINT | PRINTF + ; + +pst: + NL | ';' | pst NL | pst ';' + ; + +rbrace: + '}' { } | rbrace NL + ; + +re: + reg_expr + { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1,0)); } + | NOT re {$$ = op1(NOT, notnull($2)); } + ; + +reg_expr: + '/' {startreg();} REGEXPR '/' { $$ = $3; } + ; + +rparen: + ')' | rparen NL + ; + +simple_stmt: + print prarg '|' term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg APPEND term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg GT term { $$ = stat3($1, $2, (Node *) $3, $4); } + | print prarg { $$ = stat3($1, $2, NIL, NIL); } + | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } + | DELETE varname { yyclearin; vyyerror(":96:You can only delete array[element]"); $$ = stat1(DELETE, $2); } + | pattern { $$ = exptostat($1); } + | error { yyclearin; vyyerror(illstat); } + ; + +st: + nl { } | ';' opt_nl { } + ; + +stmt: + BREAK st { $$ = stat1(BREAK, NIL); } + | CONTINUE st { $$ = stat1(CONTINUE, NIL); } + | do stmt WHILE '(' pattern ')' st + { $$ = stat2(DO, $2, notnull($5)); } + | EXIT pattern st { $$ = stat1(EXIT, $2); } + | EXIT st { $$ = stat1(EXIT, NIL); } + | for + | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } + | if stmt { $$ = stat3(IF, $1, $2, NIL); } + | lbrace stmtlist rbrace { $$ = $2; } + | NEXT st { if (infunc) + vyyerror(":97:Next is illegal inside a function"); + $$ = stat1(NEXT, NIL); } + | RETURN pattern st { $$ = stat1(RETURN, $2); } + | RETURN st { $$ = stat1(RETURN, NIL); } + | simple_stmt st + | while stmt { $$ = stat2(WHILE, $1, $2); } + | ';' opt_nl { $$ = 0; } + ; + +stmtlist: + stmt + | stmtlist stmt { $$ = linkum($1, $2); } + ; + +subop: + SUB | GSUB + ; + +term: + term '+' term { $$ = op2(ADD, $1, $3); } + | term '-' term { $$ = op2(MINUS, $1, $3); } + | term '*' term { $$ = op2(MULT, $1, $3); } + | term '/' term { $$ = op2(DIVIDE, $1, $3); } + | term '%' term { $$ = op2(MOD, $1, $3); } + | term POWER term { $$ = op2(POWER, $1, $3); } + | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } + | '+' term %prec UMINUS { $$ = $2; } + | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } + | BLTIN '(' ')' { $$ = op2(BLTIN, (Node *) $1, rectonode()); } + | BLTIN '(' patlist ')' { $$ = op2(BLTIN, (Node *) $1, $3); } + | BLTIN { $$ = op2(BLTIN, (Node *) $1, rectonode()); } + | CALL '(' ')' { $$ = op2(CALL, valtonode($1,CVAR), NIL); } + | CALL '(' patlist ')' { $$ = op2(CALL, valtonode($1,CVAR), $3); } + | DECR var { $$ = op1(PREDECR, $2); } + | INCR var { $$ = op1(PREINCR, $2); } + | var DECR { $$ = op1(POSTDECR, $1); } + | var INCR { $$ = op1(POSTINCR, $1); } + | GETLINE var LT term { $$ = op3(GETLINE, $2, (Node *)$3, $4); } + | GETLINE LT term { $$ = op3(GETLINE, NIL, (Node *)$2, $3); } + | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } + | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } + | INDEX '(' pattern comma pattern ')' + { $$ = op2(INDEX, $3, $5); } + | INDEX '(' pattern comma reg_expr ')' + { vyyerror(":98:Index() doesn't permit regular expressions"); + $$ = op2(INDEX, $3, (Node*)$5); } + | '(' pattern ')' { $$ = $2; } + | MATCHFCN '(' pattern comma reg_expr ')' + { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } + | MATCHFCN '(' pattern comma pattern ')' + { if (constnode($5)) + $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); + else + $$ = op3(MATCHFCN, (Node *)1, $3, $5); } + | NUMBER { $$ = valtonode($1, CCON); } + | SPLIT '(' pattern comma varname comma pattern ')' /* string */ + { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } + | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ + { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } + | SPLIT '(' pattern comma varname ')' + { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ + | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } + | STRING { $$ = valtonode($1, CCON); } + | subop '(' reg_expr comma pattern ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } + | subop '(' pattern comma pattern ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); + else + $$ = op4($1, (Node *)1, $3, $5, rectonode()); } + | subop '(' reg_expr comma pattern comma var ')' + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } + | subop '(' pattern comma pattern comma var ')' + { if (constnode($3)) + $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); + else + $$ = op4($1, (Node *)1, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, $7); } + | SUBSTR '(' pattern comma pattern ')' + { $$ = op3(SUBSTR, $3, $5, NIL); } + | var + | re + ; + +var: + varname + | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } + | FIELD { $$ = valtonode($1, CFLD); } + | IVAR { $$ = op1(INDIRECT, valtonode($1, CVAR)); } + | INDIRECT term { $$ = op1(INDIRECT, $2); } + ; + +varlist: + /* nothing */ { arglist = $$ = 0; } + | VAR { arglist = $$ = valtonode($1,CVAR); } + | varlist comma VAR { arglist = $$ = linkum($1,valtonode($3,CVAR)); } + ; + +varname: + VAR { $$ = valtonode($1, CVAR); } + | ARG { $$ = op1(ARG, (Node *) $1); } + | VARNF { $$ = op1(VARNF, (Node *) $1); } + ; + + +while: + WHILE '(' pattern rparen { $$ = notnull($3); } + ; + +%% + +static void +setfname(Cell *p) +{ + if (isarr(p)) + vyyerror(":99:%s is an array, not a function", p->nval); + else if (isfunc(p)) + vyyerror(":100:You cannot define function %s more than once", p->nval); + curfname = p->nval; +} + +static int +constnode(Node *p) +{ + return p->ntype == NVALUE && ((Cell *) (p->narg[0]))->csub == CCON; +} + +static unsigned char *strnode(Node *p) +{ + return ((Cell *)(p->narg[0]))->sval; +} + +static Node *notnull(Node *n) +{ + switch (n->nobj) { + case LE: case LT: case EQ: case NE: case GT: case GE: + case BOR: case AND: case NOT: + return n; + default: + return op2(NE, n, nullnode); + } +} diff --git a/nawk/awk.h b/nawk/awk.h @@ -0,0 +1,387 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.h 1.23 (gritter) 12/25/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/awk.h /main/uw7_nj/1 */ +/* from RCS Header: awk.h 1.2 91/06/25 */ + +typedef double Awkfloat; + +#define xfree(a) { if ((a) != NULL) { free(a); a = NULL; } } +#define MAXLABEL 25 + +extern const char version[]; + +extern char errbuf[200]; +#define ERROR snprintf(errbuf, sizeof errbuf, +#define FATAL ), error(1, errbuf) +#define WARNING ), error(0, errbuf) +#define SYNTAX ), yyerror(errbuf) + +extern int compile_time; /* 1 if compiling, 0 if running */ + +extern int posix; /* if POSIX behavior is desired */ + +/* + * This is done to prevent redefinition of our own definitions for FS with + * those defined in the system's header files. Same of RS (on HP-UX/PA-RISC). + */ +#undef FS +#undef RS + +extern unsigned char **FS; +extern unsigned char **RS; +extern unsigned char **ORS; +extern unsigned char **OFS; +extern unsigned char **OFMT; +extern unsigned char **CONVFMT; +extern Awkfloat *NR; +extern Awkfloat *FNR; +extern Awkfloat *NF; +extern unsigned char **FILENAME; +extern unsigned char **SUBSEP; +extern Awkfloat *RSTART; +extern Awkfloat *RLENGTH; + +#define CHUNK 512 /* record and string increment */ + +extern unsigned char *record; +extern int recsize; +extern int dbg; +extern int lineno; +extern int errorflag; +extern int donefld; /* 1 if record broken into fields */ +extern int donerec; /* 1 if record is valid (no fld has changed */ + +#define CBUFLEN 5120 +extern unsigned char cbuf[CBUFLEN]; /* miscellaneous character collection */ + +extern unsigned char *patbeg; /* beginning of pattern matched */ +extern int patlen; /* length. set in b.c */ + +extern int mb_cur_max; /* MB_CUR_MAX, for acceleration purposes */ + +extern const char outofspace[]; /* message */ + +/* Cell: all information about a variable or constant */ + +typedef struct Cell { + unsigned char ctype; /* OCELL, OBOOL, OJUMP, etc. */ + unsigned char csub; /* CCON, CTEMP, CFLD, etc. */ + unsigned char *nval; /* name, for variables only */ + unsigned char *sval; /* string value */ + Awkfloat fval; /* value as number */ + unsigned tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE */ + struct Cell *cnext; /* ptr to next if chained */ +} Cell; + +typedef struct { /* symbol table array */ + int nelem; /* elements in table right now */ + int size; /* size of tab */ + Cell **tab; /* hash table pointers */ +} Array; + +#define NSYMTAB 50 /* initial size of a symbol table */ +extern Array *symtab, *makesymtab(int); +#define setsymtab(n, s, f, t, tp) ssetsymtab((unsigned char *)n, \ + (unsigned char *)s, \ + f, t, tp) +extern Cell *ssetsymtab(unsigned char *, unsigned char *, Awkfloat, + unsigned, Array *); +#define lookup(s, tp) slookup((unsigned char *)s, tp) +extern Cell *slookup(unsigned char *, Array *); + +extern Cell *recloc; /* location of input record */ +extern Cell *nrloc; /* NR */ +extern Cell *fnrloc; /* FNR */ +extern Cell *fsloc; /* FS */ +extern Cell *nfloc; /* NF */ +extern Cell *rstartloc; /* RSTART */ +extern Cell *rlengthloc; /* RLENGTH */ + +/* Cell.tval values: */ +#define NUM 01 /* number value is valid */ +#define STR 02 /* string value is valid */ +#define DONTFREE 04 /* string space is not freeable */ +#define CON 010 /* this is a constant */ +#define ARR 020 /* this is an array */ +#define FCN 040 /* this is a function name */ +#define FLD 0100 /* this is a field $1, $2, ... */ +#define REC 0200 /* this is $0 */ +#define CANBENUM 0400 /* tells setsymtab() to try for NUM, too */ + +#define freeable(p) (!((p)->tval & DONTFREE)) + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(c) _IO_getc_unlocked(c) +#endif /* _IO_getc_unlocked */ +#endif /* __GLIBC__ */ + +#define getline xxgetline /* avoid glibc _GNU_SOURCE collision */ + +#define DEBUG +#ifdef DEBUG + /* uses have to be doubly parenthesized */ +# define dprintf(x) if (dbg) printf x +#else +# define dprintf(x) +#endif + +#ifndef IN_MAKETAB +#include <wchar.h> + +/* + * Get next character from string s and store it in wc; n is set to + * the length of the corresponding byte sequence. + */ +#define next(wc, s, n) (mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowc(&(wc), (char *)(s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) :\ + ((wc) = *(s), (n) = 1)) +#endif /* !IN_MAKETAB */ + +/* function types */ +#define FLENGTH 1 +#define FSQRT 2 +#define FEXP 3 +#define FLOG 4 +#define FINT 5 +#define FSYSTEM 6 +#define FRAND 7 +#define FSRAND 8 +#define FSIN 9 +#define FCOS 10 +#define FATAN 11 +#define FTOUPPER 12 +#define FTOLOWER 13 +#define FCLOSE 14 + +/* Node: parse tree is made of nodes, with Cell's at bottom */ + +typedef struct Node { + int ntype; + struct Node *nnext; + int lineno; + int nobj; + struct Node *narg[1]; /* variable: actual size set by calling malloc */ +} Node; + +#define NIL ((Node *) 0) + +extern Node *winner; +extern Node *nullstat; +extern Node *nullnode; + +/* ctypes */ +#define OCELL 1 +#define OBOOL 2 +#define OJUMP 3 + +/* Cell subtypes: csub */ +#define CFREE 7 +#define CCOPY 6 +#define CCON 5 +#define CTEMP 4 +#define CNAME 3 +#define CVAR 2 +#define CFLD 1 + +/* bool subtypes */ +#define BTRUE 11 +#define BFALSE 12 + +/* jump subtypes */ +#define JEXIT 21 +#define JNEXT 22 +#define JBREAK 23 +#define JCONT 24 +#define JRET 25 + +/* node types */ +#define NVALUE 1 +#define NSTAT 2 +#define NEXPR 3 +#define NFIELD 4 + +extern Cell *(*proctab[])(Node **, int); +extern int pairstack[]; +extern long paircnt; + +#define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc) +#define isvalue(n) ((n)->ntype == NVALUE) +#define isexpr(n) ((n)->ntype == NEXPR) +#define isjump(n) ((n)->ctype == OJUMP) +#define isexit(n) ((n)->csub == JEXIT) +#define isbreak(n) ((n)->csub == JBREAK) +#define iscont(n) ((n)->csub == JCONT) +#define isnext(n) ((n)->csub == JNEXT) +#define isret(n) ((n)->csub == JRET) +#define isstr(n) ((n)->tval & STR) +#define isnum(n) ((n)->tval & NUM) +#define isarr(n) ((n)->tval & ARR) +#define isfunc(n) ((n)->tval & FCN) +#define istrue(n) ((n)->csub == BTRUE) +#define istemp(n) ((n)->csub == CTEMP) + +#include <regex.h> + +typedef struct fa { + unsigned char *restr; + int use; + int notbol; + regex_t re; +} fa; + +/* awk.g.c */ +extern int yywrap(void); +extern int yyparse(void); +/* awk.lx.c */ +extern int yylex(void); +extern void startreg(void); +extern int awk_input(void); +/* b.c */ +extern fa *makedfa(unsigned char *, int); +extern int match(void *, unsigned char *); +extern int pmatch(void *, unsigned char *); +extern int nematch(void *, unsigned char *); +/* lib.c */ +extern void fldinit(void); +extern void initgetrec(void); +extern int getrec(unsigned char **, int *); +extern int readrec(unsigned char **, int *, FILE *); +extern unsigned char *getargv(int); +extern void setclvar(unsigned char *); +extern void fldbld(void); +extern void newfld(int); +extern void recbld(void); +extern Cell *fieldadr(int); +extern void vyyerror(const char *, ...); +extern void yyerror(char *); +extern void fpecatch(int); +extern void bracecheck(void); +extern void error(int, const char *, ...); +extern void bclass(int); +extern double errcheck(double, unsigned char *); +extern void PUTS(unsigned char *); +extern int isclvar(unsigned char *); +extern int is2number(unsigned char *, Cell *); +extern double awk_atof(const char *); +extern unsigned char *makerec(const unsigned char *, int); +/* main.c */ +extern int pgetc(void); +/* parse.c */ +extern Node *nodealloc(int); +extern Node *exptostat(Node *); +extern Node *node1(int, Node *); +extern Node *node2(int, Node *, Node *); +extern Node *node3(int, Node *, Node *, Node *); +extern Node *node4(int, Node *, Node *, Node *, Node *); +extern Node *stat3(int, Node *, Node *, Node *); +extern Node *op2(int, Node *, Node *); +extern Node *op1(int, Node *); +extern Node *stat1(int, Node *); +extern Node *op3(int, Node *, Node *, Node *); +extern Node *op4(int, Node *, Node *, Node *, Node *); +extern Node *stat2(int, Node *, Node *); +extern Node *stat4(int, Node *, Node *, Node *, Node *); +extern Node *valtonode(Cell *, int); +extern Node *rectonode(void); +extern Node *makearr(Node *); +extern Node *pa2stat(Node *, Node *, Node *); +extern Node *linkum(Node *, Node *); +extern void defn(Cell *, Node *, Node *); +extern int isarg(const char *); +/* proctab.c */ +extern unsigned char *tokname(int); +/* run.c */ +extern int run(Node *); +extern Cell *r_execute(Node *); +extern Cell *program(Node **, int); +extern Cell *call(Node **, int); +extern Cell *copycell(Cell *); +extern Cell *arg(Node **, int); +extern Cell *jump(Node **, int); +extern Cell *getline(Node **, int); +extern Cell *getnf(Node **, int); +extern Cell *array(Node **, int); +extern Cell *delete(Node **, int); +extern Cell *intest(Node **, int); +extern Cell *matchop(Node **, int); +extern Cell *boolop(Node **, int); +extern Cell *relop(Node **, int); +extern Cell *gettemp(const char *); +extern Cell *indirect(Node **, int); +extern Cell *substr(Node **, int); +extern Cell *sindex(Node **, int); +extern int format(unsigned char **, int *, const unsigned char *, Node *); +extern Cell *awsprintf(Node **, int); +extern Cell *aprintf(Node **, int); +extern Cell *arith(Node **, int); +extern double ipow(double, int); +extern Cell *incrdecr(Node **, int); +extern Cell *assign(Node **, int); +extern Cell *cat(Node **, int); +extern Cell *pastat(Node **, int); +extern Cell *dopa2(Node **, int); +extern Cell *split(Node **, int); +extern Cell *condexpr(Node **, int); +extern Cell *ifstat(Node **, int); +extern Cell *whilestat(Node **, int); +extern Cell *dostat(Node **, int); +extern Cell *forstat(Node **, int); +extern Cell *instat(Node **, int); +extern Cell *bltin(Node **, int); +extern Cell *print(Node **, int); +extern Cell *nullproc(Node **, int); +extern FILE *redirect(int, Node *); +extern FILE *openfile(int, unsigned char *); +extern Cell *sub(Node **, int); +extern Cell *gsub(Node **, int); +extern int chrlen(const unsigned char *); +extern int chrdist(const unsigned char *, const unsigned char *); +/* tran.c */ +extern void syminit(void); +extern void arginit(int, unsigned char **); +extern void envinit(unsigned char **); +extern Array *makesymtab(int); +extern void freesymtab(Cell *); +extern void freeelem(Cell *, unsigned char *); +extern Cell *ssetsymtab(unsigned char *, unsigned char *, + Awkfloat, unsigned, Array *); +extern Cell *slookup(unsigned char *, Array *); +extern Awkfloat setfval(Cell *, Awkfloat); +extern void funnyvar(Cell *, char *); +extern unsigned char *setsval(Cell *, unsigned char *); +extern Awkfloat r_getfval(Cell *); +extern unsigned char *r_getsval(Cell *); +#define tostring(s) stostring((unsigned char *)s) +extern unsigned char *stostring(const unsigned char *); +extern unsigned char *qstring(unsigned char *, int); diff --git a/nawk/awk.lx.l b/nawk/awk.lx.l @@ -0,0 +1,383 @@ +%{ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)awk.lx.l 1.13 (gritter) 11/22/05> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* + * flex port partially taken from 4.4BSD awk, + * + * Copyright (c) 1991 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/awk.lx.l /main/uw7_nj/1 */ +/* from RCS Header: awk.lx.l 1.2 91/06/25 */ + +/*%Start A str sc reg comment*/ +/*%X A str sc reg comment*/ +%} +%X A str reg + +%{ + +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> +#include <unistd.h> + +static void awk_unputstr(const char *s); + +#ifdef FLEX_SCANNER +static int awk_yytchar; +int awk_input(void); +static void awk_unput(int c); +#undef YY_INPUT +#define YY_INPUT(buf, result, max_size) { \ + int c = awk_input(); \ + result = (c == EOF || c == '\0') ? YY_NULL : (buf[0] = c, 1); \ +} +#else /* !FLEX_SCANNER */ +#undef input /* defeat lex */ +#undef unput +int input(void); +void unput(int c); +#define awk_unput(c) unput(c) +#define awk_yytchar yytchar +#endif /* !FLEX_SCANNER */ + + +extern YYSTYPE yylval; +extern int infunc; + +int lineno = 1; +int bracecnt = 0; +int brackcnt = 0; +int parencnt = 0; +#define DEBUG +#ifdef DEBUG +# define RET(x) {if(dbg)printf("lex %s [%s]\n", tokname(x), yytext); return(x); } +#else +# define RET(x) return(x) +#endif + +#define CADD cbuf[clen++] = yytext[0]; \ + if (clen >= CBUFLEN-1) { \ + vyyerror(":90:String/reg expr %.10s ... too long", cbuf); \ + BEGIN INITIAL; \ + } + +static const char extra[] = ":91:Extra %c"; +extern const char nlstring[]; + +unsigned char cbuf[CBUFLEN]; +unsigned char *s; +int clen, cflag; +%} + +A [a-zA-Z_] +B [a-zA-Z0-9_] +D [0-9] +O [0-7] +H [0-9a-fA-F] +WS [ \t] + +%% + static int sc_flag = 0; + + if (sc_flag) { + BEGIN INITIAL; + sc_flag = 0; + RET('}'); + } + +\n { lineno++; RET(NL); } +#.* { ; } /* strip comments */ +{WS}+ { ; } +<INITIAL,reg>"\\"\n lineno++; +; { RET(';'); } + +BEGIN { RET(XBEGIN); } +END { RET(XEND); } +func(tion)? { if (infunc) vyyerror(":92:Illegal nested function"); RET(FUNC); } +return { if (!infunc) vyyerror(":93:Return not in function"); RET(RETURN); } +"&&" { RET(AND); } +"||" { RET(BOR); } +"!" { RET(NOT); } +"!=" { yylval.i = NE; RET(NE); } +"~" { yylval.i = MATCH; RET(MATCHOP); } +"!~" { yylval.i = NOTMATCH; RET(MATCHOP); } +"<" { yylval.i = LT; RET(LT); } +"<=" { yylval.i = LE; RET(LE); } +"==" { yylval.i = EQ; RET(EQ); } +">=" { yylval.i = GE; RET(GE); } +">" { yylval.i = GT; RET(GT); } +">>" { yylval.i = APPEND; RET(APPEND); } +"++" { yylval.i = INCR; RET(INCR); } +"--" { yylval.i = DECR; RET(DECR); } +"+=" { yylval.i = ADDEQ; RET(ASGNOP); } +"-=" { yylval.i = SUBEQ; RET(ASGNOP); } +"*=" { yylval.i = MULTEQ; RET(ASGNOP); } +"/=" { yylval.i = DIVEQ; RET(ASGNOP); } +"%=" { yylval.i = MODEQ; RET(ASGNOP); } +"^=" { yylval.i = POWEQ; RET(ASGNOP); } +"**=" { yylval.i = POWEQ; RET(ASGNOP); } +"=" { yylval.i = ASSIGN; RET(ASGNOP); } +"**" { RET(POWER); } +"^" { RET(POWER); } + +"$"{D}+ { yylval.cp = fieldadr(atoi(yytext+1)); RET(FIELD); } +"$NF" { awk_unputstr("(NF)"); return(INDIRECT); } +"$"{A}{B}* { int c, n; + c = awk_yytchar; + if (c == '(' || c == '[' || infunc && (n=isarg(yytext+1)) >= 0) { + awk_unputstr(yytext+1); + return(INDIRECT); + } else { + yylval.cp = setsymtab((unsigned char *)yytext+1,"",0.0,STR|NUM,symtab); + RET(IVAR); + } + } +"$" { RET(INDIRECT); } +NF { yylval.cp = setsymtab((unsigned char *)yytext, "", 0.0, NUM, symtab); RET(VARNF); } + +({D}+("."?){D}*|"."{D}+)((e|E)("+"|-)?{D}+)? { + yylval.cp = setsymtab((unsigned char *)yytext, tostring((unsigned char *)yytext), awk_atof(yytext), CON|NUM, symtab); + RET(NUMBER); } + +while { RET(WHILE); } +for { RET(FOR); } +do { RET(DO); } +if { RET(IF); } +else { RET(ELSE); } +next { RET(NEXT); } +exit { RET(EXIT); } +break { RET(BREAK); } +continue { RET(CONTINUE); } +print { yylval.i = PRINT; RET(PRINT); } +printf { yylval.i = PRINTF; RET(PRINTF); } +sprintf { yylval.i = SPRINTF; RET(SPRINTF); } +split { yylval.i = SPLIT; RET(SPLIT); } +substr { RET(SUBSTR); } +sub { yylval.i = SUB; RET(SUB); } +gsub { yylval.i = GSUB; RET(GSUB); } +index { RET(INDEX); } +match { RET(MATCHFCN); } +in { RET(IN); } +getline { RET(GETLINE); } +delete { RET(DELETE); } +length { yylval.i = FLENGTH; RET(BLTIN); } +log { yylval.i = FLOG; RET(BLTIN); } +int { yylval.i = FINT; RET(BLTIN); } +exp { yylval.i = FEXP; RET(BLTIN); } +sqrt { yylval.i = FSQRT; RET(BLTIN); } +sin { yylval.i = FSIN; RET(BLTIN); } +cos { yylval.i = FCOS; RET(BLTIN); } +atan2 { yylval.i = FATAN; RET(BLTIN); } +system { yylval.i = FSYSTEM; RET(BLTIN); } +rand { yylval.i = FRAND; RET(BLTIN); } +srand { yylval.i = FSRAND; RET(BLTIN); } +toupper { yylval.i = FTOUPPER; RET(BLTIN); } +tolower { yylval.i = FTOLOWER; RET(BLTIN); } +close { yylval.i = FCLOSE; RET(BLTIN); } + +{A}{B}* { int n, c; + c = awk_yytchar; /* look for '(' */ + if (c != '(' && infunc && (n=isarg(yytext)) >= 0) { + yylval.i = n; + RET(ARG); + } else { + yylval.cp = setsymtab((unsigned char *)yytext,"",0.0,STR|NUM,symtab); + if (c == '(') { + RET(CALL); + } else { + RET(VAR); + } + } + } +\" { BEGIN str; clen = 0; } + +"}" { if (--bracecnt < 0) vyyerror(extra, '}'); sc_flag = 1; RET(';'); } +"]" { if (--brackcnt < 0) vyyerror(extra, ']'); RET(']'); } +")" { if (--parencnt < 0) vyyerror(extra, ')'); RET(')'); } + +. { if (yytext[0] == '{') bracecnt++; + else if (yytext[0] == '[') brackcnt++; + else if (yytext[0] == '(') parencnt++; + RET(yylval.i = yytext[0]); /* everything else */ } + +<reg>\\. { cbuf[clen++] = '\\'; cbuf[clen++] = yytext[1]; } +<reg>\n { vyyerror(":94:Newline in regular expression %.10s ...", cbuf); lineno++; BEGIN INITIAL; } +<reg>"/" { BEGIN INITIAL; + cbuf[clen] = 0; + yylval.s = tostring(cbuf); + awk_unput('/'); + RET(REGEXPR); } +<reg>. { CADD; } + +<str>\" { BEGIN INITIAL; + cbuf[clen] = 0; s = tostring(cbuf); + cbuf[clen] = ' '; cbuf[++clen] = 0; + yylval.cp = setsymtab(cbuf, s, 0.0, CON|STR, symtab); + RET(STRING); } +<str>\n { vyyerror(nlstring, cbuf); lineno++; BEGIN INITIAL; } +<str>"\\\"" { cbuf[clen++] = '"'; } +<str>"\\"n { cbuf[clen++] = '\n'; } +<str>"\\"t { cbuf[clen++] = '\t'; } +<str>"\\"f { cbuf[clen++] = '\f'; } +<str>"\\"r { cbuf[clen++] = '\r'; } +<str>"\\"b { cbuf[clen++] = '\b'; } +<str>"\\"v { cbuf[clen++] = '\v'; } /* these ANSIisms may not be known by */ +<str>"\\"a { cbuf[clen++] = '\007'; } /* your compiler. hence 007 for bell */ +<str>"\\\\" { cbuf[clen++] = '\\'; } +<str>"\\"({O}{O}{O}|{O}{O}|{O}) { int n; + sscanf(yytext+1, "%o", &n); cbuf[clen++] = n; } +<str>"\\"x({H}+) { int n; /* ANSI permits any number! */ + sscanf(yytext+2, "%x", &n); cbuf[clen++] = n; } +<str>"\\". { cbuf[clen++] = yytext[1]; } +<str>. { CADD; } + +%% + +void +startreg(void) +{ + BEGIN reg; + clen = 0; +} + +/* input() and unput() were transcriptions of the standard lex + macros for input and output with additions for error message + printing. God help us all if someone changes how lex works. + - Luckily, the BSD people did most of the flex porting already + for oawk. +*/ + +unsigned char ebuf[300]; +unsigned char *ep = ebuf; + +#ifdef FLEX_SCANNER +int +awk_input(void) +{ + register int c; + extern unsigned char *lexprog; + + if (lexprog != NULL) { /* awk '...' */ + if (c = *lexprog & 0377) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + awk_yytchar = c; + return *ep++ = c; +} + +static void +awk_unput(int c) +{ + awk_yytchar = c; + unput(c); +} + + +#else /* !FLEX_SCANNER */ +int +input(void) +{ + register int c; + extern unsigned char *lexprog; + + if (yysptr > yysbuf) + c = U(*--yysptr) & 0377; + else if (lexprog != NULL) { /* awk '...' */ + if (c = *lexprog & 0377) + lexprog++; + } else /* awk -f ... */ + c = pgetc(); + if (c == '\n') + yylineno++; + else if (c == EOF) + c = 0; + if (ep >= ebuf + sizeof ebuf) + ep = ebuf; + return *ep++ = c; +} + +int +awk_input(void) +{ + return input(); +} + +void +unput(int c) +{ + yytchar = c; + if (yytchar == '\n') + yylineno--; + *yysptr++ = yytchar; + if (--ep < ebuf) + ep = ebuf + sizeof(ebuf) - 1; +} +#endif /* !FLEX_SCANNER */ + +static void +awk_unputstr(const char *s) +{ + int i; + + for (i = strlen(s)-1; i >= 0; i--) + awk_unput(s[i]); +} diff --git a/nawk/b.c b/nawk/b.c @@ -0,0 +1,174 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)b.c 1.6 (gritter) 5/15/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/b.c /main/uw7_nj/1 */ + +#include <stdio.h> +#include "awk.h" +#include <ctype.h> +#include "y.tab.h" +#include <pfmt.h> + +unsigned char *patbeg; +int patlen; + +static void +reprob(fa *f, int e) +{ + char msg[BUFSIZ]; + + regerror(e, &f->re, msg, sizeof(msg)); + error(MM_ERROR, ":104:Error in RE `%s': %s", f->restr, msg); +} + +static fa * +mkdfa(unsigned char *s) /* build DFA from s */ +{ + fa *pfa; + int i; + int flags; + + if ((pfa = (fa *)malloc(sizeof(fa))) == 0) + { + error(MM_ERROR, + "5:Regular expression too big: out of space in %s", s); + } + flags = posix ? REG_EXTENDED : REG_OLDERE | REG_OLDESC | REG_NOI18N; + flags |= REG_ONESUB | REG_BKTEMPTY | REG_BKTESCAPE | REG_ESCSEQ; + if ((i = regcomp(&pfa->re, (char *)s, flags)) != 0) + { + pfa->restr = s; + reprob(pfa, i); + } + pfa->restr = tostring(s); + pfa->use = 1; + pfa->notbol = 0; + return pfa; +} + +fa * +makedfa(unsigned char *s, int leftmost) /* build and cache DFA from s */ +{ + static fa *fatab[20]; + static int nfatab; + int i, n, u; + fa *pfa; + + if (compile_time) + return mkdfa(s); + /* + * Search for a match to those cached. + * If not found, save it, tossing least used one when full. + */ + for (i = 0; i < nfatab; i++) + { + if (strcmp((char *)fatab[i]->restr, (char *)s) == 0) + { + fatab[i]->use++; + return fatab[i]; + } + } + pfa = mkdfa(s); + if ((n = nfatab) < sizeof(fatab) / sizeof(fa *)) + nfatab++; + else + { + n = 0; + u = fatab[0]->use; + for (i = 1; i < sizeof(fatab) / sizeof(fa *); i++) + { + if (fatab[i]->use < u) + { + n = i; + u = fatab[n]->use; + } + } + free((void *)fatab[n]->restr); + regfree(&fatab[n]->re); + free((void *)fatab[n]); + } + fatab[n] = pfa; + return pfa; +} + +int +match(void *v, unsigned char *p) /* does p match f anywhere? */ +{ + int err; + fa *f = v; + + if ((err = regexec(&f->re, (char *)p, (size_t)0, (regmatch_t *)0, 0)) == 0) + return 1; + if (err != REG_NOMATCH) + reprob(f, err); + return 0; +} + +int +pmatch(void *v, unsigned char *p) /* find leftmost longest (maybe empty) match */ +{ + regmatch_t m; + int err; + fa *f = v; + + if ((err = regexec(&f->re, (char *)p, (size_t)1, &m, f->notbol)) == 0) + { + patbeg = &p[m.rm_so]; + patlen = m.rm_eo - m.rm_so; + return 1; + } + if (err != REG_NOMATCH) + reprob(f, err); + patlen = -1; + return 0; +} + +int +nematch(void *v, unsigned char *p) /* find leftmost longest nonempty match */ +{ + regmatch_t m; + int err; + fa *f = v; + + for (;;) + { + if ((err = regexec(&f->re, (char *)p, (size_t)1, &m, + f->notbol | REG_NONEMPTY)) == 0) + { + if ((patlen = m.rm_eo - m.rm_so) == 0) + { + p += m.rm_eo; + continue; + } + patbeg = &p[m.rm_so]; + return 1; + } + if (err != REG_NOMATCH) + reprob(f, err); + patlen = -1; + return 0; + } +} diff --git a/nawk/lib.c b/nawk/lib.c @@ -0,0 +1,852 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)lib.c 1.27 (gritter) 12/25/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/lib.c /main/uw7_nj/1 */ +/* from RCS Header: lib.c 1.2 91/06/25 */ + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <errno.h> +#include <string.h> +#include <strings.h> +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> +#include <stdarg.h> +#include <wctype.h> +#include "asciitype.h" + +#undef RS + +static void eprint(void); + +#define getfval(p) (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) +#define getsval(p) (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) + +FILE *infile = NULL; +unsigned char *file = (unsigned char*) ""; +unsigned char *record; +unsigned char *recdata; +int recsize; +unsigned char *fields; + +int donefld; /* 1 = implies rec broken into fields */ +int donerec; /* 1 = record is valid (no flds have changed) */ + +Cell **fldtab; /* room for fields */ + +static Cell dollar0 = { + OCELL, CFLD, (unsigned char*) "$0", (unsigned char *)"", 0.0, REC|STR|DONTFREE +}; +static Cell FINIT = { + OCELL, CFLD, NULL, (unsigned char*) "", 0.0, FLD|STR|DONTFREE +}; + + +static int MAXFLD; /* number of allocated fields */ +int maxfld = 0; /* last used field */ +int argno = 1; /* current input argument number */ +extern Awkfloat *ARGC; + +static void growrec(unsigned char **, int *, int, unsigned char **, int); + +char badopen[] = ":11:Cannot open %s: %s"; + +/* Dynamic field and record allocation inspired by Bell Labs awk. */ +static void morefields(void) +{ + int i; + const int n = 32; + + fldtab = realloc(fldtab, (MAXFLD + n + 1) * sizeof *fldtab); + if (fldtab == NULL) + error(MM_ERROR, ":13:Record `%.20s...' has too many fields", + record); + recloc = fldtab[0]; + for (i = MAXFLD; i < MAXFLD + n; i++) { + fldtab[i] = malloc(sizeof **fldtab); + if (fldtab[i] == NULL) + error(MM_ERROR, + ":13:Record `%.20s...' has too many fields", + record); + *fldtab[i] = FINIT; + } + MAXFLD += n; +} + +void fldinit(void) +{ + record = recdata = malloc(recsize = CHUNK); + fields = malloc(recsize); + if (record == NULL || fields == NULL) + error(MM_ERROR, outofspace, "fldinit"); + *record = '\0'; + morefields(); + *fldtab[0] = dollar0; +} + +void initgetrec(void) +{ + extern unsigned char **start_delayed, **after_delayed; + unsigned char **pp; + int i; + unsigned char *p; + + /* first handle delayed name=val arguments */ + for (pp = start_delayed; pp != after_delayed; pp++) + setclvar(*pp); + for (i = 1; i < *ARGC; i++) { + if (!isclvar(p = getargv(i))) /* find 1st real filename */ + return; + setclvar(p); /* a commandline assignment before filename */ + argno++; + } + infile = stdin; /* no filenames, so use stdin */ + /* *FILENAME = file = (unsigned char*) "-"; */ +} + +int getrec(unsigned char **buf, int *bufsize) +{ + int c, saved; + static int firsttime = 1; + + if (firsttime) { + firsttime = 0; + initgetrec(); + } + dprintf( ("RS=<%s>, FS=<%s>, ARGC=%d, FILENAME=%s\n", + *RS ? *RS : tostring(""), + *FS ? *FS : tostring(""), + (int) *ARGC, + *FILENAME ? *FILENAME : tostring("")) ); + donefld = 0; + donerec = 1; + if (*bufsize == 0) { + if ((*buf = malloc(*bufsize = CHUNK)) == NULL) + error(MM_ERROR, outofspace, "getrec"); + **buf = '\0'; + } + saved = (*buf)[0]; + (*buf)[0] = 0; + while (argno < *ARGC || infile == stdin) { + dprintf( ("argno=%d, file=|%s|\n", argno, file) ) + ; + if (infile == NULL) { /* have to open a new file */ + file = getargv(argno); + if (*file == '\0') { /* it's been zapped */ + argno++; + continue; + } + if (isclvar(file)) { /* a var=value arg */ + setclvar(file); + argno++; + continue; + } + *FILENAME = file; + dprintf( ("opening file %s\n", file) ); + if (*file == '-' && *(file+1) == '\0') + infile = stdin; + else if ((infile = fopen((char *)file, "r")) == NULL) + error(MM_ERROR, badopen, file, strerror(errno)); + setfval(fnrloc, 0.0); + } + c = readrec(buf, bufsize, infile); + if (c != 0 || (*buf)[0] != '\0') { /* normal record */ + if (*buf == record) { + if (!(recloc->tval & DONTFREE)) + xfree(recloc->sval); + recloc->sval = record; + recloc->tval = REC | STR | DONTFREE; + (void)is2number(0, recloc); + } + setfval(nrloc, nrloc->fval+1); + setfval(fnrloc, fnrloc->fval+1); + return 1; + } + /* EOF arrived on this file; set up next */ + if (infile != stdin) + fclose(infile); + infile = NULL; + argno++; + } + /* + * POSIX.2 requires that NF stick with its last value + * at the start of the END code. The most straightforward + * way to do this is to restore the contents of record + * [==buf when called from program()] so that getnf() will + * recompute the same NF value unless something strange + * occurs. This has the side effect of $0...$NF *also* + * having sticky values into END, but that seems to match + * the spirit of POSIX.2's rule for NF. + */ + if (posix) + (*buf)[0] = saved; + return 0; /* true end of file */ +} + +int readrec(unsigned char **buf, int *bufsize, FILE *inf) + /* read one record into buf */ +{ + register int sep, c, k, m, n; + unsigned char *rr; + register int nrr; + wchar_t wc; + + next(wc, *RS, n); + if ((sep = **RS) == 0) { + sep = '\n'; + while ((c=getc(inf)) == '\n' && c != EOF) /* skip leading \n's */ + ; + if (c != EOF) + ungetc(c, inf); + } + if (*bufsize == 0) + growrec(buf, bufsize, CHUNK, NULL, 0); + for (rr = *buf, nrr = *bufsize; ; ) { + cont: for (; (c=getc(inf)) != sep && c != EOF; *rr++ = c) + if (--nrr < n + 3) { + growrec(buf, bufsize, *bufsize + CHUNK, &rr, 0); + nrr += CHUNK; + } + if (c != EOF) { + /* + * Note: This code does not restrict occurences of + * the multibyte sequence in RS to the start of an + * input character. + */ + for (m = 1; m < n; m++) { + if ((c = getc(inf)) == EOF || c != (*RS)[m]) { + for (k = 0; k < m; k++) + *rr++ = (*RS)[k]; + nrr -= k; + if (c == EOF) + break; + *rr++ = c; + nrr--; + goto cont; + } + } + } + if (**RS == sep || c == EOF) + break; + if ((c = getc(inf)) == '\n' || c == EOF) /* 2 in a row */ + break; + *rr++ = '\n'; + *rr++ = c; + } + /*if (rr > *buf + *bufsize) + error(MM_ERROR, ":12:Input record `%.20s...' too long", *buf);*/ + *rr = 0; + dprintf( ("readrec saw <%s>, returns %d\n", *buf, c == EOF + && rr == *buf ? 0 : 1) ); + return c == EOF && rr == *buf ? 0 : 1; +} + +unsigned char *getargv(int n) /* get ARGV[n] */ +{ + Cell *x; + unsigned char *s, temp[25]; + extern Array *ARGVtab; + + snprintf((char *)temp, sizeof temp, "%d", n); + x = setsymtab(temp, "", 0.0, STR, ARGVtab); + s = getsval(x); + dprintf( ("getargv(%d) returns |%s|\n", n, s) ); + return s; +} + +void setclvar(unsigned char *s) /* set var=value from s */ +{ + unsigned char *p; + Cell *q; + + for (p=s; *p != '='; p++) + ; + *p++ = 0; + p = qstring(p, '\0'); + q = setsymtab(s, p, 0.0, STR, symtab); + setsval(q, p); + (void)is2number(0, q); + dprintf( ("command line set %s to |%s|\n", s, p) ); +} + +static void cleanfld(int n1, int n2); +static int refldbld(unsigned char *rec, unsigned char *fs); + +void +fldbld(void) +{ + register unsigned char *r, *fr; + Cell **p; + wchar_t wc, sep; + int i, n; + + if (donefld) + return; + if (!(recloc->tval & STR)) + getsval(recloc); + r = recloc->sval; /* was record! */ + fr = fields; + i = 0; /* number of fields accumulated here */ + if ((sep = **FS) != '\0' && (next(sep, *FS, n), (*FS)[n] != '\0')) { + /* it's a regular expression */ + i = refldbld(r, *FS); + } else if (sep == ' ') { + for (i = 0; ; ) { + while (*r == ' ' || *r == '\t' || *r == '\n') + r++; + if (*r == 0) + break; + i++; + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + next(wc, r, n); + do { + do + *fr++ = *r++; + while (--n); + next(wc, r, n); + } while (wc != ' ' && wc != '\t' && wc != '\n' && + wc != '\0'); + *fr++ = 0; + } + *fr = 0; + } else if (*r != 0) { /* if 0, it's a null field */ + for (;;) { + i++; + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->sval = fr; + fldtab[i]->tval = FLD | STR | DONTFREE; + while (next(wc, r, n), + wc != sep && wc != '\n' && wc != '\0') { + /* \n always a separator */ + do + *fr++ = *r++; + while (--n); + } + *fr++ = '\0'; + if (wc == '\0') + break; + r += n; + } + *fr = 0; + } + /*if (i >= MAXFLD) + error(MM_ERROR, ":13:Record `%.20s...' has too many fields", + record);*/ + /* clean out junk from previous record */ + cleanfld(i, maxfld); + maxfld = i; + donefld = 1; + for (p = &fldtab[1]; p <= &fldtab[0]+maxfld; p++) + (void)is2number(0, *p); + setfval(nfloc, (Awkfloat) maxfld); + if (dbg) + for (p = &fldtab[0]; p <= &fldtab[0]+maxfld; p++) + pfmt(stdout, MM_INFO, ":14:field %d: |%s|\n", p-&fldtab[0], + (*p)->sval); +} + +static void cleanfld(int n1, int n2) /* clean out fields n1..n2 inclusive */ +{ + static unsigned char *nullstat = (unsigned char *) ""; + register Cell **p, **q; + + for (p = &fldtab[n2], q = &fldtab[n1]; p > q; p--) { + if (!((*p)->tval & DONTFREE)) + xfree((*p)->sval); + (*p)->tval = FLD | STR | DONTFREE; + (*p)->sval = nullstat; + } +} + +void newfld(int n) /* add field n (after end) */ +{ + /*if (n >= MAXFLD) + error(MM_ERROR, ":15:Creating too many fields", record);*/ + while (n >= MAXFLD) + morefields(); + cleanfld(maxfld, n); + maxfld = n; + setfval(nfloc, (Awkfloat) n); +} + +static int refldbld(unsigned char *rec, + unsigned char *fs) /* build fields from reg expr in FS */ +{ + unsigned char *fr; + int i; + fa *pfa; + + fr = fields; + *fr = '\0'; + if (*rec == '\0') + return 0; + pfa = makedfa(fs, 1); + dprintf( ("into refldbld, rec = <%s>, pat = <%s>\n", rec, + fs) ); + pfa->notbol = 0; + for (i = 1; ; i++) { + if (i >= MAXFLD) + morefields(); + if (!(fldtab[i]->tval & DONTFREE)) + xfree(fldtab[i]->sval); + fldtab[i]->tval = FLD | STR | DONTFREE; + fldtab[i]->sval = fr; + dprintf( ("refldbld: i=%d\n", i) ); + if (nematch(pfa, rec)) { + pfa->notbol = REG_NOTBOL; + dprintf( ("match %s (%d chars\n", + patbeg, patlen) ); + strncpy((char*) fr, (char*) rec, patbeg-rec); + fr += patbeg - rec + 1; + *(fr-1) = '\0'; + rec = patbeg + patlen; + } else { + dprintf( ("no match %s\n", rec) ); + strcpy((char*) fr, (char*) rec); + pfa->notbol = 0; + break; + } + } + return i; +} + +void recbld(void) +{ + int i; + unsigned char *r, *p; + + if (donerec == 1) + return; + r = recdata; + for (i = 1; i <= *NF; i++) { + p = getsval(fldtab[i]); + while ((*r = *p++)) { + if (++r >= &recdata[recsize]) { + recsize += CHUNK; + growrec(&recdata, &recsize, recsize, &r, 1); + } + } + if (i < *NF) + for ((p = *OFS); (*r = *p++); ) { + if (++r >= &recdata[recsize]) { + recsize += CHUNK; + growrec(&recdata, &recsize, + recsize, &r, 1); + } + } + } + *r = '\0'; + dprintf( ("in recbld FS=%o, recloc=%lo\n", **FS, + (long)recloc) ); + recloc->tval = REC | STR | DONTFREE; + recloc->sval = record = recdata; + dprintf( ("in recbld FS=%o, recloc=%lo\n", **FS, + (long)recloc) ); + dprintf( ("recbld = |%s|\n", record) ); + donerec = 1; +} + +Cell *fieldadr(int n) +{ + if (n < 0) + error(MM_ERROR, ":17:Trying to access field %d", n); + while (n >= MAXFLD) + morefields(); + return(fldtab[n]); +} + +int errorflag = 0; +char errbuf[200]; + +static int been_here = 0; +static char + atline[] = ":18: at source line %d", + infunc[] = ":19: in function %s"; + +void +vyyerror(const char *msg, ...) +{ + extern unsigned char *curfname; + va_list args; + + if (been_here++ > 2) + return; + va_start(args, msg); + vpfmt(stderr, MM_ERROR, msg, args); + pfmt(stderr, MM_NOSTD, atline, lineno); + if (curfname != NULL) + pfmt(stderr, MM_NOSTD, infunc, curfname); + fprintf(stderr, "\n"); + errorflag = 2; + eprint(); + + va_end(args); +} + +void +yyerror(char *s) +{ + extern unsigned char /**cmdname,*/ *curfname; + static int been_here = 0; + + if (been_here++ > 2) + return; + pfmt(stderr, (MM_ERROR | MM_NOGET), "%s", s); + pfmt(stderr, MM_NOSTD, atline, lineno); + if (curfname != NULL) + pfmt(stderr, MM_NOSTD, infunc, curfname); + fprintf(stderr, "\n"); + errorflag = 2; + eprint(); +} + +/*ARGSUSED*/ +void fpecatch(int signo) +{ + error(MM_ERROR, ":20:Floating point exception"); +} + +extern int bracecnt, brackcnt, parencnt; +static void bcheck2(int n, int c1, int c2); + +void bracecheck(void) +{ + int c; + static int beenhere = 0; + + if (beenhere++) + return; + while ((c = awk_input()) != EOF && c != '\0') + bclass(c); + bcheck2(bracecnt, '{', '}'); + bcheck2(brackcnt, '[', ']'); + bcheck2(parencnt, '(', ')'); +} + +static void bcheck2(int n, int c1, int c2) +{ + if (n == 1) + pfmt(stderr, MM_ERROR, ":21:Missing %c\n", c2); + else if (n > 1) + pfmt(stderr, MM_ERROR, ":22:%d missing %c's\n", n, c2); + else if (n == -1) + pfmt(stderr, MM_ERROR, ":23:Extra %c\n", c2); + else if (n < -1) + pfmt(stderr, MM_ERROR, ":24:%d extra %c's\n", -n, c2); +} + +void +error(int flag, const char *msg, ...) +{ + int errline; + extern Node *curnode; + /*extern unsigned char *cmdname;*/ + va_list args; + + fflush(stdout); + va_start(args, msg); + vpfmt(stderr, flag, msg, args); + putc('\n', stderr); + + if (compile_time != 2 && NR && *NR > 0) { + pfmt(stderr, MM_INFO, + ":25:Input record number %g", *FNR); + if (strcmp((char*) *FILENAME, "-") != 0) + pfmt(stderr, MM_NOSTD, + ":26:, file %s", *FILENAME); + fprintf(stderr, "\n"); + } + errline = 0; + if (compile_time != 2 && curnode) + errline = curnode->lineno; + else if (compile_time != 2 && lineno) + errline = lineno; + if (errline) + pfmt(stderr, MM_INFO, ":27:Source line number %d\n", errline); + eprint(); + if (flag == MM_ERROR) { + if (dbg) + abort(); + exit(2); + } + va_end(args); +} + +static void eprint(void) /* try to print context around error */ +{ + unsigned char *p, *q, *r; + int c, episnul; + static int been_here = 0; + extern unsigned char ebuf[300], *ep; + + if (compile_time == 2 || compile_time == 0 || been_here++ > 0) + return; + episnul = ep > ebuf && ep[-1] == '\0'; + p = ep - 1 - episnul; + if (p > ebuf && *p == '\n') + p--; + for ( ; p > ebuf && *p != '\n' && *p != '\0'; p--) + ; + while (*p == '\n') + p++; + if (0 /* posix */) + pfmt(stderr, MM_INFO, ":28:Context is\n\t"); + else + pfmt(stderr, MM_INFO|MM_NOSTD, ":2228: context is\n\t"); + for (q=ep-1-episnul; q>=p && *q!=' ' && *q!='\t' && *q!='\n'; q--) + ; + for (r = q; r < ep; r++) { + if (*r != ' ' && *r != '\t' && *r != '\n') { + for ( ; p < q; p++) + if (*p) + putc(*p, stderr); + break; + } + } + fprintf(stderr, " >>> "); + for ( ; p < ep; p++) + if (*p) + putc(*p, stderr); + fprintf(stderr, " <<< "); + if (*ep) + while ((c = awk_input()) != '\n' && c != '\0' && c != EOF) { + putc(c, stderr); + bclass(c); + } + putc('\n', stderr); + ep = ebuf; +} + +void bclass(int c) +{ + switch (c) { + case '{': bracecnt++; break; + case '}': bracecnt--; break; + case '[': brackcnt++; break; + case ']': brackcnt--; break; + case '(': parencnt++; break; + case ')': parencnt--; break; + } +} + +double errcheck(double x, unsigned char *s) +{ + if (errno == EDOM) { + errno = 0; + error(MM_WARNING, ":29:%s argument out of domain", s); + x = 1; + } else if (errno == ERANGE) { + errno = 0; + error(MM_WARNING, ":30:%s result out of range", s); + x = 1; + } + return x; +} + +void PUTS(unsigned char *s) { + dprintf( ("%s\n", s) ); +} + +int isclvar(unsigned char *s) /* is s of form var=something? */ +{ + unsigned char *os = s; + + for ( ; *s; s++) + if (!(alnumchar(*s) || *s == '_')) + break; + return *s == '=' && s > os && *(s+1) != '=' && !digitchar(*os); +} + +int is2number(register unsigned char *s, Cell *p) +{ + unsigned char *after; + Awkfloat val; + + /* + * POSIX.2 says leading <blank>s are skipped and that + * <blank> is at least ' ' and '\t' and can include other + * characters, but not in the "POSIX" (aka "C") locale. + * + * The historic code skipped those two and newline. So, + * unless it's noticed by some test suite, we choose to + * keep things compatible. To be safe, reject the string + * if it starts with other white space characters since + * strtod() skips any form of white space. + * + * Permit similarly spelled trailing white space for + * compatibility. + */ + if (p != 0) + s = p->sval; + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; + if (isspace(*s)) + return 0; + /* + * Reject hexadecimal numbers, infinity and NaN strings which + * are recognized by C99 strtod() implementations. + */ + switch (*s) { + case '0': + if (s[1] == 'x' || s[1] == 'X') + return 0; + break; + case 'i': + case 'I': + if (strncasecmp((char *)s, "inf", 3) == 0) + return 0; + break; + case 'n': + case 'N': + if (strncasecmp((char *)s, "NaN", 3) == 0) + return 0; + break; + } + val = strtod((char *)s, (char **)&after); + for (s = after; *s == ' ' || *s == '\t' || *s == '\n'; s++) + ; + if (*s != '\0') + return 0; + if (p != 0) { + p->fval = val; + p->tval |= NUM; + } + return 1; +} + +double +awk_atof(const char *s) +{ + wchar_t wc; + int n; + + while (*s) { + next(wc, s, n); + if (!(mb_cur_max > 1 ? iswspace(wc) : isspace(wc))) + break; + s += n; + } + /* + * Return 0 for hexadecimal numbers, infinity and NaN strings which + * are recognized by C99 atof() implementations. + */ + switch (*s) { + case '0': + if (s[1] == 'x' || s[1] == 'X') + return 0; + break; + case 'i': + case 'I': + if (strncasecmp(s, "inf", 3) == 0) + return 0; + break; + case 'n': + case 'N': + if (strncasecmp(s, "NaN", 3) == 0) + return 0; + break; + } + return atof(s); +} + +unsigned char *makerec(const unsigned char *data, int size) +{ + if (!(recloc->tval & DONTFREE)) + xfree(recloc->sval); + if (recsize < size) + growrec(&recdata, &recsize, size, NULL, 0); + record = recdata; + strcpy((char*)record, (char*)data); + recloc->sval = record; + recloc->tval = REC | STR | DONTFREE; + donerec = 1; donefld = 0; + return record; +} + +static void growrec(unsigned char **buf, int *bufsize, int newsize, + unsigned char **ptr, int bld) +{ + unsigned char *np, *op; + + op = *buf; + if ((np = realloc(op, *bufsize = newsize)) == 0) { + oflo: if (bld) + error(MM_ERROR, + ":16:Built giant record `%.20s...'", + *buf); + else + error(MM_ERROR, + ":12:Input record `%.20s...' too long", + *buf); + } + if (ptr && *ptr) + *ptr = &np[*ptr - op]; + if (record == op) + record = np; + if (recdata == op) { + recdata = np; + recsize = *bufsize; + if ((fields = realloc(fields, recsize)) == NULL) + goto oflo; + } + if (fldtab[0]->sval == op) + fldtab[0]->sval = np; + if (recloc->sval == op) + recloc->sval = np; + *buf = np; +} + +int +vpfmt(FILE *stream, long flags, const char *fmt, va_list ap) +{ + extern char *pfmt_label__; + int n = 0; + + if ((flags & MM_NOGET) == 0) { + if (*fmt == ':') { + do + fmt++; + while (*fmt != ':'); + fmt++; + } + } + if ((flags & MM_NOSTD) == 0) + n += fprintf(stream, "%s: ", pfmt_label__); + if ((flags & MM_ACTION) == 0 && isupper(*fmt&0377)) + n += fprintf(stream, "%c", tolower(*fmt++&0377)); + n += vfprintf(stream, fmt, ap); + return n; +} diff --git a/nawk/main.c b/nawk/main.c @@ -0,0 +1,215 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)main.c 1.14 (gritter) 12/19/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/main.c /main/uw7_nj/2 */ +/* from RCS Header: main.c 1.3 91/08/12 */ + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <signal.h> +#include <pfmt.h> +#include <errno.h> +#include <string.h> +#include <locale.h> +#include <langinfo.h> +#include <libgen.h> + +#define CMDCLASS ""/*"UX:"*/ /* Command classification */ + +#include <locale.h> + +#include "awk.h" +#include "y.tab.h" + +int dbg = 0; +unsigned char *cmdname; /* gets argv[0] for error messages */ +extern FILE *yyin; /* lex input file */ +static FILE *awk_yyin; +extern FILE *yyout; +unsigned char *lexprog; /* points to program argument if it exists */ +unsigned char **start_delayed; /* first name=val argument delayed for BEGIN code */ +unsigned char **after_delayed; /* first argument after the delayed name=val's */ +extern int errorflag; /* non-zero if any syntax errors; set by yyerror */ +int compile_time = 2; /* for error printing: */ + /* 2 = cmdline, 1 = compile, 0 = running */ + +#define MAXPFILE 100 +unsigned char *pfile[MAXPFILE]; /* program filenames from -f's */ +int npfile = 0; /* number of filenames */ +int curpfile = 0; /* current filename */ + +int mb_cur_max; /* MB_CUR_MAX, for acceleration */ + +extern const char badopen[]; + +int main(int argc, unsigned char *argv[], unsigned char *envp[]) +{ + unsigned char *fs = NULL; + char label[MAXLABEL+1]; /* Space for the catalogue label */ + + (void)setlocale(LC_COLLATE, ""); + (void)setlocale(LC_CTYPE, ""); + /*(void)setlocale(LC_MESSAGES, "");*/ + (void)setlocale(LC_NUMERIC, "POSIX"); /* redundant */ + mb_cur_max = MB_CUR_MAX; + cmdname = (unsigned char *)basename((char *)argv[0]); + (void)strcpy(label, CMDCLASS); + (void)strncat(label, (char*) cmdname, (MAXLABEL - sizeof(CMDCLASS) - 1)); + (void)setcat("uxawk"); + (void)setlabel(label); + /*version = (char*) gettxt(":31", "version Oct 11, 1989");*/ + if (argc == 1) { + if (0 /* posix */) + pfmt(stderr, MM_ERROR, ":32:Incorrect usage\n"); + pfmt(stderr, MM_ACTION | (0 /* posix */ ? 0 : MM_NOSTD), + ":210107:Usage: %s [-f programfile | 'program'] [-Ffieldsep] [-v var=value] [files]\n", + cmdname); + exit(1); + } + signal(SIGFPE, fpecatch); + awk_yyin = NULL; + yyout = stdout; + fldinit(); + syminit(); + while (argc > 1 && argv[1][0] == '-' && argv[1][1] != '\0') { + if (strcmp((char*) argv[1], "--") == 0) { /* explicit end of args */ + argc--; + argv++; + break; + } + switch (argv[1][1]) { + case 'f': /* next argument is program filename */ + if (npfile >= MAXPFILE) + error(MM_ERROR, ":106:Too many program filenames"); + if (argv[1][2] != '\0') { /* arg is -fname */ + pfile[npfile++] = &argv[1][2]; + } else { + argc--; + argv++; + if (argc <= 1) + error(MM_ERROR, ":34:No program filename"); + pfile[npfile++] = argv[1]; + } + break; + case 'F': /* set field separator */ + if (argv[1][2] != 0) { /* arg is -Fsomething */ + if (argv[1][2] == 't' && argv[1][3] == 0) /* wart: t=>\t */ + fs = (unsigned char *) "\t"; + else if (argv[1][2] != 0) + fs = &argv[1][2]; + } else { /* arg is -F something */ + argc--; argv++; + if (argc > 1 && argv[1][0] == 't' && argv[1][1] == 0) /* wart: t=>\t */ + fs = (unsigned char *) "\t"; + else if (argc > 1 && argv[1][0] != 0) + fs = &argv[1][0]; + } + if (fs == NULL || *fs == '\0') + error(MM_WARNING, ":35:Field separator FS is empty"); + break; + case 'v': /* -v a=1 to be done NOW. one -v for each */ + if (argv[1][2] != '\0') { /* arg is -va=1 */ + if (!isclvar(&argv[1][2])) + error(MM_ERROR, ":105:malformed -v assignment"); + setclvar(&argv[1][2]); + } else if (--argc > 1 && isclvar((++argv)[1])) { + setclvar(argv[1]); + } else { + error(MM_ERROR, ":105:malformed -v assignment"); + } + break; + case 'd': + dbg = atoi((char *)&argv[1][2]); + if (dbg == 0) + dbg = 1; + pfmt(stdout, (MM_INFO | MM_NOGET), "%s %s\n", + cmdname, version); + break; + default: + pfmt(stderr, MM_WARNING, + ":36:Unknown option %s ignored\n", argv[1]); + break; + } + argc--; + argv++; + } + /* argv[1] is now the first argument */ + if (npfile == 0) { /* no -f; first argument is program */ + if (argc <= 1) + error(MM_ERROR, ":37:No program given"); + dprintf( ("program = |%s|\n", argv[1]) ); + lexprog = argv[1]; + argc--; + argv++; + } + /* hold leading name=val arguments until just after BEGIN */ + if (posix && argc > 1 && isclvar(argv[1])) { + start_delayed = &argv[0]; + do { + argv[0] = argv[1]; + argv++; + } while (--argc > 1 && isclvar(argv[1])); + after_delayed = &argv[0]; + } + compile_time = 1; + argv[0] = cmdname; /* put prog name at front of arglist */ + dprintf( ("argc=%d, argv[0]=%s\n", argc, argv[0]) ); + arginit(argc, argv); + envinit(envp); + yyparse(); + if (fs) + *FS = tostring(qstring(fs, '\0')); + dprintf( ("errorflag=%d\n", errorflag) ); + if (errorflag == 0) { + compile_time = 0; + (void)setlocale(LC_NUMERIC, ""); + run(winner); + } else + bracecheck(); + exit(errorflag); +} + +int pgetc(void) /* get program character */ +{ + int c; + + for (;;) { + if (awk_yyin == NULL) { + if (curpfile >= npfile) + return EOF; + if (!strcmp((char *)pfile[curpfile], "-")) + awk_yyin = stdin; + else if ((awk_yyin = fopen((char *) pfile[curpfile], "r")) == NULL) + error(MM_ERROR, badopen, + pfile[curpfile], strerror(errno)); + } + if ((c = getc(awk_yyin)) != EOF) + return c; + awk_yyin = NULL; + curpfile++; + } +} diff --git a/nawk/maketab.c b/nawk/maketab.c @@ -0,0 +1,177 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)maketab.c 1.11 (gritter) 12/4/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/maketab.c /main/uw7_nj/1 */ +/* from RCS Header: maketab.c 1.2 91/06/25 */ +static const char sccsid[] = "@(#)maketab.c 1.11 (gritter) 12/4/04"; + +#include <stdio.h> +#include <string.h> +#include "awk.h" +#include "y.tab.h" + +struct xx +{ int token; + char *name; + char *pname; +} proc[] = { + { PROGRAM, "program", NULL }, + { BOR, "boolop", " || " }, + { AND, "boolop", " && " }, + { NOT, "boolop", " !" }, + { NE, "relop", " != " }, + { EQ, "relop", " == " }, + { LE, "relop", " <= " }, + { LT, "relop", " < " }, + { GE, "relop", " >= " }, + { GT, "relop", " > " }, + { ARRAY, "array", NULL }, + { INDIRECT, "indirect", "$(" }, + { SUBSTR, "substr", "substr" }, + { SUB, "sub", "sub" }, + { GSUB, "gsub", "gsub" }, + { INDEX, "sindex", "sindex" }, + { SPRINTF, "awsprintf", "sprintf " }, + { ADD, "arith", " + " }, + { MINUS, "arith", " - " }, + { MULT, "arith", " * " }, + { DIVIDE, "arith", " / " }, + { MOD, "arith", " % " }, + { UMINUS, "arith", " -" }, + { POWER, "arith", " **" }, + { PREINCR, "incrdecr", "++" }, + { POSTINCR, "incrdecr", "++" }, + { PREDECR, "incrdecr", "--" }, + { POSTDECR, "incrdecr", "--" }, + { CAT, "cat", " " }, + { PASTAT, "pastat", NULL }, + { PASTAT2, "dopa2", NULL }, + { MATCH, "matchop", " ~ " }, + { NOTMATCH, "matchop", " !~ " }, + { MATCHFCN, "matchop", "matchop" }, + { INTEST, "intest", "intest" }, + { PRINTF, "aprintf", "printf" }, + { PRINT, "print", "print" }, + { DELETE, "delete", "delete" }, + { SPLIT, "split", "split" }, + { ASSIGN, "assign", " = " }, + { ADDEQ, "assign", " += " }, + { SUBEQ, "assign", " -= " }, + { MULTEQ, "assign", " *= " }, + { DIVEQ, "assign", " /= " }, + { MODEQ, "assign", " %= " }, + { POWEQ, "assign", " ^= " }, + { CONDEXPR, "condexpr", " ?: " }, + { IF, "ifstat", "if(" }, + { WHILE, "whilestat", "while(" }, + { FOR, "forstat", "for(" }, + { DO, "dostat", "do" }, + { IN, "instat", "instat" }, + { NEXT, "jump", "next" }, + { EXIT, "jump", "exit" }, + { BREAK, "jump", "break" }, + { CONTINUE, "jump", "continue" }, + { RETURN, "jump", "ret" }, + { BLTIN, "bltin", "bltin" }, + { CALL, "call", "call" }, + { ARG, "arg", "arg" }, + { VARNF, "getnf", "NF" }, + { GETLINE, "getline", "getline" }, + { 0, "", "" }, +}; + +#define SIZE LASTTOKEN - FIRSTTOKEN + 1 +char *table[SIZE]; +char *names[SIZE]; + +int main(void) +{ + struct xx *p; + int i, n, tok; + char c; + FILE *fp; + char buf[100], name[100], def[100]; + + printf("#include <stdio.h>\n"); + printf("#include \"awk.h\"\n"); + printf("#include \"y.tab.h\"\n\n"); +/* printf("Cell *nullproc();\n"); + for (i = SIZE; --i >= 0; ) + names[i] = ""; + for (p=proc; p->token!=0; p++) + if (p == proc || strcmp(p->name, (p-1)->name)) + printf("extern Cell *%s();\n", p->name);*/ + + if ((fp = fopen("y.tab.h", "r")) == NULL) { + fprintf(stderr, "maketab can't open y.tab.h!\n"); + exit(1); + } + printf("static unsigned char *printname[%d] = {\n", SIZE); + i = 0; + while (fgets(buf, sizeof buf, fp) != NULL) { + if (*buf == '\n') + continue; + n = sscanf(buf, "%1c %s %s %d", &c, def, name, &tok); + if ((c != '#' || n != 4) && (strcmp(def,"define") != 0)) /* not a valid #define */ + continue; + if (strncmp(name, "YY", 2) == 0 || strncmp(name, "yy", 2) == 0) + continue; + if (tok < FIRSTTOKEN || tok > LASTTOKEN) { + continue; + /* + fprintf(stderr, "maketab funny token %d %s\n", tok, buf); + exit(1); + */ + } + names[tok-FIRSTTOKEN] = (char *) malloc(strlen(name)+1); + strcpy(names[tok-FIRSTTOKEN], name); + printf("\t(unsigned char *) \"%s\",\t/* %d */\n", name, tok); + i++; + } + printf("};\n\n"); + + for (p=proc; p->token!=0; p++) + table[p->token-FIRSTTOKEN] = p->name; + printf("\nCell *(*proctab[%d])(Node **, int) = {\n", SIZE); + for (i=0; i<SIZE; i++) + if (table[i]==0) + printf("\tnullproc,\t/* %s */\n", names[i]); + else + printf("\t%s,\t/* %s */\n", table[i], names[i]); + printf("};\n\n"); + + printf("unsigned char *tokname(int n)\n"); /* print a tokname() function */ + + printf("{\n"); + printf(" static unsigned char buf[100];\n\n"); + printf(" if (n < FIRSTTOKEN || n > LASTTOKEN) {\n"); + printf(" snprintf((char *)buf, sizeof buf, \"token %%d\", n);\n"); + printf(" return buf;\n"); + printf(" }\n"); + printf(" return printname[n-257];\n"); + printf("}\n"); + exit(0); +} diff --git a/nawk/mkfile b/nawk/mkfile @@ -0,0 +1,53 @@ +BIN = nawk +OBJ = awk.lx.o b.o lib.o main.o parse.o proctab.o run.o tran.o \ + awk.g.o version.o +LOCAL_CFLAGS = -DSU3 +LOCAL_LDFLAGS = -lm +CLEAN_FILES = maketab maketab.o awk.g.c y.tab.h awk.lx.c proctab.c awk.1 +DEPS = libcommon libuxre lex yacc ed + +<$mkbuild/mk.common + +INSTALL_BIN = nawk +INSTALL_MAN1 = nawk.1 +INSTALL_SYMLINK = \ + nawk $BINDIR/awk \ + nawk.1 $MANDIR/man1/awk.1 + +nawk: $OBJ + +awk.g.c:Q: awk.g.y + echo YACC $target + $YACC -d awk.g.y + mv -f y.tab.c awk.g.c + +y.tab.h:Q: awk.g.c + (echo '1i'; echo '#include <inttypes.h>'; echo '.'; echo 'w';) | \ + $ED -s y.tab.h + +maketab:Q: maketab.o + echo CC $target + $CC $LDFLAGS maketab.o -o maketab + +proctab.c: maketab + ./maketab > proctab.c + +awk.lx.c:Q: awk.lx.l + echo LEX $target + $LEX -t awk.lx.l > awk.lx.c + +maketab.o:Q: maketab.c + echo CC $target + $CC -DIN_MAKETAB -c maketab.c + +awk.g.o: awk.h y.tab.h +awk.lx.o: awk.h y.tab.h +b.o: awk.h y.tab.h +lib.o: awk.h y.tab.h +main.o: awk.h y.tab.h +maketab.o: awk.h y.tab.h +parse.o: awk.h y.tab.h +proctab.o: awk.h y.tab.h +run.o: awk.h y.tab.h +tran.o: awk.h y.tab.h +version.o: awk.h y.tab.h diff --git a/nawk/nawk.1 b/nawk/nawk.1 @@ -0,0 +1,585 @@ +.\" +.\" Sccsid @(#)nawk.1 1.21 (gritter) 2/6/05 +.\" Derived from awk.1, Bell Labs: +.\" +.\" Copyright (C) Lucent Technologies 1997 +.\" All Rights Reserved +.\" +.\" Permission to use, copy, modify, and distribute this software and +.\" its documentation for any purpose and without fee is hereby +.\" granted, provided that the above copyright notice appear in all +.\" copies and that both that the copyright notice and this +.\" permission notice and warranty disclaimer appear in supporting +.\" documentation, and that the name Lucent Technologies or any of +.\" its entities not be used in advertising or publicity pertaining +.\" to distribution of the software without specific, written prior +.\" permission. +.\" +.\" LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, +.\" INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. +.\" IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY +.\" SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER +.\" IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, +.\" ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +.\" THIS SOFTWARE. +.TH NAWK 1 "2/6/05" "Heirloom Toolchest" "User Commands" +.SH NAME +nawk \- pattern scanning and processing language +.SH SYNOPSIS +.HP +.ad l +\fBnawk\fR +[\fB\-f \fIprogfile\fR | \fI'prog'\fR] +[\fB\-F\fIfieldsep\fR] +[\fB\-v \fIvar=value\fR] +[\fIfile . . .\fR] +.br +.ad b +.SH DESCRIPTION +.I Nawk +scans each input +.I file +for lines that match any of a set of patterns specified literally in +.IR prog +or in one or more files +specified as +.B \-f +.IR progfile . +With each pattern +there can be an associated action that will be performed +when a line of a +.I file +matches the pattern. +Each line is matched against the +pattern portion of every pattern-action statement; +the associated action is performed for each matched pattern. +The file name +.B \- +means the standard input. +Any +.IR file +of the form +.I var=value +is treated as an assignment, not a filename, +and is executed at the time it would have been opened if it were a filename +.RB ( /usr/5bin/s42/awk , +.BR /usr/5bin/posix/awk , +and +.B /usr/5bin/posix2001/awk +only). +The option +.B \-v +followed by +.I var=value +is an assignment to be done before +.I prog +is executed; +any number of +.B \-v +options may be present. +The +.B \-F +.IR fs +option defines the input field separator to be the regular expression +.IR fs. +.PP +An input line is normally made up of fields separated by white space, +or by regular expression +.BR FS . +The fields are denoted +.BR $1 , +.BR $2 , +\&..., while +.B $0 +refers to the entire line. +.PP +A pattern-action statement has the form +.IP +.IB pattern " { " action " }" +.PP +A missing +.BI { " action " } +means print the line; +a missing pattern always matches. +Pattern-action statements are separated by newlines or semicolons. +.PP +An action is a sequence of statements. +A statement can be one of the following: +.PP +.\".ta \w'\f(CWdelete array[subscript]'u +.RS +.nf +\fBif (\fI expression \fB)\fI statement \fR[ \fBelse\fI statement \fR] +\fBwhile (\fI expression \fB)\fI statement\fR +\fBfor (\fI expression \fB;\fI expression \fB;\fI expression \fB)\fI statement\fR +\fBfor (\fI var \fBin\fI array \fB)\fI statement\fR +\fBdo\fI statement \fBwhile (\fI expression \fB)\fR +\fBbreak\fR +\fBcontinue\fR +\fB{\fR [\fIstatement \fR...] \fB}\fR +\fIexpression\fR # commonly \fIvar \fB=\fI expression\fR +\fBprint\fR [\fIexpression-list\fR] [\fB>\fI expression\fR] +\fBprintf\fI format \fR[\fB,\fI expression-list\fR] [\fB>\fI expression\fR] +\fBnext \fR# skip remaining patterns on this input line +\fBdelete\fI array\fB[\fIsubscript\fB]\fR # delete an array element +\fBexit\fR [\fIexpr\fR] # exit immediately; status is \fIexpr\fR +\fBreturn\fR [\fIexpr\fR] +.fi +.RE +.br +.DT +.PP +Statements are terminated by +semicolons, newlines or right braces. +An empty +.I expression-list +stands for +.BR $0 . +String constants are quoted \&\f(CW"\ "\fR, +with the usual C escapes recognized within. +Expressions take on string or numeric values as appropriate, +and are built using the operators +.B + \- * / % ^ +(exponentiation), and concatenation (indicated by white space). +The operators +.B ! ++ \-\- += \-= *= +.B /= %= ^= > >= < +.B <= == != ?: +are also available in expressions. +Variables may be scalars, array elements +(denoted \fIx\fB[\fIi\fB]\fR) +or fields. +Variables are initialized to the null string. +Array subscripts may be any string, +not necessarily numeric; +this allows for a form of associative memory. +Multiple subscripts such as +\fB[\fIi\fB,\fIj\fB,\fIk\fB]\fR +are permitted; the constituents are concatenated, +separated by the value of +.BR SUBSEP . +.PP +The +.B print +statement prints its arguments on the standard output +(or on a file if +.BI > file +or +.BI >> file +is present or on a pipe if +.BI | cmd +is present), separated by the current output field separator, +and terminated by the output record separator. +.I file +and +.I cmd +may be literal names or parenthesized expressions; +identical string values in different statements denote +the same open file. +The +.B printf +statement formats its expression list according to the format +(see +.IR printf (3)) . +The built-in function +.BI close( expr ) +closes the file or pipe +.IR expr . +.PP +The mathematical functions +.BR exp , +.BR log , +.BR sqrt , +.BR sin , +.BR cos , +and +.BR atan2 +are built in. +Other built-in functions: +.\".TF length +.TP +.B gsub +same as +.B sub +except that all occurrences of the regular expression +are replaced; +.B sub +and +.B gsub +return the number of replacements. +.TP +.BI index( s , " t" ) +the position in +.I s +where the string +.I t +occurs, or 0 if it does not. +.TP +.B int +truncates to an integer value +.TP +.B length +the length of its argument +taken as a string, +or of +.B $0 +if no argument. +.TP +.BI match( s , " r" ) +the position in +.I s +where the regular expression +.I r +occurs, or 0 if it does not. +The variables +.B RSTART +and +.B RLENGTH +are set to the position and length of the matched string. +.TP +.B rand +random number on (0,1) +.TP +\fBsplit(\fIs\fB, \fIa\fB, \fIfs\fB)\fR +splits the string +.I s +into array elements +.IB a [1] , +.IB a [2] , +\&..., +.IB a [ n ] , +and returns +.IR n . +The separation is done with the regular expression +.I fs +or with the field separator +.B FS +if +.I fs +is not given. +.TP +\fBsprintf(\fIfmt\fB, \fIexpr\fB, \fI...\fB)\fR +the string resulting from formatting +.I expr ... +according to the +.IR printf (3) +format +.I fmt +.TP +.B srand +sets seed for +.B rand +and returns the previous seed. +.TP +\fBsub(\fIr\fB, \fIt\fB, \fIs\fB)\fR +substitutes +.I t +for the first occurrence of the regular expression +.I r +in the string +.IR s . +If +.I s +is not given, +.B $0 +is used. +.TP +\fBsubstr(\fIs\fB, \fIm\fB, \fIn\fB)\fR +the +.IR n -character +substring of +.I s +that begins at position +.IR m +counted from 1. +.TP +.BI system( cmd ) +executes +.I cmd +and returns its exit status +.TP +.BI tolower( str ) +returns a copy of +.I str +with all upper-case characters translated to their +corresponding lower-case equivalents. +.TP +.BI toupper( str ) +returns a copy of +.I str +with all lower-case characters translated to their +corresponding upper-case equivalents. +.PD +.PP +The ``function'' +.B getline +sets +.B $0 +to the next input record from the current input file; +.B getline +.BI < file +sets +.B $0 +to the next record from +.IR file . +.B getline +.I x +sets variable +.I x +instead. +Finally, +.IB cmd " |getline" +pipes the output of +.I cmd +into +.BR getline ; +each call of +.B getline +returns the next line of output from +.IR cmd . +In all cases, +.B getline +returns 1 for a successful input, +0 for end of file, and \-1 for an error. +.PP +Additional functions may be defined +(at the position of a pattern-action statement) thus: +.IP +\fBfunction \fIfoo\fB(\fIa\fB, \fIb\fB, \fIc\fB) +{ \fI...\fB; return \fIx\fB }\fR +.PP +or: +.IP +\fBfunc \fIfoo\fB(\fIa\fB, \fIb\fB, \fIc\fB) +{ \fI...\fB; return \fIx\fB }\fR +.PP +Parameters are passed by value if scalar and by reference if array name; +functions may be called recursively. +Parameters are local to the function; all other variables are global. +Thus local variables may be created by providing excess parameters in +the function definition. +.PP +Patterns are arbitrary Boolean combinations +(with +.BR "! || &&" ) +of regular expressions and +relational expressions. +Regular expressions are +full regular expressions with +.B /usr/5bin/nawk +and +extended regular expressions with +.BR /usr/5bin/s42/awk , +.BR /usr/5bin/posix/awk , +and +.BR /usr/5bin/posix2001/awk ; +both are as described in +.IR egrep (1). +Isolated regular expressions +in a pattern apply to the entire line. +Regular expressions may also occur in +relational expressions, using the operators +.BR ~ +and +.BR !~ . +.BI / re / +is a constant regular expression; +any string (constant or variable) may be used +as a regular expression, except in the position of an isolated regular expression +in a pattern. +For +.BR /usr/5bin/posix2001/awk , +regular expressions may be part of arithmetic expressions. +.PP +A pattern may consist of two patterns separated by a comma; +in this case, the action is performed for all lines +from an occurrence of the first pattern +though an occurrence of the second. +.PP +A relational expression is one of the following: +.IP +.I expression matchop regular-expression +.br +.I expression relop expression +.br +.IB expression " in " array-name +.br +.BI ( expr , expr,... ") in " array-name +.PP +where a relop is any of the six relational operators in C, +and a matchop is either +.B ~ +(matches) +or +.B !~ +(does not match). +A conditional is an arithmetic expression, +a relational expression, +or a Boolean combination +of these. +.PP +The special patterns +.B BEGIN +and +.B END +may be used to capture control before the first input line is read +and after the last. +.B BEGIN +and +.B END +do not combine with other patterns. +.PP +Variable names with special meanings: +.\".TF FILENAME +.TP 10 +.B ARGC +argument count, assignable +.TP 10 +.B ARGV +argument array, assignable; +non-null members are taken as filenames +.TP 10 +.B CONVFMT +.RB ( /usr/5bin/s42/awk , +.BR /usr/5bin/posix2001/awk , +and +.B /usr/5bin/posix/awk +only) +conversion format used when converting numbers +(default +.BR "%.6g" ) +.TP 10 +.B ENVIRON +array of environment variables; subscripts are names. +.TP 10 +.B FILENAME +the name of the current input file +.TP 10 +.B FNR +ordinal number of the current record in the current file +.TP 10 +.B FS +regular expression used to separate fields; also settable +by option +.BI \-F fs. +.TP 10 +.BR NF +number of fields in the current record +.TP 10 +.B NR +ordinal number of the current record +.TP 10 +.B OFMT +output format for numbers (default +.BR "%.6g" ) +.TP 10 +.B OFS +output field separator (default blank) +.TP 10 +.B ORS +output record separator (default newline) +.TP 10 +.B RS +input record separator (default newline) +.TP 10 +.B SUBSEP +separates multiple subscripts (default 034) +.PD +.SH EXAMPLES +.TP +.nf +length($0) > 72 +.br +.fi +Print lines longer than 72 characters. +.TP +.nf +{ print $2, $1 } +.br +.fi +Print first two fields in opposite order. +.PP +.nf +BEGIN { FS = ",[ \et]*|[ \et]+" } + { print $2, $1 } +.br +.fi +.ns +.IP +Same, with input fields separated by comma and/or blanks and tabs. +.PP +.nf + { s += $1 } +END { print "sum is", s, " average is", s/NR } +.fi +.br +.ns +.IP +Add up first column, print sum and average. +.TP +.nf +/start/, /stop/ +.br +.fi +Print all lines between start/stop pairs. +.PP +.nf +BEGIN { # Simulate echo(1) + for (i = 1; i < ARGC; i++) printf "%s ", ARGV[i] + printf "\en" + exit } +.fi +.br +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in regular expressions +as well as string comparison. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters, +the availability and composition of character classes +in regular expressions, +and the case mapping for the toupper() and tolower() functions. +.TP +.B LC_NUMERIC +Determine the radix character used when interpreting numeric input, +performing conversions between numeric and string values +and formatting numeric output. +Regardless of locale, the period character +(the decimal-point character of the C locale) +is the decimal-point character recognized in processing awk programs. +.SH SEE ALSO +egrep(1), +lex(1), +oawk(1), +sed(1), +printf(3), +locale(7) +.br +A. V. Aho, B. W. Kernighan, P. J. Weinberger, +.I +The AWK Programming Language, +Addison-Wesley, 1988. ISBN 0-201-07981-X +.SH NOTES +There are no explicit conversions between numbers and strings. +To force an expression to be treated as a number add 0 to it; +to force it to be treated as a string concatenate +\&\fB""\fR to it. +.\".sp +.\"The scope rules for variables in functions are a botch; +.\"the syntax is worse. +.PP +The LC_COLLATE variable has currently no effect in regular expressions. +Ranges in bracket expressions are ordered +as byte values in single-byte locales +and as wide character values in multibyte locales; +equivalence classes match the given character only, +and multi-character collating elements are not available. diff --git a/nawk/parse.c b/nawk/parse.c @@ -0,0 +1,248 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)parse.c 1.7 (gritter) 12/4/04> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/parse.c /main/uw7_nj/1 */ +/* from RCS Header: parse.c 1.2 91/06/25 */ + +#define DEBUG +#include <stdio.h> +#include <string.h> +#include <pfmt.h> +#include "awk.h" +#include "y.tab.h" + +Node *nodealloc(int n) +{ + register Node *x; + x = (Node *) malloc(sizeof(Node) + (n-1)*sizeof(Node *)); + if (x == NULL) + error(MM_ERROR, outofspace, "nodealloc"); + x->nnext = NULL; + x->lineno = lineno; + return(x); +} + +Node *exptostat(Node *a) +{ + a->ntype = NSTAT; + return(a); +} + +Node *node1(int a, Node *b) +{ + register Node *x; + x = nodealloc(1); + x->nobj = a; + x->narg[0]=b; + return(x); +} + +Node *node2(int a, Node *b, Node *c) +{ + register Node *x; + x = nodealloc(2); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + return(x); +} + +Node *node3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = nodealloc(3); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + return(x); +} + +Node *node4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = nodealloc(4); + x->nobj = a; + x->narg[0] = b; + x->narg[1] = c; + x->narg[2] = d; + x->narg[3] = e; + return(x); +} + +Node *stat3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = node3(a,b,c,d); + x->ntype = NSTAT; + return(x); +} + +Node *op2(int a, Node *b, Node *c) +{ + register Node *x; + x = node2(a,b,c); + x->ntype = NEXPR; + return(x); +} + +Node *op1(int a, Node *b) +{ + register Node *x; + x = node1(a,b); + x->ntype = NEXPR; + return(x); +} + +Node *stat1(int a, Node *b) +{ + register Node *x; + x = node1(a,b); + x->ntype = NSTAT; + return(x); +} + +Node *op3(int a, Node *b, Node *c, Node *d) +{ + register Node *x; + x = node3(a,b,c,d); + x->ntype = NEXPR; + return(x); +} + +Node *op4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = node4(a,b,c,d,e); + x->ntype = NEXPR; + return(x); +} + +Node *stat2(int a, Node *b, Node *c) +{ + register Node *x; + x = node2(a,b,c); + x->ntype = NSTAT; + return(x); +} + +Node *stat4(int a, Node *b, Node *c, Node *d, Node *e) +{ + register Node *x; + x = node4(a,b,c,d,e); + x->ntype = NSTAT; + return(x); +} + +Node *valtonode(Cell *a, int b) +{ + register Node *x; + + a->ctype = OCELL; + a->csub = b; + x = node1(0, (Node *) a); + x->ntype = NVALUE; + return(x); +} + +Node *rectonode(void) +{ + /* return valtonode(lookup("$0", symtab), CFLD); */ + return valtonode(recloc, CFLD); +} + +Node *makearr(Node *p) +{ + Cell *cp; + + if (isvalue(p)) { + cp = (Cell *) (p->narg[0]); + if (isfunc(cp)) + vyyerror(":38:%s is a function, not an array", + cp->nval); + else if (!isarr(cp)) { + xfree(cp->sval); + cp->sval = (unsigned char *) makesymtab(NSYMTAB); + cp->tval = ARR; + } + } + return p; +} + +Node *pa2stat(Node *a,Node *b,Node *c) +{ + register Node *x; + x = node4(PASTAT2, a, b, c, (Node *) paircnt); + paircnt++; + x->ntype = NSTAT; + return(x); +} + +Node *linkum(Node *a,Node *b) +{ + register Node *c; + + if (errorflag) /* don't link things that are wrong */ + return a; + if (a == NULL) return(b); + else if (b == NULL) return(a); + for (c = a; c->nnext != NULL; c = c->nnext) + ; + c->nnext = b; + return(a); +} + +void defn(Cell *v, /* turn on FCN bit in definition */ + Node *vl, Node *st) /* arglist, body of function */ +{ + Node *p; + int n; + + if (isarr(v)) { + vyyerror(":39:`%s' is an array name and a function name", + v->nval); + return; + } + v->tval = FCN; + v->sval = (unsigned char *) st; + n = 0; /* count arguments */ + for (p = vl; p; p = p->nnext) + n++; + v->fval = n; + dprintf( ("defining func %s (%d args)\n", v->nval, n) ); +} + +int isarg(const char *s) /* is s in argument list for current function? */ +{ + extern Node *arglist; + Node *p = arglist; + int n; + + for (n = 0; p != 0; p = p->nnext, n++) + if (strcmp((char *)((Cell *)(p->narg[0]))->nval, s) == 0) + return n; + return -1; +} diff --git a/nawk/run.c b/nawk/run.c @@ -0,0 +1,1962 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)run.c 1.33 (gritter) 12/25/06> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + + +/* from unixsrc:usr/src/common/cmd/awk/run.c /main/uw7_nj/1 */ +/* from RCS Header: run.c 1.3 91/08/12 */ + +#define tempfree(x,s) if (istemp(x)) tfree(x,s); else + +/* #define execute(p) (isvalue(p) ? (Cell *)((p)->narg[0]) : r_execute(p)) */ +#define execute(p) r_execute((Node *)p) + +#define DEBUG +#include <math.h> +#include <stdio.h> +#include <ctype.h> +#include <setjmp.h> +#include <pfmt.h> +#include <string.h> +#include <errno.h> +#include <wctype.h> +#include <inttypes.h> +#include <time.h> +#include "awk.h" +#include "y.tab.h" + +jmp_buf env; + +#define getfval(p) (((p)->tval & (ARR|FLD|REC|NUM)) == NUM ? (p)->fval : r_getfval(p)) +#define getsval(p) (((p)->tval & (ARR|FLD|REC|STR)) == STR ? (p)->sval : r_getsval(p)) + +static void tfree(register Cell *a, char *s); + +#define PA2NUM 29 +int pairstack[PA2NUM]; +long paircnt; +Node *winner = NULL; +Cell *tmps; + +static Cell truecell ={ OBOOL, BTRUE, 0, 0, 1.0, NUM }; +Cell *true = &truecell; +static Cell falsecell ={ OBOOL, BFALSE, 0, 0, 0.0, NUM }; +Cell *false = &falsecell; +static Cell breakcell ={ OJUMP, JBREAK, 0, 0, 0.0, NUM }; +Cell *jbreak = &breakcell; +static Cell contcell ={ OJUMP, JCONT, 0, 0, 0.0, NUM }; +Cell *jcont = &contcell; +static Cell nextcell ={ OJUMP, JNEXT, 0, 0, 0.0, NUM }; +Cell *jnext = &nextcell; +static Cell exitcell ={ OJUMP, JEXIT, 0, 0, 0.0, NUM }; +Cell *jexit = &exitcell; +static Cell retcell ={ OJUMP, JRET, 0, 0, 0.0, NUM }; +Cell *jret = &retcell; +static Cell tempcell ={ OCELL, CTEMP, 0, 0, 0.0, NUM }; + +Node *curnode = NULL; /* the node being executed, for debugging */ + +static const char + restoobig[] = ":40:%s() result %.20s too big", + notarray[] = ":41:%s is not an array", + ioerror[] = ":42:I/O error occurred on %s"; +const char + illstat[] = ":43:Illegal statement"; + +extern const char readvofid[], readvof[], badopen[]; + +static int growsprintf(unsigned char **, unsigned char **, + int *, const char *, ...); +static void growbuf(unsigned char **buf, int *bufsize, int incr, + unsigned char **ptr, const char *fn); +static void closeall(void); +static void caseconv(unsigned char *s, wint_t (*conv)(wint_t)); + +int run(Node *a) +{ + execute(a); + closeall(); + return 0; +} + +Cell *r_execute(Node *u) +{ + register Cell *(*proc)(Node **, int); + register Cell *x; + register Node *a; + + if (u == NULL) + return(true); + for (a = u; ; a = a->nnext) { + curnode = a; + if (isvalue(a)) { + x = (Cell *) (a->narg[0]); + if ((x->tval & FLD) && !donefld) + fldbld(); + else if ((x->tval & REC) && !donerec) + recbld(); + return(x); + } + if (notlegal(a->nobj)) /* probably a Cell* but too risky to print */ + error(MM_ERROR, illstat); + proc = proctab[a->nobj-FIRSTTOKEN]; + x = (*proc)(a->narg, a->nobj); + if ((x->tval & FLD) && !donefld) + fldbld(); + else if ((x->tval & REC) && !donerec) + recbld(); + if (isexpr(a)) + return(x); + /* a statement, goto next statement */ + if (isjump(x)) + return(x); + if (a->nnext == (Node *)NULL) + return(x); + tempfree(x, "execute"); + } +} + + +Cell *program(register Node **a, int n) +{ + register Cell *x = 0; + + if (setjmp(env) != 0) + goto ex; + if (a[0]) { /* BEGIN */ + x = execute(a[0]); + if (isexit(x)) + return(true); + if (isjump(x)) + error(MM_ERROR, + ":44:Illegal break, continue or next from BEGIN"); + if(x != 0) { tempfree(x, ""); } + } + loop: + if (a[1] || a[2]) + while (getrec(&record, &recsize) > 0) { + x = execute(a[1]); + if (isexit(x)) + break; + if(x != 0) { tempfree(x, ""); } + } + ex: + if (setjmp(env) != 0) + goto ex1; + if (a[2]) { /* END */ + x = execute(a[2]); + if (iscont(x)) /* read some more */ + goto loop; + if (isbreak(x) || isnext(x)) + error(MM_ERROR, ":45:Illegal break or next from END"); + if(x != 0) { tempfree(x, ""); } + } + ex1: + return(true); +} + +struct Frame { + int nargs; /* number of arguments in this call */ + Cell *fcncell; /* pointer to Cell for function */ + Cell **args; /* pointer to array of arguments after execute */ + Cell *retval; /* return value */ +}; + +#define NARGS 30 + +struct Frame *frame = NULL; /* base of stack frames; dynamically allocated */ +int nframe = 0; /* number of frames allocated */ +struct Frame *fp = NULL; /* frame pointer. bottom level unused */ + +Cell *call(Node **a, int n) +{ + static Cell newcopycell = { OCELL, CCOPY, 0, (unsigned char *) "", 0.0, NUM|STR|DONTFREE }; + int i, ncall, ndef; + Node *x; + Cell *args[NARGS], *oargs[NARGS], *y, *z, *fcn; + unsigned char *s; + + fcn = execute(a[0]); /* the function itself */ + s = fcn->nval; + if (!isfunc(fcn)) + error(MM_ERROR, ":46:Calling undefined function %s", s); + if (frame == NULL) { + fp = frame = (struct Frame *) calloc(nframe += 100, sizeof(struct Frame)); + if (frame == NULL) + error(MM_ERROR, ":47:Out of space for stack frames calling %s", s); + } + for (ncall = 0, x = a[1]; x != NULL; x = x->nnext) /* args in call */ + ncall++; + ndef = (int) fcn->fval; /* args in defn */ + dprintf( ("calling %s, %d args (%d in defn), fp=%ld\n", s, + ncall, ndef, (long)(fp-frame)) ); + if (ncall > ndef) { + if (ncall == 1) + error(MM_WARNING, ":48:Function %s called with 1 arg, uses only %d", + s, ndef); + else + error(MM_WARNING, ":49:Function %s called with %d args, uses only %d", + s, ncall, ndef); + } + if (ncall + ndef > NARGS) + error(MM_ERROR, ":50:Function %s has %d arguments, limit %d", + s, ncall+ndef, NARGS); + for (i = 0, x = a[1]; x != NULL; i++, x = x->nnext) { /* get call args */ + dprintf( ("evaluate args[%d], fp=%ld:\n", i, + (long)(fp-frame)) ); + y = execute(x); + oargs[i] = y; + dprintf( ("args[%d]: %s %f <%s>, t=%o\n", + i, y->nval, y->fval, isarr(y) ? + "(array)" : (char*) y->sval, y->tval) ); + if (isfunc(y)) + error(MM_ERROR, ":51:Cannot use function %s as argument in %s", + y->nval, s); + if (isarr(y)) + args[i] = y; /* arrays by ref */ + else + args[i] = copycell(y); + tempfree(y, "callargs"); + } + for ( ; i < ndef; i++) { /* add null args for ones not provided */ + args[i] = gettemp("nullargs"); + *args[i] = newcopycell; + } + fp++; /* now ok to up frame */ + if (fp >= frame + nframe) { + int dfp = fp - frame; /* old index */ + frame = (struct Frame *) + realloc(frame, (nframe += 100) * sizeof(struct Frame)); + if (frame == NULL) + error(MM_ERROR, ":52:Out of space for stack frames in %s", s); + fp = frame + dfp; + } + fp->fcncell = fcn; + fp->args = args; + fp->nargs = ndef; /* number defined with (excess are locals) */ + fp->retval = gettemp("retval"); + + dprintf( ("start exec of %s, fp=%ld\n", s, (long)(fp-frame)) ); + y = execute((Node *)(fcn->sval)); /* execute body */ + dprintf( ("finished exec of %s, fp=%ld\n", s, (long)(fp-frame)) ); + + for (i = 0; i < ndef; i++) { + Cell *t = fp->args[i]; + if (isarr(t)) { + if (t->csub == CCOPY) { + if (i >= ncall) { + freesymtab(t); + t->csub = CTEMP; + } else { + oargs[i]->tval = t->tval; + oargs[i]->tval &= ~(STR|NUM|DONTFREE); + oargs[i]->sval = t->sval; + tempfree(t, "oargsarr"); + } + } + } else if (t != y) { /* kludge to prevent freeing twice */ + t->csub = CTEMP; + tempfree(t, "fp->args"); + } + } + tempfree(fcn, "call.fcn"); + if (isexit(y) || isnext(y)) + return y; + tempfree(y, "fcn ret"); /* this can free twice! */ + z = fp->retval; /* return value */ + dprintf( ("%s returns %g |%s| %o\n", s, getfval(z), + getsval(z), z->tval) ); + fp--; + return(z); +} + +Cell *copycell(Cell *x) /* make a copy of a cell in a temp */ +{ + Cell *y; + + y = gettemp("copycell"); + y->csub = CCOPY; /* prevents freeing until call is over */ + y->nval = x->nval; + y->sval = x->sval ? tostring(x->sval) : NULL; + y->fval = x->fval; + y->tval = x->tval & ~(CON|FLD|REC|DONTFREE); /* copy is not constant or field */ + /* is DONTFREE right? */ + return y; +} + +/*ARGSUSED2*/ +Cell *arg(Node **a, int nnn) +{ + int n; + + n = (intptr_t) a[0]; /* argument number, counting from 0 */ + dprintf( ("arg(%d), fp->nargs=%d\n", n, fp->nargs) ); + if (n+1 > fp->nargs) + error(MM_ERROR, ":53:Argument #%d of function %s was not supplied", + n+1, fp->fcncell->nval); + return fp->args[n]; +} + +static int in_loop = 0; /* Flag : are we in a [while|do|for] loop ? */ + +Cell *jump(Node **a, int n) +{ + register Cell *y; + + switch (n) { + case EXIT: + if (a[0] != NULL) { + y = execute(a[0]); + errorflag = getfval(y); + tempfree(y, ""); + } + longjmp(env, 1); + case RETURN: + if (a[0] != NULL) { + y = execute(a[0]); + if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(fp->retval, getsval(y)); + fp->retval->fval = getfval(y); + fp->retval->tval |= NUM; + } + else if (y->tval & STR) + setsval(fp->retval, getsval(y)); + else if (y->tval & NUM) + setfval(fp->retval, getfval(y)); + tempfree(y, ""); + } + return(jret); + case NEXT: + return(jnext); + case BREAK: + if (posix && !in_loop) + error(MM_ERROR, ":101:break-statement outside of a loop"); + return(jbreak); + case CONTINUE: + if (posix && !in_loop) + error(MM_ERROR, ":102:continue-statement outside of a loop"); + return(jcont); + default: /* can't happen */ + error(MM_ERROR, ":54:Illegal jump type %d", n); + /*NOTREACHED*/ + return 0; + } +} + +Cell *getline(Node **a, int n) +{ + /* a[0] is variable, a[1] is operator, a[2] is filename */ + register Cell *r, *x; + unsigned char *buf = NULL; + int bufsize = 0; + FILE *fp; + + fflush(stdout); /* in case someone is waiting for a prompt */ + r = gettemp(""); + if (a[1] != NULL) { /* getline < file */ + x = execute(a[2]); /* filename */ + if ((intptr_t) a[1] == '|') /* input pipe */ + a[1] = (Node *) LE; /* arbitrary flag */ + fp = openfile((intptr_t) a[1], getsval(x)); + tempfree(x, ""); + if (fp == NULL) + n = -1; + else + n = readrec(&buf, &bufsize, fp); + if (n <= 0) { + ; + } else if (a[0] != NULL) { /* getline var <file */ + setsval(execute(a[0]), buf); + } else { /* getline <file */ + makerec(buf, bufsize); + } + } else { /* bare getline; use current input */ + if (a[0] == NULL) /* getline */ + n = getrec(&record, &recsize); + else { /* getline var */ + n = getrec(&buf, &bufsize); + setsval(execute(a[0]), buf); + } + } + setfval(r, (Awkfloat) n); + if (bufsize) + free(buf); + return r; +} + +Cell *getnf(register Node **a, int n) +{ + if (donefld == 0) + fldbld(); + return (Cell *) a[0]; +} + +Cell *array(register Node **a, int n) +{ + register Cell *x, *y, *z; + register unsigned char *s; + register Node *np; + unsigned char *buf = NULL; + int bufsz = 0, subseplen, len = 1, l; + + x = execute(a[0]); /* Cell* for symbol table */ + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "array"); + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "array"); + strcat((char*)buf, (char*)s); + if (np->nnext) + strcat((char*)buf, (char*)*SUBSEP); + tempfree(y, ""); + } + if (!isarr(x)) { + dprintf( ("making %s into an array\n", x->nval) ); + if (freeable(x)) + xfree(x->sval); + x->tval &= ~(STR|NUM|DONTFREE); + x->tval |= ARR; + x->sval = (unsigned char *) makesymtab(NSYMTAB); + } + z = setsymtab(buf, "", 0.0, STR|NUM, (Array *) x->sval); + z->ctype = OCELL; + z->csub = CVAR; + tempfree(x, ""); + free(buf); + return(z); +} + +Cell *delete(Node **a, int n) +{ + Cell *x, *y; + Node *np; + unsigned char *buf = NULL, *s; + int bufsz = 0, subseplen, len = 1, l; + + x = execute(a[0]); /* Cell* for symbol table */ + if (!isarr(x)) + return true; + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "delete"); + buf[0] = 0; + for (np = a[1]; np; np = np->nnext) { + y = execute(np); /* subscript */ + s = getsval(y); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "delete"); + strcat((char*)buf, (char*)s); + if (np->nnext) + strcat((char*)buf, (char*)*SUBSEP); + tempfree(y, ""); + } + freeelem(x, buf); + tempfree(x, ""); + free(buf); + return true; +} + +Cell *intest(Node **a, int n) +{ + register Cell *x, *ap, *k; + Node *p; + unsigned char *s; + unsigned char *buf = NULL; + int bufsz = 0, subseplen, len = 1, l; + + ap = execute(a[1]); /* array name */ + if (!isarr(ap)) + error(MM_ERROR, notarray, ap->nval); + subseplen = strlen((char *)*SUBSEP); + growbuf(&buf, &bufsz, CHUNK, NULL, "intest"); + buf[0] = 0; + for (p = a[0]; p; p = p->nnext) { + x = execute(p); /* expr */ + s = getsval(x); + len += (l = strlen((char *)s) + subseplen); + if (len >= bufsz) + growbuf(&buf, &bufsz, l, NULL, "array"); + strcat((char *)buf, (char*)s); + tempfree(x, ""); + if (p->nnext) + strcat((char *)buf, (char*)*SUBSEP); + } + k = lookup(buf, (Array *) ap->sval); + tempfree(ap, ""); + free(buf); + if (k == NULL) + return(false); + else + return(true); +} + + +Cell *matchop(Node **a, int n) +{ + register Cell *x, *y; + register unsigned char *s, *t; + register int i; + fa *pfa; + int (*mf)(void *, unsigned char *) = match, mode = 0; + + if (n == MATCHFCN) { + mf = pmatch; + mode = 1; + } + x = execute(a[1]); + s = getsval(x); + if (a[0] == 0) + i = (*mf)(a[2], s); + else { + y = execute(a[2]); + t = getsval(y); + pfa = makedfa(t, mode); + i = (*mf)(pfa, s); + tempfree(y, ""); + } + tempfree(x, ""); + if (n == MATCHFCN) { + int start, length; + if (patlen < 0) { + start = 0; + length = patlen; + } else { + start = chrdist(s, patbeg); + length = chrdist(patbeg, &patbeg[patlen - 1]); + } + setfval(rstartloc, (Awkfloat) start); + setfval(rlengthloc, (Awkfloat) length); + x = gettemp(""); + x->tval = NUM; + x->fval = start; + return x; + } else if ((n == MATCH && i == 1) || (n == NOTMATCH && i == 0)) + return(true); + else + return(false); +} + + +Cell *boolop(Node **a, int n) +{ + register Cell *x, *y; + register int i; + + x = execute(a[0]); + i = istrue(x); + tempfree(x, ""); + switch (n) { + case BOR: + if (i) return(true); + y = execute(a[1]); + i = istrue(y); + tempfree(y, ""); + if (i) return(true); + else return(false); + case AND: + if ( !i ) return(false); + y = execute(a[1]); + i = istrue(y); + tempfree(y, ""); + if (i) return(true); + else return(false); + case NOT: + if (i) return(false); + else return(true); + default: /* can't happen */ + error(MM_ERROR, ":55:Unknown boolean operator %d", n); + } + /*NOTREACHED*/ + return 0; +} + +Cell *relop(Node **a, int n) +{ + register int i; + register Cell *x, *y; + Awkfloat j; + + x = execute(a[0]); + y = execute(a[1]); + if (x->tval&NUM && y->tval&NUM) { + j = x->fval - y->fval; + i = j<0? -1: (j>0? 1: 0); + } else { + i = strcoll((char*)getsval(x), (char*)getsval(y)); + } + tempfree(x, ""); + tempfree(y, ""); + switch (n) { + case LT: if (i<0) return(true); + else return(false); + case LE: if (i<=0) return(true); + else return(false); + case NE: if (i!=0) return(true); + else return(false); + case EQ: if (i == 0) return(true); + else return(false); + case GE: if (i>=0) return(true); + else return(false); + case GT: if (i>0) return(true); + else return(false); + default: /* can't happen */ + error(MM_ERROR, ":56:Unknown relational operator %d", n); + } + /*NOTREACHED*/ + return 0; +} + +static void tfree(register Cell *a, char *s) +{ + if (dbg>1) printf("## tfree %.8s %06lo %s\n", + s, (long)a, a->sval ? a->sval : (unsigned char *)""); + if (freeable(a)) + xfree(a->sval); + if (a == tmps) + error(MM_ERROR, ":57:Tempcell list is curdled"); + a->cnext = tmps; + tmps = a; +} + +Cell *gettemp(const char *s) +{ int i; + register Cell *x; + + if (!tmps) { + tmps = (Cell *) calloc(100, sizeof(Cell)); + if (!tmps) + error(MM_ERROR, ":58:No space for temporaries"); + for(i = 1; i < 100; i++) + tmps[i-1].cnext = &tmps[i]; + tmps[i-1].cnext = 0; + } + x = tmps; + tmps = x->cnext; + *x = tempcell; + if (dbg>1) printf("## gtemp %.8s %06lo\n", s, (long)x); + return(x); +} + +Cell *indirect(Node **a, int n) +{ + register Cell *x; + register int m; + register unsigned char *s; + + x = execute(a[0]); + m = getfval(x); + if (m == 0 && !is2number(s = getsval(x), 0)) /* suspicion! */ + error(MM_ERROR, ":59:Illegal field $(%s)", s); + tempfree(x, ""); + x = fieldadr(m); + x->ctype = OCELL; + x->csub = CFLD; + return(x); +} + +Cell *substr(Node **a, int nnn) +{ + register int k, m, n; + wchar_t wc; + register unsigned char *s, *sp, *sq; + int temp; + register Cell *x, *y, *z = 0; + + x = execute(a[0]); + y = execute(a[1]); + if (a[2] != 0) + z = execute(a[2]); + s = getsval(x); + k = strlen((char*)s) + 1; + if (k <= 1) { + tempfree(x, ""); + tempfree(y, ""); + if (a[2] != 0) { + tempfree(z, ""); + } + x = gettemp(""); + setsval(x, (unsigned char *)""); + return(x); + } + m = getfval(y); + if (m <= 0) + m = 1; + else if (m > k) + m = k; + tempfree(y, ""); + if (a[2] != 0) { + n = getfval(z); + tempfree(z, ""); + } else + n = k - 1; + if (n < 0) + n = 0; + else if (n > k - m) + n = k - m; + dprintf( ("substr: m=%d, n=%d, s=%s\n", m, n, s) ); + if (mb_cur_max > 1) { + for (sp = s; m > 1 && *sp; m--) { + next(wc, sp, k); + sp += k; + } + m = sp - s + 1; + for (sq = sp ; n > 0 && *sq; n--) { + next(wc, sq, k); + sq += k; + } + n = sq - sp; + dprintf( ("substr: multibyte: m=%d, n=%d, s=%s\n", m, n, s) ); + } + y = gettemp(""); + temp = s[n+m-1]; /* with thanks to John Linderman */ + s[n+m-1] = '\0'; + setsval(y, s + m - 1); + s[n+m-1] = temp; + tempfree(x, ""); + return(y); +} + +Cell *sindex(Node **a, int nnn) +{ + register Cell *x, *y, *z; + register unsigned char *s1, *s2, *p1, *p2, *q; + int n, nq, n2; + wchar_t wc, wq, w2; + Awkfloat v = 0.0; + + x = execute(a[0]); + s1 = getsval(x); + y = execute(a[1]); + s2 = getsval(y); + + z = gettemp(""); + for (p1 = s1; next(wc, p1, n), wc != '\0'; p1 += n) { + for (q = p1, p2 = s2; + next(wq, q, nq), + next(w2, p2, n2), + w2 != '\0' && wq == w2; + q += nq, p2 += n2) + ; + if (w2 == '\0') { + v = (Awkfloat) chrdist(s1, p1); + break; + } + } + tempfree(x, ""); + tempfree(y, ""); + setfval(z, v); + return(z); +} + + +int format(unsigned char **buf, int *bufsize, const unsigned char *s, Node *a) +{ + unsigned char *fmt = NULL; + int fmtsz = 0; + unsigned char *p, *t; + const unsigned char *os; + register Cell *x; + int flag = 0; + + os = s; + fmt = malloc(fmtsz = CHUNK); + if (*bufsize == 0) + *buf = malloc(*bufsize = CHUNK); + if (fmt == NULL || *buf == NULL) + error(MM_ERROR, outofspace, "format"); + p = *buf; + while (*s) { + if (p >= &(*buf)[*bufsize]) + growbuf(buf, bufsize, CHUNK, &p, "format"); + if (*s != '%') { + *p++ = *s++; + continue; + } + if (*(s+1) == '%') { + *p++ = '%'; + s += 2; + continue; + } + for (t=fmt; (*t++ = *s) != '\0'; s++) { + if (t >= &fmt[fmtsz]) + growbuf(&fmt, &fmtsz, CHUNK, &t, "format"); + if (isalpha(*s) && *s != 'l' && *s != 'h' && *s != 'L') + break; /* the ansi panoply */ + if (*s == '*') { + x = execute(a); + a = a->nnext; + t--; + growsprintf(&fmt, &t, &fmtsz, "%d", (int) getfval(x)); + tempfree(x, ""); + } + } + *t = '\0'; + switch (*s) { + case 'a': case 'A': + case 'e': case 'E': + case 'f': case 'F': + case 'g': case 'G': + flag = 1; + break; + case 'd': case 'i': + flag = 2; + if(*(s-1) == 'l') break; + *(t-1) = 'l'; + *t = 'd'; + *++t = '\0'; + break; + case 'o': case 'x': case 'X': case 'u': + flag = *(s-1) == 'l' ? 12 : 13; + break; + case 's': + /* + * Note: If MB_CUR_MAX > 1, the precision is in + * bytes, not characters. This doesn't make much + * sense in awk context, but it seems to match + * what POSIX demands. + */ + flag = 4; + break; + case 'c': + if (mb_cur_max > 1) { + *(t-1) = 'l'; + *t = 'c'; + *++t = '\0'; + flag = 6; + } else + flag = 5; + break; + default: + flag = 0; + break; + } + if (flag == 0) { + growsprintf(buf, &p, bufsize, "%s", fmt); + continue; + } + if (a == NULL) + error(MM_ERROR, ":61:Not enough args in printf(%s)", + os); + x = execute(a); + a = a->nnext; + switch (flag) { + case 1: growsprintf(buf, &p, bufsize, (char *)fmt, getfval(x)); + break; + case 2: growsprintf(buf, &p, bufsize, (char *)fmt, + (long) getfval(x)); + break; + case 3: growsprintf(buf, &p, bufsize, (char *)fmt, + (int) getfval(x)); + break; + case 12:growsprintf(buf, &p, bufsize, (char *)fmt, + (unsigned long) getfval(x)); + break; + case 13:growsprintf(buf, &p, bufsize, (char *)fmt, + (unsigned int) getfval(x)); + break; + case 4: growsprintf(buf, &p, bufsize, (char *)fmt, getsval(x)); + break; + case 5: isnum(x) ? growsprintf(buf, &p, bufsize, (char *)fmt, + (int) getfval(x)) + : growsprintf(buf, &p, bufsize, (char *)fmt, + getsval(x)[0]); + break; + case 6: isnum(x) ? growsprintf(buf, &p, bufsize, (char *)fmt, + (wint_t) getfval(x)) + : growsprintf(buf, &p, bufsize, (char *)fmt, + (wint_t) getsval(x)[0]); + break; + } + tempfree(x, ""); + s++; + } + *p = '\0'; + for ( ; a; a = a->nnext) /* evaluate any remaining args */ + execute(a); + xfree(fmt); + return 0; +} + +Cell *awsprintf(Node **a, int n) +{ + register Cell *x; + register Node *y; + unsigned char *buf = NULL; + int bufsize = 0; + + y = a[0]->nnext; + x = execute(a[0]); + if (format(&buf, &bufsize, getsval(x), y) == -1) + error(MM_ERROR, ":62:sprintf string %.40s ... too long", buf); + tempfree(x, ""); + x = gettemp(""); + x->sval = /*tostring(buf);*/ buf ? buf : tostring(""); + x->tval = STR; + return(x); +} + +Cell *aprintf(Node **a, int n) +{ + FILE *fp; + register Cell *x; + register Node *y; + unsigned char *buf = NULL; + int bufsize = 0; + + y = a[0]->nnext; + x = execute(a[0]); + if (format(&buf, &bufsize, getsval(x), y) == -1) + error(MM_ERROR, ":63:printf string %.40s ... too long", buf); + tempfree(x, ""); + if (buf) { + if (a[1] == NULL) + fputs((char *)buf, stdout); + else { + fp = redirect((intptr_t)a[1], a[2]); + fputs((char *)buf, fp); + fflush(fp); + } + free(buf); + } + return(true); +} + +Cell *arith(Node **a, int n) +{ + Awkfloat i, j = 0; + double v; + register Cell *x, *y, *z; + + x = execute(a[0]); + i = getfval(x); + tempfree(x, ""); + if (n != UMINUS) { + y = execute(a[1]); + j = getfval(y); + tempfree(y, ""); + } + z = gettemp(""); + switch (n) { + case ADD: + i += j; + break; + case MINUS: + i -= j; + break; + case MULT: + i *= j; + break; + case DIVIDE: + if (j == 0) + error(MM_ERROR, ":64:Division by zero"); + i /= j; + break; + case MOD: + if (j == 0) + error(MM_ERROR, ":65:Division by zero in mod"); + modf(i/j, &v); + i = i - j * v; + break; + case UMINUS: + i = -i; + break; + case POWER: + if (j >= 0 && modf(j, &v) == 0.0) /* pos integer exponent */ + i = ipow(i, (int) j); + else + i = errcheck(pow(i, j), (unsigned char *)"pow"); + break; + default: /* can't happen */ + error(MM_ERROR, ":66:Illegal arithmetic operator %d", n); + } + setfval(z, i); + return(z); +} + +double ipow(double x, int n) +{ + double v; + + if (n <= 0) + return 1; + v = ipow(x, n/2); + if (n % 2 == 0) + return v * v; + else + return x * v * v; +} + +Cell *incrdecr(Node **a, int n) +{ + register Cell *x, *z; + register int k; + Awkfloat xf; + + x = execute(a[0]); + xf = getfval(x); + k = (n == PREINCR || n == POSTINCR) ? 1 : -1; + if (n == PREINCR || n == PREDECR) { + setfval(x, xf + k); + return(x); + } + z = gettemp(""); + setfval(z, xf); + setfval(x, xf + k); + tempfree(x, ""); + return(z); +} + +Cell *assign(Node **a, int n) +{ + register Cell *x, *y; + Awkfloat xf, yf; + double v; + + y = execute(a[1]); + x = execute(a[0]); /* order reversed from before... */ + if (n == ASSIGN) { /* ordinary assignment */ + if ((y->tval & (STR|NUM)) == (STR|NUM)) { + setsval(x, getsval(y)); + x->fval = getfval(y); + x->tval |= NUM; + } + else if (y->tval & STR) + setsval(x, getsval(y)); + else if (y->tval & NUM) + setfval(x, getfval(y)); + else + funnyvar(y, (char *)gettxt(readvofid, readvof)); + tempfree(y, ""); + return(x); + } + xf = getfval(x); + yf = getfval(y); + switch (n) { + case ADDEQ: + xf += yf; + break; + case SUBEQ: + xf -= yf; + break; + case MULTEQ: + xf *= yf; + break; + case DIVEQ: + if (yf == 0) + error(MM_ERROR, ":67:Division by zero in /="); + xf /= yf; + break; + case MODEQ: + if (yf == 0) + error(MM_ERROR, ":68:Division by zero in %%="); + modf(xf/yf, &v); + xf = xf - yf * v; + break; + case POWEQ: + if (yf >= 0 && modf(yf, &v) == 0.0) /* pos integer exponent */ + xf = ipow(xf, (int) yf); + else + xf = errcheck(pow(xf, yf), (unsigned char *)"pow"); + break; + default: + error(MM_ERROR, ":69:Illegal assignment operator %d", n); + break; + } + tempfree(y, ""); + setfval(x, xf); + return(x); +} + +Cell *cat(Node **a, int q) +{ + register Cell *x, *y, *z; + register int n1, n2; + register unsigned char *s; + + x = execute(a[0]); + y = execute(a[1]); + getsval(x); + getsval(y); + n1 = (int)strlen((char*)x->sval); + n2 = (int)strlen((char*)y->sval); + s = (unsigned char *) malloc(n1 + n2 + 1); + if (s == NULL) + error(MM_ERROR, ":70:Out of space concatenating %.15s and %.15s", + x->sval, y->sval); + strcpy((char*)s, (char*)x->sval); + strcpy((char*)s+n1, (char*)y->sval); + tempfree(y, ""); + z = gettemp(""); + z->sval = s; + z->tval = STR; + tempfree(x, ""); + return(z); +} + +Cell *pastat(Node **a, int n) +{ + register Cell *x; + + if (a[0] == 0) + x = execute(a[1]); + else { + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } + } + return x; +} + +Cell *dopa2(Node **a, int n) +{ + register Cell *x; + register int pair; + + pair = (intptr_t) a[3]; + if (pairstack[pair] == 0) { + x = execute(a[0]); + if (istrue(x)) + pairstack[pair] = 1; + tempfree(x, ""); + } + if (pairstack[pair] == 1) { + x = execute(a[1]); + if (istrue(x)) + pairstack[pair] = 0; + tempfree(x, ""); + x = execute(a[2]); + return(x); + } + return(false); +} + +Cell *split(Node **a, int nnn) +{ + Cell *x = 0, *y, *ap; + register unsigned char *s; + wchar_t sep, wc; + unsigned char *t, temp, num[25], *fs = 0; + int m, n, sepl; + + y = execute(a[0]); /* source string */ + s = getsval(y); + if (a[2] == 0) /* fs string */ + fs = *FS; + else if ((intptr_t) a[3] == STRING) { /* split(str,arr,"string") */ + x = execute(a[2]); + fs = getsval(x); + } else if ((intptr_t) a[3] == REGEXPR) + fs = (unsigned char*) "(regexpr)"; /* split(str,arr,/regexpr/) */ + else + error(MM_ERROR, ":71:Illegal type of split()"); + next(sep, fs, sepl); + ap = execute(a[1]); /* array name */ + freesymtab(ap); + dprintf( ("split: s=|%s|, a=%s, sep=|%s|\n", s, ap->nval, fs) ); + ap->tval &= ~STR; + ap->tval |= ARR; + ap->sval = (unsigned char *) makesymtab(NSYMTAB); + + n = 0; + if ((*s != '\0' && sep != '\0' && fs[sepl] != '\0') || + ((intptr_t) a[3] == REGEXPR)) { /* reg expr */ + fa *pfa; + if ((intptr_t) a[3] == REGEXPR) { /* it's ready already */ + pfa = (fa *) a[2]; + } else { + pfa = makedfa(fs, 1); + } + pfa->notbol = 0; + if (nematch(pfa,s)) { + pfa->notbol = REG_NOTBOL; + do { + n++; + snprintf((char *)num, sizeof num, "%d", n); + temp = *patbeg; + *patbeg = '\0'; + setsymtab(num, s, 0.0, STR|CANBENUM, (Array *)ap->sval); + *patbeg = temp; + s = patbeg + patlen; + if (*(patbeg+patlen-1) == 0 || *s == 0) { + n++; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, "", 0.0, STR, (Array *) ap->sval); + pfa->notbol = 0; + goto spdone; + } + } while (nematch(pfa,s)); + } + n++; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, s, 0.0, STR|CANBENUM, (Array *)ap->sval); + spdone: + pfa = NULL; + } else if (sep == ' ') { + for (n = 0; ; ) { + while (*s == ' ' || *s == '\t' || *s == '\n') + s++; + if (*s == 0) + break; + n++; + t = s; + next(wc, s, m); + do { + s += m; + next(wc, s, m); + } while (wc!=' ' && wc!='\t' && wc!='\n' && wc!='\0'); + temp = *s; + *s = '\0'; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, t, 0.0, STR|CANBENUM, (Array *)ap->sval); + *s = temp; + if (*s != 0) + s++; + } + } else if (*s != 0) { + for (;;) { + n++; + t = s; + while (next(wc, s, m), + wc != sep && wc != '\n' && wc != '\0') + s += m; + temp = *s; + *s = '\0'; + snprintf((char *)num, sizeof num, "%d", n); + setsymtab(num, t, 0.0, STR|CANBENUM, (Array *)ap->sval); + *s = temp; + if (wc == '\0') + break; + s += m; + } + } + tempfree(ap, ""); + tempfree(y, ""); + if (a[2] != 0 && (intptr_t) a[3] == STRING) { + tempfree(x, ""); + } + x = gettemp(""); + x->tval = NUM; + x->fval = n; + return(x); +} + +Cell *condexpr(Node **a, int n) +{ + register Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } else { + tempfree(x, ""); + x = execute(a[2]); + } + return(x); +} + +Cell *ifstat(Node **a, int n) +{ + register Cell *x; + + x = execute(a[0]); + if (istrue(x)) { + tempfree(x, ""); + x = execute(a[1]); + } else if (a[2] != 0) { + tempfree(x, ""); + x = execute(a[2]); + } + return(x); +} + +Cell *whilestat(Node **a, int n) +{ + register Cell *x; + + in_loop++; + for (;;) { + x = execute(a[0]); + if (!istrue(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[1]); + if (isbreak(x)) { + x = true; + in_loop--; + return(x); + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *dostat(Node **a, int n) +{ + register Cell *x; + + in_loop++; + for (;;) { + x = execute(a[0]); + if (isbreak(x)) { + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[1]); + if (!istrue(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *forstat(Node **xa, int n) +{ + char **a = (char **)xa; + register Cell *x; + + in_loop++; + x = execute(a[0]); + tempfree(x, ""); + for (;;) { + if (a[1]!=0) { + x = execute(a[1]); + if (!istrue(x)) { + in_loop--; + return(x); + } + else tempfree(x, ""); + } + x = execute(a[3]); + if (isbreak(x)) { /* turn off break */ + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + in_loop--; + return(x); + } + tempfree(x, ""); + x = execute(a[2]); + tempfree(x, ""); + } + /*in_loop--;*/ +} + +Cell *instat(Node **a, int n) +{ + register Cell *x, *vp, *arrayp, *cp, *ncp; + Array *tp; + int i; + + in_loop++; + vp = execute(a[0]); + arrayp = execute(a[1]); + if (!isarr(arrayp)) + error(MM_ERROR, notarray, arrayp->nval); + tp = (Array *) arrayp->sval; + tempfree(arrayp, ""); + for (i = 0; i < tp->size; i++) { /* this routine knows too much */ + for (cp = tp->tab[i]; cp != NULL; cp = ncp) { + setsval(vp, cp->nval); + ncp = cp->cnext; + x = execute(a[2]); + if (isbreak(x)) { + tempfree(vp, ""); + in_loop--; + return true; + } + if (isnext(x) || isexit(x) || isret(x)) { + tempfree(vp, ""); + in_loop--; + return(x); + } + tempfree(x, ""); + } + } + in_loop--; + return true; +} + +static int closefile(const char *a); + +Cell *bltin(Node **a, int n) +{ + static unsigned saved_srand = 1; + register Cell *x, *y; + Awkfloat u; + register int t; + unsigned char *p, *buf; + Node *nextarg; + + t = (intptr_t) a[0]; + x = execute(a[1]); + nextarg = a[1]->nnext; + switch (t) { + case FLENGTH: + u = (Awkfloat) chrlen(getsval(x)); break; + case FLOG: + u = errcheck(log(getfval(x)), (unsigned char *)"log"); break; + case FINT: + modf(getfval(x), &u); break; + case FEXP: + u = errcheck(exp(getfval(x)), (unsigned char *)"exp"); break; + case FSQRT: + u = errcheck(sqrt(getfval(x)), (unsigned char *)"sqrt"); break; + case FSIN: + u = sin(getfval(x)); break; + case FCOS: + u = cos(getfval(x)); break; + case FATAN: + if (nextarg == 0) { + error(MM_WARNING, + ":72:atan2 requires two arguments; returning 1.0"); + u = 1.0; + } else { + y = execute(a[1]->nnext); + u = atan2(getfval(x), getfval(y)); + tempfree(y, ""); + nextarg = nextarg->nnext; + } + break; + case FSYSTEM: + fflush(stdout); /* in case something is buffered already */ + u = (Awkfloat) system((char *)getsval(x)) / 256; /* 256 is unix-dep */ + break; + case FRAND: + u = (Awkfloat) (rand() % 32767) / 32767.0; + break; + case FSRAND: + u = saved_srand; /* return previous seed */ + if (x->tval & REC) /* no argument provided */ + saved_srand = time(NULL); + else + saved_srand = getfval(x); + srand((int) saved_srand); + break; + case FTOUPPER: + case FTOLOWER: + p = getsval(x); + if ((buf = malloc(strlen((char *)p) + 1)) == 0) + error(MM_ERROR, outofspace, "case-conversion"); + strcpy((char*)buf, (char*)getsval(x)); + if (t == FTOUPPER) { + if (mb_cur_max == 1) { + for (p = buf; *p; p++) + if (islower(*p)) + *p = toupper(*p); + } else + caseconv(buf, towupper); + } else { + if (mb_cur_max == 1) { + for (p = buf; *p; p++) + if (isupper(*p)) + *p = tolower(*p); + } else + caseconv(buf, towlower); + } + tempfree(x, ""); + x = gettemp(""); + setsval(x, buf); + free(buf); + return x; + case FCLOSE: + u = (Awkfloat)closefile((char *)getsval(x)); + break; + default: /* can't happen */ + error(MM_ERROR, ":73:Illegal function type %d", t); + break; + } + tempfree(x, ""); + x = gettemp(""); + setfval(x, u); + if (nextarg != 0) { + error(MM_WARNING, ":74:Function has too many arguments"); + for ( ; nextarg; nextarg = nextarg->nnext) + execute(nextarg); + } + return(x); +} + +Cell *print(Node **a, int n) +{ + register Node *x; + register Cell *y; + FILE *fp; + + if (a[1] == 0) + fp = stdout; + else + fp = redirect((intptr_t)a[1], a[2]); + for (x = a[0]; x != NULL; x = x->nnext) { + y = execute(x); + /* + * ALMOST getsval(). POSIX.2 requires that + * numeric values be converted according to OFMT + * (not CONVFMT) for print. + */ + if (posix && (y->tval & (ARR|FLD|REC|STR)) == STR) + fputs((char *)y->sval, fp); + else if (!posix || (y->tval & (ARR|FLD|REC|NUM)) != NUM) + fputs((char *)r_getsval(y), fp); + else if ((long)y->fval == y->fval) + fprintf(fp, "%ld", (long)y->fval); + else + fprintf(fp, (char *)*OFMT, y->fval); + tempfree(y, ""); + if (x->nnext == NULL) + fputs((char *)*ORS, fp); + else + fputs((char *)*OFS, fp); + } + if (a[1] != 0) + fflush(fp); + return(true); +} + +/*ARGSUSED*/ +Cell *nullproc(Node **a, int n) { return 0; } + + +static struct afile +{ + FILE *fp; + unsigned char *fname; + int mode; /* '|', 'a', 'w' */ +} *files; +static int fopen_max; + +FILE *redirect(int a, Node *b) +{ + FILE *fp; + Cell *x; + unsigned char *fname; + + x = execute(b); + fname = getsval(x); + fp = openfile(a, fname); + if (fp == NULL) + error(MM_ERROR, badopen, fname, strerror(errno)); + tempfree(x, ""); + return fp; +} + +FILE *openfile(int a, unsigned char *s) +{ + register int i, m; + register FILE *fp = 0; + + if (*s == '\0') + error(MM_ERROR, ":75:Null file name in print or getline"); + for (i=0; i < fopen_max; i++) + if (files[i].fname && + strcmp((char*)s, (char*)files[i].fname) == 0) + if ((a == files[i].mode) || (a==APPEND && files[i].mode==GT)) + return files[i].fp; + for (i=0; i < fopen_max; i++) + if (files[i].fp == 0) + break; + if (i >= fopen_max) { + if ((files = realloc(files, sizeof *files * + (fopen_max = (i + 15))))==0) + error(MM_ERROR, ":76:%s makes too many open files", s); + memset(&files[i], 0, (fopen_max - i) * sizeof *files); + } + fflush(stdout); /* force a semblance of order */ + m = a; + if (a == GT) { + fp = fopen((char *)s, "w"); + } else if (a == APPEND) { + fp = fopen((char *)s, "a"); + m = GT; /* so can mix > and >> */ + } else if (a == '|') { /* output pipe */ + fp = popen((char *)s, "w"); + } else if (a == LE) { /* input pipe */ + fp = popen((char *)s, "r"); + } else if (a == LT) { /* getline <file */ + fp = strcmp((char *)s, "-") == 0 ? stdin : fopen((char *)s, "r"); /* "-" is stdin */ + } else /* can't happen */ + error(MM_ERROR, ":77:Illegal redirection"); + if (fp != NULL) { + files[i].fname = tostring(s); + files[i].fp = fp; + files[i].mode = m; + } + return fp; +} + +static int endfile(struct afile *afp) +{ + int ret; + + if (ferror(afp->fp)) { + clearerr(afp->fp); + error(MM_WARNING, ioerror, afp->fname); + errorflag = 1; + } + if (afp->mode == '|' || afp->mode == LE) + ret = pclose(afp->fp); + else + ret = fclose(afp->fp); + if (ret == EOF) { + error(MM_WARNING, ":79:I/O error occurred while closing %s", + afp->fname); + errorflag = 1; + } + if (afp->fp != stdout) { + xfree(afp->fname); + afp->fp = 0; + } + return ret; +} + +static int closefile(const char *a) +{ + int i, ret; + + ret = EOF; + for (i = 0; i < fopen_max; i++) + if (files[i].fname && strcmp(a, (char*)files[i].fname) == 0) + ret = endfile(&files[i]); + return(ret); +} + +static void closeall(void) +{ + struct afile std; + int i; + + for (i = 0; i < fopen_max; i++) + if (files[i].fp) + (void)endfile(&files[i]); + std.fp = stdout; + std.fname = (unsigned char *)"<stdout>"; + std.mode = GT; + (void)endfile(&std); +} + +Cell *sub(Node **a, int nnn) +{ + unsigned char *sptr, *pb, *q; + register Cell *x, *y, *result; + unsigned char *buf = NULL, *t; + int bufsize = 0; + fa *pfa; + + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y, ""); + } + y = execute(a[2]); /* replacement string */ + result = false; + if (pmatch(pfa, t)) { + growbuf(&buf, &bufsize, CHUNK, NULL, "sub"); + pb = buf; + sptr = t; + while (sptr < patbeg) { + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "sub"); + } + sptr = getsval(y); + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; /* skip \, */ + *pb++ = *sptr++; /* add & */ + } else if (*sptr == '&') { + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + growbuf(&buf, &bufsize, CHUNK, + &pb, "sub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "sub"); + } + *pb = '\0'; + sptr = patbeg + patlen; + if ((patlen == 0 && *patbeg) || (patlen && *(sptr-1))) + while ((*pb++ = *sptr++)) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, + "sub"); + } + setsval(x, buf); + result = true;; + free(buf); + } + tempfree(x, ""); + tempfree(y, ""); + return result; +} + +Cell *gsub(Node **a, int nnn) +{ + register Cell *x, *y; + unsigned char *rptr, *sptr, *t, *pb; + unsigned char *buf = NULL; + int bufsize = 0; + register fa *pfa; + int mflag, num; + + mflag = 0; /* if mflag == 0, can replace empty string */ + num = 0; + x = execute(a[3]); /* target string */ + t = getsval(x); + if (a[0] == 0) + pfa = (fa *) a[1]; /* regular expression */ + else { + y = execute(a[1]); + pfa = makedfa(getsval(y), 1); + tempfree(y, ""); + } + y = execute(a[2]); /* replacement string */ + pfa->notbol = 0; + if (pmatch(pfa, t)) { + pfa->notbol = REG_NOTBOL; + growbuf(&buf, &bufsize, CHUNK, NULL, "gsub"); + pb = buf; + rptr = getsval(y); + do { + /* + unsigned char *p; + int i; + printf("target string: %s, *patbeg = %o, patlen = %d\n", + t, *patbeg, patlen); + printf(" match found: "); + p=patbeg; + for (i=0; i<patlen; i++) + printf("%c", *p++); + printf("\n"); + */ + if (patlen == 0 && *patbeg != 0) { /* matched empty string */ + if (mflag == 0) { /* can replace empty */ + num++; + sptr = rptr; + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; + *pb++ = *sptr++; + } else if (*sptr == '&') { + unsigned char *q; + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } + if (*t == 0) /* at end */ + goto done; + *pb++ = *t++; + mflag = 0; + } + else { /* matched nonempty string */ + num++; + sptr = t; + while (sptr < patbeg) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, + &pb, "gsub"); + *pb++ = *sptr++; + } + sptr = rptr; + while (*sptr != 0) { + if (*sptr == '\\' && *(sptr+1) == '&') { + sptr++; + *pb++ = *sptr++; + } else if (*sptr == '&') { + unsigned char *q; + sptr++; + for (q = patbeg; q < patbeg+patlen; ) { + *pb++ = *q++; + if (pb >= &buf[bufsize]) + growbuf(&buf, + &bufsize, CHUNK, + &pb, "gsub"); + } + } else + *pb++ = *sptr++; + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, + &pb, "gsub"); + } + t = patbeg + patlen; + if ((*(t-1) == 0) || (*t == 0)) + goto done; + mflag = 1; + } + } while (pmatch(pfa,t)); + sptr = t; + while ((*pb++ = *sptr++)) { + if (pb >= &buf[bufsize]) + growbuf(&buf, &bufsize, CHUNK, &pb, "gsub"); + } + done: *pb = '\0'; + setsval(x, buf); + pfa->notbol = 0; + free(buf); + } + tempfree(x, ""); + tempfree(y, ""); + x = gettemp(""); + x->tval = NUM; + x->fval = num; + return(x); +} +#include <stdarg.h> /* MR ul92-34309a2 */ +static int +growsprintf(unsigned char **whole, unsigned char **target, int *size, + const char *fmt, ...) +{ + va_list ap; + int ret; + size_t diff = 0, mx; + + if (*size == 0) { + if ((*whole = malloc(*size = CHUNK)) == NULL) + goto oflo; + *target = *whole; + } + diff = *target - *whole; +again: va_start(ap, fmt); + + mx = *size - diff - 8; + ret = vsnprintf((char *)*target, mx, fmt, ap); + va_end(ap); + + if (ret < 0 || ret >= mx) { + if (ret < 0) { + char dummy[2]; + va_start(ap, fmt); + ret = vsnprintf(dummy, sizeof dummy, fmt, ap); + va_end(ap); + if (ret < 0) + goto oflo; + } + if ((*whole = realloc(*whole, *size = ret + 1 + diff + 8)) == 0) + oflo: error(MM_ERROR, + ":103:Formatted result would be too long: %.20s ...", + fmt); + *target = &(*whole)[diff]; + goto again; + } + + while (**target) /* NUL characters might have been printed; */ + (*target)++; /* don't skip past them. */ + return ret; +} + +int chrlen(const unsigned char *s) +{ + wchar_t wc; + int m = 0, n; + + while (next(wc, s, n), wc != '\0') { + s += n; + m++; + } + return m; +} + +int chrdist(const unsigned char *s, const unsigned char *end) +{ + wchar_t wc; + int m = 0, n; + + while (next(wc, s, n), s <= end) { + s += n; + m++; + } + return m; +} + +static void caseconv(unsigned char *s, wint_t (*conv)(wint_t)) +{ + unsigned char *t = s; + wchar_t wc; + int len, nlen; + + while (*s) { + len = mbtowc(&wc, (char *)s, mb_cur_max); + if (len < 0) + *t++ = *s++; + else { + wc = conv(wc); + if ((nlen = wctomb((char *)t, wc)) <= len) { + t += nlen, s += len; + } else + *t++ = *s++; + } + } + *t = '\0'; +} + +static void growbuf(unsigned char **buf, int *bufsize, int incr, + unsigned char **ptr, const char *fn) +{ + unsigned char *op; + + op = *buf; + if ((*buf = realloc(*buf, *bufsize += incr)) == NULL) + error(MM_ERROR, outofspace, fn ? fn : ""); + if (ptr && *ptr) + *ptr = &(*buf)[*ptr - op]; +} diff --git a/nawk/tran.c b/nawk/tran.c @@ -0,0 +1,483 @@ +/* + Changes by Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + + Sccsid @(#)tran.c 1.16 (gritter) 2/4/05> + */ +/* UNIX(R) Regular Expression Tools + + Copyright (C) 2001 Caldera International, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to: + Free Software Foundation, Inc. + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +*/ +/* copyright "%c%" */ + +/* from unixsrc:usr/src/common/cmd/awk/tran.c /main/uw7_nj/1 */ +/* from RCS Header: tran.c 1.2 91/06/25 */ + + +#define DEBUG +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include "awk.h" +#include "y.tab.h" +#include <pfmt.h> + +#undef RS + +#define FULLTAB 2 /* rehash when table gets this x full */ +#define GROWTAB 4 /* grow table by this factor */ + +Array *symtab; /* main symbol table */ + +unsigned char **FS; /* initial field sep */ +unsigned char **RS; /* initial record sep */ +unsigned char **OFS; /* output field sep */ +unsigned char **ORS; /* output record sep */ +unsigned char **OFMT; /* output format for numbers */ +unsigned char **CONVFMT; /* generic format for numbers->strings */ +Awkfloat *NF; /* number of fields in current record */ +Awkfloat *NR; /* number of current record */ +Awkfloat *FNR; /* number of current record in current file */ +unsigned char **FILENAME; /* current filename argument */ +Awkfloat *ARGC; /* number of arguments from command line */ +unsigned char **SUBSEP; /* subscript separator for a[i,j,k]; default \034 */ +Awkfloat *RSTART; /* start of re matched with ~; origin 1 (!) */ +Awkfloat *RLENGTH; /* length of same */ + +Cell *recloc; /* location of record */ +Cell *nrloc; /* NR */ +Cell *nfloc; /* NF */ +Cell *fsloc; /* FS */ +Cell *fnrloc; /* FNR */ +Array *ARGVtab; /* symbol table containing ARGV[...] */ +Array *ENVtab; /* symbol table containing ENVIRON[...] */ +Cell *rstartloc; /* RSTART */ +Cell *rlengthloc; /* RLENGTH */ +Cell *symtabloc; /* SYMTAB */ + +Cell *nullloc; +Node *nullnode; /* zero&null, converted into a node for comparisons */ + +extern Cell **fldtab; +static int hash(register unsigned char *s, int n); +static void rehash(Array *tp); + +static const char + assigntovid[] = ":80", + assigntov[] = "assign to"; + +const char + readvofid[] = ":81", + readvof[] = "read value of", + outofspace[] = ":82:Out of space in %s", + nlstring[] = ":83:Newline in string %.10s ..."; + +void syminit(void) +{ + symtab = makesymtab(NSYMTAB); + setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab); + /* this is used for if(x)... tests: */ + nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab); + nullnode = valtonode(nullloc, CCON); + /* recloc = setsymtab("$0", record, 0.0, REC|STR|DONTFREE, symtab); */ + recloc = fldtab[0]; + fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab); + FS = &fsloc->sval; + RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval; + ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval; + OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval; + FILENAME = &setsymtab("FILENAME", "-", 0.0, STR|DONTFREE, symtab)->sval; + nfloc = setsymtab("NF", "", 0.0, NUM, symtab); + NF = &nfloc->fval; + nrloc = setsymtab("NR", "", 0.0, NUM, symtab); + NR = &nrloc->fval; + fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab); + FNR = &fnrloc->fval; + SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval; + rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab); + RSTART = &rstartloc->fval; + rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab); + RLENGTH = &rlengthloc->fval; + symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab); + symtabloc->sval = (unsigned char *) symtab; +} + +void arginit(int ac, unsigned char **av) +{ + Cell *cp; + int i; + unsigned char temp[25]; + + for (i = 1; i < ac; i++) /* first make FILENAME first real argument */ + if (!isclvar(av[i])) { + setsval(lookup("FILENAME", symtab), av[i]); + break; + } + ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval; + cp = setsymtab("ARGV", "", 0.0, ARR, symtab); + ARGVtab = makesymtab(NSYMTAB); /* could be (int) ARGC as well */ + cp->sval = (unsigned char *) ARGVtab; + for (i = 0; i < ac; i++) { + snprintf((char *)temp, sizeof temp, "%d", i); + setsymtab(temp, *av, 0.0, STR|CANBENUM, ARGVtab); + av++; + } +} + +void envinit(unsigned char **envp) +{ + Cell *cp; + unsigned char *p; + + cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab); + ENVtab = makesymtab(NSYMTAB); + cp->sval = (unsigned char *) ENVtab; + for ( ; *envp; envp++) { + if ((p = (unsigned char *) strchr((char *) *envp, '=')) == NULL) /* index() on bsd */ + continue; + *p++ = 0; /* split into two strings at = */ + setsymtab(*envp, p, 0.0, STR|CANBENUM, ENVtab); + p[-1] = '='; /* restore in case env is passed down to a shell */ + } +} + +Array *makesymtab(int n) +{ + Array *ap; + Cell **tp; + + ap = (Array *) malloc(sizeof(Array)); + tp = (Cell **) calloc(n, sizeof(Cell *)); + if (ap == NULL || tp == NULL) + error(MM_ERROR, outofspace, "makesymtab"); + ap->nelem = 0; + ap->size = n; + ap->tab = tp; + return(ap); +} + +void freesymtab(Cell *ap) /* free symbol table */ +{ + Cell *cp, *temp; + Array *tp; + int i; + + if (!isarr(ap)) + return; + tp = (Array *) ap->sval; + if (tp == NULL) + return; + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp != NULL; cp = temp) { + xfree(cp->nval); + if (freeable(cp)) + xfree(cp->sval); + temp = cp->cnext; /* avoids freeing then using */ + free(cp); + } + } + free(tp->tab); + free(tp); +} + +void freeelem(Cell *ap, unsigned char *s) + /* free elem s from ap (i.e., ap["s"] */ +{ + Array *tp; + Cell *p, *prev = NULL; + int h; + + tp = (Array *) ap->sval; + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + if (strcmp((char *) s, (char *) p->nval) == 0) { + if (prev == NULL) /* 1st one */ + tp->tab[h] = p->cnext; + else /* middle somewhere */ + prev->cnext = p->cnext; + if (freeable(p)) + xfree(p->sval); + free(p->nval); + free(p); + tp->nelem--; + return; + } +} + +Cell *ssetsymtab(unsigned char *n, unsigned char *s, Awkfloat f, + unsigned t, Array *tp) +{ + register int h; + register Cell *p; + + if (n != NULL && (p = lookup(n, tp)) != NULL) { + dprintf( ("setsymtab found %lo: n=%s", (long)p, p->nval) ); + dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) ); + return(p); + } + p = (Cell *) malloc(sizeof(Cell)); + if (p == NULL) + error(MM_ERROR, ":84:Symbol table overflow at %s", n); + p->nval = tostring(n); + p->sval = s ? tostring(s) : tostring(""); + p->fval = f; + p->tval = t & ~CANBENUM; + p->csub = 0; + if (t & CANBENUM) + (void)is2number(0, p); + tp->nelem++; + if (tp->nelem > FULLTAB * tp->size) + rehash(tp); + h = hash(n, tp->size); + p->cnext = tp->tab[h]; + tp->tab[h] = p; + dprintf( ("setsymtab set %lo: n=%s", (long)p, p->nval) ); + dprintf( (" s=\"%s\" f=%g t=%o\n", p->sval? p->sval : tostring(""), p->fval, p->tval) ); + return(p); +} + +static int hash(register unsigned char *s, int n) + /* form hash value for string s */ +{ + register unsigned hashval; + + for (hashval = 0; *s != '\0'; s++) + hashval = (*s + 31 * hashval); + return hashval % n; +} + +static void rehash(Array *tp) /* rehash items in small table into big one */ +{ + int i, nh, nsz; + Cell *cp, *op, **np; + + nsz = GROWTAB * tp->size; + np = (Cell **) calloc(nsz, sizeof(Cell *)); + if (np == NULL) + error(MM_ERROR, outofspace, "rehash"); + for (i = 0; i < tp->size; i++) { + for (cp = tp->tab[i]; cp; cp = op) { + op = cp->cnext; + nh = hash(cp->nval, nsz); + cp->cnext = np[nh]; + np[nh] = cp; + } + } + free(tp->tab); + tp->tab = np; + tp->size = nsz; +} + +Cell *slookup(register unsigned char *s, Array *tp) /* look for s in tp */ +{ + register Cell *p, *prev = NULL; + int h; + + h = hash(s, tp->size); + for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext) + if (strcmp((char *) s, (char *) p->nval) == 0) + return(p); /* found it */ + return(NULL); /* not found */ +} + +Awkfloat setfval(register Cell *vp, Awkfloat f) +{ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(assigntovid, assigntov)); + if (vp->tval & FLD) { + int n; + donerec = 0; /* mark $0 invalid */ + for (n = 0; vp != fldtab[n]; n++); + if (n > *NF) + newfld(n); + dprintf( ("setting field %d to %g\n", n, f) ); + } else if (vp->tval & REC) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } + vp->tval &= ~STR; /* mark string invalid */ + vp->tval |= NUM; /* mark number ok */ + dprintf( ("setfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), f, vp->tval) ); + return vp->fval = f; +} + +void funnyvar(Cell *vp, char *rw) +{ + if (vp->tval & ARR) + error(MM_ERROR, ":85:Cannot %s %s; it's an array name.", + rw, vp->nval); + if (vp->tval & FCN) + error(MM_ERROR, ":86:Cannot %s %s; it's a function.", + rw, vp->nval); + error(MM_ERROR, ":87:Funny variable %o: n=%s s=\"%s\" f=%g t=%o", + vp, vp->nval, vp->sval, vp->fval, vp->tval); +} + +unsigned char *setsval(register Cell *vp, unsigned char *s) +{ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(assigntovid, assigntov)); + if (vp->tval & FLD) { + int n; + donerec = 0; /* mark $0 invalid */ + for (n = 0; vp != fldtab[n]; n++); + if (n > *NF) + newfld(n); + dprintf( ("setting field %d to %s\n", n, s) ); + } else if (vp->tval & REC) { + donefld = 0; /* mark $1... invalid */ + donerec = 1; + } else if (vp == fsloc && donefld == 0) { + /* + * Because POSIX.2 requires that awk act as if it always + * splits the current input line immediately after reading, + * we force it to be split into fields just before a change + * to FS if we haven't needed to do so yet. + */ + fldbld(); + } + vp->tval &= ~NUM; + vp->tval |= STR; + s = tostring(s); /* moved to here since "s" can be "vp->sval" */ + if (freeable(vp)) + xfree(vp->sval); + if (vp->tval & REC) { + /* + * Make sure that recsize is large enough to build + * fields afterwards. + */ + unsigned char *os = s; + + s = makerec(s, strlen((char *)s) + 1); + free(os); + } else + vp->tval &= ~DONTFREE; + dprintf( ("setsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval, s, vp->tval) ); + return(vp->sval = s); +} + +Awkfloat r_getfval(register Cell *vp) +{ + /* if (vp->tval & ARR) + ERROR "Illegal reference to array %s", vp->nval FATAL; + return 0.0; */ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(readvofid, readvof)); + if ((vp->tval & FLD) && donefld == 0) + fldbld(); + else if ((vp->tval & REC) && donerec == 0) + recbld(); + if (!isnum(vp)) { /* not marked as a number */ + vp->fval = awk_atof((char *)vp->sval); /* best guess */ + if (is2number(vp->sval, 0) && !(vp->tval&CON)) + vp->tval |= NUM; /* make NUM only sparingly */ + } + dprintf( ("getfval %lo: %s = %g, t=%o\n", (long)vp, vp->nval, vp->fval, vp->tval) ); + return(vp->fval); +} + +unsigned char *r_getsval(register Cell *vp) +{ + unsigned char s[100]; + + /* if (vp->tval & ARR) + ERROR "Illegal reference to array %s", + vp->nval FATAL; + return ""; */ + if ((vp->tval & (NUM | STR)) == 0) + funnyvar(vp, (char *)gettxt(readvofid, readvof)); + if ((vp->tval & FLD) && donefld == 0) + fldbld(); + else if ((vp->tval & REC) && donerec == 0) + recbld(); + if ((vp->tval & STR) == 0) { + if (!(vp->tval&DONTFREE)) + xfree(vp->sval); + if ((long)vp->fval == vp->fval) { + snprintf((char *)s, sizeof s, "%ld", (long)vp->fval); + vp->tval |= STR; + } else { + snprintf((char *)s, sizeof s, + (char *)(posix ? *CONVFMT : *OFMT), + vp->fval); + /* + * In case CONVFMT is changed by the program, + * we leave the string value uncached for non- + * integer numeric constants. Ugh. + */ + if (!(vp->tval & CON)) + vp->tval |= STR; + } + vp->sval = tostring(s); + vp->tval &= ~DONTFREE; + } + dprintf( ("getsval %lo: %s = \"%s\", t=%o\n", (long)vp, vp->nval ? vp->nval : tostring(""), vp->sval ? vp->sval : tostring(""), vp->tval) ); + return(vp->sval); +} + +unsigned char *stostring(register const unsigned char *s) +{ + register unsigned char *p; + + p = malloc(strlen((char *) s)+1); + if (p == NULL) + error(MM_ERROR, ":88:Out of space in tostring on %s", s); + strcpy((char *) p, (char *) s); + return(p); +} + +unsigned char *qstring(unsigned char *s, int delim) + /* collect string up to delim */ +{ + unsigned char *q; + int c, n; + + for (q = cbuf; (c = *s) != delim; s++) { + if (q >= cbuf + CBUFLEN - 1) + vyyerror(":89:String %.10s ... too long", cbuf); + else if (c == '\n') + vyyerror(nlstring, cbuf); + else if (c != '\\') + *q++ = c; + else /* \something */ + switch (c = *++s) { + case '\\': *q++ = '\\'; break; + case 'n': *q++ = '\n'; break; + case 't': *q++ = '\t'; break; + case 'b': *q++ = '\b'; break; + case 'f': *q++ = '\f'; break; + case 'r': *q++ = '\r'; break; + default: + if (!isdigit(c)) { + *q++ = c; + break; + } + n = c - '0'; + if (isdigit(s[1])) { + n = 8 * n + *++s - '0'; + if (isdigit(s[1])) + n = 8 * n + *++s - '0'; + } + *q++ = n; + break; + } + } + *q = '\0'; + return cbuf; +} diff --git a/nawk/version.c b/nawk/version.c @@ -0,0 +1,25 @@ +#include "awk.h" +#if defined (SU3) +const char version[] = "@(#)awk_su3.sl 1.51 (gritter) 12/25/06"; +int posix = 1; +#elif defined (SUS) +const char version[] = "@(#)awk_sus.sl 1.51 (gritter) 12/25/06"; +int posix = 1; +#else +const char version[] = "@(#)nawk.sl 1.51 (gritter) 12/25/06"; +int posix = 0; +#endif +/* SLIST */ +/* +awk.g.y: Sccsid @(#)awk.g.y 1.9 (gritter) 5/14/06> +awk.h: Sccsid @(#)awk.h 1.23 (gritter) 12/25/04> +awk.lx.l: Sccsid @(#)awk.lx.l 1.13 (gritter) 11/22/05> +b.c: Sccsid @(#)b.c 1.6 (gritter) 5/15/04> +lib.c: Sccsid @(#)lib.c 1.27 (gritter) 12/25/06> +main.c: Sccsid @(#)main.c 1.14 (gritter) 12/19/04> +maketab.c: Sccsid @(#)maketab.c 1.11 (gritter) 12/4/04> +parse.c: Sccsid @(#)parse.c 1.7 (gritter) 12/4/04> +run.c: Sccsid @(#)run.c 1.33 (gritter) 12/25/06> +tran.c: Sccsid @(#)tran.c 1.16 (gritter) 2/4/05> +rerule.sed:# Sccsid @(#)rerule.sed 1.1 (gritter) 2/6/05 +*/ diff --git a/od/mkfile b/od/mkfile @@ -0,0 +1,8 @@ +BIN = od +OBJ = od.o +LOCAL_CFLAGS = -DSUS +INSTALL_BIN = od +INSTALL_MAN1 = od.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/od/od.1 b/od/od.1 @@ -0,0 +1,291 @@ +.\" +.\" Sccsid @(#)od.1 1.10 (gritter) 2/6/05 +.\" Parts taken from od(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH OD 1 "2/6/05" "Heirloom Toolchest" "User Commands" +.SH NAME +od \- octal dump +.SH SYNOPSIS +.PD 0 +.HP +.ad l +\fBod\fR [\fB\-bcCdDfFoOsSvxX\fR] [\fIfile\fR] +[[\fB+\fR]\fIoffset\fR[\fB.\fR][\fBb\fR]] +.HP +.ad l +\fBod\fR [\fB\-v\fR] [\fB\-A\ \fIaddress_base\fR] [\fB\-j\ \fIskip\fR] +[\fB\-N\ \fIcount\fR] [\fB\-t\ \fItype_string\fR] ... [\fIfile\fR ...] +.br +.PD +.ad b +.SH DESCRIPTION +.I Od +dumps +.I file +in +one or more formats +as +selected by the option arguments. +If no format specification is present, +.B \-o +is default. +The meanings of the option arguments are: +.TP +.B \-b +Interpret bytes in octal. +.TP +.B \-c +Interpret characters. +Certain non-graphic characters appear as C escapes: +null=\e0, +backspace=\eb, +formfeed=\ef, +newline=\en, +return=\er, +tab=\et; +others appear as 3-digit octal numbers. +For a multibyte character, +the graphical representation is printed for its first byte, +remaining bytes are marked +.BR ** . +.TP +.B \-d +Interpret unsigned two-byte words in decimal. +.TP +.B \-D +Interpret unsigned four-byte words in decimal. +.TP +.B \-f +Interpret floating point format as single precision. +.TP +.B \-F +Interpret floating point format as double precision. +.TP +.B \-o +Interpret two-byte words in octal. +.TP +.B \-O +Interpret four-byte words in octal. +.TP +.B \-s +Interpret signed two-byte words in decimal. +.TP +.B \-S +Interpret signed four-byte words in decimal. +.TP +.B \-v +Print identical groups of output lines +that immediately follow each other +instead of abbreviating +all but the first one by printing a single +.B * +character. +.TP +.B \-x +Interpret two-byte words in hexadecimal. +.TP +.B \-X +Interpret four-byte words in hexadecimal. +.PP +The following option is supported as an extension: +.TP +.B \-C +Prints the input data interpreted as characters +as another column right next to the regular output. +Non-printable characters are replaced by periods. +.PP +The +.I file +argument specifies which file is to be dumped. +If no file argument is specified, +the standard input is used. +If more than one file argument is specified, +the concatenation of all files +without an intervening separator +is dumped. +.PP +The offset argument specifies the offset +in the file where dumping is to commence. +This argument is normally interpreted +as octal bytes. +If `\fB.\fR' is appended, the offset is interpreted in +decimal. +If `\fBb\fR' is appended, the offset is interpreted in +blocks of 512 bytes. +If the file argument is omitted, +the offset argument must be preceded by +.RB ` + ' +with +.BR /usr/5bin/posix/od . +.PP +The following options have been introduced by POSIX.2. +If any of these options is present, +an offset-like argument +is always interpreted as the name of a file to be dumped. +.TP +\fB\-A \fIaddress_base\fR +Sets the format of the file offset printed in the first column. +Valid values for \fIaddress_base\fR are: +.RS +.TP 3 +.B d +Print offset as decimal. +.TP 3 +.B n +Print no offset column. +.TP 3 +.B o +Print offset as octal (default). +.TP 3 +.B x +Print offset as hexadecimal. +.RE +.TP +\fB\-j \fIskip\fR +Skip +.I skip +bytes of input, +where +.I skip +may be either a decimal number, +an octal number preceded by +.BR 0 , +or a hexadecimal number preceded by +.B 0x +or +.BR 0X . +If the last character of +.I skip +is +.B b +(if not a hexadecimal number), +.BR k , +or +.BR m , +the value is multiplied by +512, 1024, or 1048576, respectively. +.TP +\fB\-N \fIcount\fR +Terminate processing after +.I count +bytes of input, +where +.I count +may be either a decimal number, +an octal number preceded by +.BR 0 , +or a hexadecimal number preceded by +.B 0x +or +.BR 0X . +.TP +\fB\-t \fItype_string\fR +Set output format. +\fItype_string\fR may consist of one or more specifications as follows: +.RS +.TP +\fBa\fR +Print characters, +ignoring the most significant bit. +ASCII control characters are printed as their names, +other nonprintable characters are printed as octal bytes. +.TP +\fBc\fR +Print characters. +Non-printable characters are either printed as escape sequences +`\e0', `\ea', `\eb', `\ef', `\en', `\er', `\et', `\ev' +or as octal bytes. +For a multibyte character, +the graphical representation is printed for its first byte, +remaining bytes are marked +.BR ** . +.TP +\fBd\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print signed decimal words, +with the word length indicated by the second character +(\fB1\fR one byte per word, +\fB2\fR two bytes per word, +\fB4\fR four bytes per word, +\fB8\fR eight bytes per word, +\fBC\fR the size of the C language `char' data type, +\fBS\fR the size of the C language `short' data type, +\fBI\fR the size of the C language `int' data type, +and \fBL\fR the size of the C language `long' data type). +The default is the machine word length. +.TP +\fBf\fR[\fB4\fR|\fB8\fR|\fBF\fR|\fBD\fR|\fBL\fR] +Print floating point format, +with the precision indicated by the second character +(\fB4\fR and \fBF\fR single precision, +\fB8\fR, \fBD\fR, and \fBL\fR double precision). +The default is double precision. +.TP +\fBo\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print octal words, +with the word length indicated by the second character +as described for +.B d +above. +.TP +\fBu\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print unsigned decimal words, +with the word length indicated by the second character +as described for +.B d +above. +.TP +\fBx\fR[\fB1\fR|\fB2\fR|\fB4\fR|\fB8\fR|\fBC\fR|\fBS\fR|\fBI\fR|\fBL\fR] +Print hexadecimal words, +with the word length indicated by the second character +as described for +.B d +above. +.RE +.PP +Unless the +.B \-N +option is given, +dumping continues until end-of-file. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +for the +.B \-c +and +.B \-tc +options. diff --git a/od/od.c b/od/od.c @@ -0,0 +1,1078 @@ +/* + * od - octal dump + * + * Gunnar Ritter, Freiburg i. Br., Germany, December 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SUS) +static const char sccsid[] USED = "@(#)od_sus.sl 1.26 (gritter) 5/29/05"; +#else +static const char sccsid[] USED = "@(#)od.sl 1.26 (gritter) 5/29/05"; +#endif + +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <inttypes.h> +#include <locale.h> +#include <ctype.h> +#include <wctype.h> +#include <wchar.h> +#include <limits.h> +#include "asciitype.h" +#include "atoll.h" + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif /* _IO_getc_unlocked */ +#ifdef _IO_putc_unlocked +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif /* _IO_putc_unlocked */ +#endif /* __GLIBC__ */ + +enum { + BLOCK = 16 +}; + +/* + * An input block. + */ +union block { + char b_c[BLOCK]; + int16_t b_16[8]; + int32_t b_32[4]; + int64_t b_64[2]; + float b_f[4]; + double b_d[2]; +}; + +/* + * Format type as given with the -t option. + */ +struct type { + struct type *t_nxt; /* next type */ + const char *t_prf; /* format string */ + int t_rst; /* rest of multibyte character */ + char t_cnt; /* word size */ + char t_fmt; /* format character */ + char t_pad; /* space padding length */ + char t_000; /* currently unused */ +}; + +/* + * An input buffer. + */ +struct buffer { + union block bu_blk; /* input data */ + int bu_cnt; /* valid bytes in input data */ +}; + +/* + * Maps -t format to printf strings. + */ +static const struct { + char p_cnt; + char p_fmt; + char p_pad; + char p_000; + const char *p_prf; +} prf[] = { + { 4, 'f', 1, 0, " %14.7e" }, + { 8, 'f', 10, 0, " %21.14le" }, + { 1, 'd', 0, 0, " %3d", }, + { 2, 'd', 0, 0, "\205\212", }, + { 4, 'd', 4, 0, "\12\212", }, + { 8, 'd', 10, 0, "\24\212", }, + { 1, 'o', 0, 0, "\3\10" }, + { 2, 'o', 1, 0, "\6\10" }, + { 4, 'o', 4, 0, "\13\10" }, + { 8, 'o', 9, 0, "\26\10" }, + { 1, 'u', 0, 0, "\3\12" }, + { 2, 'u', 2, 0, "\5\12" }, + { 4, 'u', 5, 0, "\12\12" }, + { 8, 'u', 11, 0, "\24\12" }, + { 1, 'x', 1, 0, "\2\20" }, + { 2, 'x', 3, 0, "\4\20" }, + { 4, 'x', 7, 0, "\10\20" }, + { 8, 'x', 15, 0, "\20\20" }, + { 1, 'a', 0, 0, "" }, + { 1, 'c', 0, 0, "" }, + { 1, '\0', 0, 0, "" }, + { 0, 0, 0, 0, NULL } +}; + +static unsigned errcnt; /* count of errors */ +static char *progname; /* argv[0] to main() */ +static int offset_base = 8;/* base of offset to be printed */ +static int offset_oflo = 07777777; /* max offs. in regular width */ +static long long skip; /* skip bytes of input */ +static long long limit = -1; /* print no more bytes than limit */ +static long long total; /* total bytes of input */ +static long long offset; /* offset to print */ +static int vflag; /* print all lines */ +static int Cflag; /* Cray -C option */ +static char **files; /* files to read */ +static const char *skipstr; /* skip format string for error msg */ +static FILE *curfile; /* current file */ +static struct type *types; /* output formats */ +static int mb_cur_max; /* MB_CUR_MAX */ +static int hadinput; /* did actually read from a file */ +static int stretch; /* stretch output columns */ +static int expensive; /* need to compare output lines */ + +/* + * For -t a. + */ +static const char *const ctab_a[] = { + "nul", "soh", "stx", "etx", "eot", "enq", "ack", "bel", + " bs", " ht", " nl", " vt", " ff", " cr", " so", " si", + "dle", "dc1", "dc2", "dc3", "dc4", "nak", "syn", "etb", + "can", " em", "sub", "esc", " fs", " gs", " rs", " us", + " sp" +}; + +/* + * For -c. + */ +static const char *const ctab_0[] = { + " \\0", "001", "002", "003", "004", "005", "006", "007", + " \\b", " \\t", " \\n", "013", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037", + " " +}; + +/* + * For -t c. + */ +static const char *const ctab_c[] = { + " \\0", "001", "002", "003", "004", "005", "006", " \\a", + " \\b", " \\t", " \\n", " \\v", " \\f", " \\r", "016", "017", + "020", "021", "022", "023", "024", "025", "026", "027", + "030", "031", "032", "033", "034", "035", "036", "037", + " " +}; + +/******************************* HELPERS ********************************/ +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = calloc(nmemb, size)) == NULL) { + write(2, "No storage\n", 11); + exit(077); + } + return p; +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "No storage\n", 11); + exit(077); + } + return p; +} + +/*static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +}*/ + +/******************************* EXECUTION ********************************/ +/* + * Return the next file in the argument list, or NULL if there are + * no further files. + */ +static FILE * +nextfile(void) +{ + FILE *fp; + + if (curfile && curfile != stdin) + fclose(curfile); + do { + if (files == NULL || files[0] == NULL) + return NULL; + if (files[0][0] == '-' && files[0][1] == '\0') { + fp = stdin; + if (limit >= 0) + setvbuf(stdin, NULL, _IONBF, 0); + } else { + if ((fp = fopen(files[0], "r")) == NULL) { + fprintf(stderr, "%s: cannot open %s\n", + progname, files[0]); + errcnt |= 1; + } + } + files++; + } while (fp == NULL); + if (hadinput == 0 && fp != NULL) + hadinput++; + return fp; +} + +/* + * Skip bytes of input. + */ +static void +doskip(void) +{ + while (skip > 0) { + if (curfile == NULL || getc(curfile) == EOF) { + if ((curfile = nextfile()) == NULL) { + fprintf(stderr, "%s: %s is too large.\n", + progname, skipstr); + exit(2); + } + continue; + } + total++; + skip--; + } + if (limit >= 0) + limit += total; +} + +/* + * Fill an input buffer. + */ +static int +fill(struct buffer *bp) +{ + int c, i; + + i = 0; + while (i < sizeof bp->bu_blk && (limit <= 0 || total < limit)) { + if (curfile == NULL || (c = getc(curfile)) == EOF) { + if ((curfile = nextfile()) == NULL) + break; + continue; + } + bp->bu_blk.b_c[i++] = (char)c; + total++; + } + bp->bu_cnt = i; + while (i < sizeof bp->bu_blk) + bp->bu_blk.b_c[i++] = '\0'; + return bp->bu_cnt; +} + +/* + * Print a value to the passed buffer. As 64-bit arithmethics requires + * more than twice the time of 32-bit arithmetics on 32-bit platforms, + * generate different function sets for int, long, and long long. + */ +#define digit(T, type) static size_t \ +T ## digit(char *buf, int size, int base, unsigned type n) \ +{ \ + char *cp; \ + int d; \ +\ + if (size == 0) \ + return 0; \ + cp = buf + T ## digit(buf, size - 1, base, n / base); \ + *cp = (d = n % base) > 9 ? d - 10 + 'a' : d + '0'; \ + return cp - buf + 1; \ +} + +#define number(T, type) static size_t \ +T ## number(char *buf, const char *fmt, unsigned type n) \ +{ \ + int size = fmt[0] & 0377, base = fmt[1] & 0377; \ + int add = 1; \ +\ + buf[0] = ' '; \ + if (size & 0200) { \ + size &= 0177; \ + buf[add++] = ' '; \ + } \ + if (base & 0200) { \ + base &= 0177; \ + if ((type)n < 0) { \ + buf[add] = '-'; \ + n = 0 - (type)n; \ + } else \ + buf[add] = ' '; \ + add++; \ + } \ + return T ## digit(&buf[add], size, base, n) + add; \ +} + +#define mkfuncs(T, type) digit(T, type) number(T, type) + +mkfuncs(i, int) +mkfuncs(l, long) +mkfuncs(ll, long long) + +/* + * Print the offset at the start of each row. + */ +static void +prna(long long addr, int c) +{ + unsigned long long a; + char buf[30]; + int m, n, s; + + if (offset_base != 0) { + if (addr <= offset_oflo) { + /* + * Address fits in 7 characters and is preceded + * by '0' characters. + */ + if (addr > UINT_MAX) + n = lldigit(buf, 7, offset_base, addr); + else + n = idigit(buf, 7, offset_base, addr); + for (m = 0; m < n; m++) + putc(buf[m], stdout); + } else { + /* + * Precompute the length of the address in + * characters if possible (speed improvement). + */ + switch (offset_base) { + case 8: + a = addr; + for (s = 0; a != 0; s++) + a >>= 3; + break; + case 16: + a = addr; + for (s = 0; a != 0; s++) + a >>= 4; + break; + default: + s = sizeof buf; + } + if (addr > UINT_MAX) + n = lldigit(buf, s, offset_base, addr); + else + n = idigit(buf, s, offset_base, addr); + for (m = 0; buf[m] == '0'; m++); + while (m < n) { + putc(buf[m], stdout); + m++; + } + } + } + if (c != '\0') + putc(c, stdout); +} + +/* + * Print a number of output lines, each preceded by the offset column. + */ +static void +prnt(long long addr, const char *s) +{ + int lc = 0; + + do { + if (lc++ == 0) + prna(addr, '\0'); + else + fputs(" ", stdout); + do + putc(*s, stdout); + while (*s++ != '\n'); + } while (*s != '\0'); +} + +/* + * Append a string to a group of output lines, or flush if s == NULL. + */ +static void +put(const char *s) +{ + static char *ob, *Ob; + static size_t os, Os, ol; + static int eq; + + if (s == NULL) { + if (Ob && !vflag && expensive && strcmp(ob, Ob) == 0) { + if (eq++ == 0) + printf("*\n"); + } else { + prnt(offset, ob); + if (ol + 1 > Os) + Ob = srealloc(Ob, Os = ol + 1); + strcpy(Ob, ob); + eq = 0; + } + ol = 0; + } else { + size_t l = strlen(s); + + if (ol + l + 1 >= os) + ob = srealloc(ob, os = ol + l + 1); + strcpy(&ob[ol], s); + ol += l; + } +} + +/* + * Format the data within the buffers according to tp. + */ +static void +format(struct type *tp, struct buffer *b1, struct buffer *b2) +{ + char buf[200]; + int i, j, n, l = 0; + + switch (tp->t_fmt) { + case 'a': + case '\0': + case 'c': + for (i = 0; i < b1->bu_cnt; i++) { + int c = b1->bu_blk.b_c[i] & 0377; + + if (tp->t_fmt == 'a') + c &= 0177; + if (tp->t_rst) { + strcpy(&buf[l], " **"); + tp->t_rst--; + l += 4; + } else if (tp->t_fmt != 'a' && c > 040 && + mb_cur_max > 1) { + char mb[MB_LEN_MAX]; + struct buffer *bp; + int m, n; + wchar_t wc; + + m = i; + bp = b1; + for (n = 0; n < mb_cur_max; n++) { + mb[n] = bp->bu_blk.b_c[m++]; + if (m >= bp->bu_cnt) { + if (bp == b1) { + bp = b2; + m = 0; + } else + break; + } + } + mb[n] = '\0'; + if ((n = mbtowc(&wc, mb, mb_cur_max)) <= 0 + || !iswprint(wc)) + goto spec; + m = wcwidth(wc); + do + buf[l++] = ' '; + while (++m < 4); + for (m = 0; m < n; m++) + buf[l++] = mb[m]; + if (n > 1) + tp->t_rst = n - 1; + } else if (c > 040 && isprint(c)) { + buf[l++] = ' '; + buf[l++] = ' '; + buf[l++] = ' '; + buf[l++] = c; + } else { + spec: if (c <= 040) { + buf[l] = ' '; + switch (tp->t_fmt) { + case 'a': + strcpy(&buf[l+1], ctab_a[c]); + break; + case '\0': + strcpy(&buf[l+1], ctab_0[c]); + break; + case 'c': + strcpy(&buf[l+1], ctab_c[c]); + break; + } + l += 4; + } else if (tp->t_fmt == 'a' && c == '\177') { + strcpy(&buf[l], " del"); + l += 4; + } else + l += inumber(&buf[l], "\3\10", c); + } + } + break; + case 'f': + case 'd': + case 'o': + case 'u': + case 'x': + for (i = 0, n = 0; + i < BLOCK / tp->t_cnt && n < b1->bu_cnt; + i++, n += tp->t_cnt) { + if (stretch) { + for (j = 0; j < tp->t_pad + stretch - 1; j++) + buf[l++] = ' '; + } + if (tp->t_fmt == 'f') { + switch (tp->t_cnt) { + case 4: + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_f[i]); + break; + case 8: + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_d[i]); + break; + } + } else { + switch (tp->t_cnt) { + case 1: + if (tp->t_fmt == 'd') + l += sprintf(&buf[l], tp->t_prf, + b1->bu_blk.b_c[i]); + else + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_c[i]&0377); + break; + case 2: + if (tp->t_fmt == 'd') + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_16[i]); + else + l += inumber(&buf[l], tp->t_prf, + b1->bu_blk.b_16[i] & 0177777U); + break; + case 4: + if (tp->t_fmt == 'd') + l += lnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_32[i]); + else + l += lnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_32[i] & + 037777777777UL); + break; + case 8: + if (tp->t_fmt == 'd') + l+= llnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_64[i]); + else + l+= llnumber(&buf[l], tp->t_prf, + b1->bu_blk.b_64[i] & + 01777777777777777777777ULL); + break; + } + } + } + } + if (Cflag && b1->bu_cnt > 0) { + static int max; + int c; + if (max == 0) + max = l * (BLOCK/tp->t_cnt) / + ((b1->bu_cnt+tp->t_cnt-1) / tp->t_cnt); + while (l < max) + buf[l++] = ' '; + buf[l++] = ' '; + for (i = 0; i < b1->bu_cnt || i % 8; i++) { + c = i < b1->bu_cnt ? b1->bu_blk.b_c[i] & 0377 : '.'; + buf[l++] = isprint(c) ? c : '.'; + } + } + buf[l++] = '\n'; + buf[l] = '\0'; + put(buf); +} + +/* + * Main execution loop. Two input buffers are necessary because multibyte + * characters for the -c option do not always end at a buffer boundary. + */ +static void +od(void) +{ + struct buffer b1, b2, *bp, *bq; + struct type *tp; + int star = 0; + + offset = total; + fill(bp = &b1); + fill(bq = &b2); + if (hadinput == 0) + return; + do { + if (star == 0) { + for (tp = types; tp; tp = tp->t_nxt) + format(tp, bp, bq); + put(NULL); + } + offset += bp->bu_cnt; + bp = (bp == &b1 ? &b2 : &b1); + bq = (bq == &b1 ? &b2 : &b1); + /* + * If no multibyte characters are to be printed, identical + * input blocks always lead to identical output lines. It + * is thus not necessary to format them for comparison; + * comparing at this point saves a lot of time for files + * that contain many identical lines. + */ + if (!vflag && !expensive && bp->bu_cnt && + bp->bu_cnt == bq->bu_cnt && + memcmp(bp->bu_blk.b_c, bq->bu_blk.b_c, + bp->bu_cnt) == 0) { + if (star == 0) + printf("*\n"); + star = 1; + } else + star = 0; + } while (fill(bq) > 0 || bp->bu_cnt > 0); + if (total > 0) + prna(total, '\n'); +} + +/*************************** OPTION SCANNING *****************************/ +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-bcdDfFoOsSvxX] [file] [[+]offset[.][b]]\n", + progname); + exit(2); +} + +static void +setfiles(char **av) +{ + if (*av) + files = av; + else { + curfile = stdin; + hadinput = 1; + if (limit >= 0) + setvbuf(stdin, NULL, _IONBF, 0); + } +} + +static void +invarg(int c) +{ + fprintf(stderr, "%s: invalid argument to option -%c\n", progname, c); + usage(); +} + +/* + * Compute output column alignment. + */ +static void +align(void) +{ + struct type *tp, *tq; + + for (tp = types; tp && tp->t_nxt; tp = tp->t_nxt) { + tq = tp->t_nxt; + + if (tp->t_pad != tq->t_pad) { + stretch = 1; + break; + } + } +} + +/* + * Add an element to the list of types. + */ +static void +addtype(char fmt, char cnt) +{ + struct type *tp, *tq; + int i; + + tp = scalloc(1, sizeof *tp); + tp->t_fmt = fmt; + tp->t_cnt = cnt; + for (i = 0; prf[i].p_prf; i++) { + if (prf[i].p_cnt == cnt && prf[i].p_fmt == fmt) { + tp->t_prf = prf[i].p_prf; + tp->t_pad = prf[i].p_pad; + tp->t_000 = prf[i].p_000; + break; + } + } + if (types) { + for (tq = types; tq->t_nxt; tq = tq->t_nxt); + tq->t_nxt = tp; + } else + types = tp; +} + +/* + * Handle the argument to -t. + */ +static int +settype(const char *s) +{ + char fmt, cnt; + + if (s == NULL) { + expensive = mb_cur_max > 1; + addtype('\0', 1); + return 0; + } + while (*s) { + switch (fmt = *s++) { + case 'c': + expensive = mb_cur_max > 1; + /*FALLTHRU*/ + case 'a': + addtype(fmt, 1); + break; + case 'f': + switch (*s) { + case 'F': + case '4': + cnt = 4; + s++; + break; + case 'D': + case 'L': + case '8': + cnt = 8; + s++; + break; + default: + cnt = 8; + } + addtype(fmt, cnt); + break; + case 'd': + case 'o': + case 'u': + case 'x': + switch (*s) { + case '1': + cnt = 1; + s++; + break; + case 'C': + cnt = sizeof (char); + s++; + break; + case '2': + cnt = 2; + s++; + break; + case 'S': + cnt = sizeof (short); + s++; + break; + case '4': + cnt = 4; + s++; + break; + case 'I': + cnt = sizeof (int); + s++; + break; + case '8': + cnt = 8; + s++; + break; + case 'L': + cnt = sizeof (long); + s++; + break; + default: + cnt = sizeof (int); + } + addtype(fmt, cnt); + break; + default: + return -1; + } + } + return 0; +} + +/* + * Handle a traditional offset argument. + */ +static int +setoffset(const char *s) +{ + long long o; + const char *sp; + int base = 8; + int mult = 1; + + skipstr = s; + if (*s == '+') + s++; + for (sp = s; digitchar(*sp & 0377); sp++); + if (sp > s) { + if (*sp == '.') { + base = 10; + sp++; + } + if (*sp == 'b' || *sp == 'B') { + mult = 512; + sp++; + } + if (*sp != '\0') + return -1; + } else + return -1; + o = strtoll(s, NULL, base); + skip = o * mult; + return 0; +} + +/* + * Handle the argument to -j. + */ +static int +setskip(const char *s) +{ + const char *sp = NULL; + long long o; + int base = 10; + int mult = 1; + + skipstr = s; + if (s[0] == '0' && s[1]) { + s++; + if (*s == 'x' || *s == 'X') { + s++; + base = 16; + } else + base = 8; + } + switch (base) { + case 8: + for (sp = s; octalchar(*sp & 0377); sp++); + break; + case 10: + for (sp = s; digitchar(*sp & 0377); sp++); + break; + case 16: + for (sp = s; digitchar(*sp & 0377) || + *sp == 'a' || *sp == 'A' || + *sp == 'b' || *sp == 'B' || + *sp == 'c' || *sp == 'C' || + *sp == 'd' || *sp == 'D' || + *sp == 'e' || *sp == 'E' || + *sp == 'f' || *sp == 'F'; + sp++); + break; + } + if (sp > s) { + switch (*sp) { + case 'b': + mult = 512; + sp++; + break; + case 'k': + mult = 1024; + sp++; + break; + case 'm': + mult = 1048576; + sp++; + break; + case '\0': + break; + default: + return -1; + } + if (*sp != '\0') + return -1; + } else + return -1; + o = strtoull(s, NULL, base); + skip = o * mult; + return 0; +} + +/* + * Handle the argument to -N. + */ +static int +setlimit(const char *s) +{ + long long o; + char *x; + int base = 10; + + if (*s == '0') { + s++; + if (*s == 'x' || *s == 'X') { + s++; + base = 16; + } else + base = 8; + } + o = strtoll(s, &x, base); + if (*x != '\0') + return -1; + limit = o; + return 0; +} + +int +main(int argc, char **argv) +{ + const char optstring[] = ":A:bcCdDfFj:N:oOsSt:vxX"; + int i, newopt = 0;; + + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + if (sizeof (union block) != BLOCK || mb_cur_max > BLOCK) + abort(); + progname = basename(argv[0]); +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + while ((i = getopt(argc, argv, optstring)) != EOF) { + switch (i) { + case 'A': + switch (optarg[0]) { + case 'd': + offset_base = 10; + offset_oflo = 9999999; + break; + case 'o': + offset_base = 8; + offset_oflo = 07777777; + break; + case 'x': + offset_base = 16; + offset_oflo = 0xfffffff; + break; + case 'n': + offset_base = 0; + break; + default: + invarg(i); + } + if (optarg[1] != '\0') + invarg(i); + newopt = 1; + break; + case 'b': + settype("o1"); + break; + case 'c': + settype(NULL); + break; + case 'd': + settype("u2"); + break; + case 'D': + settype("u4"); + break; + case 'f': + settype("fF"); + break; + case 'F': + settype("fD"); + break; + case 'j': + if (setskip(optarg) < 0) + invarg(i); + newopt = 1; + break; + case 'N': + if (setlimit(optarg) < 0) + invarg(i); + newopt = 1; + break; + case 'o': + settype("o2"); + break; + case 'O': + settype("o4"); + break; + case 's': + settype("d2"); + break; + case 'S': + settype("d4"); + break; + case 't': + if (settype(optarg) < 0) + invarg('t'); + newopt = 1; + break; + case 'v': + vflag = 1; + break; + case 'x': + settype("x2"); + break; + case 'X': + settype("x4"); + break; + case ':': + fprintf(stderr, + "%s: option requires an argument -- %c\n", + progname, optopt); + usage(); + case 'C': + Cflag = 1; + break; + case '?': + fprintf(stderr, "%s: bad flag -%c\n", + progname, optopt); + /*FALLTHRU*/ + default: + usage(); + } + } + if (newopt == 0 && ((optind>=argc-2 && argc &&argv[argc-1][0] == '+') || +#ifndef SUS + (optind>=argc-2 && argc && +#else /* SUS */ + (optind == argc-1 && +#endif /* SUS */ + digitchar(argv[argc-1][0] & 0377))) && + setoffset(argv[argc-1]) >= 0) { + argc--; + argv[argc] = NULL; + } + setfiles(argc ? &argv[optind] : &argv[0]); + if (types == NULL) + settype("oS"); + align(); + if (skip > 0) + doskip(); + od(); + return errcnt; +} diff --git a/patch/backupfile.c b/patch/backupfile.c @@ -0,0 +1,246 @@ +/*- + * Copyright (C) 1990 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * backupfile.c -- make Emacs style backup file names + * + * David MacKenzie <djm@ai.mit.edu>. Some algorithms adapted from GNU Emacs. + * + * $OpenBSD: backupfile.c,v 1.20 2009/10/27 23:59:41 deraadt Exp $ + * $FreeBSD$ + */ + +#include <ctype.h> +#include <dirent.h> +#include <libgen.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "backupfile.h" + + +#define ISDIGIT(c) (isascii ((unsigned char)c) && isdigit ((unsigned char)c)) + +/* Which type of backup file names are generated. */ +enum backup_type backup_type = none; + +/* + * The extension added to file names to produce a simple (as opposed to + * numbered) backup file name. + */ +const char *simple_backup_suffix = "~"; + +static char *concat(const char *, const char *); +static char *make_version_name(const char *, int); +static int max_backup_version(const char *, const char *); +static int version_number(const char *, const char *, size_t); +static int argmatch(const char *, const char **); +static void invalid_arg(const char *, const char *, int); + +/* + * Return the name of the new backup file for file FILE, allocated with + * malloc. Return 0 if out of memory. FILE must not end with a '/' unless it + * is the root directory. Do not call this function if backup_type == none. + */ +char * +find_backup_file_name(const char *file) +{ + char *dir, *base_versions, *tmp_file; + int highest_backup; + + if (backup_type == simple) + return concat(file, simple_backup_suffix); + tmp_file = strdup(file); + if (tmp_file == NULL) + return NULL; + base_versions = concat(basename(tmp_file), ".~"); + free(tmp_file); + if (base_versions == NULL) + return NULL; + tmp_file = strdup(file); + if (tmp_file == NULL) { + free(base_versions); + return NULL; + } + dir = dirname(tmp_file); + if (dir == NULL) { + free(base_versions); + free(tmp_file); + return NULL; + } + highest_backup = max_backup_version(base_versions, dir); + free(base_versions); + free(tmp_file); + if (backup_type == numbered_existing && highest_backup == 0) + return concat(file, simple_backup_suffix); + return make_version_name(file, highest_backup + 1); +} + +/* + * Return the number of the highest-numbered backup file for file FILE in + * directory DIR. If there are no numbered backups of FILE in DIR, or an + * error occurs reading DIR, return 0. FILE should already have ".~" appended + * to it. + */ +static int +max_backup_version(const char *file, const char *dir) +{ + DIR *dirp; + struct dirent *dp; + int highest_version, this_version; + size_t file_name_length; + + dirp = opendir(dir); + if (dirp == NULL) + return 0; + + highest_version = 0; + file_name_length = strlen(file); + + while ((dp = readdir(dirp)) != NULL) { + if (strlen(dp->d_name) <= file_name_length) + continue; + + this_version = version_number(file, dp->d_name, file_name_length); + if (this_version > highest_version) + highest_version = this_version; + } + closedir(dirp); + return highest_version; +} + +/* + * Return a string, allocated with malloc, containing "FILE.~VERSION~". + * Return 0 if out of memory. + */ +static char * +make_version_name(const char *file, int version) +{ + char *backup_name; + + if (asprintf(&backup_name, "%s.~%d~", file, version) == -1) + return NULL; + return backup_name; +} + +/* + * If BACKUP is a numbered backup of BASE, return its version number; + * otherwise return 0. BASE_LENGTH is the length of BASE. BASE should + * already have ".~" appended to it. + */ +static int +version_number(const char *base, const char *backup, size_t base_length) +{ + int version; + const char *p; + + version = 0; + if (!strncmp(base, backup, base_length) && ISDIGIT(backup[base_length])) { + for (p = &backup[base_length]; ISDIGIT(*p); ++p) + version = version * 10 + *p - '0'; + if (p[0] != '~' || p[1]) + version = 0; + } + return version; +} + +/* + * Return the newly-allocated concatenation of STR1 and STR2. If out of + * memory, return 0. + */ +static char * +concat(const char *str1, const char *str2) +{ + char *newstr; + + if (asprintf(&newstr, "%s%s", str1, str2) == -1) + return NULL; + return newstr; +} + +/* + * If ARG is an unambiguous match for an element of the null-terminated array + * OPTLIST, return the index in OPTLIST of the matched element, else -1 if it + * does not match any element or -2 if it is ambiguous (is a prefix of more + * than one element). + */ +static int +argmatch(const char *arg, const char **optlist) +{ + int i; /* Temporary index in OPTLIST. */ + size_t arglen; /* Length of ARG. */ + int matchind = -1; /* Index of first nonexact match. */ + int ambiguous = 0; /* If nonzero, multiple nonexact match(es). */ + + arglen = strlen(arg); + + /* Test all elements for either exact match or abbreviated matches. */ + for (i = 0; optlist[i]; i++) { + if (!strncmp(optlist[i], arg, arglen)) { + if (strlen(optlist[i]) == arglen) + /* Exact match found. */ + return i; + else if (matchind == -1) + /* First nonexact match found. */ + matchind = i; + else + /* Second nonexact match found. */ + ambiguous = 1; + } + } + if (ambiguous) + return -2; + else + return matchind; +} + +/* + * Error reporting for argmatch. KIND is a description of the type of entity + * that was being matched. VALUE is the invalid value that was given. PROBLEM + * is the return value from argmatch. + */ +static void +invalid_arg(const char *kind, const char *value, int problem) +{ + fprintf(stderr, "patch: "); + if (problem == -1) + fprintf(stderr, "invalid"); + else /* Assume -2. */ + fprintf(stderr, "ambiguous"); + fprintf(stderr, " %s `%s'\n", kind, value); +} + +static const char *backup_args[] = { + "never", "simple", "nil", "existing", "t", "numbered", 0 +}; + +static enum backup_type backup_types[] = { + simple, simple, numbered_existing, + numbered_existing, numbered, numbered +}; + +/* + * Return the type of backup indicated by VERSION. Unique abbreviations are + * accepted. + */ +enum backup_type +get_version(const char *version) +{ + int i; + + if (version == NULL || *version == '\0') + return numbered_existing; + i = argmatch(version, backup_args); + if (i >= 0) + return backup_types[i]; + invalid_arg("version control type", version, i); + exit(2); +} diff --git a/patch/backupfile.h b/patch/backupfile.h @@ -0,0 +1,39 @@ +/*- + * Copyright (C) 1990 Free Software Foundation, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * without restriction. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. + * + * backupfile.h -- declarations for making Emacs style backup file names + * + * $OpenBSD: backupfile.h,v 1.6 2003/07/28 18:35:36 otto Exp $ + * $FreeBSD$ + */ + +/* When to make backup files. */ +enum backup_type { + /* Never make backups. */ + none, + + /* Make simple backups of every file. */ + simple, + + /* + * Make numbered backups of files that already have numbered backups, + * and simple backups of the others. + */ + numbered_existing, + + /* Make numbered backups of every file. */ + numbered +}; + +extern enum backup_type backup_type; +extern const char *simple_backup_suffix; + +char *find_backup_file_name(const char *file); +enum backup_type get_version(const char *version); diff --git a/patch/common.h b/patch/common.h @@ -0,0 +1,119 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: common.h,v 1.26 2006/03/11 19:41:30 otto Exp $ + * $FreeBSD$ + */ + +#include <sys/types.h> + +#include <stdbool.h> +#include <stdint.h> + +#define DEBUGGING + +/* constants */ + +#define MAXHUNKSIZE 200000 /* is this enough lines? */ +#define INITHUNKMAX 125 /* initial dynamic allocation size */ +#define INITLINELEN 4096 +#define BUFFERSIZE 4096 + +#define SCCSPREFIX "s." +#define GET "get -e %s" +#define SCCSDIFF "get -p %s | diff - %s >/dev/null" + +#define RCSSUFFIX ",v" +#define CHECKOUT "co -l %s" +#define RCSDIFF "rcsdiff %s > /dev/null" + +#define ORIGEXT ".orig" +#define REJEXT ".rej" + +/* handy definitions */ + +#define strNE(s1,s2) (strcmp(s1, s2)) +#define strEQ(s1,s2) (!strcmp(s1, s2)) +#define strnNE(s1,s2,l) (strncmp(s1, s2, l)) +#define strnEQ(s1,s2,l) (!strncmp(s1, s2, l)) + +/* typedefs */ + +typedef long LINENUM; /* must be signed */ + +/* globals */ + +extern mode_t filemode; + +extern char *buf; /* general purpose buffer */ +extern size_t buf_size; /* size of general purpose buffer */ + +extern bool using_plan_a; /* try to keep everything in memory */ +extern bool out_of_mem; /* ran out of memory in plan a */ + +#define MAXFILEC 2 + +extern char *filearg[MAXFILEC]; +extern bool ok_to_create_file; +extern char *outname; +extern char *origprae; + +extern char *TMPOUTNAME; +extern char *TMPINNAME; +extern char *TMPREJNAME; +extern char *TMPPATNAME; +extern bool toutkeep; +extern bool trejkeep; + +#ifdef DEBUGGING +extern int debug; +#endif + +extern bool force; +extern bool batch; +extern bool verbose; +extern bool reverse; +extern bool noreverse; +extern bool skip_rest_of_patch; +extern int strippath; +extern bool canonicalize; +/* TRUE if -C was specified on command line. */ +extern bool check_only; +extern bool warn_on_invalid_line; +extern bool last_line_missing_eol; + + +#define CONTEXT_DIFF 1 +#define NORMAL_DIFF 2 +#define ED_DIFF 3 +#define NEW_CONTEXT_DIFF 4 +#define UNI_DIFF 5 + +extern int diff_type; +extern char *revision; /* prerequisite revision, if any */ +extern LINENUM input_lines; /* how long is input file in lines */ + +extern int posix; + diff --git a/patch/inp.c b/patch/inp.c @@ -0,0 +1,485 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: inp.c,v 1.36 2012/04/10 14:46:34 ajacoutot Exp $ + * $FreeBSD$ + */ + +#include <sys/types.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/mman.h> + +#include <ctype.h> +#include <libgen.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <fcntl.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "inp.h" + + +/* Input-file-with-indexable-lines abstract type */ + +static size_t i_size; /* size of the input file */ +static char *i_womp; /* plan a buffer for entire file */ +static char **i_ptr; /* pointers to lines in i_womp */ +static char empty_line[] = { '\0' }; + +static int tifd = -1; /* plan b virtual string array */ +static char *tibuf[2]; /* plan b buffers */ +static LINENUM tiline[2] = {-1, -1}; /* 1st line in each buffer */ +static LINENUM lines_per_buf; /* how many lines per buffer */ +static int tireclen; /* length of records in tmp file */ + +static bool rev_in_string(const char *); +static bool reallocate_lines(size_t *); + +/* returns false if insufficient memory */ +static bool plan_a(const char *); + +static void plan_b(const char *); + +/* New patch--prepare to edit another file. */ + +void +re_input(void) +{ + if (using_plan_a) { + free(i_ptr); + i_ptr = NULL; + if (i_womp != NULL) { + munmap(i_womp, i_size); + i_womp = NULL; + } + i_size = 0; + } else { + using_plan_a = true; /* maybe the next one is smaller */ + close(tifd); + tifd = -1; + free(tibuf[0]); + free(tibuf[1]); + tibuf[0] = tibuf[1] = NULL; + tiline[0] = tiline[1] = -1; + tireclen = 0; + } +} + +/* Construct the line index, somehow or other. */ + +void +scan_input(const char *filename) +{ + if (!plan_a(filename)) + plan_b(filename); + if (verbose) { + say("Patching file %s using Plan %s...\n", filename, + (using_plan_a ? "A" : "B")); + } +} + +static bool +reallocate_lines(size_t *lines_allocated) +{ + char **p; + size_t new_size; + + new_size = *lines_allocated * 3 / 2; + p = realloc(i_ptr, (new_size + 2) * sizeof(char *)); + if (p == NULL) { /* shucks, it was a near thing */ + munmap(i_womp, i_size); + i_womp = NULL; + free(i_ptr); + i_ptr = NULL; + *lines_allocated = 0; + return false; + } + *lines_allocated = new_size; + i_ptr = p; + return true; +} + +/* Try keeping everything in memory. */ + +static bool +plan_a(const char *filename) +{ + int ifd, statfailed; + char *p, *s, lbuf[INITLINELEN]; + struct stat filestat; + ptrdiff_t sz; + size_t i; + size_t iline, lines_allocated; + +#ifdef DEBUGGING + if (debug & 8) + return false; +#endif + + if (filename == NULL || *filename == '\0') + return false; + + statfailed = stat(filename, &filestat); + if (statfailed && ok_to_create_file) { + if (verbose) + say("(Creating file %s...)\n", filename); + + /* + * in check_patch case, we still display `Creating file' even + * though we're not. The rule is that -C should be as similar + * to normal patch behavior as possible + */ + if (check_only) + return true; + makedirs(filename, true); + close(creat(filename, 0666)); + statfailed = stat(filename, &filestat); + } + if (statfailed && check_only) + fatal("%s not found, -C mode, can't probe further\n", filename); + /* For nonexistent or read-only files, look for RCS or SCCS versions. */ + if (statfailed || + /* No one can write to it. */ + (filestat.st_mode & 0222) == 0 || + /* I can't write to it. */ + ((filestat.st_mode & 0022) == 0 && filestat.st_uid != getuid())) { + const char *cs = NULL, *filebase, *filedir; + struct stat cstat; + char *tmp_filename1, *tmp_filename2; + + tmp_filename1 = strdup(filename); + tmp_filename2 = strdup(filename); + if (tmp_filename1 == NULL || tmp_filename2 == NULL) + fatal("strdupping filename"); + filebase = basename(tmp_filename1); + filedir = dirname(tmp_filename2); + + /* Leave room in lbuf for the diff command. */ + s = lbuf + 20; + +#define try(f, a1, a2, a3) \ + (snprintf(s, buf_size - 20, f, a1, a2, a3), stat(s, &cstat) == 0) + + if (try("%s/RCS/%s%s", filedir, filebase, RCSSUFFIX) || + try("%s/RCS/%s%s", filedir, filebase, "") || + try("%s/%s%s", filedir, filebase, RCSSUFFIX)) { + snprintf(buf, buf_size, CHECKOUT, filename); + snprintf(lbuf, sizeof lbuf, RCSDIFF, filename); + cs = "RCS"; + } else if (try("%s/SCCS/%s%s", filedir, SCCSPREFIX, filebase) || + try("%s/%s%s", filedir, SCCSPREFIX, filebase)) { + snprintf(buf, buf_size, GET, s); + snprintf(lbuf, sizeof lbuf, SCCSDIFF, s, filename); + cs = "SCCS"; + } else if (statfailed) + fatal("can't find %s\n", filename); + + free(tmp_filename1); + free(tmp_filename2); + + /* + * else we can't write to it but it's not under a version + * control system, so just proceed. + */ + if (cs) { + if (!statfailed) { + if ((filestat.st_mode & 0222) != 0) + /* The owner can write to it. */ + fatal("file %s seems to be locked " + "by somebody else under %s\n", + filename, cs); + /* + * It might be checked out unlocked. See if + * it's safe to check out the default version + * locked. + */ + if (verbose) + say("Comparing file %s to default " + "%s version...\n", + filename, cs); + if (system(lbuf)) + fatal("can't check out file %s: " + "differs from default %s version\n", + filename, cs); + } + if (verbose) + say("Checking out file %s from %s...\n", + filename, cs); + if (system(buf) || stat(filename, &filestat)) + fatal("can't check out file %s from %s\n", + filename, cs); + } + } + filemode = filestat.st_mode; + if (!S_ISREG(filemode)) + fatal("%s is not a normal file--can't patch\n", filename); + if ((uint64_t)filestat.st_size > SIZE_MAX) { + say("block too large to mmap\n"); + return false; + } + i_size = (size_t)filestat.st_size; + if (out_of_mem) { + set_hunkmax(); /* make sure dynamic arrays are allocated */ + out_of_mem = false; + return false; /* force plan b because plan a bombed */ + } + if ((ifd = open(filename, O_RDONLY)) < 0) + pfatal("can't open file %s", filename); + + if (i_size) { + i_womp = mmap(NULL, i_size, PROT_READ, MAP_PRIVATE, ifd, 0); + if (i_womp == MAP_FAILED) { + perror("mmap failed"); + i_womp = NULL; + close(ifd); + return false; + } + } else { + i_womp = NULL; + } + + close(ifd); + if (i_size) + madvise(i_womp, i_size, MADV_SEQUENTIAL); + + /* estimate the number of lines */ + lines_allocated = i_size / 25; + if (lines_allocated < 100) + lines_allocated = 100; + + if (!reallocate_lines(&lines_allocated)) + return false; + + /* now scan the buffer and build pointer array */ + iline = 1; + i_ptr[iline] = i_womp; + /* test for NUL too, to maintain the behavior of the original code */ + for (s = i_womp, i = 0; i < i_size && *s != '\0'; s++, i++) { + if (*s == '\n') { + if (iline == lines_allocated) { + if (!reallocate_lines(&lines_allocated)) + return false; + } + /* these are NOT NUL terminated */ + i_ptr[++iline] = s + 1; + } + } + /* if the last line contains no EOL, append one */ + if (i_size > 0 && i_womp[i_size - 1] != '\n') { + last_line_missing_eol = true; + /* fix last line */ + sz = s - i_ptr[iline]; + p = malloc(sz + 1); + if (p == NULL) { + free(i_ptr); + i_ptr = NULL; + munmap(i_womp, i_size); + i_womp = NULL; + return false; + } + + memcpy(p, i_ptr[iline], sz); + p[sz] = '\n'; + i_ptr[iline] = p; + /* count the extra line and make it point to some valid mem */ + i_ptr[++iline] = empty_line; + } else + last_line_missing_eol = false; + + input_lines = iline - 1; + + /* now check for revision, if any */ + + if (revision != NULL) { + if (!rev_in_string(i_womp)) { + if (force) { + if (verbose) + say("Warning: this file doesn't appear " + "to be the %s version--patching anyway.\n", + revision); + } else if (batch) { + fatal("this file doesn't appear to be the " + "%s version--aborting.\n", + revision); + } else { + ask("This file doesn't appear to be the " + "%s version--patch anyway? [n] ", + revision); + if (*buf != 'y') + fatal("aborted\n"); + } + } else if (verbose) + say("Good. This file appears to be the %s version.\n", + revision); + } + return true; /* plan a will work */ +} + +/* Keep (virtually) nothing in memory. */ + +static void +plan_b(const char *filename) +{ + FILE *ifp; + size_t i = 0, j, maxlen = 1; + char *p; + bool found_revision = (revision == NULL); + + using_plan_a = false; + if ((ifp = fopen(filename, "r")) == NULL) + pfatal("can't open file %s", filename); + unlink(TMPINNAME); + if ((tifd = open(TMPINNAME, O_EXCL | O_CREAT | O_WRONLY, 0666)) < 0) + pfatal("can't open file %s", TMPINNAME); + while (fgets(buf, buf_size, ifp) != NULL) { + if (revision != NULL && !found_revision && rev_in_string(buf)) + found_revision = true; + if ((i = strlen(buf)) > maxlen) + maxlen = i; /* find longest line */ + } + last_line_missing_eol = i > 0 && buf[i - 1] != '\n'; + if (last_line_missing_eol && maxlen == i) + maxlen++; + + if (revision != NULL) { + if (!found_revision) { + if (force) { + if (verbose) + say("Warning: this file doesn't appear " + "to be the %s version--patching anyway.\n", + revision); + } else if (batch) { + fatal("this file doesn't appear to be the " + "%s version--aborting.\n", + revision); + } else { + ask("This file doesn't appear to be the %s " + "version--patch anyway? [n] ", + revision); + if (*buf != 'y') + fatal("aborted\n"); + } + } else if (verbose) + say("Good. This file appears to be the %s version.\n", + revision); + } + fseek(ifp, 0L, SEEK_SET); /* rewind file */ + lines_per_buf = BUFFERSIZE / maxlen; + tireclen = maxlen; + tibuf[0] = malloc(BUFFERSIZE + 1); + if (tibuf[0] == NULL) + fatal("out of memory\n"); + tibuf[1] = malloc(BUFFERSIZE + 1); + if (tibuf[1] == NULL) + fatal("out of memory\n"); + for (i = 1;; i++) { + p = tibuf[0] + maxlen * (i % lines_per_buf); + if (i % lines_per_buf == 0) /* new block */ + if (write(tifd, tibuf[0], BUFFERSIZE) < BUFFERSIZE) + pfatal("can't write temp file"); + if (fgets(p, maxlen + 1, ifp) == NULL) { + input_lines = i - 1; + if (i % lines_per_buf != 0) + if (write(tifd, tibuf[0], BUFFERSIZE) < BUFFERSIZE) + pfatal("can't write temp file"); + break; + } + j = strlen(p); + /* These are '\n' terminated strings, so no need to add a NUL */ + if (j == 0 || p[j - 1] != '\n') + p[j] = '\n'; + } + fclose(ifp); + close(tifd); + if ((tifd = open(TMPINNAME, O_RDONLY)) < 0) + pfatal("can't reopen file %s", TMPINNAME); +} + +/* + * Fetch a line from the input file, \n terminated, not necessarily \0. + */ +char * +ifetch(LINENUM line, int whichbuf) +{ + if (line < 1 || line > input_lines) { + if (warn_on_invalid_line) { + say("No such line %ld in input file, ignoring\n", line); + warn_on_invalid_line = false; + } + return NULL; + } + if (using_plan_a) + return i_ptr[line]; + else { + LINENUM offline = line % lines_per_buf; + LINENUM baseline = line - offline; + + if (tiline[0] == baseline) + whichbuf = 0; + else if (tiline[1] == baseline) + whichbuf = 1; + else { + tiline[whichbuf] = baseline; + + if (lseek(tifd, (off_t) (baseline / lines_per_buf * + BUFFERSIZE), SEEK_SET) < 0) + pfatal("cannot seek in the temporary input file"); + + if (read(tifd, tibuf[whichbuf], BUFFERSIZE) < 0) + pfatal("error reading tmp file %s", TMPINNAME); + } + return tibuf[whichbuf] + (tireclen * offline); + } +} + +/* + * True if the string argument contains the revision number we want. + */ +static bool +rev_in_string(const char *string) +{ + const char *s; + size_t patlen; + + if (revision == NULL) + return true; + patlen = strlen(revision); + if (strnEQ(string, revision, patlen) && isspace((unsigned char)string[patlen])) + return true; + for (s = string; *s; s++) { + if (isspace((unsigned char)*s) && strnEQ(s + 1, revision, patlen) && + isspace((unsigned char)s[patlen + 1])) { + return true; + } + } + return false; +} diff --git a/patch/inp.h b/patch/inp.h @@ -0,0 +1,32 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: inp.h,v 1.8 2003/08/15 08:00:51 otto Exp $ + * $FreeBSD$ + */ + +void re_input(void); +void scan_input(const char *); +char *ifetch(LINENUM, int); diff --git a/patch/mkfile b/patch/mkfile @@ -0,0 +1,7 @@ +BIN = patch +OBJ = patch.o backupfile.o inp.o mkpath.o pch.o util.o +LOCAL_LDFLAGS = -pthread +INSTALL_BIN = patch +INSTALL_MAN1 = patch.1 + +<$mkbuild/mk.default diff --git a/patch/mkpath.c b/patch/mkpath.c @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 1983, 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $OpenBSD: mkpath.c,v 1.2 2005/06/20 07:14:06 otto Exp $ + * $FreeBSD$ + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <err.h> +#include <errno.h> +#include <string.h> + +int mkpath(char *); + +/* Code taken directly from mkdir(1). + + * mkpath -- create directories. + * path - path + */ +int +mkpath(char *path) +{ + struct stat sb; + char *slash; + int done = 0; + + slash = path; + + while (!done) { + slash += strspn(slash, "/"); + slash += strcspn(slash, "/"); + + done = (*slash == '\0'); + *slash = '\0'; + + if (stat(path, &sb)) { + if (errno != ENOENT || (mkdir(path, 0777) && + errno != EEXIST)) { + warn("%s", path); + return (-1); + } + } else if (!S_ISDIR(sb.st_mode)) { + warnx("%s: %s", path, strerror(ENOTDIR)); + return (-1); + } + + *slash = '/'; + } + + return (0); +} + diff --git a/patch/patch.1 b/patch/patch.1 @@ -0,0 +1,700 @@ +.\"- +.\" Copyright 1986, Larry Wall +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following condition +.\" is met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this condition and the following disclaimer. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $OpenBSD: patch.1,v 1.26 2010/09/03 11:09:29 jmc Exp $ +.\" $FreeBSD$ +.Dd January 29, 2013 +.Dt PATCH 1 +.Os +.Sh NAME +.Nm patch +.Nd apply a diff file to an original +.Sh SYNOPSIS +.Nm +.Bk -words +.Op Fl bCcEeflNnRstuv +.Op Fl B Ar backup-prefix +.Op Fl D Ar symbol +.Op Fl d Ar directory +.Op Fl F Ar max-fuzz +.Op Fl i Ar patchfile +.Op Fl o Ar out-file +.Op Fl p Ar strip-count +.Op Fl r Ar rej-name +.Op Fl V Cm t | nil | never +.Op Fl x Ar number +.Op Fl z Ar backup-ext +.Op Fl Fl posix +.Op Ar origfile Op Ar patchfile +.Ek +.Nm +.Pf \*(Lt Ar patchfile +.Sh DESCRIPTION +.Nm +will take a patch file containing any of the four forms of difference +listing produced by the +.Xr diff 1 +program and apply those differences to an original file, +producing a patched version. +If +.Ar patchfile +is omitted, or is a hyphen, the patch will be read from the standard input. +.Pp +.Nm +will attempt to determine the type of the diff listing, unless overruled by a +.Fl c , +.Fl e , +.Fl n , +or +.Fl u +option. +Context diffs (old-style, new-style, and unified) and +normal diffs are applied directly by the +.Nm +program itself, whereas ed diffs are simply fed to the +.Xr ed 1 +editor via a pipe. +.Pp +If the +.Ar patchfile +contains more than one patch, +.Nm +will try to apply each of them as if they came from separate patch files. +This means, among other things, that it is assumed that the name of the file +to patch must be determined for each diff listing, and that the garbage before +each diff listing will be examined for interesting things such as file names +and revision level (see the section on +.Sx Filename Determination +below). +.Pp +The options are as follows: +.Bl -tag -width Ds +.It Xo +.Fl B Ar backup-prefix , +.Fl Fl prefix Ar backup-prefix +.Xc +Causes the next argument to be interpreted as a prefix to the backup file +name. +If this argument is specified, any argument to +.Fl z +will be ignored. +.It Fl b , Fl Fl backup +Save a backup copy of the file before it is modified. +By default the original file is saved with a backup extension of +.Qq .orig +unless the file already has a numbered backup, in which case a numbered +backup is made. +This is equivalent to specifying +.Qo Fl V Cm existing Qc . +This option is currently the default, unless +.Fl -posix +is specified. +.It Fl C , Fl Fl check +Checks that the patch would apply cleanly, but does not modify anything. +.It Fl c , Fl Fl context +Forces +.Nm +to interpret the patch file as a context diff. +.It Xo +.Fl D Ar symbol , +.Fl Fl ifdef Ar symbol +.Xc +Causes +.Nm +to use the +.Qq #ifdef...#endif +construct to mark changes. +The argument following will be used as the differentiating symbol. +Note that, unlike the C compiler, there must be a space between the +.Fl D +and the argument. +.It Xo +.Fl d Ar directory , +.Fl Fl directory Ar directory +.Xc +Causes +.Nm +to interpret the next argument as a directory, +and change the working directory to it before doing anything else. +.It Fl E , Fl Fl remove-empty-files +Causes +.Nm +to remove output files that are empty after the patches have been applied. +This option is useful when applying patches that create or remove files. +.It Fl e , Fl Fl ed +Forces +.Nm +to interpret the patch file as an +.Xr ed 1 +script. +.It Xo +.Fl F Ar max-fuzz , +.Fl Fl fuzz Ar max-fuzz +.Xc +Sets the maximum fuzz factor. +This option only applies to context diffs, and causes +.Nm +to ignore up to that many lines in looking for places to install a hunk. +Note that a larger fuzz factor increases the odds of a faulty patch. +The default fuzz factor is 2, and it may not be set to more than +the number of lines of context in the context diff, ordinarily 3. +.It Fl f , Fl Fl force +Forces +.Nm +to assume that the user knows exactly what he or she is doing, and to not +ask any questions. +It assumes the following: +skip patches for which a file to patch cannot be found; +patch files even though they have the wrong version for the +.Qq Prereq: +line in the patch; +and assume that patches are not reversed even if they look like they are. +This option does not suppress commentary; use +.Fl s +for that. +.It Xo +.Fl i Ar patchfile , +.Fl Fl input Ar patchfile +.Xc +Causes the next argument to be interpreted as the input file name +(i.e. a patchfile). +This option may be specified multiple times. +.It Fl l , Fl Fl ignore-whitespace +Causes the pattern matching to be done loosely, in case the tabs and +spaces have been munged in your input file. +Any sequence of whitespace in the pattern line will match any sequence +in the input file. +Normal characters must still match exactly. +Each line of the context must still match a line in the input file. +.It Fl N , Fl Fl forward +Causes +.Nm +to ignore patches that it thinks are reversed or already applied. +See also +.Fl R . +.It Fl n , Fl Fl normal +Forces +.Nm +to interpret the patch file as a normal diff. +.It Xo +.Fl o Ar out-file , +.Fl Fl output Ar out-file +.Xc +Causes the next argument to be interpreted as the output file name. +.It Xo +.Fl p Ar strip-count , +.Fl Fl strip Ar strip-count +.Xc +Sets the pathname strip count, +which controls how pathnames found in the patch file are treated, +in case you keep your files in a different directory than the person who sent +out the patch. +The strip count specifies how many slashes are to be stripped from +the front of the pathname. +(Any intervening directory names also go away.) +For example, supposing the file name in the patch file was +.Pa /u/howard/src/blurfl/blurfl.c : +.Pp +Setting +.Fl p Ns Ar 0 +gives the entire pathname unmodified. +.Pp +.Fl p Ns Ar 1 +gives +.Pp +.D1 Pa u/howard/src/blurfl/blurfl.c +.Pp +without the leading slash. +.Pp +.Fl p Ns Ar 4 +gives +.Pp +.D1 Pa blurfl/blurfl.c +.Pp +Not specifying +.Fl p +at all just gives you +.Pa blurfl.c , +unless all of the directories in the leading path +.Pq Pa u/howard/src/blurfl +exist and that path is relative, +in which case you get the entire pathname unmodified. +Whatever you end up with is looked for either in the current directory, +or the directory specified by the +.Fl d +option. +.It Fl R , Fl Fl reverse +Tells +.Nm +that this patch was created with the old and new files swapped. +(Yes, I'm afraid that does happen occasionally, human nature being what it +is.) +.Nm +will attempt to swap each hunk around before applying it. +Rejects will come out in the swapped format. +The +.Fl R +option will not work with ed diff scripts because there is too little +information to reconstruct the reverse operation. +.Pp +If the first hunk of a patch fails, +.Nm +will reverse the hunk to see if it can be applied that way. +If it can, you will be asked if you want to have the +.Fl R +option set. +If it cannot, the patch will continue to be applied normally. +(Note: this method cannot detect a reversed patch if it is a normal diff +and if the first command is an append (i.e. it should have been a delete) +since appends always succeed, due to the fact that a null context will match +anywhere. +Luckily, most patches add or change lines rather than delete them, so most +reversed normal diffs will begin with a delete, which will fail, triggering +the heuristic.) +.It Xo +.Fl r Ar rej-name , +.Fl Fl reject-file Ar rej-name +.Xc +Causes the next argument to be interpreted as the reject file name. +.It Xo +.Fl s , Fl Fl quiet , +.Fl Fl silent +.Xc +Makes +.Nm +do its work silently, unless an error occurs. +.It Fl t , Fl Fl batch +Similar to +.Fl f , +in that it suppresses questions, but makes some different assumptions: +skip patches for which a file to patch cannot be found (the same as +.Fl f ) ; +skip patches for which the file has the wrong version for the +.Qq Prereq: +line in the patch; +and assume that patches are reversed if they look like they are. +.It Fl u , Fl Fl unified +Forces +.Nm +to interpret the patch file as a unified context diff (a unidiff). +.It Xo +.Fl V Cm t | nil | never , +.Fl Fl version-control Cm t | nil | never +.Xc +Causes the next argument to be interpreted as a method for creating +backup file names. +The type of backups made can also be given in the +.Ev PATCH_VERSION_CONTROL +or +.Ev VERSION_CONTROL +environment variables, which are overridden by this option. +The +.Fl B +option overrides this option, causing the prefix to always be used for +making backup file names. +The values of the +.Ev PATCH_VERSION_CONTROL +and +.Ev VERSION_CONTROL +environment variables and the argument to the +.Fl V +option are like the GNU Emacs +.Dq version-control +variable; they also recognize synonyms that are more descriptive. +The valid values are (unique abbreviations are accepted): +.Bl -tag -width Ds -offset indent +.It Cm t , numbered +Always make numbered backups. +.It Cm nil , existing +Make numbered backups of files that already have them, +simple backups of the others. +.It Cm never , simple +Always make simple backups. +.El +.It Fl v , Fl Fl version +Causes +.Nm +to print out its revision header and patch level. +.It Xo +.Fl x Ar number , +.Fl Fl debug Ar number +.Xc +Sets internal debugging flags, and is of interest only to +.Nm +patchers. +.It Xo +.Fl z Ar backup-ext , +.Fl Fl suffix Ar backup-ext +.Xc +Causes the next argument to be interpreted as the backup extension, to be +used in place of +.Qq .orig . +.It Fl Fl posix +Enables strict +.St -p1003.1-2008 +conformance, specifically: +.Bl -enum +.It +Backup files are not created unless the +.Fl b +option is specified. +.It +If unspecified, the file name used is the first of the old, new and +index files that exists. +.El +.El +.Ss Patch Application +.Nm +will try to skip any leading garbage, apply the diff, +and then skip any trailing garbage. +Thus you could feed an article or message containing a +diff listing to +.Nm , +and it should work. +If the entire diff is indented by a consistent amount, +this will be taken into account. +.Pp +With context diffs, and to a lesser extent with normal diffs, +.Nm +can detect when the line numbers mentioned in the patch are incorrect, +and will attempt to find the correct place to apply each hunk of the patch. +As a first guess, it takes the line number mentioned for the hunk, plus or +minus any offset used in applying the previous hunk. +If that is not the correct place, +.Nm +will scan both forwards and backwards for a set of lines matching the context +given in the hunk. +First +.Nm +looks for a place where all lines of the context match. +If no such place is found, and it's a context diff, and the maximum fuzz factor +is set to 1 or more, then another scan takes place ignoring the first and last +line of context. +If that fails, and the maximum fuzz factor is set to 2 or more, +the first two and last two lines of context are ignored, +and another scan is made. +.Pq The default maximum fuzz factor is 2. +.Pp +If +.Nm +cannot find a place to install that hunk of the patch, it will put the hunk +out to a reject file, which normally is the name of the output file plus +.Qq .rej . +(Note that the rejected hunk will come out in context diff form whether the +input patch was a context diff or a normal diff. +If the input was a normal diff, many of the contexts will simply be null.) +The line numbers on the hunks in the reject file may be different than +in the patch file: they reflect the approximate location patch thinks the +failed hunks belong in the new file rather than the old one. +.Pp +As each hunk is completed, you will be told whether the hunk succeeded or +failed, and which line (in the new file) +.Nm +thought the hunk should go on. +If this is different from the line number specified in the diff, +you will be told the offset. +A single large offset MAY be an indication that a hunk was installed in the +wrong place. +You will also be told if a fuzz factor was used to make the match, in which +case you should also be slightly suspicious. +.Ss Filename Determination +If no original file is specified on the command line, +.Nm +will try to figure out from the leading garbage what the name of the file +to edit is. +When checking a prospective file name, pathname components are stripped +as specified by the +.Fl p +option and the file's existence and writability are checked relative +to the current working directory (or the directory specified by the +.Fl d +option). +.Pp +If the diff is a context or unified diff, +.Nm +is able to determine the old and new file names from the diff header. +For context diffs, the +.Dq old +file is specified in the line beginning with +.Qq *** +and the +.Dq new +file is specified in the line beginning with +.Qq --- . +For a unified diff, the +.Dq old +file is specified in the line beginning with +.Qq --- +and the +.Dq new +file is specified in the line beginning with +.Qq +++ . +If there is an +.Qq Index: +line in the leading garbage (regardless of the diff type), +.Nm +will use the file name from that line as the +.Dq index +file. +.Pp +.Nm +will choose the file name by performing the following steps, with the first +match used: +.Bl -enum +.It +If +.Nm +is operating in strict +.St -p1003.1-2008 +mode, the first of the +.Dq old , +.Dq new +and +.Dq index +file names that exist is used. +Otherwise, +.Nm +will examine either the +.Dq old +and +.Dq new +file names or, for a non-context diff, the +.Dq index +file name, and choose the file name with the fewest path components, +the shortest basename, and the shortest total file name length (in that order). +.It +If no file exists, +.Nm +checks for the existence of the files in an SCCS or RCS directory +(using the appropriate prefix or suffix) using the criteria specified +above. +If found, +.Nm +will attempt to get or check out the file. +.It +If no suitable file was found to patch, the patch file is a context or +unified diff, and the old file was zero length, the new file name is +created and used. +.It +If the file name still cannot be determined, +.Nm +will prompt the user for the file name to use. +.El +.Pp +Additionally, if the leading garbage contains a +.Qq Prereq:\ \& +line, +.Nm +will take the first word from the prerequisites line (normally a version +number) and check the input file to see if that word can be found. +If not, +.Nm +will ask for confirmation before proceeding. +.Pp +The upshot of all this is that you should be able to say, while in a news +interface, the following: +.Pp +.Dl | patch -d /usr/src/local/blurfl +.Pp +and patch a file in the blurfl directory directly from the article containing +the patch. +.Ss Backup Files +By default, the patched version is put in place of the original, with +the original file backed up to the same name with the extension +.Qq .orig , +or as specified by the +.Fl B , +.Fl V , +or +.Fl z +options. +The extension used for making backup files may also be specified in the +.Ev SIMPLE_BACKUP_SUFFIX +environment variable, which is overridden by the options above. +.Pp +If the backup file is a symbolic or hard link to the original file, +.Nm +creates a new backup file name by changing the first lowercase letter +in the last component of the file's name into uppercase. +If there are no more lowercase letters in the name, +it removes the first character from the name. +It repeats this process until it comes up with a +backup file that does not already exist or is not linked to the original file. +.Pp +You may also specify where you want the output to go with the +.Fl o +option; if that file already exists, it is backed up first. +.Ss Notes For Patch Senders +There are several things you should bear in mind if you are going to +be sending out patches: +.Pp +First, you can save people a lot of grief by keeping a +.Pa patchlevel.h +file which is patched to increment the patch level as the first diff in the +patch file you send out. +If you put a +.Qq Prereq: +line in with the patch, it will not let them apply +patches out of order without some warning. +.Pp +Second, make sure you have specified the file names right, either in a +context diff header, or with an +.Qq Index: +line. +If you are patching something in a subdirectory, be sure to tell the patch +user to specify a +.Fl p +option as needed. +.Pp +Third, you can create a file by sending out a diff that compares a +null file to the file you want to create. +This will only work if the file you want to create does not exist already in +the target directory. +.Pp +Fourth, take care not to send out reversed patches, since it makes people wonder +whether they already applied the patch. +.Pp +Fifth, while you may be able to get away with putting 582 diff listings into +one file, it is probably wiser to group related patches into separate files in +case something goes haywire. +.Sh ENVIRONMENT +.Bl -tag -width "PATCH_VERSION_CONTROL" -compact +.It Ev POSIXLY_CORRECT +When set, +.Nm +behaves as if the +.Fl Fl posix +option has been specified. +.It Ev SIMPLE_BACKUP_SUFFIX +Extension to use for backup file names instead of +.Qq .orig . +.It Ev TMPDIR +Directory to put temporary files in; default is +.Pa /tmp . +.It Ev PATCH_VERSION_CONTROL +Selects when numbered backup files are made. +.It Ev VERSION_CONTROL +Same as +.Ev PATCH_VERSION_CONTROL . +.El +.Sh FILES +.Bl -tag -width "$TMPDIR/patch*" -compact +.It Pa $TMPDIR/patch* +.Nm +temporary files +.It Pa /dev/tty +used to read input when +.Nm +prompts the user +.El +.Sh EXIT STATUS +The +.Nm +utility exits with one of the following values: +.Pp +.Bl -tag -width Ds -offset indent -compact +.It 0 +Successful completion. +.It 1 +One or more lines were written to a reject file. +.It \*(Gt1 +An error occurred. +.El +.Pp +When applying a set of patches in a loop it behooves you to check this +exit status so you do not apply a later patch to a partially patched file. +.Sh DIAGNOSTICS +Too many to list here, but generally indicative that +.Nm +couldn't parse your patch file. +.Pp +The message +.Qq Hmm... +indicates that there is unprocessed text in the patch file and that +.Nm +is attempting to intuit whether there is a patch in that text and, if so, +what kind of patch it is. +.Sh SEE ALSO +.Xr diff 1 +.Sh STANDARDS +The +.Nm +utility is compliant with the +.St -p1003.1-2008 +specification +(except as detailed above for the +.Fl -posix +option), +though the presence of +.Nm +itself is optional. +.Pp +The flags +.Op Fl BCEFfstVvxz +and +.Op Fl -posix +are extensions to that specification. +.Sh AUTHORS +.An Larry Wall +with many other contributors. +.Sh CAVEATS +.Nm +cannot tell if the line numbers are off in an ed script, and can only detect +bad line numbers in a normal diff when it finds a +.Qq change +or a +.Qq delete +command. +A context diff using fuzz factor 3 may have the same problem. +Until a suitable interactive interface is added, you should probably do +a context diff in these cases to see if the changes made sense. +Of course, compiling without errors is a pretty good indication that the patch +worked, but not always. +.Pp +.Nm +usually produces the correct results, even when it has to do a lot of +guessing. +However, the results are guaranteed to be correct only when the patch is +applied to exactly the same version of the file that the patch was +generated from. +.Sh BUGS +Could be smarter about partial matches, excessively deviant offsets and +swapped code, but that would take an extra pass. +.Pp +Check patch mode +.Pq Fl C +will fail if you try to check several patches in succession that build on +each other. +The entire +.Nm +code would have to be restructured to keep temporary files around so that it +can handle this situation. +.Pp +If code has been duplicated (for instance with #ifdef OLDCODE ... #else ... +#endif), +.Nm +is incapable of patching both versions, and, if it works at all, will likely +patch the wrong one, and tell you that it succeeded to boot. +.Pp +If you apply a patch you have already applied, +.Nm +will think it is a reversed patch, and offer to un-apply the patch. +This could be construed as a feature. diff --git a/patch/patch.c b/patch/patch.c @@ -0,0 +1,1074 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: patch.c,v 1.50 2012/05/15 19:32:02 millert Exp $ + * $FreeBSD$ + * + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <getopt.h> +#include <limits.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "inp.h" +#include "backupfile.h" +#include "pathnames.h" + +mode_t filemode = 0644; + +char *buf; /* general purpose buffer */ +size_t buf_size; /* size of the general purpose buffer */ + +bool using_plan_a = true; /* try to keep everything in memory */ +bool out_of_mem = false; /* ran out of memory in plan a */ + +#define MAXFILEC 2 + +char *filearg[MAXFILEC]; +bool ok_to_create_file = false; +char *outname = NULL; +char *origprae = NULL; +char *TMPOUTNAME; +char *TMPINNAME; +char *TMPREJNAME; +char *TMPPATNAME; +bool toutkeep = false; +bool trejkeep = false; +bool warn_on_invalid_line; +bool last_line_missing_eol; + +#ifdef DEBUGGING +int debug = 0; +#endif + +bool force = false; +bool batch = false; +bool verbose = true; +bool reverse = false; +bool noreverse = false; +bool skip_rest_of_patch = false; +int strippath = 957; +bool canonicalize = false; +bool check_only = false; +int diff_type = 0; +char *revision = NULL; /* prerequisite revision, if any */ +LINENUM input_lines = 0; /* how long is input file in lines */ +int posix = 0; /* strict POSIX mode? */ + +static void reinitialize_almost_everything(void); +static void get_some_switches(void); +static LINENUM locate_hunk(LINENUM); +static void abort_context_hunk(void); +static void rej_line(int, LINENUM); +static void abort_hunk(void); +static void apply_hunk(LINENUM); +static void init_output(const char *); +static void init_reject(const char *); +static void copy_till(LINENUM, bool); +static bool spew_output(void); +static void dump_line(LINENUM, bool); +static bool patch_match(LINENUM, LINENUM, LINENUM); +static bool similar(const char *, const char *, int); +static void usage(void); + +/* true if -E was specified on command line. */ +static bool remove_empty_files = false; + +/* true if -R was specified on command line. */ +static bool reverse_flag_specified = false; + +/* buffer holding the name of the rejected patch file. */ +static char rejname[NAME_MAX + 1]; + +/* how many input lines have been irretractibly output */ +static LINENUM last_frozen_line = 0; + +static int Argc; /* guess */ +static char **Argv; +static int Argc_last; /* for restarting plan_b */ +static char **Argv_last; + +static FILE *ofp = NULL; /* output file pointer */ +static FILE *rejfp = NULL; /* reject file pointer */ + +static int filec = 0; /* how many file arguments? */ +static LINENUM last_offset = 0; +static LINENUM maxfuzz = 2; + +/* patch using ifdef, ifndef, etc. */ +static bool do_defines = false; +/* #ifdef xyzzy */ +static char if_defined[128]; +/* #ifndef xyzzy */ +static char not_defined[128]; +/* #else */ +static const char else_defined[] = "#else\n"; +/* #endif xyzzy */ +static char end_defined[128]; + + +/* Apply a set of diffs as appropriate. */ + +int +main(int argc, char *argv[]) +{ + int error = 0, hunk, failed, i, fd; + bool patch_seen, reverse_seen; + LINENUM where = 0, newwhere, fuzz, mymaxfuzz; + const char *tmpdir; + char *v; + + setlinebuf(stdout); + setlinebuf(stderr); + for (i = 0; i < MAXFILEC; i++) + filearg[i] = NULL; + + buf_size = INITLINELEN; + buf = malloc((unsigned)(buf_size)); + if (buf == NULL) + fatal("out of memory\n"); + + /* Cons up the names of the temporary files. */ + if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') + tmpdir = _PATH_TMP; + for (i = strlen(tmpdir) - 1; i > 0 && tmpdir[i] == '/'; i--) + ; + i++; + if (asprintf(&TMPOUTNAME, "%.*s/patchoXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPOUTNAME)) < 0) + pfatal("can't create %s", TMPOUTNAME); + close(fd); + + if (asprintf(&TMPINNAME, "%.*s/patchiXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPINNAME)) < 0) + pfatal("can't create %s", TMPINNAME); + close(fd); + + if (asprintf(&TMPREJNAME, "%.*s/patchrXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPREJNAME)) < 0) + pfatal("can't create %s", TMPREJNAME); + close(fd); + + if (asprintf(&TMPPATNAME, "%.*s/patchpXXXXXXXXXX", i, tmpdir) == -1) + fatal("cannot allocate memory"); + if ((fd = mkstemp(TMPPATNAME)) < 0) + pfatal("can't create %s", TMPPATNAME); + close(fd); + + v = getenv("SIMPLE_BACKUP_SUFFIX"); + if (v) + simple_backup_suffix = v; + else + simple_backup_suffix = ORIGEXT; + + /* parse switches */ + Argc = argc; + Argv = argv; + get_some_switches(); + + if (backup_type == none) { + if ((v = getenv("PATCH_VERSION_CONTROL")) == NULL) + v = getenv("VERSION_CONTROL"); + if (v != NULL || !posix) + backup_type = get_version(v); /* OK to pass NULL. */ + } + + /* make sure we clean up /tmp in case of disaster */ + set_signals(0); + + patch_seen = false; + for (open_patch_file(filearg[1]); there_is_another_patch(); + reinitialize_almost_everything()) { + /* for each patch in patch file */ + + patch_seen = true; + + warn_on_invalid_line = true; + + if (outname == NULL) + outname = savestr(filearg[0]); + + /* for ed script just up and do it and exit */ + if (diff_type == ED_DIFF) { + do_ed_script(); + continue; + } + /* initialize the patched file */ + if (!skip_rest_of_patch) + init_output(TMPOUTNAME); + + /* initialize reject file */ + init_reject(TMPREJNAME); + + /* find out where all the lines are */ + if (!skip_rest_of_patch) + scan_input(filearg[0]); + + /* + * from here on, open no standard i/o files, because + * malloc might misfire and we can't catch it easily + */ + + /* apply each hunk of patch */ + hunk = 0; + failed = 0; + reverse_seen = false; + out_of_mem = false; + while (another_hunk()) { + hunk++; + fuzz = 0; + mymaxfuzz = pch_context(); + if (maxfuzz < mymaxfuzz) + mymaxfuzz = maxfuzz; + if (!skip_rest_of_patch) { + do { + where = locate_hunk(fuzz); + if (hunk == 1 && where == 0 && !force && !reverse_seen) { + /* dwim for reversed patch? */ + if (!pch_swap()) { + if (fuzz == 0) + say("Not enough memory to try swapped hunk! Assuming unswapped.\n"); + continue; + } + reverse = !reverse; + /* try again */ + where = locate_hunk(fuzz); + if (where == 0) { + /* didn't find it swapped */ + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + reverse = !reverse; + } else if (noreverse) { + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + reverse = !reverse; + say("Ignoring previously applied (or reversed) patch.\n"); + skip_rest_of_patch = true; + } else if (batch) { + if (verbose) + say("%seversed (or previously applied) patch detected! %s -R.", + reverse ? "R" : "Unr", + reverse ? "Assuming" : "Ignoring"); + } else { + ask("%seversed (or previously applied) patch detected! %s -R? [y] ", + reverse ? "R" : "Unr", + reverse ? "Assume" : "Ignore"); + if (*buf == 'n') { + ask("Apply anyway? [n] "); + if (*buf != 'y') + skip_rest_of_patch = true; + else + reverse_seen = true; + where = 0; + reverse = !reverse; + if (!pch_swap()) + /* put it back to normal */ + fatal("lost hunk on alloc error!\n"); + } + } + } + } while (!skip_rest_of_patch && where == 0 && + ++fuzz <= mymaxfuzz); + + if (skip_rest_of_patch) { /* just got decided */ + if (ferror(ofp) || fclose(ofp)) { + say("Error writing %s\n", + TMPOUTNAME); + error = 1; + } + ofp = NULL; + } + } + newwhere = pch_newfirst() + last_offset; + if (skip_rest_of_patch) { + abort_hunk(); + failed++; + if (verbose) + say("Hunk #%d ignored at %ld.\n", + hunk, newwhere); + } else if (where == 0) { + abort_hunk(); + failed++; + if (verbose) + say("Hunk #%d failed at %ld.\n", + hunk, newwhere); + } else { + apply_hunk(where); + if (verbose) { + say("Hunk #%d succeeded at %ld", + hunk, newwhere); + if (fuzz != 0) + say(" with fuzz %ld", fuzz); + if (last_offset) + say(" (offset %ld line%s)", + last_offset, + last_offset == 1L ? "" : "s"); + say(".\n"); + } + } + } + + if (out_of_mem && using_plan_a) { + Argc = Argc_last; + Argv = Argv_last; + say("\n\nRan out of memory using Plan A--trying again...\n\n"); + if (ofp) + fclose(ofp); + ofp = NULL; + if (rejfp) + fclose(rejfp); + rejfp = NULL; + continue; + } + if (hunk == 0) + fatal("Internal error: hunk should not be 0\n"); + + /* finish spewing out the new file */ + if (!skip_rest_of_patch && !spew_output()) { + say("Can't write %s\n", TMPOUTNAME); + error = 1; + } + + /* and put the output where desired */ + ignore_signals(); + if (!skip_rest_of_patch) { + struct stat statbuf; + char *realout = outname; + + if (!check_only) { + if (move_file(TMPOUTNAME, outname) < 0) { + toutkeep = true; + realout = TMPOUTNAME; + chmod(TMPOUTNAME, filemode); + } else + chmod(outname, filemode); + + if (remove_empty_files && + stat(realout, &statbuf) == 0 && + statbuf.st_size == 0) { + if (verbose) + say("Removing %s (empty after patching).\n", + realout); + unlink(realout); + } + } + } + if (ferror(rejfp) || fclose(rejfp)) { + say("Error writing %s\n", rejname); + error = 1; + } + rejfp = NULL; + if (failed) { + error = 1; + if (*rejname == '\0') { + if (*((char *)strncpy(rejname, outname, + sizeof(rejname))) >= sizeof(rejname)) + fatal("filename %s is too long\n", outname); + if (*((char *)strncat(rejname, REJEXT, + sizeof(rejname))) >= sizeof(rejname)) + fatal("filename %s is too long\n", outname); + } + if (!check_only) + say("%d out of %d hunks %s--saving rejects to %s\n", + failed, hunk, skip_rest_of_patch ? "ignored" : "failed", rejname); + else + say("%d out of %d hunks %s while patching %s\n", + failed, hunk, skip_rest_of_patch ? "ignored" : "failed", filearg[0]); + if (!check_only && move_file(TMPREJNAME, rejname) < 0) + trejkeep = true; + } + set_signals(1); + } + + if (!patch_seen) + error = 2; + + my_exit(error); + /* NOTREACHED */ +} + +/* Prepare to find the next patch to do in the patch file. */ + +static void +reinitialize_almost_everything(void) +{ + re_patch(); + re_input(); + + input_lines = 0; + last_frozen_line = 0; + + filec = 0; + if (!out_of_mem) { + free(filearg[0]); + filearg[0] = NULL; + } + + free(outname); + outname = NULL; + + last_offset = 0; + diff_type = 0; + + free(revision); + revision = NULL; + + reverse = reverse_flag_specified; + skip_rest_of_patch = false; + + get_some_switches(); +} + +/* Process switches and filenames. */ + +static void +get_some_switches(void) +{ + const char *options = "b::B:cCd:D:eEfF:i:lnNo:p:r:RstuvV:x:z:"; + static struct option longopts[] = { + {"backup", no_argument, 0, 'b'}, + {"batch", no_argument, 0, 't'}, + {"check", no_argument, 0, 'C'}, + {"context", no_argument, 0, 'c'}, + {"debug", required_argument, 0, 'x'}, + {"directory", required_argument, 0, 'd'}, + {"ed", no_argument, 0, 'e'}, + {"force", no_argument, 0, 'f'}, + {"forward", no_argument, 0, 'N'}, + {"fuzz", required_argument, 0, 'F'}, + {"ifdef", required_argument, 0, 'D'}, + {"input", required_argument, 0, 'i'}, + {"ignore-whitespace", no_argument, 0, 'l'}, + {"normal", no_argument, 0, 'n'}, + {"output", required_argument, 0, 'o'}, + {"prefix", required_argument, 0, 'B'}, + {"quiet", no_argument, 0, 's'}, + {"reject-file", required_argument, 0, 'r'}, + {"remove-empty-files", no_argument, 0, 'E'}, + {"reverse", no_argument, 0, 'R'}, + {"silent", no_argument, 0, 's'}, + {"strip", required_argument, 0, 'p'}, + {"suffix", required_argument, 0, 'z'}, + {"unified", no_argument, 0, 'u'}, + {"version", no_argument, 0, 'v'}, + {"version-control", required_argument, 0, 'V'}, + {"posix", no_argument, &posix, 1}, + {NULL, 0, 0, 0} + }; + int ch; + + rejname[0] = '\0'; + Argc_last = Argc; + Argv_last = Argv; + if (!Argc) + return; + optind = 1; + while ((ch = getopt_long(Argc, Argv, options, longopts, NULL)) != -1) { + switch (ch) { + case 'b': + if (backup_type == none) + backup_type = numbered_existing; + if (optarg == NULL) + break; + if (verbose) + say("Warning, the ``-b suffix'' option has been" + " obsoleted by the -z option.\n"); + /* FALLTHROUGH */ + case 'z': + /* must directly follow 'b' case for backwards compat */ + simple_backup_suffix = savestr(optarg); + break; + case 'B': + origprae = savestr(optarg); + break; + case 'c': + diff_type = CONTEXT_DIFF; + break; + case 'C': + check_only = true; + break; + case 'd': + if (chdir(optarg) < 0) + pfatal("can't cd to %s", optarg); + break; + case 'D': + do_defines = true; + if (!isalpha((unsigned char)*optarg) && *optarg != '_') + fatal("argument to -D is not an identifier\n"); + snprintf(if_defined, sizeof if_defined, + "#ifdef %s\n", optarg); + snprintf(not_defined, sizeof not_defined, + "#ifndef %s\n", optarg); + snprintf(end_defined, sizeof end_defined, + "#endif /* %s */\n", optarg); + break; + case 'e': + diff_type = ED_DIFF; + break; + case 'E': + remove_empty_files = true; + break; + case 'f': + force = true; + break; + case 'F': + maxfuzz = atoi(optarg); + break; + case 'i': + if (++filec == MAXFILEC) + fatal("too many file arguments\n"); + filearg[filec] = savestr(optarg); + break; + case 'l': + canonicalize = true; + break; + case 'n': + diff_type = NORMAL_DIFF; + break; + case 'N': + noreverse = true; + break; + case 'o': + outname = savestr(optarg); + break; + case 'p': + strippath = atoi(optarg); + break; + case 'r': + if ((*(char *)strncpy(rejname, optarg, + sizeof(rejname))) >= sizeof(rejname)) + fatal("argument for -r is too long\n"); + break; + case 'R': + reverse = true; + reverse_flag_specified = true; + break; + case 's': + verbose = false; + break; + case 't': + batch = true; + break; + case 'u': + diff_type = UNI_DIFF; + break; + case 'v': + version(); + break; + case 'V': + backup_type = get_version(optarg); + break; +#ifdef DEBUGGING + case 'x': + debug = atoi(optarg); + break; +#endif + default: + if (ch != '\0') + usage(); + break; + } + } + Argc -= optind; + Argv += optind; + + if (Argc > 0) { + filearg[0] = savestr(*Argv++); + Argc--; + while (Argc > 0) { + if (++filec == MAXFILEC) + fatal("too many file arguments\n"); + filearg[filec] = savestr(*Argv++); + Argc--; + } + } + + if (getenv("POSIXLY_CORRECT") != NULL) + posix = 1; +} + +static void +usage(void) +{ + fprintf(stderr, +"usage: patch [-bCcEeflNnRstuv] [-B backup-prefix] [-D symbol] [-d directory]\n" +" [-F max-fuzz] [-i patchfile] [-o out-file] [-p strip-count]\n" +" [-r rej-name] [-V t | nil | never] [-x number] [-z backup-ext]\n" +" [--posix] [origfile [patchfile]]\n" +" patch <patchfile\n"); + my_exit(EXIT_SUCCESS); +} + +/* + * Attempt to find the right place to apply this hunk of patch. + */ +static LINENUM +locate_hunk(LINENUM fuzz) +{ + LINENUM first_guess = pch_first() + last_offset; + LINENUM offset; + LINENUM pat_lines = pch_ptrn_lines(); + LINENUM max_pos_offset = input_lines - first_guess - pat_lines + 1; + LINENUM max_neg_offset = first_guess - last_frozen_line - 1 + pch_context(); + + if (pat_lines == 0) { /* null range matches always */ + if (verbose && fuzz == 0 && (diff_type == CONTEXT_DIFF + || diff_type == NEW_CONTEXT_DIFF + || diff_type == UNI_DIFF)) { + say("Empty context always matches.\n"); + } + return (first_guess); + } + if (max_neg_offset >= first_guess) /* do not try lines < 0 */ + max_neg_offset = first_guess - 1; + if (first_guess <= input_lines && patch_match(first_guess, 0, fuzz)) + return first_guess; + for (offset = 1; ; offset++) { + bool check_after = (offset <= max_pos_offset); + bool check_before = (offset <= max_neg_offset); + + if (check_after && patch_match(first_guess, offset, fuzz)) { +#ifdef DEBUGGING + if (debug & 1) + say("Offset changing from %ld to %ld\n", + last_offset, offset); +#endif + last_offset = offset; + return first_guess + offset; + } else if (check_before && patch_match(first_guess, -offset, fuzz)) { +#ifdef DEBUGGING + if (debug & 1) + say("Offset changing from %ld to %ld\n", + last_offset, -offset); +#endif + last_offset = -offset; + return first_guess - offset; + } else if (!check_before && !check_after) + return 0; + } +} + +/* We did not find the pattern, dump out the hunk so they can handle it. */ + +static void +abort_context_hunk(void) +{ + LINENUM i; + const LINENUM pat_end = pch_end(); + /* + * add in last_offset to guess the same as the previous successful + * hunk + */ + const LINENUM oldfirst = pch_first() + last_offset; + const LINENUM newfirst = pch_newfirst() + last_offset; + const LINENUM oldlast = oldfirst + pch_ptrn_lines() - 1; + const LINENUM newlast = newfirst + pch_repl_lines() - 1; + const char *stars = (diff_type >= NEW_CONTEXT_DIFF ? " ****" : ""); + const char *minuses = (diff_type >= NEW_CONTEXT_DIFF ? " ----" : " -----"); + + fprintf(rejfp, "***************\n"); + for (i = 0; i <= pat_end; i++) { + switch (pch_char(i)) { + case '*': + if (oldlast < oldfirst) + fprintf(rejfp, "*** 0%s\n", stars); + else if (oldlast == oldfirst) + fprintf(rejfp, "*** %ld%s\n", oldfirst, stars); + else + fprintf(rejfp, "*** %ld,%ld%s\n", oldfirst, + oldlast, stars); + break; + case '=': + if (newlast < newfirst) + fprintf(rejfp, "--- 0%s\n", minuses); + else if (newlast == newfirst) + fprintf(rejfp, "--- %ld%s\n", newfirst, minuses); + else + fprintf(rejfp, "--- %ld,%ld%s\n", newfirst, + newlast, minuses); + break; + case '\n': + fprintf(rejfp, "%s", pfetch(i)); + break; + case ' ': + case '-': + case '+': + case '!': + fprintf(rejfp, "%c %s", pch_char(i), pfetch(i)); + break; + default: + fatal("fatal internal error in abort_context_hunk\n"); + } + } +} + +static void +rej_line(int ch, LINENUM i) +{ + size_t len; + const char *line = pfetch(i); + + len = strlen(line); + + fprintf(rejfp, "%c%s", ch, line); + if (len == 0 || line[len-1] != '\n') + fprintf(rejfp, "\n\\ No newline at end of file\n"); +} + +static void +abort_hunk(void) +{ + LINENUM i, j, split; + int ch1, ch2; + const LINENUM pat_end = pch_end(); + const LINENUM oldfirst = pch_first() + last_offset; + const LINENUM newfirst = pch_newfirst() + last_offset; + + if (diff_type != UNI_DIFF) { + abort_context_hunk(); + return; + } + split = -1; + for (i = 0; i <= pat_end; i++) { + if (pch_char(i) == '=') { + split = i; + break; + } + } + if (split == -1) { + fprintf(rejfp, "malformed hunk: no split found\n"); + return; + } + i = 0; + j = split + 1; + fprintf(rejfp, "@@ -%ld,%ld +%ld,%ld @@\n", + pch_ptrn_lines() ? oldfirst : 0, + pch_ptrn_lines(), newfirst, pch_repl_lines()); + while (i < split || j <= pat_end) { + ch1 = i < split ? pch_char(i) : -1; + ch2 = j <= pat_end ? pch_char(j) : -1; + if (ch1 == '-') { + rej_line('-', i); + i++; + } else if (ch1 == ' ' && ch2 == ' ') { + rej_line(' ', i); + i++; + j++; + } else if (ch1 == '!' && ch2 == '!') { + while (i < split && ch1 == '!') { + rej_line('-', i); + i++; + ch1 = i < split ? pch_char(i) : -1; + } + while (j <= pat_end && ch2 == '!') { + rej_line('+', j); + j++; + ch2 = j <= pat_end ? pch_char(j) : -1; + } + } else if (ch1 == '*') { + i++; + } else if (ch2 == '+' || ch2 == ' ') { + rej_line(ch2, j); + j++; + } else { + fprintf(rejfp, "internal error on (%ld %ld %ld)\n", + i, split, j); + rej_line(ch1, i); + rej_line(ch2, j); + return; + } + } +} + +/* We found where to apply it (we hope), so do it. */ + +static void +apply_hunk(LINENUM where) +{ + LINENUM old = 1; + const LINENUM lastline = pch_ptrn_lines(); + LINENUM new = lastline + 1; +#define OUTSIDE 0 +#define IN_IFNDEF 1 +#define IN_IFDEF 2 +#define IN_ELSE 3 + int def_state = OUTSIDE; + const LINENUM pat_end = pch_end(); + + where--; + while (pch_char(new) == '=' || pch_char(new) == '\n') + new++; + + while (old <= lastline) { + if (pch_char(old) == '-') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == OUTSIDE) { + fputs(not_defined, ofp); + def_state = IN_IFNDEF; + } else if (def_state == IN_IFDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + fputs(pfetch(old), ofp); + } + last_frozen_line++; + old++; + } else if (new > pat_end) { + break; + } else if (pch_char(new) == '+') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == IN_IFNDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } else if (def_state == OUTSIDE) { + fputs(if_defined, ofp); + def_state = IN_IFDEF; + } + } + fputs(pfetch(new), ofp); + new++; + } else if (pch_char(new) != pch_char(old)) { + say("Out-of-sync patch, lines %ld,%ld--mangled text or line numbers, maybe?\n", + pch_hunk_beg() + old, + pch_hunk_beg() + new); +#ifdef DEBUGGING + say("oldchar = '%c', newchar = '%c'\n", + pch_char(old), pch_char(new)); +#endif + my_exit(2); + } else if (pch_char(new) == '!') { + copy_till(where + old - 1, false); + if (do_defines) { + fputs(not_defined, ofp); + def_state = IN_IFNDEF; + } + while (pch_char(old) == '!') { + if (do_defines) { + fputs(pfetch(old), ofp); + } + last_frozen_line++; + old++; + } + if (do_defines) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + while (pch_char(new) == '!') { + fputs(pfetch(new), ofp); + new++; + } + } else { + if (pch_char(new) != ' ') + fatal("Internal error: expected ' '\n"); + old++; + new++; + if (do_defines && def_state != OUTSIDE) { + fputs(end_defined, ofp); + def_state = OUTSIDE; + } + } + } + if (new <= pat_end && pch_char(new) == '+') { + copy_till(where + old - 1, false); + if (do_defines) { + if (def_state == OUTSIDE) { + fputs(if_defined, ofp); + def_state = IN_IFDEF; + } else if (def_state == IN_IFNDEF) { + fputs(else_defined, ofp); + def_state = IN_ELSE; + } + } + while (new <= pat_end && pch_char(new) == '+') { + fputs(pfetch(new), ofp); + new++; + } + } + if (do_defines && def_state != OUTSIDE) { + fputs(end_defined, ofp); + } +} + +/* + * Open the new file. + */ +static void +init_output(const char *name) +{ + ofp = fopen(name, "w"); + if (ofp == NULL) + pfatal("can't create %s", name); +} + +/* + * Open a file to put hunks we can't locate. + */ +static void +init_reject(const char *name) +{ + rejfp = fopen(name, "w"); + if (rejfp == NULL) + pfatal("can't create %s", name); +} + +/* + * Copy input file to output, up to wherever hunk is to be applied. + * If endoffile is true, treat the last line specially since it may + * lack a newline. + */ +static void +copy_till(LINENUM lastline, bool endoffile) +{ + if (last_frozen_line > lastline) + fatal("misordered hunks! output would be garbled\n"); + while (last_frozen_line < lastline) { + if (++last_frozen_line == lastline && endoffile) + dump_line(last_frozen_line, !last_line_missing_eol); + else + dump_line(last_frozen_line, true); + } +} + +/* + * Finish copying the input file to the output file. + */ +static bool +spew_output(void) +{ + int rv; + +#ifdef DEBUGGING + if (debug & 256) + say("il=%ld lfl=%ld\n", input_lines, last_frozen_line); +#endif + if (input_lines) + copy_till(input_lines, true); /* dump remainder of file */ + rv = ferror(ofp) == 0 && fclose(ofp) == 0; + ofp = NULL; + return rv; +} + +/* + * Copy one line from input to output. + */ +static void +dump_line(LINENUM line, bool write_newline) +{ + char *s; + + s = ifetch(line, 0); + if (s == NULL) + return; + /* Note: string is not NUL terminated. */ + for (; *s != '\n'; s++) + putc(*s, ofp); + if (write_newline) + putc('\n', ofp); +} + +/* + * Does the patch pattern match at line base+offset? + */ +static bool +patch_match(LINENUM base, LINENUM offset, LINENUM fuzz) +{ + LINENUM pline = 1 + fuzz; + LINENUM iline; + LINENUM pat_lines = pch_ptrn_lines() - fuzz; + const char *ilineptr; + const char *plineptr; + short plinelen; + + for (iline = base + offset + fuzz; pline <= pat_lines; pline++, iline++) { + ilineptr = ifetch(iline, offset >= 0); + if (ilineptr == NULL) + return false; + plineptr = pfetch(pline); + plinelen = pch_line_len(pline); + if (canonicalize) { + if (!similar(ilineptr, plineptr, plinelen)) + return false; + } else if (strnNE(ilineptr, plineptr, plinelen)) + return false; + if (iline == input_lines) { + /* + * We are looking at the last line of the file. + * If the file has no eol, the patch line should + * not have one either and vice-versa. Note that + * plinelen > 0. + */ + if (last_line_missing_eol) { + if (plineptr[plinelen - 1] == '\n') + return false; + } else { + if (plineptr[plinelen - 1] != '\n') + return false; + } + } + } + return true; +} + +/* + * Do two lines match with canonicalized white space? + */ +static bool +similar(const char *a, const char *b, int len) +{ + while (len) { + if (isspace((unsigned char)*b)) { /* whitespace (or \n) to match? */ + if (!isspace((unsigned char)*a)) /* no corresponding whitespace? */ + return false; + while (len && isspace((unsigned char)*b) && *b != '\n') + b++, len--; /* skip pattern whitespace */ + while (isspace((unsigned char)*a) && *a != '\n') + a++; /* skip target whitespace */ + if (*a == '\n' || *b == '\n') + return (*a == *b); /* should end in sync */ + } else if (*a++ != *b++) /* match non-whitespace chars */ + return false; + else + len--; /* probably not necessary */ + } + return true; /* actually, this is not reached */ + /* since there is always a \n */ +} diff --git a/patch/pathnames.h b/patch/pathnames.h @@ -0,0 +1,12 @@ +/*- + * Placed in the public domain by Todd C. Miller <Todd.Miller@courtesan.com> + * on July 29, 2003. + * + * $OpenBSD: pathnames.h,v 1.1 2003/07/29 20:10:17 millert Exp $ + * $FreeBSD$ + */ + + +#include <paths.h> + +#define _PATH_ED "/bin/ed" diff --git a/patch/pch.c b/patch/pch.c @@ -0,0 +1,1596 @@ + +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: pch.c,v 1.39 2012/04/11 08:07:13 ajacoutot Exp $ + * $FreeBSD$ + */ + +#include <sys/types.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <libgen.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "pch.h" +#include "pathnames.h" + +/* Patch (diff listing) abstract type. */ + +static long p_filesize; /* size of the patch file */ +static LINENUM p_first; /* 1st line number */ +static LINENUM p_newfirst; /* 1st line number of replacement */ +static LINENUM p_ptrn_lines; /* # lines in pattern */ +static LINENUM p_repl_lines; /* # lines in replacement text */ +static LINENUM p_end = -1; /* last line in hunk */ +static LINENUM p_max; /* max allowed value of p_end */ +static LINENUM p_context = 3; /* # of context lines */ +static LINENUM p_input_line = 0; /* current line # from patch file */ +static char **p_line = NULL;/* the text of the hunk */ +static short *p_len = NULL; /* length of each line */ +static char *p_char = NULL; /* +, -, and ! */ +static int hunkmax = INITHUNKMAX; /* size of above arrays to begin with */ +static int p_indent; /* indent to patch */ +static LINENUM p_base; /* where to intuit this time */ +static LINENUM p_bline; /* line # of p_base */ +static LINENUM p_start; /* where intuit found a patch */ +static LINENUM p_sline; /* and the line number for it */ +static LINENUM p_hunk_beg; /* line number of current hunk */ +static LINENUM p_efake = -1; /* end of faked up lines--don't free */ +static LINENUM p_bfake = -1; /* beg of faked up lines */ +static FILE *pfp = NULL; /* patch file pointer */ +static char *bestguess = NULL; /* guess at correct filename */ + +static void grow_hunkmax(void); +static int intuit_diff_type(void); +static void next_intuit_at(LINENUM, LINENUM); +static void skip_to(LINENUM, LINENUM); +static size_t pgets(bool _do_indent); +static char *best_name(const struct file_name *, bool); +static char *posix_name(const struct file_name *, bool); +static size_t num_components(const char *); + +/* + * Prepare to look for the next patch in the patch file. + */ +void +re_patch(void) +{ + p_first = 0; + p_newfirst = 0; + p_ptrn_lines = 0; + p_repl_lines = 0; + p_end = (LINENUM) - 1; + p_max = 0; + p_indent = 0; +} + +/* + * Open the patch file at the beginning of time. + */ +void +open_patch_file(const char *filename) +{ + struct stat filestat; + int nr, nw; + + if (filename == NULL || *filename == '\0' || strEQ(filename, "-")) { + pfp = fopen(TMPPATNAME, "w"); + if (pfp == NULL) + pfatal("can't create %s", TMPPATNAME); + while ((nr = fread(buf, 1, buf_size, stdin)) > 0) { + nw = fwrite(buf, 1, nr, pfp); + if (nr != nw) + pfatal("write error to %s", TMPPATNAME); + } + if (ferror(pfp) || fclose(pfp)) + pfatal("can't write %s", TMPPATNAME); + filename = TMPPATNAME; + } + pfp = fopen(filename, "r"); + if (pfp == NULL) + pfatal("patch file %s not found", filename); + fstat(fileno(pfp), &filestat); + p_filesize = filestat.st_size; + next_intuit_at(0L, 1L); /* start at the beginning */ + set_hunkmax(); +} + +/* + * Make sure our dynamically realloced tables are malloced to begin with. + */ +void +set_hunkmax(void) +{ + if (p_line == NULL) + p_line = calloc((size_t) hunkmax, sizeof(char *)); + if (p_len == NULL) + p_len = calloc((size_t) hunkmax, sizeof(short)); + if (p_char == NULL) + p_char = calloc((size_t) hunkmax, sizeof(char)); +} + +/* + * Enlarge the arrays containing the current hunk of patch. + */ +static void +grow_hunkmax(void) +{ + int new_hunkmax; + char **new_p_line; + short *new_p_len; + char *new_p_char; + + new_hunkmax = hunkmax * 2; + + if (p_line == NULL || p_len == NULL || p_char == NULL) + fatal("Internal memory allocation error\n"); + + new_p_line = realloc(p_line, new_hunkmax * sizeof(char *)); + if (new_p_line == NULL) + free(p_line); + + new_p_len = realloc(p_len, new_hunkmax * sizeof(short)); + if (new_p_len == NULL) + free(p_len); + + new_p_char = realloc(p_char, new_hunkmax * sizeof(char)); + if (new_p_char == NULL) + free(p_char); + + p_char = new_p_char; + p_len = new_p_len; + p_line = new_p_line; + + if (p_line != NULL && p_len != NULL && p_char != NULL) { + hunkmax = new_hunkmax; + return; + } + + if (!using_plan_a) + fatal("out of memory\n"); + out_of_mem = true; /* whatever is null will be allocated again */ + /* from within plan_a(), of all places */ +} + +/* True if the remainder of the patch file contains a diff of some sort. */ + +bool +there_is_another_patch(void) +{ + bool exists = false; + + if (p_base != 0L && p_base >= p_filesize) { + if (verbose) + say("done\n"); + return false; + } + if (verbose) + say("Hmm..."); + diff_type = intuit_diff_type(); + if (!diff_type) { + if (p_base != 0L) { + if (verbose) + say(" Ignoring the trailing garbage.\ndone\n"); + } else + say(" I can't seem to find a patch in there anywhere.\n"); + return false; + } + if (verbose) + say(" %sooks like %s to me...\n", + (p_base == 0L ? "L" : "The next patch l"), + diff_type == UNI_DIFF ? "a unified diff" : + diff_type == CONTEXT_DIFF ? "a context diff" : + diff_type == NEW_CONTEXT_DIFF ? "a new-style context diff" : + diff_type == NORMAL_DIFF ? "a normal diff" : + "an ed script"); + if (p_indent && verbose) + say("(Patch is indented %d space%s.)\n", p_indent, + p_indent == 1 ? "" : "s"); + skip_to(p_start, p_sline); + while (filearg[0] == NULL) { + if (force || batch) { + say("No file to patch. Skipping...\n"); + filearg[0] = savestr(bestguess); + skip_rest_of_patch = true; + return true; + } + ask("File to patch: "); + if (*buf != '\n') { + free(bestguess); + bestguess = savestr(buf); + filearg[0] = fetchname(buf, &exists, 0); + } + if (!exists) { + ask("No file found--skip this patch? [n] "); + if (*buf != 'y') + continue; + if (verbose) + say("Skipping patch...\n"); + free(filearg[0]); + filearg[0] = fetchname(bestguess, &exists, 0); + skip_rest_of_patch = true; + return true; + } + } + return true; +} + +static void +p4_fetchname(struct file_name *name, char *str) +{ + char *t, *h; + + /* Skip leading whitespace. */ + while (isspace((unsigned char)*str)) + str++; + + /* Remove the file revision number. */ + for (t = str, h = NULL; *t != '\0' && !isspace((unsigned char)*t); t++) + if (*t == '#') + h = t; + if (h != NULL) + *h = '\0'; + + name->path = fetchname(str, &name->exists, strippath); +} + +/* Determine what kind of diff is in the remaining part of the patch file. */ + +static int +intuit_diff_type(void) +{ + long this_line = 0, previous_line; + long first_command_line = -1; + LINENUM fcl_line = -1; + bool last_line_was_command = false, this_is_a_command = false; + bool stars_last_line = false, stars_this_line = false; + char *s, *t; + int indent, retval; + struct file_name names[MAX_FILE]; + + memset(names, 0, sizeof(names)); + ok_to_create_file = false; + fseek(pfp, p_base, SEEK_SET); + p_input_line = p_bline - 1; + for (;;) { + previous_line = this_line; + last_line_was_command = this_is_a_command; + stars_last_line = stars_this_line; + this_line = ftell(pfp); + indent = 0; + p_input_line++; + if (pgets(false) == 0) { + if (first_command_line >= 0L) { + /* nothing but deletes!? */ + p_start = first_command_line; + p_sline = fcl_line; + retval = ED_DIFF; + goto scan_exit; + } else { + p_start = this_line; + p_sline = p_input_line; + retval = 0; + goto scan_exit; + } + } + for (s = buf; *s == ' ' || *s == '\t' || *s == 'X'; s++) { + if (*s == '\t') + indent += 8 - (indent % 8); + else + indent++; + } + for (t = s; isdigit((unsigned char)*t) || *t == ','; t++) + ; + this_is_a_command = (isdigit((unsigned char)*s) && + (*t == 'd' || *t == 'c' || *t == 'a')); + if (first_command_line < 0L && this_is_a_command) { + first_command_line = this_line; + fcl_line = p_input_line; + p_indent = indent; /* assume this for now */ + } + if (!stars_last_line && strnEQ(s, "*** ", 4)) + names[OLD_FILE].path = fetchname(s + 4, + &names[OLD_FILE].exists, strippath); + else if (strnEQ(s, "--- ", 4)) + names[NEW_FILE].path = fetchname(s + 4, + &names[NEW_FILE].exists, strippath); + else if (strnEQ(s, "+++ ", 4)) + /* pretend it is the old name */ + names[OLD_FILE].path = fetchname(s + 4, + &names[OLD_FILE].exists, strippath); + else if (strnEQ(s, "Index:", 6)) + names[INDEX_FILE].path = fetchname(s + 6, + &names[INDEX_FILE].exists, strippath); + else if (strnEQ(s, "Prereq:", 7)) { + for (t = s + 7; isspace((unsigned char)*t); t++) + ; + revision = savestr(t); + for (t = revision; *t && !isspace((unsigned char)*t); t++) + ; + *t = '\0'; + if (*revision == '\0') { + free(revision); + revision = NULL; + } + } else if (strnEQ(s, "==== ", 5)) { + /* Perforce-style diffs. */ + if ((t = strstr(s + 5, " - ")) != NULL) + p4_fetchname(&names[NEW_FILE], t + 3); + p4_fetchname(&names[OLD_FILE], s + 5); + } + if ((!diff_type || diff_type == ED_DIFF) && + first_command_line >= 0L && + strEQ(s, ".\n")) { + p_indent = indent; + p_start = first_command_line; + p_sline = fcl_line; + retval = ED_DIFF; + goto scan_exit; + } + if ((!diff_type || diff_type == UNI_DIFF) && strnEQ(s, "@@ -", 4)) { + if (strnEQ(s + 4, "0,0", 3)) + ok_to_create_file = true; + p_indent = indent; + p_start = this_line; + p_sline = p_input_line; + retval = UNI_DIFF; + goto scan_exit; + } + stars_this_line = strnEQ(s, "********", 8); + if ((!diff_type || diff_type == CONTEXT_DIFF) && stars_last_line && + strnEQ(s, "*** ", 4)) { + if (atol(s + 4) == 0) + ok_to_create_file = true; + /* + * If this is a new context diff the character just + * before the newline is a '*'. + */ + while (*s != '\n') + s++; + p_indent = indent; + p_start = previous_line; + p_sline = p_input_line - 1; + retval = (*(s - 1) == '*' ? NEW_CONTEXT_DIFF : CONTEXT_DIFF); + goto scan_exit; + } + if ((!diff_type || diff_type == NORMAL_DIFF) && + last_line_was_command && + (strnEQ(s, "< ", 2) || strnEQ(s, "> ", 2))) { + p_start = previous_line; + p_sline = p_input_line - 1; + p_indent = indent; + retval = NORMAL_DIFF; + goto scan_exit; + } + } +scan_exit: + if (retval == UNI_DIFF) { + /* unswap old and new */ + struct file_name tmp = names[OLD_FILE]; + names[OLD_FILE] = names[NEW_FILE]; + names[NEW_FILE] = tmp; + } + if (filearg[0] == NULL) { + if (posix) + filearg[0] = posix_name(names, ok_to_create_file); + else { + /* Ignore the Index: name for context diffs, like GNU */ + if (names[OLD_FILE].path != NULL || + names[NEW_FILE].path != NULL) { + free(names[INDEX_FILE].path); + names[INDEX_FILE].path = NULL; + } + filearg[0] = best_name(names, ok_to_create_file); + } + } + + free(bestguess); + bestguess = NULL; + if (filearg[0] != NULL) + bestguess = savestr(filearg[0]); + else if (!ok_to_create_file) { + /* + * We don't want to create a new file but we need a + * filename to set bestguess. Avoid setting filearg[0] + * so the file is not created automatically. + */ + if (posix) + bestguess = posix_name(names, true); + else + bestguess = best_name(names, true); + } + free(names[OLD_FILE].path); + free(names[NEW_FILE].path); + free(names[INDEX_FILE].path); + return retval; +} + +/* + * Remember where this patch ends so we know where to start up again. + */ +static void +next_intuit_at(LINENUM file_pos, LINENUM file_line) +{ + p_base = file_pos; + p_bline = file_line; +} + +/* + * Basically a verbose fseek() to the actual diff listing. + */ +static void +skip_to(LINENUM file_pos, LINENUM file_line) +{ + size_t len; + + if (p_base > file_pos) + fatal("Internal error: seek %ld>%ld\n", p_base, file_pos); + if (verbose && p_base < file_pos) { + fseek(pfp, p_base, SEEK_SET); + say("The text leading up to this was:\n--------------------------\n"); + while (ftell(pfp) < file_pos) { + len = pgets(false); + if (len == 0) + fatal("Unexpected end of file\n"); + say("|%s", buf); + } + say("--------------------------\n"); + } else + fseek(pfp, file_pos, SEEK_SET); + p_input_line = file_line - 1; +} + +/* Make this a function for better debugging. */ +static void +malformed(void) +{ + fatal("malformed patch at line %ld: %s", p_input_line, buf); + /* about as informative as "Syntax error" in C */ +} + +/* + * True if the line has been discarded (i.e. it is a line saying + * "\ No newline at end of file".) + */ +static bool +remove_special_line(void) +{ + int c; + + c = fgetc(pfp); + if (c == '\\') { + do { + c = fgetc(pfp); + } while (c != EOF && c != '\n'); + + return true; + } + if (c != EOF) + fseek(pfp, -1L, SEEK_CUR); + + return false; +} + +/* + * True if there is more of the current diff listing to process. + */ +bool +another_hunk(void) +{ + long line_beginning; /* file pos of the current line */ + LINENUM repl_beginning; /* index of --- line */ + LINENUM fillcnt; /* #lines of missing ptrn or repl */ + LINENUM fillsrc; /* index of first line to copy */ + LINENUM filldst; /* index of first missing line */ + bool ptrn_spaces_eaten; /* ptrn was slightly misformed */ + bool repl_could_be_missing; /* no + or ! lines in this hunk */ + bool repl_missing; /* we are now backtracking */ + long repl_backtrack_position; /* file pos of first repl line */ + LINENUM repl_patch_line; /* input line number for same */ + LINENUM ptrn_copiable; /* # of copiable lines in ptrn */ + char *s; + size_t len; + int context = 0; + + while (p_end >= 0) { + if (p_end == p_efake) + p_end = p_bfake; /* don't free twice */ + else + free(p_line[p_end]); + p_end--; + } + p_efake = -1; + + p_max = hunkmax; /* gets reduced when --- found */ + if (diff_type == CONTEXT_DIFF || diff_type == NEW_CONTEXT_DIFF) { + line_beginning = ftell(pfp); + repl_beginning = 0; + fillcnt = 0; + fillsrc = 0; + filldst = 0; + ptrn_spaces_eaten = false; + repl_could_be_missing = true; + repl_missing = false; + repl_backtrack_position = 0; + repl_patch_line = 0; + ptrn_copiable = 0; + + len = pgets(true); + p_input_line++; + if (len == 0 || strnNE(buf, "********", 8)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + p_context = 100; + p_hunk_beg = p_input_line + 1; + while (p_end < p_max) { + line_beginning = ftell(pfp); + len = pgets(true); + p_input_line++; + if (len == 0) { + if (p_max - p_end < 4) { + /* assume blank lines got chopped */ + (*(char *)strncpy(buf, " \n", buf_size)); + } else { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("unexpected end of file in patch\n"); + } + } + p_end++; + if (p_end >= hunkmax) + fatal("Internal error: hunk larger than hunk " + "buffer size"); + p_char[p_end] = *buf; + p_line[p_end] = NULL; + switch (*buf) { + case '*': + if (strnEQ(buf, "********", 8)) { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } else + fatal("unexpected end of hunk " + "at line %ld\n", + p_input_line); + } + if (p_end != 0) { + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("unexpected *** at line %ld: %s", + p_input_line, buf); + } + context = 0; + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + for (s = buf; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + if (strnEQ(s, "0,0", 3)) + memmove(s, s + 2, strlen(s + 2) + 1); + p_first = (LINENUM) atol(s); + while (isdigit((unsigned char)*s)) + s++; + if (*s == ',') { + for (; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_ptrn_lines = ((LINENUM) atol(s)) - p_first + 1; + } else if (p_first) + p_ptrn_lines = 1; + else { + p_ptrn_lines = 0; + p_first = 1; + } + + /* we need this much at least */ + p_max = p_ptrn_lines + 6; + while (p_max >= hunkmax) + grow_hunkmax(); + p_max = hunkmax; + break; + case '-': + if (buf[1] == '-') { + if (repl_beginning || + (p_end != p_ptrn_lines + 1 + + (p_char[p_end - 1] == '\n'))) { + if (p_end == 1) { + /* + * `old' lines were omitted; + * set up to fill them in + * from 'new' context lines. + */ + p_end = p_ptrn_lines + 1; + fillsrc = p_end + 1; + filldst = 1; + fillcnt = p_ptrn_lines; + } else { + if (repl_beginning) { + if (repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + fatal("duplicate \"---\" at line %ld--check line numbers at line %ld\n", + p_input_line, p_hunk_beg + repl_beginning); + } else { + fatal("%s \"---\" at line %ld--check line numbers at line %ld\n", + (p_end <= p_ptrn_lines + ? "Premature" + : "Overdue"), + p_input_line, p_hunk_beg); + } + } + } + repl_beginning = p_end; + repl_backtrack_position = ftell(pfp); + repl_patch_line = p_input_line; + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + p_char[p_end] = '='; + for (s = buf; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_newfirst = (LINENUM) atol(s); + while (isdigit((unsigned char)*s)) + s++; + if (*s == ',') { + for (; *s && !isdigit((unsigned char)*s); s++) + ; + if (!*s) + malformed(); + p_repl_lines = ((LINENUM) atol(s)) - + p_newfirst + 1; + } else if (p_newfirst) + p_repl_lines = 1; + else { + p_repl_lines = 0; + p_newfirst = 1; + } + p_max = p_repl_lines + p_end; + if (p_max > MAXHUNKSIZE) + fatal("hunk too large (%ld lines) at line %ld: %s", + p_max, p_input_line, buf); + while (p_max >= hunkmax) + grow_hunkmax(); + if (p_repl_lines != ptrn_copiable && + (p_context != 0 || p_repl_lines != 1)) + repl_could_be_missing = false; + break; + } + goto change_line; + case '+': + case '!': + repl_could_be_missing = false; + change_line: + if (buf[1] == '\n' && canonicalize) + (*(char *)strncpy(buf + 1, " \n", buf_size - 1)); + if (!isspace((unsigned char)buf[1]) && buf[1] != '>' && + buf[1] != '<' && + repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + if (context >= 0) { + if (context < p_context) + p_context = context; + context = -1000; + } + p_line[p_end] = savestr(buf + 2); + if (out_of_mem) { + p_end--; + return false; + } + if (p_end == p_ptrn_lines) { + if (remove_special_line()) { + int l; + + l = strlen(p_line[p_end]) - 1; + (p_line[p_end])[l] = 0; + } + } + break; + case '\t': + case '\n': /* assume the 2 spaces got eaten */ + if (repl_beginning && repl_could_be_missing && + (!ptrn_spaces_eaten || + diff_type == NEW_CONTEXT_DIFF)) { + repl_missing = true; + goto hunk_done; + } + p_line[p_end] = savestr(buf); + if (out_of_mem) { + p_end--; + return false; + } + if (p_end != p_ptrn_lines + 1) { + ptrn_spaces_eaten |= (repl_beginning != 0); + context++; + if (!repl_beginning) + ptrn_copiable++; + p_char[p_end] = ' '; + } + break; + case ' ': + if (!isspace((unsigned char)buf[1]) && + repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + context++; + if (!repl_beginning) + ptrn_copiable++; + p_line[p_end] = savestr(buf + 2); + if (out_of_mem) { + p_end--; + return false; + } + break; + default: + if (repl_beginning && repl_could_be_missing) { + repl_missing = true; + goto hunk_done; + } + malformed(); + } + /* set up p_len for strncmp() so we don't have to */ + /* assume null termination */ + if (p_line[p_end]) + p_len[p_end] = strlen(p_line[p_end]); + else + p_len[p_end] = 0; + } + +hunk_done: + if (p_end >= 0 && !repl_beginning) + fatal("no --- found in patch at line %ld\n", pch_hunk_beg()); + + if (repl_missing) { + + /* reset state back to just after --- */ + p_input_line = repl_patch_line; + for (p_end--; p_end > repl_beginning; p_end--) + free(p_line[p_end]); + fseek(pfp, repl_backtrack_position, SEEK_SET); + + /* redundant 'new' context lines were omitted - set */ + /* up to fill them in from the old file context */ + if (!p_context && p_repl_lines == 1) { + p_repl_lines = 0; + p_max--; + } + fillsrc = 1; + filldst = repl_beginning + 1; + fillcnt = p_repl_lines; + p_end = p_max; + } else if (!p_context && fillcnt == 1) { + /* the first hunk was a null hunk with no context */ + /* and we were expecting one line -- fix it up. */ + while (filldst < p_end) { + p_line[filldst] = p_line[filldst + 1]; + p_char[filldst] = p_char[filldst + 1]; + p_len[filldst] = p_len[filldst + 1]; + filldst++; + } +#if 0 + repl_beginning--; /* this doesn't need to be fixed */ +#endif + p_end--; + p_first++; /* do append rather than insert */ + fillcnt = 0; + p_ptrn_lines = 0; + } + if (diff_type == CONTEXT_DIFF && + (fillcnt || (p_first > 1 && ptrn_copiable > 2 * p_context))) { + if (verbose) + say("%s\n%s\n%s\n", + "(Fascinating--this is really a new-style context diff but without", + "the telltale extra asterisks on the *** line that usually indicate", + "the new style...)"); + diff_type = NEW_CONTEXT_DIFF; + } + /* if there were omitted context lines, fill them in now */ + if (fillcnt) { + p_bfake = filldst; /* remember where not to free() */ + p_efake = filldst + fillcnt - 1; + while (fillcnt-- > 0) { + while (fillsrc <= p_end && p_char[fillsrc] != ' ') + fillsrc++; + if (fillsrc > p_end) + fatal("replacement text or line numbers mangled in hunk at line %ld\n", + p_hunk_beg); + p_line[filldst] = p_line[fillsrc]; + p_char[filldst] = p_char[fillsrc]; + p_len[filldst] = p_len[fillsrc]; + fillsrc++; + filldst++; + } + while (fillsrc <= p_end && fillsrc != repl_beginning && + p_char[fillsrc] != ' ') + fillsrc++; +#ifdef DEBUGGING + if (debug & 64) + printf("fillsrc %ld, filldst %ld, rb %ld, e+1 %ld\n", + fillsrc, filldst, repl_beginning, p_end + 1); +#endif + if (fillsrc != p_end + 1 && fillsrc != repl_beginning) + malformed(); + if (filldst != p_end + 1 && filldst != repl_beginning) + malformed(); + } + if (p_line[p_end] != NULL) { + if (remove_special_line()) { + p_len[p_end] -= 1; + (p_line[p_end])[p_len[p_end]] = 0; + } + } + } else if (diff_type == UNI_DIFF) { + LINENUM fillold; /* index of old lines */ + LINENUM fillnew; /* index of new lines */ + char ch; + + line_beginning = ftell(pfp); /* file pos of the current line */ + len = pgets(true); + p_input_line++; + if (len == 0 || strnNE(buf, "@@ -", 4)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + s = buf + 4; + if (!*s) + malformed(); + p_first = (LINENUM) atol(s); + while (isdigit((unsigned char)*s)) + s++; + if (*s == ',') { + p_ptrn_lines = (LINENUM) atol(++s); + while (isdigit((unsigned char)*s)) + s++; + } else + p_ptrn_lines = 1; + if (*s == ' ') + s++; + if (*s != '+' || !*++s) + malformed(); + p_newfirst = (LINENUM) atol(s); + while (isdigit((unsigned char)*s)) + s++; + if (*s == ',') { + p_repl_lines = (LINENUM) atol(++s); + while (isdigit((unsigned char)*s)) + s++; + } else + p_repl_lines = 1; + if (*s == ' ') + s++; + if (*s != '@') + malformed(); + if (!p_ptrn_lines) + p_first++; /* do append rather than insert */ + p_max = p_ptrn_lines + p_repl_lines + 1; + while (p_max >= hunkmax) + grow_hunkmax(); + fillold = 1; + fillnew = fillold + p_ptrn_lines; + p_end = fillnew + p_repl_lines; + snprintf(buf, buf_size, "*** %ld,%ld ****\n", p_first, + p_first + p_ptrn_lines - 1); + p_line[0] = savestr(buf); + if (out_of_mem) { + p_end = -1; + return false; + } + p_char[0] = '*'; + snprintf(buf, buf_size, "--- %ld,%ld ----\n", p_newfirst, + p_newfirst + p_repl_lines - 1); + p_line[fillnew] = savestr(buf); + if (out_of_mem) { + p_end = 0; + return false; + } + p_char[fillnew++] = '='; + p_context = 100; + context = 0; + p_hunk_beg = p_input_line + 1; + while (fillold <= p_ptrn_lines || fillnew <= p_end) { + line_beginning = ftell(pfp); + len = pgets(true); + p_input_line++; + if (len == 0) { + if (p_max - fillnew < 3) { + /* assume blank lines got chopped */ + (*(char *)strncpy(buf, " \n", buf_size)); + } else { + fatal("unexpected end of file in patch\n"); + } + } + if (*buf == '\t' || *buf == '\n') { + ch = ' '; /* assume the space got eaten */ + s = savestr(buf); + } else { + ch = *buf; + s = savestr(buf + 1); + } + if (out_of_mem) { + while (--fillnew > p_ptrn_lines) + free(p_line[fillnew]); + p_end = fillold - 1; + return false; + } + switch (ch) { + case '-': + if (fillold > p_ptrn_lines) { + free(s); + p_end = fillnew - 1; + malformed(); + } + p_char[fillold] = ch; + p_line[fillold] = s; + p_len[fillold++] = strlen(s); + if (fillold > p_ptrn_lines) { + if (remove_special_line()) { + p_len[fillold - 1] -= 1; + s[p_len[fillold - 1]] = 0; + } + } + break; + case '=': + ch = ' '; + /* FALL THROUGH */ + case ' ': + if (fillold > p_ptrn_lines) { + free(s); + while (--fillnew > p_ptrn_lines) + free(p_line[fillnew]); + p_end = fillold - 1; + malformed(); + } + context++; + p_char[fillold] = ch; + p_line[fillold] = s; + p_len[fillold++] = strlen(s); + s = savestr(s); + if (out_of_mem) { + while (--fillnew > p_ptrn_lines) + free(p_line[fillnew]); + p_end = fillold - 1; + return false; + } + if (fillold > p_ptrn_lines) { + if (remove_special_line()) { + p_len[fillold - 1] -= 1; + s[p_len[fillold - 1]] = 0; + } + } + /* FALL THROUGH */ + case '+': + if (fillnew > p_end) { + free(s); + while (--fillnew > p_ptrn_lines) + free(p_line[fillnew]); + p_end = fillold - 1; + malformed(); + } + p_char[fillnew] = ch; + p_line[fillnew] = s; + p_len[fillnew++] = strlen(s); + if (fillold > p_ptrn_lines) { + if (remove_special_line()) { + p_len[fillnew - 1] -= 1; + s[p_len[fillnew - 1]] = 0; + } + } + break; + default: + p_end = fillnew; + malformed(); + } + if (ch != ' ' && context > 0) { + if (context < p_context) + p_context = context; + context = -1000; + } + } /* while */ + } else { /* normal diff--fake it up */ + char hunk_type; + int i; + LINENUM min, max; + + line_beginning = ftell(pfp); + p_context = 0; + len = pgets(true); + p_input_line++; + if (len == 0 || !isdigit((unsigned char)*buf)) { + next_intuit_at(line_beginning, p_input_line); + return false; + } + p_first = (LINENUM) atol(buf); + for (s = buf; isdigit((unsigned char)*s); s++) + ; + if (*s == ',') { + p_ptrn_lines = (LINENUM) atol(++s) - p_first + 1; + while (isdigit((unsigned char)*s)) + s++; + } else + p_ptrn_lines = (*s != 'a'); + hunk_type = *s; + if (hunk_type == 'a') + p_first++; /* do append rather than insert */ + min = (LINENUM) atol(++s); + for (; isdigit((unsigned char)*s); s++) + ; + if (*s == ',') + max = (LINENUM) atol(++s); + else + max = min; + if (hunk_type == 'd') + min++; + p_end = p_ptrn_lines + 1 + max - min + 1; + if (p_end > MAXHUNKSIZE) + fatal("hunk too large (%ld lines) at line %ld: %s", + p_end, p_input_line, buf); + while (p_end >= hunkmax) + grow_hunkmax(); + p_newfirst = min; + p_repl_lines = max - min + 1; + snprintf(buf, buf_size, "*** %ld,%ld\n", p_first, + p_first + p_ptrn_lines - 1); + p_line[0] = savestr(buf); + if (out_of_mem) { + p_end = -1; + return false; + } + p_char[0] = '*'; + for (i = 1; i <= p_ptrn_lines; i++) { + len = pgets(true); + p_input_line++; + if (len == 0) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '<') + fatal("< expected at line %ld of patch\n", + p_input_line); + p_line[i] = savestr(buf + 2); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_len[i] = strlen(p_line[i]); + p_char[i] = '-'; + } + + if (remove_special_line()) { + p_len[i - 1] -= 1; + (p_line[i - 1])[p_len[i - 1]] = 0; + } + if (hunk_type == 'c') { + len = pgets(true); + p_input_line++; + if (len == 0) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '-') + fatal("--- expected at line %ld of patch\n", + p_input_line); + } + snprintf(buf, buf_size, "--- %ld,%ld\n", min, max); + p_line[i] = savestr(buf); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_char[i] = '='; + for (i++; i <= p_end; i++) { + len = pgets(true); + p_input_line++; + if (len == 0) + fatal("unexpected end of file in patch at line %ld\n", + p_input_line); + if (*buf != '>') + fatal("> expected at line %ld of patch\n", + p_input_line); + p_line[i] = savestr(buf + 2); + if (out_of_mem) { + p_end = i - 1; + return false; + } + p_len[i] = strlen(p_line[i]); + p_char[i] = '+'; + } + + if (remove_special_line()) { + p_len[i - 1] -= 1; + (p_line[i - 1])[p_len[i - 1]] = 0; + } + } + if (reverse) /* backwards patch? */ + if (!pch_swap()) + say("Not enough memory to swap next hunk!\n"); +#ifdef DEBUGGING + if (debug & 2) { + int i; + char special; + + for (i = 0; i <= p_end; i++) { + if (i == p_ptrn_lines) + special = '^'; + else + special = ' '; + fprintf(stderr, "%3d %c %c %s", i, p_char[i], + special, p_line[i]); + fflush(stderr); + } + } +#endif + if (p_end + 1 < hunkmax)/* paranoia reigns supreme... */ + p_char[p_end + 1] = '^'; /* add a stopper for apply_hunk */ + return true; +} + +/* + * Input a line from the patch file. + * Worry about indentation if do_indent is true. + * The line is read directly into the buf global variable which + * is resized if necessary in order to hold the complete line. + * Returns the number of characters read including the terminating + * '\n', if any. + */ +size_t +pgets(bool do_indent) +{ + char *line; + size_t len; + int indent = 0, skipped = 0; + + line = fgetln(pfp, &len); + if (line != NULL) { + if (len + 1 > buf_size) { + while (len + 1 > buf_size) + buf_size *= 2; + free(buf); + buf = malloc(buf_size); + if (buf == NULL) + fatal("out of memory\n"); + } + if (do_indent == 1 && p_indent) { + for (; + indent < p_indent && (*line == ' ' || *line == '\t' || *line == 'X'); + line++, skipped++) { + if (*line == '\t') + indent += 8 - (indent %7); + else + indent++; + } + } + memcpy(buf, line, len - skipped); + buf[len - skipped] = '\0'; + } + return len; +} + + +/* + * Reverse the old and new portions of the current hunk. + */ +bool +pch_swap(void) +{ + char **tp_line; /* the text of the hunk */ + short *tp_len; /* length of each line */ + char *tp_char; /* +, -, and ! */ + LINENUM i; + LINENUM n; + bool blankline = false; + char *s; + + i = p_first; + p_first = p_newfirst; + p_newfirst = i; + + /* make a scratch copy */ + + tp_line = p_line; + tp_len = p_len; + tp_char = p_char; + p_line = NULL; /* force set_hunkmax to allocate again */ + p_len = NULL; + p_char = NULL; + set_hunkmax(); + if (p_line == NULL || p_len == NULL || p_char == NULL) { + + free(p_line); + p_line = tp_line; + free(p_len); + p_len = tp_len; + free(p_char); + p_char = tp_char; + return false; /* not enough memory to swap hunk! */ + } + /* now turn the new into the old */ + + i = p_ptrn_lines + 1; + if (tp_char[i] == '\n') { /* account for possible blank line */ + blankline = true; + i++; + } + if (p_efake >= 0) { /* fix non-freeable ptr range */ + if (p_efake <= i) + n = p_end - i + 1; + else + n = -i; + p_efake += n; + p_bfake += n; + } + for (n = 0; i <= p_end; i++, n++) { + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + if (p_char[n] == '+') + p_char[n] = '-'; + p_len[n] = tp_len[i]; + } + if (blankline) { + i = p_ptrn_lines + 1; + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + p_len[n] = tp_len[i]; + n++; + } + if (p_char[0] != '=') + fatal("Malformed patch at line %ld: expected '=' found '%c'\n", + p_input_line, p_char[0]); + p_char[0] = '*'; + for (s = p_line[0]; *s; s++) + if (*s == '-') + *s = '*'; + + /* now turn the old into the new */ + + if (p_char[0] != '*') + fatal("Malformed patch at line %ld: expected '*' found '%c'\n", + p_input_line, p_char[0]); + tp_char[0] = '='; + for (s = tp_line[0]; *s; s++) + if (*s == '*') + *s = '-'; + for (i = 0; n <= p_end; i++, n++) { + p_line[n] = tp_line[i]; + p_char[n] = tp_char[i]; + if (p_char[n] == '-') + p_char[n] = '+'; + p_len[n] = tp_len[i]; + } + + if (i != p_ptrn_lines + 1) + fatal("Malformed patch at line %ld: expected %ld lines, " + "got %ld\n", + p_input_line, p_ptrn_lines + 1, i); + + i = p_ptrn_lines; + p_ptrn_lines = p_repl_lines; + p_repl_lines = i; + + free(tp_line); + free(tp_len); + free(tp_char); + + return true; +} + +/* + * Return the specified line position in the old file of the old context. + */ +LINENUM +pch_first(void) +{ + return p_first; +} + +/* + * Return the number of lines of old context. + */ +LINENUM +pch_ptrn_lines(void) +{ + return p_ptrn_lines; +} + +/* + * Return the probable line position in the new file of the first line. + */ +LINENUM +pch_newfirst(void) +{ + return p_newfirst; +} + +/* + * Return the number of lines in the replacement text including context. + */ +LINENUM +pch_repl_lines(void) +{ + return p_repl_lines; +} + +/* + * Return the number of lines in the whole hunk. + */ +LINENUM +pch_end(void) +{ + return p_end; +} + +/* + * Return the number of context lines before the first changed line. + */ +LINENUM +pch_context(void) +{ + return p_context; +} + +/* + * Return the length of a particular patch line. + */ +short +pch_line_len(LINENUM line) +{ + return p_len[line]; +} + +/* + * Return the control character (+, -, *, !, etc) for a patch line. + */ +char +pch_char(LINENUM line) +{ + return p_char[line]; +} + +/* + * Return a pointer to a particular patch line. + */ +char * +pfetch(LINENUM line) +{ + return p_line[line]; +} + +/* + * Return where in the patch file this hunk began, for error messages. + */ +LINENUM +pch_hunk_beg(void) +{ + return p_hunk_beg; +} + +/* + * Apply an ed script by feeding ed itself. + */ +void +do_ed_script(void) +{ + char *t; + long beginning_of_this_line; + FILE *pipefp = NULL; + + if (!skip_rest_of_patch) { + if (copy_file(filearg[0], TMPOUTNAME) < 0) { + unlink(TMPOUTNAME); + fatal("can't create temp file %s", TMPOUTNAME); + } + snprintf(buf, buf_size, "%s%s%s", _PATH_ED, + verbose ? " " : " -s ", TMPOUTNAME); + pipefp = popen(buf, "w"); + } + for (;;) { + beginning_of_this_line = ftell(pfp); + if (pgets(true) == 0) { + next_intuit_at(beginning_of_this_line, p_input_line); + break; + } + p_input_line++; + for (t = buf; isdigit((unsigned char)*t) || *t == ','; t++) + ; + /* POSIX defines allowed commands as {a,c,d,i,s} */ + if (isdigit((unsigned char)*buf) && (*t == 'a' || *t == 'c' || + *t == 'd' || *t == 'i' || *t == 's')) { + if (pipefp != NULL) + fputs(buf, pipefp); + if (*t != 'd') { + while (pgets(true)) { + p_input_line++; + if (pipefp != NULL) + fputs(buf, pipefp); + if (strEQ(buf, ".\n")) + break; + } + } + } else { + next_intuit_at(beginning_of_this_line, p_input_line); + break; + } + } + if (pipefp == NULL) + return; + fprintf(pipefp, "w\n"); + fprintf(pipefp, "q\n"); + fflush(pipefp); + pclose(pipefp); + ignore_signals(); + if (!check_only) { + if (move_file(TMPOUTNAME, outname) < 0) { + toutkeep = true; + chmod(TMPOUTNAME, filemode); + } else + chmod(outname, filemode); + } + set_signals(1); +} + +/* + * Choose the name of the file to be patched based on POSIX rules. + * NOTE: the POSIX rules are amazingly stupid and we only follow them + * if the user specified --posix or set POSIXLY_CORRECT. + */ +static char * +posix_name(const struct file_name *names, bool assume_exists) +{ + char *path = NULL; + int i; + + /* + * POSIX states that the filename will be chosen from one + * of the old, new and index names (in that order) if + * the file exists relative to CWD after -p stripping. + */ + for (i = 0; i < MAX_FILE; i++) { + if (names[i].path != NULL && names[i].exists) { + path = names[i].path; + break; + } + } + if (path == NULL && !assume_exists) { + /* + * No files found, look for something we can checkout from + * RCS/SCCS dirs. Same order as above. + */ + for (i = 0; i < MAX_FILE; i++) { + if (names[i].path != NULL && + (path = checked_in(names[i].path)) != NULL) + break; + } + /* + * Still no match? Check to see if the diff could be creating + * a new file. + */ + if (path == NULL && ok_to_create_file && + names[NEW_FILE].path != NULL) + path = names[NEW_FILE].path; + } + + return path ? savestr(path) : NULL; +} + +static char * +compare_names(const struct file_name *names, bool assume_exists, int phase) +{ + size_t min_components, min_baselen, min_len, tmp; + char *best = NULL; + char *path; + int i; + + /* + * The "best" name is the one with the fewest number of path + * components, the shortest basename length, and the shortest + * overall length (in that order). We only use the Index: file + * if neither of the old or new files could be intuited from + * the diff header. + */ + min_components = min_baselen = min_len = SIZE_MAX; + for (i = INDEX_FILE; i >= OLD_FILE; i--) { + path = names[i].path; + if (path == NULL || + (phase == 1 && !names[i].exists && !assume_exists) || + (phase == 2 && checked_in(path) == NULL)) + continue; + if ((tmp = num_components(path)) > min_components) + continue; + if (tmp < min_components) { + min_components = tmp; + best = path; + } + if ((tmp = strlen(basename(path))) > min_baselen) + continue; + if (tmp < min_baselen) { + min_baselen = tmp; + best = path; + } + if ((tmp = strlen(path)) > min_len) + continue; + min_len = tmp; + best = path; + } + return best; +} + +/* + * Choose the name of the file to be patched based the "best" one + * available. + */ +static char * +best_name(const struct file_name *names, bool assume_exists) +{ + char *best; + + best = compare_names(names, assume_exists, 1); + if (best == NULL) { + best = compare_names(names, assume_exists, 2); + /* + * Still no match? Check to see if the diff could be creating + * a new file. + */ + if (best == NULL && ok_to_create_file && + names[NEW_FILE].path != NULL) + best = names[NEW_FILE].path; + } + + return best ? savestr(best) : NULL; +} + +static size_t +num_components(const char *path) +{ + size_t n; + const char *cp; + + for (n = 0, cp = path; (cp = strchr(cp, '/')) != NULL; n++, cp++) { + while (*cp == '/') + cp++; /* skip consecutive slashes */ + } + return n; +} diff --git a/patch/pch.h b/patch/pch.h @@ -0,0 +1,56 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: pch.h,v 1.9 2003/10/31 20:20:45 millert Exp $ + * $FreeBSD$ + */ + +#define OLD_FILE 0 +#define NEW_FILE 1 +#define INDEX_FILE 2 +#define MAX_FILE 3 + +struct file_name { + char *path; + bool exists; +}; + +void re_patch(void); +void open_patch_file(const char *); +void set_hunkmax(void); +bool there_is_another_patch(void); +bool another_hunk(void); +bool pch_swap(void); +char *pfetch(LINENUM); +short pch_line_len(LINENUM); +LINENUM pch_first(void); +LINENUM pch_ptrn_lines(void); +LINENUM pch_newfirst(void); +LINENUM pch_repl_lines(void); +LINENUM pch_end(void); +LINENUM pch_context(void); +LINENUM pch_hunk_beg(void); +char pch_char(LINENUM); +void do_ed_script(void); diff --git a/patch/util.c b/patch/util.c @@ -0,0 +1,432 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: util.c,v 1.35 2010/07/24 01:10:12 ray Exp $ + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <libgen.h> +#include <paths.h> +#include <signal.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> + +#include "common.h" +#include "util.h" +#include "backupfile.h" +#include "pathnames.h" + +/* Rename a file, copying it if necessary. */ + +int +move_file(const char *from, const char *to) +{ + int fromfd; + ssize_t i; + + /* to stdout? */ + + if (strEQ(to, "-")) { +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to stdout.\n", from); +#endif + fromfd = open(from, O_RDONLY); + if (fromfd < 0) + pfatal("internal error, can't reopen %s", from); + while ((i = read(fromfd, buf, buf_size)) > 0) + if (write(STDOUT_FILENO, buf, i) != i) + pfatal("write failed"); + close(fromfd); + return 0; + } + if (backup_file(to) < 0) { + say("Can't backup %s, output is in %s: %s\n", to, from, + strerror(errno)); + return -1; + } +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to %s.\n", from, to); +#endif + if (rename(from, to) < 0) { + if (errno != EXDEV || copy_file(from, to) < 0) { + say("Can't create %s, output is in %s: %s\n", + to, from, strerror(errno)); + return -1; + } + } + return 0; +} + +/* Backup the original file. */ + +int +backup_file(const char *orig) +{ + struct stat filestat; + char bakname[MAXPATHLEN], *s, *simplename; + dev_t orig_device; + ino_t orig_inode; + + if (backup_type == none || stat(orig, &filestat) != 0) + return 0; /* nothing to do */ + /* + * If the user used zero prefixes or suffixes, then + * he doesn't want backups. Yet we have to remove + * orig to break possible hardlinks. + */ + if ((origprae && *origprae == 0) || *simple_backup_suffix == 0) { + unlink(orig); + return 0; + } + orig_device = filestat.st_dev; + orig_inode = filestat.st_ino; + + if (origprae) { + if ((*(char *)strncpy(bakname, origprae, sizeof(bakname))) >= sizeof(bakname) || + (*(char *)strncat(bakname, orig, sizeof(bakname))) >= sizeof(bakname)) + fatal("filename %s too long for buffer\n", origprae); + } else { + if ((s = find_backup_file_name(orig)) == NULL) + fatal("out of memory\n"); + if ((*(char *)strncpy(bakname, s, sizeof(bakname))) >= sizeof(bakname)) + fatal("filename %s too long for buffer\n", s); + free(s); + } + + if ((simplename = strrchr(bakname, '/')) != NULL) + simplename = simplename + 1; + else + simplename = bakname; + + /* + * Find a backup name that is not the same file. Change the + * first lowercase char into uppercase; if that isn't + * sufficient, chop off the first char and try again. + */ + while (stat(bakname, &filestat) == 0 && + orig_device == filestat.st_dev && orig_inode == filestat.st_ino) { + /* Skip initial non-lowercase chars. */ + for (s = simplename; *s && !islower((unsigned char)*s); s++) + ; + if (*s) + *s = toupper((unsigned char)*s); + else + memmove(simplename, simplename + 1, + strlen(simplename + 1) + 1); + } +#ifdef DEBUGGING + if (debug & 4) + say("Moving %s to %s.\n", orig, bakname); +#endif + if (rename(orig, bakname) < 0) { + if (errno != EXDEV || copy_file(orig, bakname) < 0) + return -1; + } + return 0; +} + +/* + * Copy a file. + */ +int +copy_file(const char *from, const char *to) +{ + int tofd, fromfd; + ssize_t i; + + tofd = open(to, O_CREAT|O_TRUNC|O_WRONLY, 0666); + if (tofd < 0) + return -1; + fromfd = open(from, O_RDONLY, 0); + if (fromfd < 0) + pfatal("internal error, can't reopen %s", from); + while ((i = read(fromfd, buf, buf_size)) > 0) + if (write(tofd, buf, i) != i) + pfatal("write to %s failed", to); + close(fromfd); + close(tofd); + return 0; +} + +/* + * Allocate a unique area for a string. + */ +char * +savestr(const char *s) +{ + char *rv; + + if (!s) + s = "Oops"; + rv = strdup(s); + if (rv == NULL) { + if (using_plan_a) + out_of_mem = true; + else + fatal("out of memory\n"); + } + return rv; +} + +/* + * Vanilla terminal output (buffered). + */ +void +say(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + fflush(stdout); +} + +/* + * Terminal output, pun intended. + */ +void +fatal(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + fprintf(stderr, "patch: **** "); + vfprintf(stderr, fmt, ap); + va_end(ap); + my_exit(2); +} + +/* + * Say something from patch, something from the system, then silence . . . + */ +void +pfatal(const char *fmt, ...) +{ + va_list ap; + int errnum = errno; + + fprintf(stderr, "patch: **** "); + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, ": %s\n", strerror(errnum)); + my_exit(2); +} + +/* + * Get a response from the user via /dev/tty + */ +void +ask(const char *fmt, ...) +{ + va_list ap; + ssize_t nr = 0; + static int ttyfd = -1; + + va_start(ap, fmt); + vfprintf(stdout, fmt, ap); + va_end(ap); + fflush(stdout); + if (ttyfd < 0) + ttyfd = open(_PATH_TTY, O_RDONLY); + if (ttyfd >= 0) { + if ((nr = read(ttyfd, buf, buf_size)) > 0 && + buf[nr - 1] == '\n') + buf[nr - 1] = '\0'; + } + if (ttyfd < 0 || nr <= 0) { + /* no tty or error reading, pretend user entered 'return' */ + putchar('\n'); + buf[0] = '\0'; + } +} + +/* + * How to handle certain events when not in a critical region. + */ +void +set_signals(int reset) +{ + static sig_t hupval, intval; + + if (!reset) { + hupval = signal(SIGHUP, SIG_IGN); + if (hupval != SIG_IGN) + hupval = my_exit; + intval = signal(SIGINT, SIG_IGN); + if (intval != SIG_IGN) + intval = my_exit; + } + signal(SIGHUP, hupval); + signal(SIGINT, intval); +} + +/* + * How to handle certain events when in a critical region. + */ +void +ignore_signals(void) +{ + signal(SIGHUP, SIG_IGN); + signal(SIGINT, SIG_IGN); +} + +/* + * Make sure we'll have the directories to create a file. If `striplast' is + * true, ignore the last element of `filename'. + */ + +void +makedirs(const char *filename, bool striplast) +{ + char *tmpbuf; + + if ((tmpbuf = strdup(filename)) == NULL) + fatal("out of memory\n"); + + if (striplast) { + char *s = strrchr(tmpbuf, '/'); + if (s == NULL) { + free(tmpbuf); + return; /* nothing to be done */ + } + *s = '\0'; + } + if (mkpath(tmpbuf) != 0) + pfatal("creation of %s failed", tmpbuf); + free(tmpbuf); +} + +/* + * Make filenames more reasonable. + */ +char * +fetchname(const char *at, bool *exists, int strip_leading) +{ + char *fullname, *name, *t; + int sleading, tab; + struct stat filestat; + + if (at == NULL || *at == '\0') + return NULL; + while (isspace((unsigned char)*at)) + at++; +#ifdef DEBUGGING + if (debug & 128) + say("fetchname %s %d\n", at, strip_leading); +#endif + /* So files can be created by diffing against /dev/null. */ + if (strnEQ(at, _PATH_DEVNULL, sizeof(_PATH_DEVNULL) - 1)) + return NULL; + name = fullname = t = savestr(at); + + tab = strchr(t, '\t') != NULL; + /* Strip off up to `strip_leading' path components and NUL terminate. */ + for (sleading = strip_leading; *t != '\0' && ((tab && *t != '\t') || + !isspace((unsigned char)*t)); t++) { + if (t[0] == '/' && t[1] != '/' && t[1] != '\0') + if (--sleading >= 0) + name = t + 1; + } + *t = '\0'; + + /* + * If no -p option was given (957 is the default value!), we were + * given a relative pathname, and the leading directories that we + * just stripped off all exist, put them back on. + */ + if (strip_leading == 957 && name != fullname && *fullname != '/') { + name[-1] = '\0'; + if (stat(fullname, &filestat) == 0 && S_ISDIR(filestat.st_mode)) { + name[-1] = '/'; + name = fullname; + } + } + name = savestr(name); + free(fullname); + + *exists = stat(name, &filestat) == 0; + return name; +} + +/* + * Takes the name returned by fetchname and looks in RCS/SCCS directories + * for a checked in version. + */ +char * +checked_in(char *file) +{ + char *filebase, *filedir, tmpbuf[MAXPATHLEN]; + struct stat filestat; + + filebase = basename(file); + filedir = dirname(file); + +#define try(f, a1, a2, a3) \ +(snprintf(tmpbuf, sizeof tmpbuf, f, a1, a2, a3), stat(tmpbuf, &filestat) == 0) + + if (try("%s/RCS/%s%s", filedir, filebase, RCSSUFFIX) || + try("%s/RCS/%s%s", filedir, filebase, "") || + try("%s/%s%s", filedir, filebase, RCSSUFFIX) || + try("%s/SCCS/%s%s", filedir, SCCSPREFIX, filebase) || + try("%s/%s%s", filedir, SCCSPREFIX, filebase)) + return file; + + return NULL; +} + +void +version(void) +{ + fprintf(stderr, "patch 2.0-12u10 FreeBSD\n"); + my_exit(EXIT_SUCCESS); +} + +/* + * Exit with cleanup. + */ +void +my_exit(int status) +{ + unlink(TMPINNAME); + if (!toutkeep) + unlink(TMPOUTNAME); + if (!trejkeep) + unlink(TMPREJNAME); + unlink(TMPPATNAME); + exit(status); +} diff --git a/patch/util.h b/patch/util.h @@ -0,0 +1,51 @@ +/*- + * Copyright 1986, Larry Wall + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following condition is met: + * 1. Redistributions of source code must retain the above copyright notice, + * this condition and the following disclaimer. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * patch - a program to apply diffs to original files + * + * -C option added in 1998, original code by Marc Espie, based on FreeBSD + * behaviour + * + * $OpenBSD: util.h,v 1.15 2005/06/20 07:14:06 otto Exp $ + * $FreeBSD$ + */ + +char *fetchname(const char *, bool *, int); +char *checked_in(char *); +int backup_file(const char *); +int move_file(const char *, const char *); +int copy_file(const char *, const char *); +void say(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void fatal(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void pfatal(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +void ask(const char *, ...) + __attribute__((__format__(__printf__, 1, 2))); +char *savestr(const char *); +void set_signals(int); +void ignore_signals(void); +void makedirs(const char *, bool); +void version(void); +void my_exit(int) __attribute__((noreturn)); + +/* in mkpath.c */ +extern int mkpath(char *); diff --git a/pgrep/mkfile b/pgrep/mkfile @@ -0,0 +1,11 @@ +BIN = pgrep +OBJ = pgrep.o +DEPS = libcommon + +<$mkbuild/mk.default + +INSTALL_BIN = pgrep +INSTALL_MAN1 = pgrep.1 +INSTALL_SYMLINK = \ + pgrep $BINDIR/pkill \ + pgrep.1 $MANDIR/man1/pkill.1 diff --git a/pgrep/pgrep.1 b/pgrep/pgrep.1 @@ -0,0 +1,258 @@ +.\" +.\" Copyright (c) 2003 Gunnar Ritter +.\" +.\" This software is provided 'as-is', without any express or implied +.\" warranty. In no event will the authors be held liable for any damages +.\" arising from the use of this software. +.\" +.\" Permission is granted to anyone to use this software for any purpose, +.\" including commercial applications, and to alter it and redistribute +.\" it freely, subject to the following restrictions: +.\" +.\" 1. The origin of this software must not be misrepresented; you must not +.\" claim that you wrote the original software. If you use this software +.\" in a product, an acknowledgment in the product documentation would be +.\" appreciated but is not required. +.\" +.\" 2. Altered source versions must be plainly marked as such, and must not be +.\" misrepresented as being the original software. +.\" +.\" 3. This notice may not be removed or altered from any source distribution. +.\" +.\" Sccsid @(#)pgrep.1 1.9 (gritter) 8/14/05 +.TH PGREP 1 "8/14/05" "Heirloom Toolchest" "User Commands" +.SH NAME +pgrep, pkill \- find or signal processes by name and other attributes +.SH SYNOPSIS +.HP +.ad l +\fBpgrep\fR [\fB\-flnovx\fR] [\fB-d\ \fIdelim\fR] +[\fB\-P\ \fIppidlist\fR] [\fB\-g\ \fIpgrplist\fR] [\fB\-s\ \fIsidlist\fR] +[\fB\-u\ \fIeuidlist\fR] [\fB\-U\ \fIuidlist\fR] +[\fB-G\ \fIgidlist\fR] [\fB\-t\ \fItermlist\fR] [\fIpattern\fR] +.HP +.ad l +\fBpkill\fR [\fB-\fIsignal\fR] [\fB\-fnovx\fR] +[\fB\-P\ \fIppidlist\fR] [\fB\-g\ \fIpgrplist\fR] [\fB\-s\ \fIsidlist\fR] +[\fB\-u\ \fIeuidlist\fR] [\fB\-U\ \fIuidlist\fR] +[\fB-G\ \fIgidlist\fR] [\fB\-t\ \fItermlist\fR] [\fIpattern\fR] +.br +.ad b +.SH DESCRIPTION +.I Pgrep +searches the currently active processes +for occurences of the specified +.I pattern +and prints the process IDs of the matching ones. +.I Pattern +is treated as an extended regular expression +as described in +.IR egrep (1). +A number of options can be used in addition to the +.I pattern +(or without specifying +.IR pattern ) +to further restrict the set of matching processes. +Multiple criteria can be specified for each of these options, +separated by commas or blanks, +or by giving the option more than once. +In this case, +all processes that match any of the given criteria +are considered matches. +If more than one kind of criterion is specified, +a process must match each kind of criterion. +.PP +.I Pkill +is similar to +.IR pgrep , +but a signal (SIGTERM by default) +is sent to matching processes +instead of printing its process ID. +The signal can be changed with the +\fB\-\fIsignal\fR +argument; +this argument must appear before all options +to be recognized. +.I Signal +can be either numeric +or symbolic with the SIG prefix omitted +(as in QUIT for SIGQUIT). +.PP +Zombie processes and the current +.I pgrep +or +.I pkill +process are never included. +.PP +Both commands accept the following options: +.TP 12 +\fB\-d \fIdelim\fR +Use the specified +.I delimiter +string to separate process IDs in output. +By default, a newline character is used. +This option is accepted by +.I pgrep +only. +.TP 12 +.B \-f +Use the command line arguments of each process +instead of the name of its executable file +for matching, +and, if +.B \-l +is also specified, +for printing. +.TP 12 +\fB\-g\fI pgrplist\fR +Restrict matches to processes +whose process group ID +appears in +.IR pgrplist . +If an ID is `0', +the process group ID of the current process is used. +.TP 12 +\fB\-G\fI gidlist\fR +Restrict matches to processes +whose real group ID +appears in +.IR gidlist . +.TP 12 +.B \-l +Print the command name in addition to the process ID. +This option is accepted by +.I pgrep +only. +.TP 12 +.B \-n +Select only the newest (most recently created) process +of all processes that matched the other criteria. +Cannot be combined with +.IR \-o . +.TP 12 +.B \-o +Select only the oldest (least recently created) process +of all processes that matched the other criteria. +Cannot be combined with +.IR \-n . +.TP 12 +\fB\-P\fI ppidlist\fR +Restrict matches to processes +whose parent ID appears in +.IR ppidlist . +.TP 12 +\fB\-s\fI sidlist\fR +Restrict matches to processes +that are members of a session given in +.IR sidlist . +If an ID is `0', +the session ID of the current process is used. +.TP 12 +\fB\-t\fI termlist\fR +Restrict matches to processes +that run on any controlling terminal given in +.IR termlist . +Terminal specifications have the format described in +.IR ps (1). +.TP 12 +\fB\-u\fI uidlist\fR +Restrict matches to processes +whose effective user ID +appears in +.IR uidlist . +.TP 12 +\fB\-U\fI uidlist\fR +Restrict matches to processes +whose real user ID +appears in +.IR uidlist . +.TP 12 +.B \-v +Reverse the match, +that is, +select all processes that fail to fulfill the given criteria. +.TP 12 +.B \-x +Require the entire process name to be matched, +as if +.I pattern +was surrounded by `^( )$'. +.SH EXAMPLES +.PP +Determine the process ID of the +.I inetd +process: +.RS +.sp +pgrep \-x inetd +.sp +.RE +Print all processes that have a first argument starting with +.I /bin/ +along with their arguments: +.RS +.sp +pgrep \-l \-f \'^/bin/\' +.sp +.RE +Send a SIGHUP signal to all processes that are owned by either the +.I root +or the +.I daemon +user and are children of the +.I init +process +(process ID 1): +.RS +.sp +pkill \-HUP \-u root,daemon \-P 1 +.sp +.RE +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in extended regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters and +the availability and composition of character classes +in extended regular expressions. +.SH "SEE ALSO" +egrep(1), +ps(1), +locale(7) +.SH DIAGNOSTICS +.I Pgrep +and +.I Pkill +exit with +.TP +0 +if matching processes were found, +and the \fI\-v\fR option was not specified; +.br +.ns +.TP +1 +if no matching processes were found, +or the \fI\-v\fR option was specified without any matches; +.br +.ns +.TP +2 +if an invalid command line argument was found; +.br +.ns +.TP +3 +on fatal errors. +.SH NOTES +Command names are limited to 18 characters, +process arguments to 80 characters; +excess characters are stripped. diff --git a/pgrep/pgrep.c b/pgrep/pgrep.c @@ -0,0 +1,1748 @@ +/* + * pgrep, pkill - find or signal processes by name and other attributes + * + * Gunnar Ritter, Freiburg i. Br., Germany, March 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)pgrep.sl 1.25 (gritter) 12/16/07"; + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <alloca.h> +#include <dirent.h> +#include <limits.h> +#include <pwd.h> +#include <signal.h> +#include <grp.h> +#include <locale.h> +#include <ctype.h> +#include <regex.h> + +#if !defined (__linux__) && !defined (__NetBSD__) && !defined (__OpenBSD__) \ + && !defined (__APPLE__) +#if defined (__hpux) +#include <sys/param.h> +#include <sys/pstat.h> +#elif defined (_AIX) +#include <procinfo.h> +#define proc process +#else /* !__hpux, !_AIX */ +#ifdef sun +#define _STRUCTURED_PROC 1 +#endif /* sun */ +#include <sys/procfs.h> +#endif /* !__hpux, !_AIX */ +#endif /* !__linux__, !__NetBSD__, !__OpenBSD__ */ + +#if defined (__NetBSD__) || defined (__OpenBSD__) || defined (__APPLE__) +#ifndef __APPLE__ +#include <kvm.h> +#endif +#include <sys/param.h> +#include <sys/sysctl.h> +#if defined (__APPLE__) +#include <mach/mach_types.h> +#include <mach/task_info.h> +#endif /* __APPLE__ */ +#define proc process +#undef p_pgid +#define p_pgid p__pgid +#endif /* __NetBSD__, __OpenBSD__, __APPLE__ */ + +#ifndef PRNODEV +#define PRNODEV 0 +#endif + +#include <blank.h> + +#define PROCDIR "/proc" +#define eq(a, b) (strcmp(a, b) == 0) + +enum okay { + OKAY, + STOP +}; + +enum valtype { + VT_CHAR, + VT_INT, + VT_UINT, + VT_LONG, + VT_ULONG +}; + +union value { + char v_char; + int v_int; + unsigned int v_uint; + long v_long; + unsigned long v_ulong; +}; + +struct proc { + struct proc *p_nxt; /* next proc structure */ + pid_t p_pid; /* process id */ + char p_fname[19]; /* executable name */ + pid_t p_ppid; /* parent process id */ + pid_t p_pgid; /* process group id */ + pid_t p_sid; /* session id */ + int p_ttydev; /* controlling terminal */ + char p_psargs[80]; /* process arguments */ + uid_t p_uid; /* real user id */ + uid_t p_euid; /* effective user id */ + gid_t p_gid; /* real group id */ + gid_t p_egid; /* effective group id */ + unsigned long p_start; /* start time (in jiffies except BSD) */ + unsigned long p_size; /* size in kilobytes */ + int p_match; /* matched this process */ +}; + +enum attype { + ATT_PPID, /* parent process id */ + ATT_PGRP, /* process group id */ + ATT_SID, /* sessiond id */ + ATT_EUID, /* effective user id */ + ATT_UID, /* real user id */ + ATT_GID, /* real group id */ + ATT_TTY, /* controlling terminal */ + ATT_ALL +}; + +struct attrib { + struct attrib *a_nxt; /* next element of list */ + enum attype a_type; /* type of attribute */ + long a_val; /* value of attribute */ +}; + +struct attlist { + struct attlist *al_nxt; /* next element of list */ + struct attrib *al_att; /* this attribute */ +}; + +static const char *progname; +static pid_t mypid; /* this instance's pid */ +static unsigned errcnt; /* error count */ +static int matched; /* a process matched */ +static int pkill; /* this is the pkill command */ +static int fflag; /* match against full command line */ +static int lflag; /* long output format */ +static int nflag; /* match newest process only */ +static int oflag; /* match oldest process only */ +static int vflag; /* reverse matching */ +static int xflag; /* match exact string */ +static int signo = SIGTERM; /* signal to send */ +static int need_euid_egid; /* need euid or egid */ +static struct attlist *attributes; /* required attributes */ +static struct proc *processes; /* collected processes */ +static regex_t *expression; /* regular expression to match */ +static const char *delimiter; /* delimiter string */ +static int prdelim; /* print a delimiter (not first proc) */ + +static int str_2_sig(const char *, int *); + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "no memory\n", 10); + exit(3); + } + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = (void *)calloc(nmemb, size)) == NULL) { + write(2, "no memory\n", 10); + exit(3); + } + return p; +} + +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) \ + && !defined (__OpenBSD__) && !defined (__APPLE__) +static void +chdir_to_proc(void) +{ + static int fd = -1; + + if (fd == -1 && (fd = open(PROCDIR, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s\n", progname, PROCDIR); + exit(3); + } + if (fchdir(fd) < 0) { + fprintf(stderr, "%s: cannot chdir to %s\n", progname, PROCDIR); + exit(3); + } +} + +static union value * +getval(char **listp, enum valtype type, int separator, int sep2) +{ + char *buf; + static union value v; + const char *cp, *op; + char *cq, *x; + + if (**listp == '\0') + return NULL; + op = *listp; + while (**listp != '\0') { + if ((separator==' ' ? isspace(**listp) : **listp == separator) + || **listp == sep2) + break; + (*listp)++; + } + buf = alloca(*listp - op + 1); + for (cp = op, cq = buf; cp < *listp; cp++, cq++) + *cq = *cp; + *cq = '\0'; + if (**listp) { + while ((separator == ' ' ? + isspace(**listp) : **listp == separator) || + **listp == sep2) + (*listp)++; + } + switch (type) { + case VT_CHAR: + if (buf[1] != '\0') + return NULL; + v.v_char = buf[0]; + break; + case VT_INT: + v.v_int = strtol(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_UINT: + v.v_uint = strtoul(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_LONG: + v.v_long = strtol(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_ULONG: + v.v_ulong = strtoul(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + } + return &v; +} +#endif /* !__hpux, !_AIX, !__NetBSD__, !__OpenBSD__, !__APPLE__ */ + +static const char * +element(const char **listp, int override) +{ + static char *buf; + static size_t buflen; + const char *cp, *op; + char *cq; + size_t sz; + int stop = ','; + + if (**listp == '\0') + return NULL; + op = *listp; + while (**listp != '\0') { + if (**listp == override) + stop = '\0'; + if (stop != '\0' && (**listp == stop || isblank(**listp))) + break; + (*listp)++; + } + if (**listp == '\0') + return op; + if ((sz = *listp - op + 1) > buflen) { + buflen = sz; + buf = srealloc(buf, buflen); + } + for (cp = op, cq = buf; cp < *listp; cp++, cq++) + *cq = *cp; + *cq = '\0'; + if (**listp) { + while (**listp == stop || isblank(**listp)) + (*listp)++; + } + return buf; +} + +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) + +#if defined (__linux__) || defined (__FreeBSD__) || defined (__DragonFly__) + +#define GETVAL_REQ(a) if ((v = getval(&cp, (a), ' ', 0)) == NULL) \ + return STOP + +#define GETVAL_OPT(a) if ((v = getval(&cp, (a), ' ', 0)) == NULL) \ + goto complete + +#define GETVAL_COMMA(a) if ((v = getval(&cp, (a), ' ', ',')) == NULL) \ + return STOP + +#endif /* __linux__ || __FreeBSD__ || __DragonFly__ */ + + +#if defined (__linux__) +static enum okay +getproc_stat(struct proc *p, pid_t expected_pid) +{ + static char *buf; + static size_t buflen; + union value *v; + FILE *fp; + char *cp, *cq, *ce; + size_t sz, sc; + + if ((fp = fopen("stat", "r")) == NULL) + return STOP; + for (cp = buf; ;) { + const unsigned chunk = 32; + + if (buflen < (sz = cp - buf + chunk)) { + sc = cp - buf; + buf = srealloc(buf, buflen = sz); + cp = &buf[sc]; + } + if ((sz = fread(cp, 1, chunk, fp)) < chunk) { + ce = &cp[sz - 1]; + break; + } + cp += chunk; + } + fclose(fp); + if (*ce != '\n') + return STOP; + *ce-- = '\0'; + cp = buf; + GETVAL_REQ(VT_INT); + if ((p->p_pid = v->v_int) != expected_pid) + return STOP; + if (*cp++ != '(') + return STOP; + for (cq = ce; cq >= cp && *cq != ')'; cq--); + if (cq < cp) + return STOP; + *cq = '\0'; + strncpy(p->p_fname, cp, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + cp = &cq[1]; + while (isspace(*cp)) + cp++; + GETVAL_REQ(VT_CHAR); + if (v->v_char == 'Z') + return STOP; + GETVAL_REQ(VT_INT); + p->p_ppid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_pgid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_sid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_ttydev = v->v_int; + GETVAL_REQ(VT_INT); + /* tty_pgrp not used */ + GETVAL_REQ(VT_ULONG); + /* flag not used */ + GETVAL_REQ(VT_ULONG); + /* min_flt */ + GETVAL_REQ(VT_ULONG); + /* cmin_flt */ + GETVAL_REQ(VT_ULONG); + /* maj_flt */ + GETVAL_REQ(VT_ULONG); + /* cmaj_flt */ + GETVAL_REQ(VT_ULONG); + /* time */ + GETVAL_REQ(VT_ULONG); + /* stime */ + GETVAL_REQ(VT_LONG); + /* ctime */ + GETVAL_REQ(VT_LONG); + /* cstime */ + GETVAL_REQ(VT_LONG); + /* priority */ + GETVAL_REQ(VT_LONG); + /* nice */ + GETVAL_REQ(VT_LONG); + /* timeout not used */ + GETVAL_REQ(VT_LONG); + /* it_real_value not used */ + GETVAL_REQ(VT_ULONG); + p->p_start = v->v_ulong; + GETVAL_REQ(VT_ULONG); + p->p_size = (v->v_ulong >> 10); + return OKAY; +} + +static enum okay +getproc_cmdline(struct proc *p) +{ + FILE *fp; + char *cp, *ce; + int hadzero = 0, c; + + if ((fp = fopen("cmdline", "r")) != NULL) { + cp = p->p_psargs; + ce = cp + sizeof p->p_psargs - 1; + while (cp < ce && (c = getc(fp)) != EOF) { + if (c != '\0') { + if (hadzero) { + *cp++ = ' '; + if (cp == ce) + break; + hadzero = 0; + } + *cp++ = c; + } else { + hadzero = 1; + } + } + *cp = '\0'; + fclose(fp); + } + if (*p->p_psargs == '\0' && p->p_size == 0) + strcpy(p->p_psargs, p->p_fname); + return OKAY; +} + +static enum okay +getproc_status(struct proc *p) +{ + char line[LINE_MAX]; + union value *v; + FILE *fp; + char *cp; + int scanr; + + if ((fp = fopen("status", "r")) == NULL) + return STOP; + scanr = 0; + while (fgets(line, sizeof line, fp) != NULL) { + if (strncmp(line, "Uid:", 4) == 0) { + cp = &line[4]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_uid = v->v_int; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_euid = v->v_int; + scanr++; + } else if (strncmp(line, "Gid:", 4) == 0) { + cp = &line[4]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_gid = v->v_int; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_egid = v->v_int; + scanr++; + } + } + fclose(fp); + if (scanr != 2) + return STOP; + return OKAY; +} + +static struct proc * +getproc(const char *dir, pid_t expected_pid) +{ + struct proc *p; + enum okay result; + + p = scalloc(1, sizeof *p); + if (chdir(dir) == 0) { + if ((result = getproc_stat(p, expected_pid)) == OKAY) + if ((result = getproc_cmdline(p)) == OKAY) + result = getproc_status(p); + chdir_to_proc(); + } else + result = STOP; + if (result == STOP) { + free(p); + return NULL; + } + return p; +} + +#elif defined (__FreeBSD__) || defined (__DragonFly__) + +static enum okay +getproc_status(struct proc *p, pid_t expected_pid) +{ + static char *buf; + static size_t buflen; + union value *v; + FILE *fp; + char *cp, *ce, *cq; + size_t sz, sc; + int mj, mi; + + if ((fp = fopen("status", "r")) == NULL) + return STOP; + for (cp = buf; ;) { + const unsigned chunk = 32; + + if (buflen < (sz = cp - buf + chunk)) { + sc = cp - buf; + buf = srealloc(buf, buflen = sz); + cp = &buf[sc]; + } + if ((sz = fread(cp, 1, chunk, fp)) < chunk) { + ce = &cp[sz - 1]; + break; + } + cp += chunk; + } + fclose(fp); + if (*ce != '\n') + return STOP; + *ce-- = '\0'; + cp = buf; + while (*cp != ' ') { + if (cp - buf < sizeof p->p_fname - 2) + p->p_fname[cp-buf] = *cp; + cp++; + } + if (cp - buf < sizeof p->p_fname - 1) + p->p_fname[cp-buf] = '\0'; + else + p->p_fname[sizeof p->p_fname - 1] = '\0'; + while (*cp == ' ') + cp++; + GETVAL_REQ(VT_INT); + p->p_pid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_ppid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_pgid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_sid = v->v_int; + if (isdigit(*cp)) { + GETVAL_COMMA(VT_INT); + mj = v->v_int; + GETVAL_REQ(VT_INT); + mi = v->v_int; + if (mj != -1 || mi != -1) + p->p_ttydev = makedev(mj, mi); + } else { + struct stat st; + char *dev; + cq = cp; + while (*cp != ' ') cp++; + *cp = '\0'; + dev = smalloc(cp - cq + 8); + strcpy(dev, "/dev/"); + strcpy(&dev[5], cq); + if (stat(dev, &st) < 0) + p->p_ttydev = PRNODEV; + else + p->p_ttydev = st.st_rdev; + free(dev); + *cp = ' '; + while (*cp == ' ') cp++; + } + while (*cp != ' ') cp++; while (*cp == ' ') cp++; + /* skip flags */ + GETVAL_COMMA(VT_LONG); + p->p_start = v->v_long; + GETVAL_REQ(VT_LONG); + /* skip microseconds */ + while (*cp != ' ') cp++; while (*cp == ' ') cp++; + /* skip user time */ + while (*cp != ' ') cp++; while (*cp == ' ') cp++; + /* skip system time */ + while (*cp != ' ') cp++; while (*cp == ' ') cp++; + /* skip wchan message */ + GETVAL_REQ(VT_INT); + p->p_euid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_uid = v->v_int; + GETVAL_COMMA(VT_INT); + p->p_gid = v->v_int; + GETVAL_COMMA(VT_INT); + p->p_egid = v->v_int; + return OKAY; +} + +static enum okay +getproc_cmdline(struct proc *p) +{ + FILE *fp; + char *cp, *ce; + int hadzero = 0, c; + + if ((fp = fopen("cmdline", "r")) != NULL) { + cp = p->p_psargs; + ce = cp + sizeof p->p_psargs - 1; + while (cp < ce && (c = getc(fp)) != EOF) { + if (c != '\0') { + if (hadzero) { + *cp++ = ' '; + if (cp == ce) + break; + hadzero = 0; + } + *cp++ = c; + } else { + hadzero = 1; + } + } + *cp = '\0'; + fclose(fp); + } + if (*p->p_psargs == '\0' && p->p_size == 0) + strcpy(p->p_psargs, p->p_fname); + return OKAY; +} + +static struct proc * +getproc(const char *dir, pid_t expected_pid) +{ + struct proc *p; + enum okay result; + + p = scalloc(1, sizeof *p); + if (chdir(dir) == 0) { + if ((result = getproc_status(p, expected_pid)) == OKAY) + result = getproc_cmdline(p); + chdir_to_proc(); + } else + result = STOP; + if (result == STOP) { + free(p); + return NULL; + } + return p; +} + +#else /* !__linux__, !__FreeBSD__, !__DragonFly__ */ + +static const char * +concat(const char *dir, const char *base) +{ + static char *name; + static long size; + long length; + char *np; + const char *cp; + + if ((length = strlen(dir) + strlen(base) + 2) > size) + name = srealloc(name, size = length); + np = name; + for (cp = dir; *cp; cp++) + *np++ = *cp; + *np++ = '/'; + for (cp = base; *cp; cp++) + *np++ = *cp; + *np = '\0'; + return name; +} + +static enum okay +getproc_psinfo(const char *dir, struct proc *p, pid_t expected_pid) +{ + FILE *fp; + struct psinfo pi; + + if ((fp = fopen(concat(dir, "psinfo"), "r")) == NULL) + return STOP; + if (fread(&pi, 1, sizeof pi, fp) != sizeof pi || + pi.pr_pid != expected_pid) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_pid = pi.pr_pid; + strncpy(p->p_fname, pi.pr_fname, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = pi.pr_ppid; + p->p_pgid = pi.pr_pgid; + p->p_sid = pi.pr_sid; + p->p_ttydev = pi.pr_ttydev; + strncpy(p->p_psargs, pi.pr_psargs, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + p->p_uid = pi.pr_uid; + p->p_gid = pi.pr_gid; +#ifdef __sun + p->p_euid = pi.pr_euid; + p->p_egid = pi.pr_egid; +#endif /* __sun */ + p->p_start = pi.pr_start.tv_sec; + p->p_size = pi.pr_size; + return OKAY; +} + +#ifndef __sun +static enum okay +getproc_cred(const char *dir, struct proc *p) +{ + FILE *fp; + struct prcred pc; + + if ((fp = fopen(concat(dir, "cred"), "r")) == NULL) + return need_euid_egid ? STOP : OKAY; + if (fread(&pc, 1, sizeof pc, fp) != sizeof pc) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_euid = pc.pr_euid; + p->p_egid = pc.pr_egid; + return OKAY; +} +#endif /* !__sun */ + +static struct proc * +getproc(const char *dir, pid_t expected_pid) +{ + struct proc *p; + enum okay result; + + p = scalloc(1, sizeof *p); + result = getproc_psinfo(dir, p, expected_pid); +#ifndef __sun + if (result == OKAY) + result = getproc_cred(dir, p); +#endif /* !__sun */ + if (result == STOP) { + free(p); + return NULL; + } + return p; +} +#endif /* !__linux__ */ + +static void +collectprocs(void) +{ + struct proc *p, *pq = NULL; + DIR *Dp; + struct dirent *dp; + unsigned long val; + char *x; + + if ((Dp = opendir(".")) != NULL) { + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + val = strtoul(dp->d_name, &x, 10); + if (*x != 0) + continue; + if ((p = getproc(dp->d_name, val)) != NULL) { + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + } + } + closedir(Dp); + } +} + +#elif defined (__hpux) +static void +collectprocs(void) +{ +#define burst ((size_t)10) + struct proc *p, *pq = NULL; + struct pst_status pst[burst]; + int i, count; + int idx = 0; + + while ((count = pstat_getproc(pst, sizeof *pst, burst, idx)) > 0) { + for (i = 0; i < count; i++) { + p = scalloc(sizeof *p, 1); + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + p->p_pid = pst[i].pst_pid; + strncpy(p->p_fname, pst[i].pst_ucomm, + sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = pst[i].pst_ppid; + p->p_pgid = pst[i].pst_pgrp; + p->p_sid = pst[i].pst_sid; + if (pst[i].pst_term.psd_major != -1 || + pst[i].pst_term.psd_minor != -1) + p->p_ttydev = makedev(pst[i].pst_term.psd_major, + pst[i].pst_term.psd_minor); + strncpy(p->p_psargs, pst[i].pst_cmd, + sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + p->p_uid = pst[i].pst_uid; + p->p_euid = pst[i].pst_euid; + p->p_gid = pst[i].pst_gid; + p->p_egid = pst[i].pst_egid; + p->p_start = pst[i].pst_start; + p->p_size = pst[i].pst_dsize + pst[i].pst_tsize + + pst[i].pst_ssize; + } + idx = pst[count-1].pst_idx + 1; + } +} +#elif defined (_AIX) +static void +oneproc(struct proc *p, struct procentry64 *pi) +{ + char args[100], *ap, *cp; + + p->p_pid = pi->pi_pid; + strncpy(p->p_fname, pi->pi_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = pi->pi_ppid; + p->p_pgid = pi->pi_pgrp; + p->p_sid = pi->pi_sid; + p->p_ttydev = pi->pi_ttyp ? pi->pi_ttyd : PRNODEV; + p->p_uid = pi->pi_uid; + p->p_euid = pi->pi_cred.crx_uid; + p->p_gid = pi->pi_cred.crx_rgid; + p->p_egid = pi->pi_cred.crx_gid; + p->p_start = pi->pi_start; + p->p_size = pi->pi_size; + if (getargs(pi, sizeof *pi, args, sizeof args) == 0) { + ap = args; + cp = p->p_psargs; + while (cp < &p->p_psargs[sizeof p->p_psargs - 1]) { + if (ap[0] == '\0') { + if (ap[1] == '\0') + break; + *cp++ = ' '; + } else + *cp++ = *ap; + ap++; + } + *cp = '\0'; + } +} + +static void +collectprocs(void) +{ +#define burst ((size_t)10) + struct proc *p, *pq = NULL; + struct procentry64 pi[burst]; + pid_t idx = 0; + int i, count; + + while ((count = getprocs64(pi, sizeof *pi, NULL, 0, &idx, burst)) > 0) { + for (i = 0; i < count; i++) { + p = scalloc(sizeof *p, 1); + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + oneproc(p, &pi[i]); + } + if (count < burst) + break; + } +} +#elif defined (__OpenBSD__) +#include <uvm/uvm_extern.h> +static void +oneproc(struct proc *p, struct kinfo_proc *kp) +{ + p->p_pid = kp->kp_proc.p_pid; + strncpy(p->p_fname, kp->kp_proc.p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = kp->kp_eproc.e_ppid; + p->p_pgid = kp->kp_eproc.e_pgid; + p->p_sid = kp->kp_eproc.e_tpgid; /* ? */ + p->p_ttydev = kp->kp_eproc.e_tdev; + p->p_uid = kp->kp_eproc.e_pcred.p_ruid; + p->p_euid = kp->kp_eproc.e_ucred.cr_uid; + p->p_gid = kp->kp_eproc.e_pcred.p_rgid; + p->p_egid = kp->kp_eproc.e_ucred.cr_gid; + p->p_start = kp->kp_eproc.e_pstats.p_start.tv_sec; + p->p_size = kp->kp_eproc.e_vm.vm_tsize + + kp->kp_eproc.e_vm.vm_dsize + + kp->kp_eproc.e_vm.vm_ssize; +} +static void +argproc(struct proc *p, struct kinfo_proc *kp, kvm_t *kt) +{ + char **args; + char *ap, *pp; + + if ((args = kvm_getargv(kt, kp, sizeof p->p_psargs)) == NULL) + return; + ap = args[0]; + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + *pp = ' '; + ap = *++args; + if (ap == NULL) + break; + } else + *pp = *ap++; + } +} + +static void +collectprocs(void) +{ + struct proc *p, *pq = NULL; + kvm_t *kt; + struct kinfo_proc *kp; + int i, cnt; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, "kvm_open")) == NULL) + exit(1); + kp = kvm_getprocs(kt, KERN_PROC_ALL, 0, &cnt); + for (i = 0; i < cnt; i++) { + p = scalloc(sizeof *p, 1); + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + oneproc(p, &kp[i]); + argproc(p, &kp[i], kt); + } + kvm_close(kt); +} +#elif defined (__NetBSD__) +static void +oneproc(struct proc *p, struct kinfo_proc2 *kp) +{ + p->p_pid = kp->p_pid; + strncpy(p->p_fname, kp->p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = kp->p_ppid; + p->p_pgid = kp->p__pgid; + p->p_sid = kp->p_sid; + p->p_ttydev = kp->p_tdev; + p->p_uid = kp->p_ruid; + p->p_euid = kp->p_uid; + p->p_gid = kp->p_rgid; + p->p_egid = kp->p_gid; + p->p_start = kp->p_ustart_sec; + p->p_size = kp->p_vm_tsize + kp->p_vm_dsize + kp->p_vm_ssize; +} + +static void +argproc(struct proc *p, struct kinfo_proc2 *kp, kvm_t *kt) +{ + char **args; + char *ap, *pp; + + if ((args = kvm_getargv2(kt, kp, sizeof p->p_psargs)) == NULL) + return; + ap = args[0]; + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + *pp = ' '; + ap = *++args; + if (ap == NULL) + break; + } else + *pp = *ap++; + } +} + +static void +collectprocs(void) +{ + struct proc *p, *pq = NULL; + kvm_t *kt; + struct kinfo_proc2 *kp; + int i, cnt; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, "kvm_open")) == NULL) + exit(1); + kp = kvm_getproc2(kt, KERN_PROC_ALL, 0, sizeof *kp, &cnt); + for (i = 0; i < cnt; i++) { + p = scalloc(sizeof *p, 1); + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + oneproc(p, &kp[i]); + argproc(p, &kp[i], kt); + } + kvm_close(kt); +} +#elif defined (__APPLE__) + +static int +GetBSDProcessList(pid_t thepid, struct kinfo_proc **procList, size_t *procCount) + /* derived from http://developer.apple.com/qa/qa2001/qa1123.html */ + /* Returns a list of all BSD processes on the system. This routine + allocates the list and puts it in *procList and a count of the + number of entries in *procCount. You are responsible for freeing + this list (use "free" from System framework). + all classic apps run in one process + On success, the function returns 0. + On error, the function returns a BSD errno value. + Preconditions: + assert( procList != NULL); + assert(*procList == NULL); + assert(procCount != NULL); + Postconditions: + assert( (err == 0) == (*procList != NULL) ); + */ +{ + int err; + struct kinfo_proc *result; + int mib[4]; + size_t length; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + if (thepid == 0) { + mib[2] = KERN_PROC_ALL; + mib[3] = 0; + } else { + mib[2] = KERN_PROC_PID; + mib[3] = thepid; + } + /* We start by calling sysctl with result == NULL and length == 0. + That will succeed, and set length to the appropriate length. + We then allocate a buffer of that size and call sysctl again + with that buffer. + */ + length = 0; + err = sysctl(mib, 4, NULL, &length, NULL, 0); + if (err == -1) + err = errno; + if (err == 0) { + result = smalloc(length); + err = sysctl(mib, 4, result, &length, NULL, 0); + if (err == -1) + err = errno; + if (err == ENOMEM) { + free(result); /* clean up */ + result = NULL; + } + } + *procList = result; + *procCount = err == 0 ? length / sizeof **procList : 0; + return err; +} + +extern kern_return_t task_for_pid(task_port_t task, pid_t pid, task_port_t *target); + +static void +oneproc(struct proc *p, struct kinfo_proc *kp) +{ + task_port_t task; + kern_return_t error; + struct task_basic_info task_binfo; + unsigned int info_count = TASK_BASIC_INFO_COUNT; + + p->p_pid = kp->kp_proc.p_pid; + strncpy(p->p_fname, kp->kp_proc.p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = kp->kp_eproc.e_ppid; + p->p_pgid = kp->kp_eproc.e_pgid; + p->p_sid = kp->kp_eproc.e_tpgid; + p->p_ttydev = kp->kp_eproc.e_tdev == -1 ? PRNODEV : kp->kp_eproc.e_tdev;; + p->p_uid = kp->kp_eproc.e_pcred.p_ruid; + p->p_euid = kp->kp_eproc.e_ucred.cr_uid; + p->p_gid = kp->kp_eproc.e_pcred.p_rgid; + p->p_egid = kp->kp_eproc.e_ucred.cr_gid; + p->p_start = kp->kp_proc.p_starttime.tv_sec + + (kp->kp_proc.p_starttime.tv_usec >= 500000); + + error = task_for_pid(mach_task_self(), p->p_pid, &task); + if (error != KERN_SUCCESS) { + return; /* no process, nothing to show/kill */ + } + + info_count = TASK_BASIC_INFO_COUNT; + error = task_info(task, TASK_BASIC_INFO, &task_binfo, &info_count); + if (error != KERN_SUCCESS) { + fprintf(stderr, "Error calling task_info():%d\n", error); + exit(3); + } + + p->p_size = task_binfo.virtual_size / 1024; /* in kilobytes */ +} + +static void +argproc(struct proc *p, struct kinfo_proc *kp) +{ + size_t size, argsz; + char *argbuf; + int mib[3]; + long nargs; + char *ap, *pp; + + /* allocate a procargs space per process */ + mib[0] = CTL_KERN; + mib[1] = KERN_ARGMAX; + size = sizeof argsz; + if (sysctl(mib, 2, &argsz, &size, NULL, 0) == -1) { + fprintf(stderr, "error in sysctl(): %s\n", strerror(errno)); + exit(3); + } + argbuf = (char *)smalloc(argsz); + + /* fetch the process arguments */ + mib[0] = CTL_KERN; + mib[1] = KERN_PROCARGS2; + mib[2] = kp->kp_proc.p_pid; + if (sysctl(mib, 3, argbuf, &argsz, NULL, 0) == -1) + goto DONE; /* process has no args or already left the system */ + + /* the number of args is at offset 0, this works for 32 and 64bit */ + memcpy(&nargs, argbuf, sizeof nargs); + ap = argbuf + sizeof nargs; + + /* skip the exec_path */ + while (ap < &argbuf[argsz] && *ap != '\0') + ap++; + if (ap == &argbuf[argsz]) + goto DONE; /* no args to show */ + /* skip trailing '\0' chars */ + while (ap < &argbuf[argsz] && *ap == '\0') + ap++; + if (ap == &argbuf[argsz]) + goto DONE; /* no args to show */ + + /* now concat copy the arguments */ + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + if (--nargs == 0) + break; + *pp = ' '; + ++ap; + } else + *pp = *ap++; + } + *pp = '\0'; + +DONE: free(argbuf); + return; +} + +static void +collectprocs(void) +{ + int mib[2]; + struct proc *p, *pq = NULL; + struct kinfo_proc *kp = NULL; + size_t i, cnt; + int err; + + if ((err = GetBSDProcessList(0, &kp, &cnt)) != 0) { + fprintf(stderr, "error getting proc list: %s\n", strerror(err)); + exit(3); + } + for (i = 0; i < cnt; i++) { + p = smalloc(sizeof *p); + if (pq) + pq->p_nxt = p; + else + processes = p; + pq = p; + oneproc(p, &kp[i]); + argproc(p, &kp[i]); + } + /* free the memory allocated by GetBSDProcessList */ + free(kp); +} +#endif /* all */ + +static enum okay +hasattr(struct proc *p, struct attrib *a) +{ + long val = 0; + + switch (a->a_type) { + case ATT_ALL: + return OKAY; + case ATT_PPID: + val = p->p_ppid; + break; + case ATT_PGRP: + val = p->p_pgid; + break; + case ATT_SID: + val = p->p_sid; + break; + case ATT_EUID: + val = p->p_euid; + break; + case ATT_UID: + val = p->p_uid; + break; + case ATT_GID: + val = p->p_gid; + break; + case ATT_TTY: + /* + * Never matches processes without controlling tty. + */ + if (p->p_ttydev == PRNODEV) + return STOP; + val = p->p_ttydev; + break; + } + return val == a->a_val ? OKAY : STOP; +} + +static void +tryproc(struct proc *p) +{ + struct attlist *alp; + struct attrib *ap; + const char *line; + regmatch_t where; + + for (alp = attributes; alp; alp = alp->al_nxt) { + for (ap = alp->al_att; ap; ap = ap->a_nxt) + if (hasattr(p, ap) == OKAY) + break; + if (ap == NULL) + return; + } + if (expression) { + line = fflag ? p->p_psargs : p->p_fname; + if (regexec(expression, line, 1, &where, 0) != 0) + return; + if (xflag && (where.rm_so != 0 || where.rm_eo == -1 || + line[where.rm_eo] != '\0')) + return; + } + p->p_match = 1; +} + +static void +selectprocs(void) +{ + struct proc *p; + + for (p = processes; p; p = p->p_nxt) + tryproc(p); +} + +static void +outproc(struct proc *p) +{ + if (pkill) { + if (kill(p->p_pid, signo) < 0) + fprintf(stderr, + "%s: Failed to signal pid %ld: %s\n", + progname, (long)p->p_pid, strerror(errno)); + } else { + if (delimiter && prdelim++) + printf("%s", delimiter); + if (lflag) + printf("%5ld %s", (long)p->p_pid, + fflag ? p->p_psargs : p->p_fname); + else + printf("%ld", (long)p->p_pid); + if (delimiter == NULL) + printf("\n"); + } +} + +static void +handleprocs(void) +{ + struct proc *p, *selected = NULL; + + for (p = processes; p; p = p->p_nxt) { + if (p->p_pid != mypid && p->p_match ^ vflag) { + matched = 1; + if (nflag) { + if (selected == NULL || + p->p_start >= selected->p_start) + selected = p; + } else if (oflag) { + if (selected == NULL || + p->p_start < selected->p_start) + selected = p; + } else + outproc(p); + } + } + if ((nflag || oflag) && selected) + outproc(selected); + if (prdelim && delimiter) + printf("\n"); +} + +static long +getrdev(const char *device) +{ + struct stat st; + long id = 0; + char *file; + + file = alloca(strlen(device) + 9); + strcpy(file, "/dev/"); + strcpy(&file[5], device); + if (stat(file, &st) < 0) { + strcpy(file, "/dev/tty/"); + strcpy(&file[8], device); + if (stat(file, &st) == 0) + id = st.st_rdev; + else { + fprintf(stderr, "%s: unknown terminal name -- %s\n", + progname, device); + exit(2); + } + } else + id = st.st_rdev; + return id; +} + +static struct attrib * +makatt(enum attype at, const char *string, int optc, struct attrib *aq) +{ + struct attrib *ap; + struct passwd *pwd; + struct group *grp; + char *x; + long val = 0; + + if (*string == '\0') + at = ATT_ALL; + else switch (at) { + case ATT_PPID: + case ATT_PGRP: + case ATT_SID: + val = strtol(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, + "%s: invalid argument for option '%c' -- %s\n", + progname, optc, string); + exit(2); + } + if (val == 0) switch (at) { + case ATT_PGRP: + val = getpgid(0); + break; + case ATT_SID: + val = getsid(0); + break; + } + break; + case ATT_EUID: + need_euid_egid = 1; + /*FALLTHRU*/ + case ATT_UID: + if ((pwd = getpwnam(string)) != NULL) + val = pwd->pw_uid; + else { + val = strtol(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, + "%s: invalid user name -- %s\n", + progname, string); + exit(2); + } + } + break; + case ATT_GID: + if ((grp = getgrnam(string)) != NULL) + val = grp->gr_gid; + else { + val = strtol(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-') { + fprintf(stderr, + "%s: invalid group name -- %s\n", + progname, string); + exit(2); + } + } + break; + case ATT_TTY: + val = getrdev(string); + break; + } + ap = scalloc(1, sizeof *ap); + ap->a_type = at; + ap->a_val = val; + ap->a_nxt = aq; + return ap; +} + +static void +addattribs(enum attype at, const char *list, int optc) +{ + struct attlist *al = NULL; + const char *cp; + + for (al = attributes; al; al = al->al_nxt) + if (al->al_att && al->al_att->a_type == at) + break; + if (al == NULL) { + al = scalloc(1, sizeof *al); + al->al_nxt = attributes; + attributes = al; + } + while (*list == ',' || isblank(*list&0377)) + list++; + if (*list) + while ((cp = element(&list, '\0')) != NULL) + al->al_att = makatt(at, cp, optc, al->al_att); + else + al->al_att = makatt(at, "", optc, al->al_att); +} + +static enum okay +getsig(const char *str) +{ + char *x; + int val; + + if ((val = strtol(str, &x, 10)) >= 0 && *x == '\0' && + *str != '-' && *str != '+') { + signo = val; + return OKAY; + } + if (str_2_sig(str, &val) == OKAY) { + signo = val; + return OKAY; + } + return STOP; +} + +static void +usage(void) +{ + if (pkill) + fprintf(stderr, "\ +Usage: %s [-signal] [-fnovx] [-P ppidlist] [-g pgrplist] [-s sidlist]\n\ +\t[-u euidlist] [-U uidlist] [-G gidlist] [-t termlist] [pattern]\n", + progname); + else + fprintf(stderr, "\ +Usage: %s [-flnovx] [-d delim] [-P ppidlist] [-g pgrplist] [-s sidlist]\n\ +\t[-u euidlist] [-U uidlist] [-G gidlist] [-t termlist] [pattern]\n", + progname); + exit(2); +} + +int +main(int argc, char **argv) +{ + int i, flags; + + progname = basename(argv[0]); + if (strncmp(progname, "pkill", 5) == 0) + pkill = 1; + setlocale(LC_COLLATE, ""); + setlocale(LC_CTYPE, ""); + if (pkill && argc > 1 && argv[1][0] == '-' && + getsig(&argv[1][1]) == OKAY) + optind = 2; + while ((i = getopt(argc, argv, pkill ? "fnovxP:g:s:u:U:G:t:" : + "flnovxd:P:g:s:u:U:G:t:")) != EOF) { + switch (i) { + case 'f': + fflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'o': + oflag = 1; + break; + case 'v': + vflag = 1; + break; + case 'x': + xflag = 1; + break; + case 'd': + delimiter = optarg; + break; + case 'P': + addattribs(ATT_PPID, optarg, i); + break; + case 'g': + addattribs(ATT_PGRP, optarg, i); + break; + case 's': + addattribs(ATT_SID, optarg, i); + break; + case 'u': + addattribs(ATT_EUID, optarg, i); + break; + case 'U': + addattribs(ATT_UID, optarg, i); + break; + case 'G': + addattribs(ATT_GID, optarg, i); + break; + case 't': + addattribs(ATT_TTY, optarg, i); + break; + default: + usage(); + } + } + if (nflag && oflag) { + fprintf(stderr, "%s: -n and -o are mutually exclusive\n", + progname); + usage(); + } + if (argv[optind]) { + if (argv[optind+1]) { + fprintf(stderr, "%s: illegal argument -- %s\n", + progname, argv[optind + 1]); + usage(); + } + flags = REG_EXTENDED; +#ifdef REG_MTPARENBAD + flags |= REG_MTPARENBAD; +#endif + if (!xflag) + flags |= REG_NOSUB; +#ifdef REG_ONESUB + else + flags |= REG_ONESUB; +#endif + expression = scalloc(1, sizeof *expression); + if ((i = regcomp(expression, argv[optind], flags)) != 0) { + char *errst; + size_t errsz; + + errsz = regerror(i, expression, NULL, 0) + 1; + errst = smalloc(errsz); + regerror(i, expression, errst, errsz); + fprintf(stderr, "%s: %s\n", progname, errst); + exit(2); + } + } else if (attributes == NULL) { + fprintf(stderr, "%s: No matching criteria specified\n", + progname); + usage(); + } + mypid = getpid(); +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) + chdir_to_proc(); +#endif /* !__hpux, !_AIX, !__NetBSD__, !__OpenBSD__, !__APPLE__ */ + collectprocs(); + selectprocs(); + handleprocs(); + return errcnt ? errcnt : matched == 0; +} + +struct sig_strlist +{ + const int sig_num; + const char *sig_str; +}; + +static const struct sig_strlist sig_strs[] = { + { 0, "EXIT" }, + { SIGHUP, "HUP" }, + { SIGINT, "INT" }, + { SIGQUIT, "QUIT" }, + { SIGILL, "ILL" }, + { SIGTRAP, "TRAP" }, + { SIGABRT, "ABRT" }, +#ifdef SIGIOT + { SIGIOT, "IOT" }, +#endif +#ifdef SIGEMT + { SIGEMT, "EMT" }, +#endif +#ifdef SIGFPE + { SIGFPE, "FPE" }, +#endif +#ifdef SIGKILL + { SIGKILL, "KILL" }, +#endif +#ifdef SIGBUS + { SIGBUS, "BUS" }, +#endif +#ifdef SIGSEGV + { SIGSEGV, "SEGV" }, +#endif +#ifdef SIGSYS + { SIGSYS, "SYS" }, +#endif +#ifdef SIGPIPE + { SIGPIPE, "PIPE" }, +#endif +#ifdef SIGALRM + { SIGALRM, "ALRM" }, +#endif +#ifdef SIGTERM + { SIGTERM, "TERM" }, +#endif +#ifdef SIGUSR1 + { SIGUSR1, "USR1" }, +#endif +#ifdef SIGUSR2 + { SIGUSR2, "USR2" }, +#endif +#ifdef SIGCLD + { SIGCLD, "CLD" }, +#endif +#ifdef SIGCHLD + { SIGCHLD, "CHLD" }, +#endif +#ifdef SIGPWR + { SIGPWR, "PWR" }, +#endif +#ifdef SIGWINCH + { SIGWINCH, "WINCH" }, +#endif +#ifdef SIGURG + { SIGURG, "URG" }, +#endif +#ifdef SIGPOLL + { SIGPOLL, "POLL" }, +#endif +#ifdef SIGIO + { SIGIO, "IO" }, +#endif +#ifdef SIGSTOP + { SIGSTOP, "STOP" }, +#endif +#ifdef SIGTSTP + { SIGTSTP, "TSTP" }, +#endif +#ifdef SIGCONT + { SIGCONT, "CONT" }, +#endif +#ifdef SIGTTIN + { SIGTTIN, "TTIN" }, +#endif +#ifdef SIGTTOU + { SIGTTOU, "TTOU" }, +#endif +#ifdef SIGVTALRM + { SIGVTALRM, "VTALRM" }, +#endif +#ifdef SIGPROF + { SIGPROF, "PROF" }, +#endif +#ifdef SIGXCPU + { SIGXCPU, "XCPU" }, +#endif +#ifdef SIGXFSZ + { SIGXFSZ, "XFSZ" }, +#endif +#ifdef SIGWAITING + { SIGWAITING, "WAITING" }, +#endif +#ifdef SIGLWP + { SIGLWP, "LWP" }, +#endif +#ifdef SIGFREEZE + { SIGFREEZE, "FREEZE" }, +#endif +#ifdef SIGTHAW + { SIGTHAW, "THAW" }, +#endif +#ifdef SIGCANCEL + { SIGCANCEL, "CANCEL" }, +#endif +#ifdef SIGLOST + { SIGLOST, "LOST" }, +#endif +#ifdef SIGSTKFLT + { SIGSTKFLT, "STKFLT" }, +#endif +#ifdef SIGINFO + { SIGINFO, "INFO" }, +#endif +#ifdef SIG_2_STR_WITH_RT_SIGNALS + { SIGRTMIN, "RTMIN" }, + { SIGRTMIN+1, "RTMIN+1" }, + { SIGRTMIN+2, "RTMIN+2" }, + { SIGRTMIN+3, "RTMIN+3" }, + { SIGRTMAX-3, "RTMAX-3" }, + { SIGRTMAX-2, "RTMAX-2" }, + { SIGRTMAX-1, "RTMAX-1" }, + { SIGRTMAX, "RTMAX" }, +#endif /* SIG_2_STR_WITH_RT_SIGNALS */ + { -1, NULL } +}; + +static int +str_2_sig(const char *str, int *signum) +{ + int i; + + for (i = 0; sig_strs[i].sig_str; i++) + if (eq(str, sig_strs[i].sig_str)) + break; + if (sig_strs[i].sig_str == NULL) + return STOP; + *signum = sig_strs[i].sig_num; + return OKAY; +} diff --git a/printf/mkfile b/printf/mkfile @@ -0,0 +1,7 @@ +BIN = printf +OBJ = printf.o +INSTALL_BIN = printf +INSTALL_MAN1 = printf.1 +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/printf/printf.1 b/printf/printf.1 @@ -0,0 +1,254 @@ +.\" +.\" Copyright (c) 2005 Gunnar Ritter +.\" +.\" This software is provided 'as-is', without any express or implied +.\" warranty. In no event will the authors be held liable for any damages +.\" arising from the use of this software. +.\" +.\" Permission is granted to anyone to use this software for any purpose, +.\" including commercial applications, and to alter it and redistribute +.\" it freely, subject to the following restrictions: +.\" +.\" 1. The origin of this software must not be misrepresented; you must not +.\" claim that you wrote the original software. If you use this software +.\" in a product, an acknowledgment in the product documentation would be +.\" appreciated but is not required. +.\" +.\" 2. Altered source versions must be plainly marked as such, and must not be +.\" misrepresented as being the original software. +.\" +.\" 3. This notice may not be removed or altered from any source distribution. +.\" +.\" Sccsid @(#)printf.1 1.2 (gritter) 7/17/05 +.TH PRINTF 1 "7/17/05" "Heirloom Toolchest" "User Commands" +.SH NAME +printf \- print a text string +.SH SYNOPSIS +\fBprintf\fR \fIformat\fR [\fIargument\fR ...] +.SH DESCRIPTION +The +.I printf +command writes a string to standard output +with a controlled +.IR format . +It is essentially a utility variant of the `C' language function +.IR printf (3). +.PP +Most characters in the +.I format +string +are simply copied. +The only exceptions are the backslash `\e' and percent `%' characters. +.PP +The backslash `\e' character in the +.I format +string +serves as an escape for the following character. +The sequences +`\ea' (alert), +`\eb' (backspace), +`\ef' (formfeed), +`\en' (newline), +`\er' (carriage return), +`\et' (tabulator), +and `\ev' (vertical tabulator) +cause the corresponding control characters to be printed. +The sequence `\e\fInnn\fR', +where +.I nnn +is a one-, two-, or three-digit octal number, +cause the character (byte) with the corresponding number +to be printed. +`\e\e' prints the backslash character itself. +.PP +The percent `%' character in the +.I format +string introduces a conversion specification +consisting of up to five parts, +of which the first four are optional: +.TP 14 +.I position +A positive decimal integer followed by a dollar `$' character +that specifies the +.I argument +to be used instead of the next unused one +for the following conversions. +.TP 14 +.I flags +Zero or more of +.RS +.PD 0 +.TP 8 +.B \- +left-justify the field +.TP 8 +.B + +always include a sign when printing a number +.TP +<space> +prefix the result by a space if there is no sign +.TP 8 +.B # +alternate format +.TP 8 +.B 0 +pad numbers with leading zeros +.PD +.RE +.TP 14 +.I field width +A decimal number giving the minimum width of the output printed. +The output is padded if necessary, +as controlled by the flags described above. +By default, no padding is performed. +If the field width is `*', +it is taken from the next unused +.IR argument . +.TP 14 +.I precision +A dot `.', +followed by a decimal number giving +the minimum digits written for integer numeric conversions, +the minumum digits after the radix character +for floating-point numeric conversions, +or maximum bytes written for string conversions. +If the precisision is `.*', +it is taken from the next unused +.IR argument . +.TP 14 +.I specifier +A character controlling the type of the conversion performed: +.RS +.PD 0 +.TP 5 +.B s +The next unused +.I argument +is written as a plain string. +.TP 5 +.B b +The next unused +.I argument +is written as a string containing escape sequences +similar to those of +.IR echo (1). +The backslash sequences described above are supported, +except that the octal number must be prefixed by a zero +as in `\e0\fInnn\fR'. +A `\ec' sequence causes +.I printf +to exit immediately when it is encountered. +.TP 5 +.B c +The first character (byte) of the next unused +.I argument +is written. +Any following characters in this argument are ignored. +.TP 5 +.BR d ", i +The next unused +.I argument +is written as a signed decimal number. +.TP 5 +.B o +The next unused +.I argument +is written as an octal number. +With the `#' flag, +it is prefixed by `0'. +.TP 5 +.B u +The next unused +.I argument +is written as an unsigned decimal number. +.TP 5 +.B x +The next unused +.I argument +is written as a hexadecimal number, +using lowercase characters. +With the `#' flag, +it is prefixed by `0x'. +.TP 5 +.B X +The next unused +.I argument +is written as a hexadecimal number, +using uppercase characters. +With the `#' flag, +it is prefixed by `0X'. +.TP 5 +.B f +The next unused +.I argument +is written as a floating-point number +in the style `[\-]\fInnn.nnn\fR'. +.TP 5 +.B e +The next unused +.I argument +is written as a floating-point number +in the style `[\-]\fIn.nnn\fRe[+|\-]\fInn'. +.TP 5 +.B E +The next unused +.I argument +is written as a floating-point number +in the style `[\-]\fIn.nnn\fRE[+|\-]\fInn'. +.TP 5 +.B g +The next unused +.I argument +is written as a floating-point number +like an integer if there is no fractional part, +as described for `f' if the exponent is small, +or as described for `g' if the exponent is large. +.TP 5 +.B G +The next unused +.I argument +is written as a floating-point number +like an integer if there is no fractional part, +as described for `f' if the exponent is small, +or as described for `G' if the exponent is large. +.TP 5 +.B % +A percent character is printed. +No +.I argument +is consumed. +.PD +.RE +.PP +If the argument for the numeric specifiers starts with a +single- or double quote (`'\fIc\fR' or `"\fIc\fR'), +the numeric value of the following character (byte sequence) +in the current character encoding is used. +.PP +If the +.I format +string consumes at least an +.IR argument , +no format specification contains a +.I position +part, +but there are still unused +.I arguments +after the entire format string has been evaluated once, +it is evaluated repeatedly until all arguments are consumed. +Missing +.I arguments +default to the empty string for string conversions, +and to zero if a numeric value is expected. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +for `'\fIc\fR' and `"\fIc\fR'. +.SH "SEE ALSO" +echo(1), +printf(3) diff --git a/printf/printf.c b/printf/printf.c @@ -0,0 +1,402 @@ +/* + * printf - print a text string + * + * Gunnar Ritter, Freiburg i. Br., Germany, June 2005. + */ +/* + * Copyright (c) 2005 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)printf.c 1.7 (gritter) 7/17/05"; + +#include <stdio.h> +#include <stdlib.h> +#include <libgen.h> +#include <locale.h> +#include <wchar.h> +#include <limits.h> +#include <errno.h> +#include "asciitype.h" + +#if defined (__GLIBC__) && defined (_IO_getc_unlocked) +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif + +static char *fp; /* format pointer */ +static int a; /* current argument index */ +static int ab; /* beginning of arguments */ +static int ac; /* argc to main() */ +static char **av; /* argv to main() */ +static int c; /* last character (byte) read */ +static const char *progname; /* argv[0] to main() */ +static int status; /* exit status */ +static int mb_cur_max; /* MB_CUR_MAX */ +static int dolflag; /* n$ field encountered */ + +static void +usage(void) +{ + fprintf(stderr, "Usage: %s format [[[arg1] arg2] ... argn]\n", + progname); + exit(2); +} + +#define getnum(T, type, func) static type \ +T(const char *cp) \ +{ \ + char *xp; \ + wchar_t wc; \ + int i; \ + type n; \ +\ + if (*cp == '"' || *cp == '\'') { \ + if (mb_cur_max > 1 && cp[1] & 0200) { \ + if ((i = mbtowc(&wc, &cp[1], mb_cur_max)) < 0) \ + return WEOF; \ + return wc; \ + } else \ + return cp[1] & 0377; \ + } \ + errno = 0; \ + n = func(cp, &xp); \ + if (errno) { \ + fprintf(stderr, "%s: \"%s\" arithmetic overflow\n", \ + progname, cp); \ + status |= 1; \ + xp = ""; \ + } \ + if (*xp) { \ + fprintf(stderr, "%s: \"%s\" %s\n", progname, cp, \ + xp > cp ? "not completely converted" : \ + "expected numeric value"); \ + status |= 1; \ + } \ + return n; \ +} + +#define getint(a, b) strtol(a, b, 0) +#define getuns(a, b) strtoul(a, b, 0) +#define getdouble(a, b) strtod(a, b) + +getnum(integer, int, getint) +getnum(unsgned, unsigned, getuns) +getnum(floating, double, getdouble) + +static int +backslash(int bflag, int really) +{ + int c, i, n, z = 1; + + fp++; + if (mb_cur_max > 1 && *fp & 0200) { + if ((n = mblen(fp, mb_cur_max)) < 0) + n = 1; + } else + n = 1; + switch (*fp) { + case '\0': + n = 0; + /*FALLTHRU*/ + case '\\': + if (really) putchar('\\'); + break; + case 'a': + if (really) putchar('\a'); + break; + case 'b': + if (really) putchar('\b'); + break; + case 'c': + if (bflag) { + if (really) + exit(status); + else { + while (*fp) + fp++; + return 0; + } + } + goto dfl; + case 'f': + if (really) putchar('\f'); + break; + case 'n': + if (really) putchar('\n'); + break; + case 'r': + if (really) putchar('\r'); + break; + case 't': + if (really) putchar('\t'); + break; + case 'v': + if (really) putchar('\v'); + break; + case '0': + if (bflag) { + if (fp[1]) { + fp++; + goto digit; + } + if (really) putchar('\0'); + break; + } + /*FALLTHRU*/ + case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + if (bflag) + goto dfl; + digit: c = 0; + for (i = 0; i < 3 && octalchar(fp[i] & 0377); i++) + c = c << 3 | (fp[i] - '0'); + if (really) putchar(c); + n = i; + break; + default: + dfl: for (i = 0; i < n; i++) + if (really) putchar(fp[i] & 0377); + z = n; + } + fp += n - 1; + return z; +} + +static void +bconv(int width, int prec, char *sp) +{ + char *ofp = fp; + int i, n, really = 1; + + fp = sp; + if (width > 0) { + really = 0; + goto try; + prt: really = 1; + fp = sp; + for (i = 0; i < width - n && i + n < prec; i++) + putchar(' '); + } +try: for (n = 0; *fp && n < prec; fp++) { + switch (*fp) { + case '\\': + n += backslash(1, really); + break; + default: + if (really) putchar(*fp & 0377); + n++; + } + } + if (width > 0 && really == 0) + goto prt; + fp = ofp; + if (width < 0) { + while (n < prec && n++ < -width) + putchar(' '); + } +} + +#define nextarg() (a < ac ? av[a++] : "") +static void +percent(void) +{ + char *fmt = fp, *sp; + int width = 0, prec = LONG_MAX; + int n; + double f; + int c; + int star = 0; + int sign = 1; + + if (*++fp == '\0') { + fp--; + return; + } + if (digitchar(*fp&0377)) { + n = 0; + for (sp = fp; digitchar(*sp&0377); sp++) + n = n * 10 + *sp - '0'; + if (*sp == '$') { + a = n + ab - 1; + dolflag = 1; + fmt = sp; + fmt[0] = '%'; + fp = &sp[1]; + } + } +loop: switch (*fp) { + case '-': + sign = -1; + /*FALLTHRU*/ + case '+': + case '#': + case '0': + case ' ': + fp++; + goto loop; + } + if (digitchar(*fp&0377)) { + do + width = width * 10 + *fp++ - '0'; + while (digitchar(*fp&0377)); + } else if (*fp == '*') { + width = a < ac ? integer(av[a++]) : 0; + fp++; + star |= 1; + } + width *= sign; + if (*fp == '.') { + fp++; + if (digitchar(*fp&0377)) { + prec = 0; + do + prec = prec * 10 + *fp++ - '0'; + while (digitchar(*fp&0377)); + } else if (*fp == '*') { + prec = a < ac ? integer(av[a++]) : 0; + fp++; + star |= 2; + } + } + switch (*fp) { + case 'b': + bconv(width, prec, nextarg()); + return; + case '%': + putchar('%'); + return; + case 'd': + case 'i': + case 'o': + case 'u': + case 'x': + case 'X': + case 'c': + n = *fp == 'c' ? *(nextarg()) & 0377 : + *fp == 'd' || *fp == 'i' ? + integer(nextarg()) : + unsgned(nextarg()); + c = fp[1]; + fp[1] = '\0'; + switch (star) { + case 3: + printf(fmt, width, prec, n); + break; + case 2: + printf(fmt, prec, n); + break; + case 1: + printf(fmt, width, n); + break; + default: + printf(fmt, n); + } + fp[1] = c; + break; + case 'f': + case 'e': + case 'E': + case 'g': + case 'G': + f = floating(nextarg()); + c = fp[1]; + fp[1] = '\0'; + switch (star) { + case 3: + printf(fmt, width, prec, f); + break; + case 2: + printf(fmt, prec, f); + break; + case 1: + printf(fmt, width, f); + break; + default: + printf(fmt, f); + } + fp[1] = c; + break; + case 's': + c = fp[1]; + fp[1] = '\0'; + sp = nextarg(); + switch (star) { + case 3: + printf(fmt, width, prec, sp); + break; + case 2: + printf(fmt, prec, sp); + break; + case 1: + printf(fmt, width, sp); + break; + default: + printf(fmt, sp); + } + fp[1] = c; + break; + default: + putchar(*fp & 0377); + return; + } +} + +int +main(int argc, char **argv) +{ + setlocale(LC_CTYPE, ""); + mb_cur_max = MB_CUR_MAX; + progname = basename(argv[0]); + if (argc > 1 && argv[1][0] == '-' && argv[1][1] == '-' && + argv[1][2] == '\0') { + argv++; + argc--; + } + if (argc <= 1) + usage(); + ac = argc; + av = argv; + a = ab = 2; + do { + for (fp = av[1]; *fp; fp++) { + switch (c = *fp & 0377) { + case '\\': + backslash(0, 1); + break; + case '%': + percent(); + break; + default: + putchar(c); + } + } + } while (a > ab && a < ac && dolflag == 0); + if (ferror(stdout)) + status |= 1; + return status; +} diff --git a/ps/NOTES b/ps/NOTES @@ -0,0 +1,62 @@ +Notes for the ps utility +======================== + +ps needs to associate /dev device names with dev_t device numbers in order +to print the controlling terminal devices. It does this by examining files +in /dev with stat(2). To accelerate this process, ps normally keeps a cache +file; the data in this file is not secret (it can be collected by any user +by examining /dev himself), but its integrity must be protected. ps thus +normally runs set-user-ID and creates the cache file with its effective +user ID. ps should be run with the credentials of either root or another +trusted user ID; it does not need privilege otherwise on Linux or Solaris +and can be configured in /etc/default/ps to switch to the real user ID +after creating the cache file. The location, permission mode, and group +ownership of the cache file can also be changed in /etc/default/ps (see +the text of that file for details). + +It is also possible to disable the cache file by removing -DUSE_PS_CACHE +from Makefile.mk, and regenerating the Makefile and rebuilding ps after +doing so. If no cache file is used, ps does not need to run set-user-ID +on Linux and Solaris. + +On (native) Open UNIX, ps needs root privileges to read execution times +of child processes, and effective user and group IDs. It will thus be +limited in functionality unless it runs set-user-ID to root, especially +for the SUSv2 version which uses effective user IDs for the -u option +and prints them in the UID column. With the Open UNIX LKP, no privileges +are necessary, just as on Linux, but some files in LKP /proc contain +weird values, causing ps to print weird output. + +On FreeBSD, ps needs privileges for completely correct operation because +some information it has to read (e. g. process flags, priority) is not +available with /proc, but only by examining /dev/kmem with the kvm library. + +The System V-style ps utility and the BSD-style version are two separate +binaries with this toolchest. Moreover, the BSD-style version does not +accept options without a preceding '-' (original BSD implementations do, +but not /usr/ucb/ps on System V which was the model implementation for +this version). If you like the ps utility to switch to BSD-style mode +if the first argument does not start with '-', as procps v2 does, use +a shell script like the following: + + case $1 in + -*|'') + PATH=/usr/5bin:$PATH + exec ps ${@+"$@"} + ;; + [0-9]*) + PATH=/usr/ucb:$PATH + exec ps "$@" + ;; + *) + opt=$1 + shift + PATH=/usr/ucb:$PATH + exec ps "-$opt" ${@+"$@"} + ;; + esac + +Of course, you will have to change the PATH assignments if you installed +the tools in a different location. + + Gunnar Ritter 4/6/04 diff --git a/ps/mkfile b/ps/mkfile @@ -0,0 +1,8 @@ +BIN = ps +OBJ = ps.o +LOCAL_CFLAGS = -DDEFAULT=\"$DFLDIR/ps\" -DUCB +INSTALL_BIN = ps +INSTALL_MAN1 = ps.1b +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/ps/ps.1 b/ps/ps.1 @@ -0,0 +1,488 @@ +'\" t +.\" Sccsid @(#)ps.1 1.48 (gritter) 8/19/08 +.\" Parts taken from ps(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH PS 1 "8/19/08" "Heirloom Toolchest" "User Commands" +.SH NAME +ps \- process status +.SH SYNOPSIS +.HP +.ad l +.nh +\fBps\fR [\-\fBaAcdefjlLPRy\fR] +[\fB\-o\fI property\fR[\fB=\fR[\fItitle\fR]], ...\fR]\ ... +[\fB\-gGpstuU\fI\ criteria\fR, ...]\ ... +[\fB\-r \fIsysname\fR] +.br +.hy 1 +.ad b +.SH DESCRIPTION +.B Ps +prints certain indicia about active processes. +If no options are specified, +all processes on the current terminal are selected; +.B /usr/5bin/posix/ps +and +.B /usr/5bin/posix2001/ps +further restrict this to processes owned by the invoking user. +The selection can be changed +by adding +.I criteria +with the options below; +when a process satifies any criterion, +it is selected. +A +.I criteria +string can consist +of multiple criteria +separated by blanks or commas. +.PP +By default, +.B ps +prints the process id, +controlling terminal device, +cumulative execution time +and command of processes. +.PP +The +.B ps +command accepts the following options: +.TP 12 +.B \-a +Selects all processes with a controlling terminal device, +except for session leaders. +.TP 12 +.B \-c +Adds scheduling class and priority to the output. +This is the default with +.BR /usr/5bin/s42/ps . +See below for the meaning of columns. +.TP 12 +.B \-d +Selects all processes except session leaders. +.TP 12 +.B \-e +Selects all processes. +.TP 12 +.B \-f +Full listing; +adds user name, parent process id, +processor utilization, +and the time when the process was started. +See below for the meaning of columns. +.TP 12 +\fB\-g\fI pgrplist\fR +For +.B /usr/5bin/ps +and +.BR /usr/5bin/s42/ps , +all processes that belong to one of the process groups ids in +.I pgrplist +are selected; +for +.B /usr/5bin/posix/ps +and +.BR /usr/5bin/posix2001/ps , +all processes that belong to one of the session ids in +.IR group . +.TP 12 +.B \-j +Adds process group id and session id to the output. +See below for the meaning of columns. +.TP 12 +.B \-l +Long listing; +adds process flags, process state, +numeric user id, +parent process id, +processor utilization, +priority, nice value, +core address, +memory size in pages and the event waited for. +See below for the meaning of columns. +.TP 12 +\fB\-p\fI pidlist\fR +Selects all processes with one of the given \fIprocess ids\fR. +.TP 12 +\fB\-r\fI sysname\fR +Change the root directory to +.IR sysname , +which may be either an absolute pathname +or the basename component of a mounted file system. +Requires presence of a selection criterion. +.TP 12 +\fB\-s\fI sidlist\fR +Selects all processes that belong to one of the session ids in +.IR sidlist . +.TP 12 +\fB\-t\fI device\fR +Selects all processes with the current terminal +.IR device . +This may be the device name with either +.B /dev/ +or +.B /dev/tty +omitted, +as in +.I ttyS2 +or +.I S2 +for +.IR /dev/ttyS2 . +.TP 12 +\fB\-u\fI uidlist\fR +Selects all processes with one of the given \fIuser ids\fR, +which may be symbolic or numeric. +The real user id is used with +.B /usr/5bin/ps +and +.BR /usr/5bin/s42/ps , +the effective user id with +.B /usr/5bin/posix/ps +and +.BR /usr/5bin/posix2001/ps . +.PP +The following options have been introduced by POSIX.2: +.TP 12 +.B \-A +Selects all processes. +.TP 12 +\fB\-G\fI gidlist\fR +Selects all processes that have one of the specified real \fIgroup ids\fR, +which may be symbolic or numeric. +.TP 12 +\fB\-o\fI property\fR[\fB=\fR[\fItitle\fR]],... +The output is changed to reflect the named +.IR property . +Multiple properties can be given, +separated by blanks or commas; +it is also possible to specify multiple +.B \-o +options. +Normally, the default property description is +written in the first output line. +If the +.B = +character is present, but the +.I title +is missing in all format specifications, +no descriptions are printed; +if a +.I title +is given, it is used instead of the default. +See below for valid +.I property +strings. +.TP 12 +\fB\-U\fI uidlist\fR +Selects all processes with one of the given real \fIuser ids\fR, +which may be symbolic or numeric. +.PP +The following options are extensions: +.TP 12 +.B \-L +.hw LWPs +Prints information on lightweight processes (LWPs); +adds lightweight process id, +lightweight process time, +and, if \fI\-f\fR is also present, +the number of lightweight processes. +See below for the meaning of columns. +.TP 12 +.B \-P +Prints the processor on which the process is currently running. +Disables printing of flags and memory address. +.TP 12 +.B \-R +Resource usage format; +prints process id, +memory and resident set size in pages, +buffer reads and writes, +messages sent and received, +user and system time, +and command. +See below for the meaning of columns. +.TP 12 +.B \-y +Modifies the +.B \-l +output format; +process flags, address and size in pages are omitted, +and resident set size +and memory size in kilobytes are printed instead. +.PP +The meaning of columns and column headings +for the +.BR \-c , +.BR \-f , +.BR \-j , +.BR \-l +and +.B \-P +options are as follows +(the letters given in parentheses specify which option +causes the column to appear; +\fIall\fR means that the column is always printed): +.PP +.TS +l2 l2 l s s +l2 l2 l2 l4 l. +F (l) T{ +Flags associated with the process +(octal and additive): +T} + 01 in core; + 02 system process; + 04 T{ +locked in core (e.g. for physical I/O); +T} + 10 being swapped; + 20 being traced by another process. +.T& +l2 l2 l s s +l2 l2 l2 l4 l. +S (l) The state of the process: + O running on a processor; + R runnable (on run queue); + S sleeping; + I intermediate; + Z terminated; + T stopped; + X allocating memory. +.T& +l2 l2 l s s. +UID (f,l) T{ +The real user ID of the process owner, +or, for \fB/usr/5bin/posix/ps\fR +and \fB/usr/5bin/posix2001/ps\fR, +the effective user ID. +With the \fB\-l\fR +option, +a numeric ID is printed, +otherwise the user name. +T} +PID (all) T{ +The process id of the process; +as in certain cults +it is possible to kill a process +if you know its true name. +T} +PPID (f,j,l) The process ID of the parent process. +PGID (j) The process group ID of the process. +SID (j) The session ID of the process. +LWP (L) The lightweight process ID of the process. +NLWP (fL) T{ +The number of lightweight processes +in the process. +T} +PSR (P) T{ +The processor on which the process is currently running. +T} +C (f,l) Processor utilization for scheduling. +CLS (c) Scheduling class. +PRI (c,l) T{ +Priority. +With \fB\-l\fR, high numbers mean low priority. +With \fB\-c\fR, high numbers mean high priority; +time-sharing processes have priorities below 60; +for real-time processes, +the priority is computed as \fI100 + scheduling priority.\fR +T} +NI (l) Nice value, used in priority computation. +ADDR (l) The core address of the process. +RSS (ly) T{ +The amount of memory in kilobytes +currently present in core. +T} +SZ (l,R) T{ +The size in pages of the core image of the process. +If the \fB\-y\fR option is also given, +the size is printed in kilobytes. +T} +MRSZ (R) T{ +The amount of memory in pages +currently present in core. +T} +PFLTS (R) T{ +The number of major page faults +that have occurred with the process. +T} +BUFR (R) T{ +Buffer reads performed on behalf of the process. +T} +BUFW (R) T{ +Buffer writes performed on behalf of the process. +T} +MRCV (R) T{ +Messages received by the process. +T} +MSND (R) T{ +Messages sent by the process. +T} +WCHAN (l) T{ +The event for which the process is waiting or sleeping; +if blank, the process is running. +T} +.\" Trailing no-break-spaces guarantee a minimum table width for nroff +.\" without restricting troff to select the same. +STIME (f) The time when the process was started.\ \ \ \ \ \ +TTY (all) T{ +The controlling tty for the process. +T} +TIME (all) T{ +The cumulative execution time for the process. +T} +LTIME (L) T{ +The cumulative execution time for the lightweight process. +T} +UTIME (R) T{ +The cumulative time the process spent in user mode. +T} +KTIME (R) T{ +The cumulative time the process spent in system (kernel) mode. +T} +COMD (all) T{ +The command name; +with the \fB\-f\fR option, +the command line. +The heading `CMD' is printed +for \fB/usr/5bin/posix/ps\fR +and \fB/usr/5bin/posix2001/ps\fR; +the heading `COMMAND' is printed +if the \fBSYSV3\fR +environment variable is set +and the \fI\-l\fR option is not present. +T} +.TE +.PP +A process that has exited and has a parent, +but has not yet been waited for by the parent +is marked +.IR <defunct> . +.PP +For the +.B \-o +option, the following properties +(listed with their default column headings) +can be given: +.PP +.TS +l2fB l2 l. +user USER Effective user name. +ruser RUSER Real user name. +group GROUP Effective group name. +rgroup RGROUP Real group name. +pid PID Process id. +ppid PPID Parent process id. +pgid PGID Process group id. +sid SID Session id. +class CLASS Scheduling class. +pcpu %CPU Processor usage in percent. +vsz VSZ Memory usage in kilobytes. +nice NI Nice value. +etime ELAPSED Time elapsed since the process was started. +time TIME Cumulative execution time. +tty TTY Controlling terminal device. +comm COMMAND The first command line argument. +args COMMAND Command line arguments separated by spaces. +f F Process flags. +s S Process state. +c C Processor utilization for scheduling. +uid UID Numeric effective user id. +ruid RUID Numeric real user id. +gid GID Numeric effective group id. +rgid RGID Numeric real group id. +pri PRI Priority; high numbers mean high priority. +opri PRI Priority; high numbers mean low priority. +psr PSR Processor. +addr ADDR Core address. +osz SZ Memory size in pages. +wchan WCHAN Event for which the process is waiting. +stime STIME T{ +Start time of the process (may contain whitespace). +T} +rss RSS Resident set size in kilobytes. +pmem %MEM Memory usage in percent. +fname COMMAND T{ +.ad l +.nr ol \n(.l +.ll 39n +The first eight characters of the executable file for the process. +.br +.ll \n(olu +.ad b +T} +.TE +.PP +For those properties that correspond to user or group names, +the numeric id is printed +if the name does not fit into the column width. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the set of printable characters +and the character width. +Non-printable characters in arguments and file names +are dropped if writing to a terminal. +.TP +.B LC_TIME +Affects the format of date and time strings printed. +.TP +.B SYSV3 +Changes the text of some headings as described above. +.SH FILES +.TP +.B /etc/passwd +Used for converting numeric and symbolic user ids. +.TP +.B /etc/group +Used for converting numeric and symbolic group ids. +.TP +.B /etc/default/ps +.TP +.B /proc/ +.TP +.B /dev/ +.SH "SEE ALSO" +nice(1), +kill(1), +priocntl(1), +proc(5), +locale(7) +.SH NOTES +Things can change while ps is running; +the picture it gives is only a close approximation to reality. diff --git a/ps/ps.1b b/ps/ps.1b @@ -0,0 +1,421 @@ +'\" t +.\" Sccsid @(#)ps.1b 1.19 (gritter) 9/5/05 +.\" Parts taken from ps(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" Parts taken from ps(1), 4.3BSD: +.\" Copyright (c) 1980, 1990 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 3. All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed by the University of +.\" California, Berkeley and its contributors. +.\" This product includes software developed by Gunnar Ritter +.\" and his contributors. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS '\fIAS IS\fR' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.TH PS 1B "9/5/05" "Heirloom Toolchest" "BSD System Compatibility" +.SH NAME +ps \- (BSD) process status +.SH SYNOPSIS +.HP +.ad l +.nh +\fB/usr/ucb/ps\fR +[\-\fBaAcglnrSuvwx\fR] +[\fB\-o\fI property\fR[\fB=\fR[\fItitle\fR]], ...\fR]\ ... +[\fB\-GptU\fI\ criteria\fR, ...]\ ... +[\fIpid\fR] +.br +.hy 1 +.ad b +.SH DESCRIPTION +.B Ps +prints certain indicia about active processes. +If no options are specified, +all processes on the current terminal +that are owned by the invoking user +and are not session leaders +are selected. +Specifying +.B \-a +selects other users' processes; +specifying +.B \-x +selects processes without controlling terminals. +The selection can further be changed +by adding +.I criteria +with the options below; +when a process satifies any criterion, +it is selected. +A +.I criteria +string can consist +of multiple criteria +separated by blanks or commas. +If the +.I pid +argument is present, +output is restriced to the selected process. +.PP +By default, +.B ps +prints the process id, +controlling terminal device, +process status, +cumulative execution time +and command of processes. +.PP +The +.B ps +command accepts the following options: +.TP 12 +.B \-a +Selects all processes with a controlling terminal device +(including session leaders). +.TP 12 +.B \-c +Prints the command name, +as stored internally in the system for purposes of accounting, +rather than the command arguments, +which are kept in the process' address space. +This is more reliable, if less informative, +since the process is free to destroy the latter information. +.TP 12 +.B \-g +Includes session leaders in the output. +Without this option, +.B ps +only prints ``interesting'' processes. +Processes are deemed to be uninteresting +if they are session leaders. +This normally eliminates top-level command interpreters +and processes waiting for users to login on free terminals. +.TP 12 +.B \-l +Long listing; +adds process flags, +numeric user id, +parent process id, cpu usage, +priority, nice value, +memory and resident set size in pages and the event waited for. +See below for the meaning of columns. +.TP 12 +.B \-n +For a user listing as selected with +.BR \-u , +print the numeric user ID instead of the user name. +.TP 12 +.B \-r +Causes only running processes to be printed. +.TP 12 +.B \-S +Includes the time used by children that have been waited for. +.TP 12 +\fB\-t\fI device\fR +Selects all processes with the current terminal +.IR device . +This may be the device name with either +.B /dev/ +or +.B /dev/tty +omitted, +as in +.I ttyS2 +or +.I S2 +for +.IR /dev/ttyS2 . +Giving +.B ? +as a device name selects processes with no controlling terminal. +.TP 12 +.B \-u +A user oriented output is produced; adds user name, +processor usage, memory and resident set size, +and the time when the process was started. +.TP 12 +.B \-v +A version of the output +containing virtual memory statistics is output; +adds memory and resident set size. +.TP 12 +.B \-w +Increase output width. +The default width of 80 columns +is set to 132 if this option is given once; +if given more than once, +the output width is unlimited. +.TP 12 +.B \-x +Include processes that have no controlling terminal. +.PP +The following options have been introduced by POSIX.2: +.TP 12 +.B \-A +Selects all processes. +.TP 12 +\fB\-G\fI gidlist\fR +Selects all processes that have one of the specified real \fIgroup ids\fR, +which may be symbolic or numeric. +.TP 12 +\fB\-o\fI property\fR[\fB=\fR[\fItitle\fR]],... +The output is changed to reflect the named +.IR property . +Multiple properties can be given, +separated by blanks or commas; +it is also possible to specify multiple +.B \-o +options. +Normally, the default property description is +written in the first output line. +If the +.B = +character is present, but the +.I title +is missing in all format specifications, +no descriptions are printed; +if a +.I title +is given, it is used instead of the default. +See below for valid +.I property +strings. +.TP 12 +\fB\-p\fI pidlist\fR +Selects all processes with one of the given \fIprocess ids\fR. +.TP 12 +\fB\-U\fI uidlist\fR +Selects all processes with one of the given real \fIuser ids\fR, +which may be symbolic or numeric. +.PP +The meaning of columns and column headings +are as follows: +.PP +.TS +l2 l s s +l2 l2 l4 l. +F T{ +Flags associated with the process +(octal and additive): +T} + 01 in core; + 02 system process; + 04 T{ +locked in core (e.g. for physical I/O); +T} + 10 being swapped; + 20 being traced by another process. +.T& +l2 l s s +l2 l2 l4 l. +S The state of the process: + R running; + S sleeping; + I intermediate; + Z terminated; + T stopped; + X allocating memory. +.T& +l2 l s s. +UID T{ +The effective user ID of the process owner. +T} +USER T{ +The name of the process owner, +based on the effective user ID. +T} +PID T{ +The process id of the process; +as in certain cults +it is possible to kill a process +if you know its true name. +T} +PPID The process ID of the parent process. +CP Processor utilization for scheduling. +PRI T{ +Priority. +High numbers mean low priority. +T} +NI Nice value, used in priority computation. +ADDR The core address of the process. +RSS T{ +The amount of memory in pages +currently present in core. +T} +SZ T{ +The size in pages of the core image of the process. +T} +WCHAN T{ +The event for which the process is waiting or sleeping; +if blank, the process is running. +T} +.\" Trailing no-break-spaces guarantee a minimum table width for nroff +.\" without restricting troff to select the same. +START The time when the process was started.\ \ \ \ \ \ \ \ \ \ \ \ \ +TT T{ +The controlling tty for the process. +T} +TIME T{ +The cumulative execution time for the process, +including its terminated children if \fB\-S\fR is present. +T} +COMMAND T{ +The command line; +with the \fB\-c\fR option, +the command name. +T} +.TE +.PP +A process that has exited and has a parent, +but has not yet been waited for by the parent +is marked +.IR <defunct> . +.PP +For the +.B \-o +option, the following properties +(listed with their default column headings) +can be given: +.PP +.TS +l2fB l2 l. +user USER Effective user name. +ruser RUSER Real user name. +group GROUP Effective group name. +rgroup RGROUP Real group name. +pid PID Process id. +ppid PPID Parent process id. +pgid PGID Process group id. +sid SID Session id. +class CLASS Scheduling class. +pcpu %CPU Processor usage in percent. +vsz VSZ Memory usage in kilobytes. +nice NI Nice value. +etime ELAPSED Time elapsed since the process was started. +time TIME Cumulative execution time. +tty TTY Controlling terminal device. +comm COMMAND The first command line argument. +args COMMAND Command line arguments separated by spaces. +f F Process flags. +s S Process state. +c C Processor utilization for scheduling. +uid UID Numeric effective user id. +ruid RUID Numeric real user id. +gid GID Numeric effective group id. +rgid RGID Numeric real group id. +pri PRI Priority; high numbers mean high priority. +opri PRI Priority; high numbers mean low priority. +psr PSR Processor. +addr ADDR Core address. +osz SZ Memory size in pages. +wchan WCHAN Event for which the process is waiting. +stime STIME Start time of the process. +rss RSS Resident set size in kilobytes. +pmem %MEM Memory usage in percent. +fname COMMAND T{ +.ad l +.nr ol \n(.l +.ll 39n +The first 16 characters of the executable file for the process. +.br +.ll \n(olu +.ad b +T} +.TE +.PP +For those properties that correspond to user or group names, +the numeric id is printed +if the name does not fit into the column width. +.SH "ENVIRONMENT VARIABLES" +.TP +.B COLUMNS +Overrides the default output width. +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_CTYPE +Determines the set of printable characters +and the character width. +Non-printable characters in arguments and file names +are dropped if writing to a terminal. +.TP +.B LC_TIME +Affects the format of date and time strings printed. +.SH FILES +.TP +.B /etc/passwd +Used for converting numeric and symbolic user ids. +.TP +.B /etc/group +Used for converting numeric and symbolic group ids. +.TP +.B /etc/default/ps +.TP +.B /proc/ +.TP +.B /dev/ +.SH "SEE ALSO" +nice(1), +priocntl(1), +kill(1), +proc(5), +locale(7) +.SH NOTES +Things can change while ps is running; +the picture it gives is only a close approximation to reality. diff --git a/ps/ps.c b/ps/ps.c @@ -0,0 +1,5043 @@ +/* + * ps - process status + * + * Gunnar Ritter, Freiburg i. Br., Germany, August 2002. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (S42) +static const char sccsid[] USED = "@(#)ps_s42.sl 2.115 (gritter) 12/16/07"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)ps_sus.sl 2.115 (gritter) 12/16/07"; +#elif defined (UCB) +static const char sccsid[] USED = "@(#)/usr/ucb/ps.sl 2.115 (gritter) 12/16/07"; +#else +static const char sccsid[] USED = "@(#)ps.sl 2.115 (gritter) 12/16/07"; +#endif + +static const char cacheid[] = "@(#)/tmp/ps_cache 2.115 (gritter) 12/16/07"; + +#if !defined (__linux__) && !defined (__sun) && !defined (__FreeBSD__) \ + && !defined (__DragonFly__) +#define _KMEMUSER +#endif /* !__linux__, !__sun, !__FreeBSD__, !__DragonFly__ */ +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/utsname.h> +#ifdef __GLIBC__ +#include <sys/sysmacros.h> +#endif +#include <fcntl.h> +#include <time.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <errno.h> +#include <libgen.h> +#include <alloca.h> +#include <dirent.h> +#include <limits.h> +#include <sched.h> +#include <pwd.h> +#include <grp.h> +#include <langinfo.h> +#include <locale.h> +#include <ctype.h> +#include <blank.h> +#include <inttypes.h> +#include <termios.h> +#if defined (__linux__) +#include <mntent.h> +#elif defined (__FreeBSD__) || defined (__DragonFly__) +#include <kvm.h> +#include <sys/param.h> +#include <sys/ucred.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <sys/resource.h> +#include <sys/sysctl.h> +#include <sys/user.h> +#define proc process +#undef p_pgid +#undef p_pctcpu +#if defined (__DragonFly__) +#endif /* __DragonFly__ */ +#elif defined (__hpux) +#include <mntent.h> +#include <sys/param.h> +#include <sys/pstat.h> +#elif defined (_AIX) +#include <mntent.h> +#include <procinfo.h> +#define proc process +#ifndef MNTTYPE_IGNORE +#define MNTTYPE_IGNORE "" +#endif +#elif defined (__NetBSD__) || defined (__OpenBSD__) +#include <kvm.h> +#include <sys/param.h> +#include <sys/sysctl.h> +#include <sys/mount.h> +#define proc process +#undef p_pgid +#if !defined (SRUN) && defined (LSRUN) +#define SRUN LSRUN +#endif +#if !defined (SSLEEP) && defined (LSSLEEP) +#define SSLEEP LSSLEEP +#endif +#if !defined (SDEAD) && defined (LSDEAD) +#define SDEAD LSDEAD +#endif +#if !defined (SONPROC) && defined (LSONPROC) +#define SONPROC LSONPROC +#endif +#if !defined (P_INMEM) && defined (L_INMEM) +#define P_INMEM L_INMEM +#endif +#if !defined (P_SINTR) && defined (L_SINTR) +#define P_SINTR L_SINTR +#endif +#ifndef SCHED_OTHER +#define SCHED_OTHER 1 +#endif +#elif defined (__APPLE__) +#include <sys/time.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/mount.h> +#include <sys/resource.h> +#include <mach/mach_types.h> +#include <mach/task_info.h> +#include <mach/shared_memory_server.h> +#define proc process +#undef p_pgid +#else /* SVR4 */ +#include <sys/mnttab.h> +#ifdef __sun +#define _STRUCTURED_PROC 1 +#endif /* __sun */ +#include <sys/procfs.h> +#include <sys/proc.h> +#undef p_pid +#undef p_wchan +#define proc process +#endif /* SVR4 */ +#include <wchar.h> +#include <wctype.h> +#ifndef TIOCGWINSZ +#include <sys/ioctl.h> +#endif + +#if __NetBSD_Version__ >= 300000000 +#include <sys/statvfs.h> +#define statfs statvfs +#endif + +#include <mbtowi.h> + +#ifdef __linux__ +#ifndef SCHED_BATCH +#define SCHED_BATCH 3 +#endif +#ifndef SCHED_ISO +#define SCHED_ISO 4 +#endif +#endif /* __linux__ */ + +#define PROCDIR "/proc" +#ifndef UCB +#define DEFUNCT "<defunct>" +#else /* UCB */ +#define DEFUNCT " <defunct>" +#endif /* UCB */ +#ifndef PRNODEV +#define PRNODEV 0 +#endif /* !PRNODEV */ +#define eq(a, b) (strcmp(a, b) == 0) + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif /* _IO_getc_unlocked */ +#ifdef _IO_putc_unlocked +#undef putchar +#define putchar(c) _IO_putc_unlocked(c, stdout) +#endif /* _IO_putc_unlocked */ +#endif /* __GLIBC__ */ + +#define next(wc, s, n) (mb_cur_max > 1 && *(s) & 0200 ? \ + ((n) = mbtowi(&(wc), (s), mb_cur_max), \ + (n) = ((n) > 0 ? (n) : (n) < 0 ? (wc=WEOF, 1) : 1)) :\ + ((wc) = *(s) & 0377, (n) = 1)) + +#ifndef _AIX +typedef uint32_t dev_type; +#else +typedef uint64_t dev_type; +#endif + +enum okay { + OKAY, + STOP +}; + +enum crtype { + CR_ALL, /* -e, -A */ + CR_ALL_WITH_TTY, /* -a */ + CR_ALL_BUT_SESSION_LEADERS, /* -d */ + CR_WITHOUT_TTY, /* UCB -gx */ + CR_NO_TTY_NO_SESSION_LEADER, /* UCB -x */ + CR_PROCESS_GROUP, /* traditional -g ... */ + CR_REAL_GID, /* -G ... */ + CR_PROCESS_ID, /* -p ... */ + CR_TTY_DEVICE, /* -t ... */ + CR_SESSION_LEADER, /* -s ..., SUS -g ... */ + CR_EFF_UID, /* SUS -u ... */ + CR_REAL_UID, /* -U ..., traditional -u ... */ + CR_ADD_UNINTERESTING, /* UCB -g */ + CR_INVALID_EFF_UID, /* invalid eff. uid but look for more */ + CR_INVALID_REAL_UID, /* invalid real uid but look for more */ + CR_INVALID_REAL_GID, /* invalid group but look for more */ + CR_INVALID_TTY_DEVICE, /* invalid tty, ignore */ + CR_INVALID_STOP, /* invalid criterion but stop later */ + CR_DEFAULT +}; + +enum outype { + OU_USER, + OU_RUSER, + OU_GROUP, + OU_RGROUP, + OU_PID, + OU_PPID, + OU_PGID, + OU_PCPU, + OU_VSZ, + OU_NICE, + OU_ETIME, + OU_OTIME, + OU_TIME, + OU_ACCUTIME, + OU_TTY, + OU_COMM, + OU_ARGS, + OU_F, + OU_S, + OU_C, + OU_UID, + OU_RUID, + OU_GID, + OU_RGID, + OU_SID, + OU_CLASS, + OU_PRI, + OU_OPRI, + OU_PSR, + OU_ADDR, + OU_OSZ, + OU_WCHAN, + OU_STIME, + OU_RSS, + OU_ORSS, + OU_PMEM, + OU_FNAME, + OU_LWP, + OU_NLWP, + OU_LTIME, + OU_STID, + OU_TID, + OU_NTP, + OU_MRSZ, + OU_PFLTS, + OU_BUFR, + OU_BUFW, + OU_MRCV, + OU_MSND, + OU_UTIME, + OU_KTIME, + OU_SPACE +}; + +enum { + FL_LOAD = 001, + FL_SYS = 002, + FL_LOCK = 004, + FL_SWAP = 010, + FL_TRC = 020, + FL_WTED = 040 +}; + +enum valtype { + VT_CHAR, + VT_INT, + VT_UINT, + VT_LONG, + VT_ULONG +}; + +union value { + char v_char; + int v_int; + unsigned int v_uint; + long v_long; + unsigned long v_ulong; +}; + +struct trenod { + struct trenod *t_lln; + struct trenod *t_rln; + char *t_str; + unsigned long t_num; +}; + +struct ditem { + struct ditem *d_lnk; + char *d_str; + dev_type d_rdev; +}; + +struct criterion { + struct criterion *c_nxt; + enum crtype c_typ; + unsigned long c_val; +}; + +struct output { + struct output *o_nxt; + enum outype o_typ; + char *o_nam; + int o_len; +}; + +static const struct { + enum outype os_typ; + char *os_fmt; + char *os_def; + enum { + OS_Lflag = 01 + } os_flags; +} outspec[] = { + { OU_USER, "user", " USER", 0 }, + { OU_RUSER, "ruser", " RUSER", 0 }, + { OU_GROUP, "group", " GROUP", 0 }, + { OU_RGROUP, "rgroup", " RGROUP", 0 }, + { OU_PID, "pid", " PID", 0 }, + { OU_PPID, "ppid", " PPID", 0 }, + { OU_PGID, "pgid", " PGID", 0 }, + { OU_PCPU, "pcpu", "%CPU", 0 }, + { OU_VSZ, "vsz", " VSZ", 0 }, + { OU_NICE, "nice", "NI", 0 }, + { OU_ETIME, "etime", " ELAPSED", 0 }, + { OU_TIME, "time", " TIME", 0 }, + { OU_ACCUTIME, "accutime", " TIME", 0 }, + { OU_OTIME, "otime", " TIME", 0 }, + { OU_TTY, "tty", "TT ", 0 }, + { OU_COMM, "comm", "COMMAND", 0 }, + { OU_ARGS, "args", "COMMAND", 0 }, + { OU_F, "f", " F", 0 }, + { OU_S, "s", "S", 0 }, + { OU_C, "c", " C", 0 }, + { OU_UID, "uid", " UID", 0 }, + { OU_RUID, "ruid", " RUID", 0 }, + { OU_GID, "gid", " GID", 0 }, + { OU_RGID, "rgid", " RGID", 0 }, + { OU_SID, "sid", " SID", 0 }, + { OU_CLASS, "class", " CLS", 0 }, + { OU_PRI, "pri", "PRI", 0 }, + { OU_OPRI, "opri", "PRI", 0 }, + { OU_PSR, "psr", "PSR", 0 }, + { OU_ADDR, "addr", " ADDR", 0 }, + { OU_OSZ, "osz", " SZ", 0 }, + { OU_WCHAN, "wchan", " WCHAN", 0 }, + { OU_STIME, "stime", " STIME", 0 }, + { OU_RSS, "rss", " RSS", 0 }, + { OU_ORSS, "orss", " RSS", 0 }, + { OU_PMEM, "pmem", "%MEM", 0 }, + { OU_FNAME, "fname", "COMMAND", 0 }, + { OU_LWP, "lwp", " LWP", OS_Lflag }, + { OU_NLWP, "nlwp", " NLWP", 0 }, + { OU_LTIME, "ltime", "LTIME", OS_Lflag }, + { OU_STID, "stid", " STID", OS_Lflag }, + { OU_TID, "tid", "TID", OS_Lflag }, + { OU_NTP, "ntp", "NTP", 0 }, + { OU_MRSZ, "mrsz", " MRSZ", 0 }, + { OU_PFLTS, "pflts", "PFLTS", 0 }, + { OU_BUFR, "bufr", " BUFR", 0 }, + { OU_BUFW, "bufw", " BUFW", 0 }, + { OU_MRCV, "mrcv", " MRCV", 0 }, + { OU_MSND, "msnd", " MSND", 0 }, + { OU_UTIME, "utime", " UTIME", 0 }, + { OU_KTIME, "ktime", " KTIME", 0 }, + { OU_SPACE, NULL, " ", 0 } +}; + +struct proc { + pid_t p_pid; /* process id */ + char p_fname[19]; /* executable name */ + char p_state[2]; /* process state */ + char p_lstate[2]; /* linux state */ + pid_t p_ppid; /* parent process id */ + pid_t p_pgid; /* process group */ + pid_t p_sid; /* session */ + pid_t p_lwp; /* LWP id */ + dev_type p_ttydev; /* tty device */ + unsigned long p_flag; /* process flags */ + unsigned long p_lflag; /* linux flags */ + time_t p_time; /* cpu time */ + time_t p_accutime; /* accumulated cpu time */ + time_t p_utime; /* user time */ + time_t p_ktime; /* kernel time */ + long p_intpri; /* priority value from /proc */ + long p_rtpri; /* rt_priority value from /proc */ + long p_policy; /* scheduling policy */ + int p_c; /* cpu usage for scheduling */ + int p_oldpri; /* old priority */ + int p_pri; /* new priority */ + int p_nice; /* nice value */ + int p_nlwp; /* number of LWPs */ + time_t p_start; /* start time */ + unsigned long p_size; /* size in kilobytes */ + unsigned long p_osz; /* size in pages */ + unsigned long p_rssize; /* rss size in kbytes */ + unsigned long p_orss; /* rss size in pages */ + unsigned long p_pflts; /* page faults */ + unsigned long p_bufr; /* buffer reads */ + unsigned long p_bufw; /* buffer writes */ + unsigned long p_mrcv; /* messages received */ + unsigned long p_msnd; /* messages sent */ + unsigned long p_addr; /* address */ + unsigned long p_wchan; /* wait channel */ + int p_psr; /* processor */ + double p_pctcpu; /* cpu percent */ + double p_pctmem; /* mem percent */ + char *p_clname; /* scheduling class */ + char p_comm[80]; /* first argument */ + char p_psargs[80]; /* process arguments */ + uid_t p_uid; /* real uid */ + uid_t p_euid; /* effective uid */ + gid_t p_gid; /* real gid */ + gid_t p_egid; /* effective gid */ +}; + +static unsigned errcnt; /* count of errors */ +static int Lflag; /* show LWPs */ +static int oflag; /* had -o switch */ +static const char *rflag; /* change root directory */ +static int ucb_rflag; /* running processes only */ +static int dohdr; /* output header */ +#undef hz +static long hz; /* clock ticks per second */ +static time_t now; /* current time */ +#ifdef __linux__ +static time_t uptime; +#endif /* __linux__ */ +#ifndef __sun +static unsigned long totalmem; +#endif /* !__sun */ +static unsigned long kbytes_per_page; +static unsigned long pagesize; +static uid_t myuid; /* real uid of ps */ +static uid_t myeuid; /* effective uid of ps */ +static int sched_selection; +static int maxcolumn; /* maximum terminal size */ +static int mb_cur_max; /* MB_CUR_MAX acceleration */ +static int ontty; /* running on a tty */ +static char *progname; /* argv[0] to main() */ +static struct proc myproc; /* struct proc for this ps instance */ + +static struct ditem **d0; /* dev_t to device name mapping */ +static struct criterion *c0; /* criteria list */ +static struct output *o0; /* output field list */ + +#ifdef __linux__ +static int linux_version[3] = { 2, 4, 0 }; +#endif /* !__linux__ */ + +#ifdef USE_PS_CACHE +static FILE *devfp; +static char *ps_cache_file = "/tmp/ps_cache"; +static mode_t ps_cache_mode = 0664; +static gid_t ps_cache_gid = 3; +#endif /* USE_PS_CACHE */ +static int dropprivs; + +static void postproc(struct proc *); +static enum okay selectproc(struct proc *); + +/************************************************************************ + * Utility functions * + ************************************************************************/ + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) { + write(2, "no memory\n", 10); + exit(077); + } + return p; +} + +static void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static char * +sstrdup(const char *op) +{ + char *np; + + np = smalloc(strlen(op) + 1); + strcpy(np, op); + return np; +} + +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = (void *)calloc(nmemb, size)) == NULL) { + write(2, "no memory\n", 10); + exit(077); + } + return p; +} + +static FILE * +wopen(const char *fn) +{ + int fd; + char *tl, *dn, *fc; + + dn = dirname(fc = sstrdup(fn)); + tl = smalloc(strlen(dn) + 10); + strcpy(tl, dn); + strcat(tl, "/psXXXXXX"); + free(fc); + if ((fd = mkstemp(tl)) < 0) + return NULL; + if (rename(tl, fn) < 0) { + unlink(tl); + free(tl); + close(fd); + return NULL; + } + free(tl); + return fdopen(fd, "w"); +} + +static struct trenod * +treget(unsigned long num, struct trenod **troot) +{ + long long c; + struct trenod *tp = *troot; + + while (tp != NULL) { + if ((c = num - tp->t_num) == 0) + break; + else if (c < 0) + tp = tp->t_lln; + else + tp = tp->t_rln; + } + return tp; +} + +static void +treput(struct trenod *tk, struct trenod **troot) +{ + if (*troot) { + long long c; + struct trenod *tp = *troot, *tq = NULL; + + while (tp != NULL) { + tq = tp; + if ((c = tk->t_num - tp->t_num) == 0) + return; + else if (c < 0) + tp = tp->t_lln; + else + tp = tp->t_rln; + } + if (tq != NULL) { + if ((c = tk->t_num - tq->t_num) < 0) + tq->t_lln = tk; + else + tq->t_rln = tk; + } + } else + *troot = tk; +} + +#define dhash(c) ((uint32_t)(2654435769U * (uint32_t)(c) >> 24)) + +static struct ditem * +dlook(dev_type rdev, struct ditem **dt, char *str) +{ + struct ditem *dp; + int h; + + dp = dt[h = dhash(rdev)]; + while (dp != NULL) { + if (dp->d_rdev == rdev) + break; + dp = dp->d_lnk; + } + if (str != NULL && dp == NULL) { + dp = scalloc(1, sizeof *dp); + dp->d_rdev = rdev; + dp->d_str = str; + dp->d_lnk = dt[h]; + dt[h] = dp; + } + return dp; +} + +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) +static void +chdir_to_proc(void) +{ + static int fd = -1; + + if (fd == -1 && (fd = open(PROCDIR, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s\n", progname, PROCDIR); + exit(075); + } + if (fchdir(fd) < 0) { + fprintf(stderr, "%s: cannot chdir to %s\n", progname, PROCDIR); + exit(074); + } +} +#endif /* !__hpux, !_AIX, !__NetBSD__, !__OpenBSD__, !__APPLE__ */ + +static union value * +getval(char **listp, enum valtype type, int separator, int sep2) +{ + char *buf; + static union value v; + const char *cp, *op; + char *cq, *x; + + if (**listp == '\0') + return NULL; + op = *listp; + while (**listp != '\0') { + if ((separator == ' ' ? isspace(**listp) : **listp == separator) + || **listp == sep2) + break; + (*listp)++; + } + buf = alloca(*listp - op + 1); + for (cp = op, cq = buf; cp < *listp; cp++, cq++) + *cq = *cp; + *cq = '\0'; + if (**listp) { + while ((separator == ' ' ? + isspace(**listp) : **listp == separator) || + **listp == sep2) + (*listp)++; + } + switch (type) { + case VT_CHAR: + if (buf[1] != '\0') + return NULL; + v.v_char = buf[0]; + break; + case VT_INT: + v.v_int = strtol(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_UINT: + v.v_uint = strtoul(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_LONG: + v.v_long = strtol(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + case VT_ULONG: + v.v_ulong = strtoul(buf, &x, 10); + if (*x != '\0') + return NULL; + break; + } + return &v; +} + +#ifdef __linux__ +static int +linux_version_lt(int version, int patchlevel, int sublevel) +{ + if (linux_version[0] < version) + return 1; + if (linux_version[0] == version) { + if (linux_version[1] < patchlevel) + return 1; + if (patchlevel == linux_version[1] && + linux_version[2] < sublevel) + return 1; + } + return 0; +} + +static int +has_o1_sched(void) +{ + struct stat st; + + if (sched_selection) + return sched_selection > 0; + return stat("/proc/sys/sched", &st) == 0; +} +#endif /* __linux__ */ + +static int +hasnonprint(const char *s) +{ + wint_t wc; + int n; + + while (*s) { + next(wc, s, n); + if (mb_cur_max > 1 ? !iswprint(wc) : !isprint(wc)) + return 1; + s += n; + } + return 0; +} + +static int +colwidth(const char *s) +{ + wint_t wc; + int i, n, w = 0; + + while (*s) { + next(wc, s, n); + s += n; + if (mb_cur_max > 1) + i = iswprint(wc) ? wcwidth(wc) : 0; + else + i = isprint(wc) != 0; + w += i; + } + return w; +} + +static void +cleanline(struct proc *p) +{ + /* + * If the argument list contains a nonprintable character, + * replace it with the file name even if output is not a + * terminal. + */ + if (*p->p_psargs == '\0' || hasnonprint(p->p_psargs)) { + if (p->p_size == 0 && *p->p_psargs == '\0') + strcpy(p->p_psargs, p->p_fname); + else + snprintf(p->p_psargs, sizeof p->p_psargs, "[ %.8s ]", + p->p_fname); + strcpy(p->p_comm, p->p_psargs); + } +} + +/************************************************************************ + * Execution * + ************************************************************************/ + +static void +putheader(void) +{ + struct output *o; + unsigned i; + + for (o = o0; o; o = o->o_nxt) { + if (*o->o_nam == '\0') { + for (i = 0; i < o->o_len; i++) + putchar(' '); + } else + fputs(o->o_nam, stdout); + if (o->o_nxt && o->o_typ != OU_SPACE) + putchar(' '); + } + putchar('\n'); +} + +/* + * Print a string, not exceeding the maximum output width, but with at least + * minimum columns. Drop nonprintable characters if printing to a terminal. + */ +static int +putstr(int width, int minimum, int maximum, const char *s) +{ + wint_t wc; + int written = 0, n, cw; + + while (next(wc, s, n), cw = wcwidth(wc), cw = cw >= 0 ? cw : 1, + wc != '\0' && + (maxcolumn == 0 || width + cw <= maxcolumn) && + (maximum == 0 || written + cw <= maximum)) { + if (!ontty || (mb_cur_max > 1 ? iswprint(wc) : isprint(wc))) { + while (n--) { + putchar(*s); + s++; + } + written += cw; + width += cw; + } else + s += n; + } + while ((maxcolumn == 0 || width < maxcolumn) && written < minimum && + (maximum == 0 || written < maximum)) { + putchar(' '); + written++; + } + return written; +} + +/* + * Print a hexadecimal value with a maximum width, preceded by spaces + * if it is short. + * + * This is used for ADDR and WCHAN. Truncating the addresses to keep + * the display columns in order makes sense here since ADDR serves no + * known purpose anymore, and for WCHAN only the lower part of the + * address is relevant. + */ +static int +putxd(int width, unsigned long val) +{ + const char digits[] = "0123456789abcdef"; + char *buf = alloca(width); + int m, n = width; + + do { + buf[--n] = digits[val & 0xf]; + val >>= 4; + } while (val != 0 && n > 0); + for (m = 0; m < n; m++) + putchar(' '); + do + putchar(buf[n]); + while (++n < width); + return width; +} + +static int +putid(unsigned long val, unsigned len, struct trenod **troot, + char *(*func)(unsigned long)) +{ + struct trenod *tp; + char *str; + + if ((tp = treget(val, troot)) == NULL) { + if ((str = func(val)) != NULL) { + tp = scalloc(1, sizeof *tp); + tp->t_str = smalloc(strlen(str) + 1); + strcpy(tp->t_str, str); + tp->t_num = val; + treput(tp, troot); + } else + numeric: +#ifdef UCB + return printf("%-*lu", len, val); +#else + return printf("%*lu", len, val); +#endif + } + if (oflag && colwidth(tp->t_str) > len) + goto numeric; +#ifdef UCB + return printf("%-*s", len, tp->t_str); +#else + return printf("%*s", len, tp->t_str); +#endif +} + +static char * +get_username_from_pwd(unsigned long uid) +{ + struct passwd *pwd; + + if ((pwd = getpwuid(uid)) != NULL) + return pwd->pw_name; + return NULL; +} + +static char * +get_groupname_from_grp(unsigned long gid) +{ + struct group *grp; + + if ((grp = getgrgid(gid)) != NULL) + return grp->gr_name; + return NULL; +} + +static int +putuser(uid_t uid, unsigned len) +{ + static struct trenod *u0; + + return putid(uid, len, &u0, get_username_from_pwd); +} + +static int +putgroup(gid_t gid, unsigned len) +{ + static struct trenod *g0; + + return putid(gid, len, &g0, get_groupname_from_grp); +} + +static int +putdev(dev_type dev, unsigned len) +{ + struct ditem *d; + char *nam; + + if (dev != (dev_type)PRNODEV) { + if ((d = dlook(dev, d0, NULL)) != NULL) + nam = d->d_str; + else + nam = "??"; + } else + nam = "?"; + return printf("%-*s", len, nam); +} + +static int +time2(long t, unsigned len, int format) +{ + char buf[40]; + int days, hours, minutes, seconds; + + if (t < 0) + t = 0; + if (format == 2) + snprintf(buf, sizeof buf, "%2lu:%02lu.%ld", t / 600, + (t/10) % 60, + t % 10); + else if (format == 1) + snprintf(buf, sizeof buf, "%2lu:%02lu", t / 60, t % 60); + else { + days = t / 86400; + t %= 86400; + hours = t / 3600; + t %= 3600; + minutes = t / 60; + t %= 60; + seconds = t; + if (days) + snprintf(buf, sizeof buf, "%02u-:%02u:%02u:%02u", + days, hours, minutes, seconds); + else + snprintf(buf, sizeof buf, "%02u:%02u:%02u", + hours, minutes, seconds); + } + return printf("%*s", len, buf); +} + +static int +time3(time_t t, unsigned len) +{ + struct tm *tp; + int sz = 8, width = 0; + + while (sz++ < len) { + putchar(' '); + width++; + } + tp = localtime(&t); + if (now > t && now - t > 86400) { + nl_item val; + + switch (tp->tm_mon) { + case 0: val = ABMON_1; break; + case 1: val = ABMON_2; break; + case 2: val = ABMON_3; break; + case 3: val = ABMON_4; break; + case 4: val = ABMON_5; break; + case 5: val = ABMON_6; break; + case 6: val = ABMON_7; break; + case 7: val = ABMON_8; break; + case 8: val = ABMON_9; break; + case 9: val = ABMON_10; break; + case 10: val = ABMON_11; break; + case 11: val = ABMON_12; break; + default: val = ABMON_12; /* won't happen anyway */ + } + width += printf(" %s %02u", nl_langinfo(val), tp->tm_mday); + } else + width += printf("%02u:%02u:%02u", + tp->tm_hour, tp->tm_min, tp->tm_sec); + return width; +} + +#define ZOMBIE(a) (p->p_lstate[0] != 'Z' ? (a) : \ + printf("%-*s", o->o_len, oflag ? "-" : " ")) + +static void +outproc(struct proc *p) +{ + struct output *o; + int width = 0; + + for (o = o0; o; o = o->o_nxt) { + switch (o->o_typ) { + case OU_USER: + width += putuser(p->p_euid, o->o_len); + break; + case OU_RUSER: + width += putuser(p->p_uid, o->o_len); + break; + case OU_RGROUP: + width += putgroup(p->p_gid, o->o_len); + break; + case OU_GROUP: + width += putgroup(p->p_egid, o->o_len); + break; + case OU_PID: + width += printf("%*u", o->o_len, (int)p->p_pid); + break; + case OU_PPID: + width += printf("%*u", o->o_len, (int)p->p_ppid); + break; + case OU_PGID: + width += printf("%*u", o->o_len, (int)p->p_pgid); + break; + case OU_LWP: + case OU_STID: + width += ZOMBIE(printf("%*u", o->o_len, (int)p->p_lwp)); + break; + case OU_PCPU: + width += printf("%*.1f", o->o_len, p->p_pctcpu); + break; + case OU_VSZ: + width += ZOMBIE(printf("%*lu", o->o_len, + (long)p->p_size)); + break; + case OU_NICE: + if (p->p_policy == SCHED_OTHER && p->p_pid != 0) { + width += ZOMBIE(printf("%*d", o->o_len, + (int)p->p_nice)); + } else { + width += ZOMBIE(printf("%*.*s", + o->o_len, o->o_len, + p->p_clname)); + } + break; + case OU_NLWP: + width += ZOMBIE(printf("%*u", o->o_len, p->p_nlwp)); + break; + case OU_NTP: + width += ZOMBIE(printf("%*u", o->o_len, + p->p_nlwp > 1 ? p->p_nlwp : 0)); + break; + case OU_TID: + width += ZOMBIE(printf("%*s", o->o_len, "-")); + break; + case OU_ETIME: + width += time2(now - p->p_start, o->o_len, 0); + break; + case OU_TTY: + width += ZOMBIE(putdev(p->p_ttydev, o->o_len)); + break; + case OU_LTIME: + case OU_OTIME: + width += time2(p->p_time, o->o_len, 1); + break; + case OU_TIME: + width += time2(p->p_time, o->o_len, 0); + break; + case OU_ACCUTIME: + width += time2(p->p_accutime, o->o_len, 1); + break; + case OU_UTIME: + width += time2(p->p_utime, o->o_len, 2); + break; + case OU_KTIME: + width += time2(p->p_ktime, o->o_len, 2); + break; + case OU_COMM: + width += putstr(width, o->o_nxt ? o->o_len : 0, 0, + p->p_lstate[0] != 'Z' ? + p->p_comm : DEFUNCT); + break; + case OU_ARGS: + width += putstr(width, o->o_nxt ? o->o_len : 0, 0, + p->p_lstate[0] != 'Z' ? + p->p_psargs : DEFUNCT); + break; + case OU_F: + width += printf("%*o", o->o_len, + (int)(p->p_flag & 077)); + break; + case OU_S: + width += printf("%*s", o->o_len, p->p_state); + break; + case OU_C: + width += printf("%*d", o->o_len, p->p_c); + break; + case OU_UID: + width += printf("%*u", o->o_len, (int)p->p_euid); + break; + case OU_RUID: + width += printf("%*u", o->o_len, (int)p->p_uid); + break; + case OU_GID: + width += printf("%*u", o->o_len, (int)p->p_egid); + break; + case OU_RGID: + width += printf("%*u", o->o_len, (int)p->p_gid); + break; + case OU_SID: + width += printf("%*u", o->o_len, (int)p->p_sid); + break; + case OU_CLASS: + width += ZOMBIE(printf("%*s", o->o_len, p->p_clname)); + break; + case OU_PRI: + width += ZOMBIE(printf("%*d", o->o_len, (int)p->p_pri)); + break; + case OU_OPRI: + width += ZOMBIE(printf("%*d", o->o_len, + (int)p->p_oldpri)); + break; + case OU_PSR: + width += printf("%*d", o->o_len, (int)p->p_psr); + break; + case OU_ADDR: + width += ZOMBIE(putxd(o->o_len, (long)p->p_addr)); + break; + case OU_OSZ: + width += ZOMBIE(printf("%*lu", o->o_len, + (long)p->p_osz)); + break; + case OU_WCHAN: + if (p->p_lstate[0] == 'S' || p->p_lstate[0] == 'X' || + p->p_lstate[0] == 'D') + width += putxd(o->o_len, (long)p->p_wchan); + else + width += printf("%*s", o->o_len, " "); + break; + case OU_STIME: + width += ZOMBIE(time3(p->p_start, o->o_len)); + break; + case OU_RSS: + width += ZOMBIE(printf("%*lu", o->o_len, + (long)p->p_rssize)); + break; + case OU_ORSS: + case OU_MRSZ: + width += ZOMBIE(printf("%*lu", o->o_len, + (long)p->p_orss)); + break; + case OU_PMEM: + width += printf("%*.1f", o->o_len, p->p_pctmem); + break; + case OU_PFLTS: + width += printf("%*lu", o->o_len, p->p_pflts); + break; + case OU_BUFW: + width += printf("%*lu", o->o_len, p->p_bufw); + break; + case OU_BUFR: + width += printf("%*lu", o->o_len, p->p_bufr); + break; + case OU_MRCV: + width += printf("%*lu", o->o_len, p->p_mrcv); + break; + case OU_MSND: + width += printf("%*lu", o->o_len, p->p_msnd); + break; + case OU_FNAME: + width += putstr(width, o->o_nxt ? o->o_len : 0, +#ifndef UCB + p->p_lstate[0] != 'Z' ? 8 : 9, +#else /* UCB */ + 16, +#endif /* UCB */ + p->p_lstate[0] != 'Z' ? + p->p_fname : DEFUNCT); + break; + case OU_SPACE: + if (o->o_len > 1) + width += printf("%*s", o->o_len - 1, ""); + break; + } + if (o->o_nxt) { + putchar(' '); + width++; + } + } + putchar('\n'); +} + +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) + +#if defined (__linux__) || defined (__FreeBSD__) || defined (__DragonFly__) +#define GETVAL_REQ(a) if ((v = getval(&cp, (a), ' ', 0)) == NULL) \ + return STOP + +#define GETVAL_OPT(a) if ((v = getval(&cp, (a), ' ', 0)) == NULL) \ + goto complete + +#define GETVAL_COMMA(a) if ((v = getval(&cp, (a), ' ', ',')) == NULL) \ + return STOP +#endif /* __linux__ || __FreeBSD__ || __DragonFly__ */ + +#if defined (__linux__) +static void +get_linux_version(void) +{ + struct utsname ut; + char *x; + long val; + + if (uname(&ut) == 0) { + if ((val = strtol(ut.release, &x, 10)) > 0 && + (*x == '.' || *x == '\0')) { + linux_version[0] = val; + if (*x && (val = strtol(&x[1], &x, 10)) >= 0 && + (*x == '.' || *x == '\0')) { + linux_version[1] = val; + if (*x && (val = strtol(&x[1], &x, 10)) >= 0) + linux_version[2] = val; + } + } + } +} + +static time_t +sysup(void) +{ + FILE *fp; + char buf[32]; + char *cp; + union value *v; + time_t s = 0; + + if ((fp = fopen("uptime", "r")) == NULL) + return 0; + if (fread(buf, 1, sizeof buf, fp) > 0) { + cp = buf; + if ((v = getval(&cp, VT_ULONG, '.', 0)) != NULL) + s = v->v_ulong; + } + fclose(fp); + return s; +} + +static unsigned long +getmem(void) +{ + FILE *fp; + char line[LINE_MAX]; + char *cp; + union value *v; + unsigned long mem = 1; + + if ((fp = fopen("meminfo", "r")) == NULL) + return 0; + while (fgets(line, sizeof line, fp) != NULL) { + if (strncmp(line, "MemTotal:", 9) == 0) { + cp = &line[9]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_ULONG, ' ', 0)) != NULL) + mem = v->v_ulong; + break; + } + } + fclose(fp); + return mem; +} + +static time_t +hz2time(long val, int mult) +{ + long long t; + + t = val * mult / hz; + if ((val * mult) % hz >= (hz >> 1)) + t++; + return t; +} + +static void (*compute_priority)(struct proc *); + +/* + * Calculate reasonable values for priority fields using all we can get + * from /proc in Linux 2.4: a crippled counter (in p->intpri) and the + * nice value. + */ +static void +compute_priority_old(struct proc *p) +{ + static int def_counter, scale, max_goodness; + int full_counter, counter, goodness; + + /* + * This is based on the computations in linux/sched.c, 2.4.19. + */ + if (def_counter == 0) { + def_counter = 10 * hz / 100; + if (hz < 200) + scale = 4; + else if (hz < 400) + scale = 3; + else if (hz < 800) + scale = 2; + else if (hz < 1600) + scale = 1; + else + scale = 0; + max_goodness = (((40 << 3) >> scale) + 2) + 40; + } + full_counter = (((40 - p->p_nice) << 3) >> scale) + 2; + /* + * Try to reverse the computation in linux/fs/proc/array.c, + * 2.4.19. + */ + counter = (def_counter * (20 - p->p_intpri)) / 10; + /* + * This can apparently happen if the command is in its first + * timeslice after a lower nice value has been set. + */ + if (counter > full_counter) + counter = full_counter; + /* + * This approximation is even worse, as we cannot know about + * PROC_CHANGE_PENALTY and MM. + */ + if ((goodness = counter) > 0) + goodness += 40 - p->p_nice; + /* + * Keep all priorities for -c below 60 and with higher + * priorities for higher numbers. + */ + p->p_pri = goodness * 59 / max_goodness; + /* + * Old-style priorities start at 60 and have lower numbers + * for higher priorities. + */ + p->p_oldpri = 119 - p->p_pri; + /* + * Our counter emulation can be wrong by 2 in the worst + * case. If the process is not currently on a run queue, + * assume it did not use the CPU at all. + */ + p->p_c = full_counter - counter; + if (p->p_lstate[0] != 'R' && p->p_c <= 2) + p->p_c = 0; + /* + * The value for C still depends on the nice value. Make 80 + * the highest possible C value for all nice values. + */ + p->p_c *= 80 / full_counter; +} + +/* + * Priority calculation for Linux 2.5 and (hopefully) above, based + * on 2.5.31. This supplies a sensible priority value, but originally + * nothing we could use to compute "CPU usage for scheduling". More + * recent 2.6 versions have a SleepAVG field in the "status" file. + */ +static void +compute_priority_new(struct proc *p) +{ + if (p->p_rtpri) { + p->p_pri = 100 + p->p_rtpri; + p->p_oldpri = 60 - (p->p_rtpri >> 1); + } else { + p->p_pri = 40 - p->p_intpri; + p->p_oldpri = 60 + p->p_intpri + (p->p_intpri >> 1); + } +} + +static void +compute_various(struct proc *p) +{ + /* + * All dead processes are considered zombies by us. + */ + if (p->p_lstate[0] == 'X') + p->p_lstate[0] = 'Z'; + /* + * Set System V style status. There seems no method to + * determine 'O' (not only on run queue, but actually + * running). + */ + if (p->p_lstate[0] == 'D' || p->p_lstate[0] == 'W') + p->p_state[0] = 'S'; + else + p->p_state[0] = p->p_lstate[0]; +#ifdef notdef + /* + * Process flags vary too much between real and vendor kernels + * and there's no method to distinguish them - don't use. + */ + if (p->p_lflag & 0x00000002) /* PF_STARTING */ + p->p_state[0] = 'I'; + else if (p->p_lflag & 0x00000800) /* PF_MEMALLOC */ + p->p_state[0] = 'X'; +#endif /* notdef */ + /* + * Set v7 / System III style flags. + */ + if (p->p_lstate[0] != 'Z') { + if (p->p_flag & FL_SYS || p->p_rssize != 0) + p->p_flag |= FL_LOAD; /* cf. statm processing */ + else + p->p_flag |= FL_SWAP; /* no rss -> swapped */ + if (p->p_lstate[0] == 'D') { + p->p_flag |= FL_LOCK; + p->p_flag &= ~FL_SWAP; + } else if (p->p_lstate[0] == 'W') + p->p_flag |= FL_SWAP; + /*if (p->p_lflag & 0x10) obsolete, doesn't work + p->p_flag |= FL_TRC;*/ + } +} + +static enum okay +getproc_stat(struct proc *p, pid_t expected_pid) +{ + static char *buf; + static size_t buflen; + union value *v; + FILE *fp; + char *cp, *cq, *ce; + size_t sz, sc; + unsigned long lval; + /* + * There is no direct method to determine if something is a system + * process. We consider a process a system process if a certain set + * of criteria is entirely zero. + */ + unsigned long sysfl = 0; + + if ((fp = fopen("stat", "r")) == NULL) + return STOP; + for (cp = buf; ;) { + const unsigned chunk = 32; + + if (buflen < (sz = cp - buf + chunk)) { + sc = cp - buf; + buf = srealloc(buf, buflen = sz); + cp = &buf[sc]; + } + if ((sz = fread(cp, 1, chunk, fp)) < chunk) { + ce = &cp[sz - 1]; + break; + } + cp += chunk; + } + fclose(fp); + if (*ce != '\n') + return STOP; + *ce-- = '\0'; + cp = buf; + /* pid */ + GETVAL_REQ(VT_INT); + if ((p->p_pid = v->v_int) != expected_pid) + return STOP; + if (*cp++ != '(') + return STOP; + for (cq = ce; cq >= cp && *cq != ')'; cq--); + if (cq < cp) + return STOP; + *cq = '\0'; + strncpy(p->p_fname, cp, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + cp = &cq[1]; + while (isspace(*cp)) + cp++; + /* state */ + GETVAL_REQ(VT_CHAR); + p->p_lstate[0] = v->v_char; + sysfl |= v->v_char == 'Z'; + /* ppid */ + GETVAL_REQ(VT_INT); + p->p_ppid = v->v_int; + /* pgrp */ + GETVAL_REQ(VT_INT); + p->p_pgid = v->v_int; + /* session */ + GETVAL_REQ(VT_INT); + p->p_sid = v->v_int; + /* tty_nr */ + GETVAL_REQ(VT_INT); + p->p_ttydev = v->v_int; + sysfl |= v->v_int; + /* tty_pgrp */ + GETVAL_REQ(VT_INT); + /* flags */ + GETVAL_REQ(VT_ULONG); + p->p_lflag = v->v_ulong; + /* minflt */ + GETVAL_REQ(VT_ULONG); + /* cminflt */ + GETVAL_REQ(VT_ULONG); + /* majflt */ + GETVAL_REQ(VT_ULONG); + p->p_pflts = v->v_ulong; + /* cmajflt */ + GETVAL_REQ(VT_ULONG); + /* utime */ + GETVAL_REQ(VT_ULONG); + lval = v->v_ulong; + p->p_utime = hz2time(lval, 10); + sysfl |= v->v_ulong; + /* stime */ + GETVAL_REQ(VT_ULONG); + p->p_ktime = hz2time(v->v_ulong, 10); + lval += v->v_ulong; + p->p_time = hz2time(lval, 1); + /* cutime */ + GETVAL_REQ(VT_LONG); + lval += v->v_ulong; + /* cstime */ + GETVAL_REQ(VT_LONG); + lval += v->v_ulong; + p->p_accutime += hz2time(lval, 1); + /* priority */ + GETVAL_REQ(VT_LONG); + p->p_intpri = v->v_long; + /* nice */ + GETVAL_REQ(VT_LONG); + p->p_nice = v->v_long + 20; + /* timeout */ + GETVAL_REQ(VT_LONG); + /* itrealvalue */ + GETVAL_REQ(VT_LONG); + /* starttime */ + GETVAL_REQ(VT_ULONG); + p->p_start = hz2time(v->v_ulong, 1) + now - uptime; + /* vsize */ + GETVAL_REQ(VT_ULONG); + p->p_size = (v->v_ulong >> 10); + p->p_osz = v->v_ulong / pagesize; + sysfl |= v->v_ulong; + /* rss */ + GETVAL_REQ(VT_LONG); + p->p_orss = v->v_long; + p->p_rssize = v->v_long * kbytes_per_page; + sysfl |= v->v_ulong; + /* rlim */ + GETVAL_REQ(VT_ULONG); + /* startcode */ + GETVAL_REQ(VT_ULONG); + p->p_addr = v->v_ulong; + sysfl |= v->v_ulong; + /* endcode */ + GETVAL_REQ(VT_ULONG); + sysfl |= v->v_ulong; + /* startstack */ + GETVAL_REQ(VT_ULONG); + sysfl |= v->v_ulong; + /* kstkesp */ + GETVAL_REQ(VT_ULONG); + /* kstkeip */ + GETVAL_REQ(VT_ULONG); + /* signal */ + GETVAL_REQ(VT_ULONG); + /* blocked */ + GETVAL_REQ(VT_ULONG); + /* sigignore */ + GETVAL_REQ(VT_ULONG); + /* sigcatch */ + GETVAL_REQ(VT_ULONG); + /* wchan */ + GETVAL_REQ(VT_ULONG); + p->p_wchan = v->v_ulong; + /* + * These appeared in later Linux versions, so they are not + * required to be present. + */ + p->p_policy = -1; /* initialize to invalid values */ + /* nswap */ + GETVAL_OPT(VT_ULONG); + /* cnswap */ + GETVAL_OPT(VT_ULONG); + /* exit_signal */ + GETVAL_OPT(VT_INT); + /* processor */ + GETVAL_OPT(VT_INT); + p->p_psr = v->v_int; + /* rt_priority */ + GETVAL_OPT(VT_ULONG); + p->p_rtpri = v->v_ulong; + /* policy */ + GETVAL_OPT(VT_ULONG); + p->p_policy = v->v_ulong; +complete: + if (sysfl == 0) + p->p_flag |= FL_SYS; + compute_various(p); + return OKAY; +} + +static enum okay +getproc_scheduler(struct proc *p) +{ + struct sched_param s; + + if (p->p_policy == -1) /* Linux 2.4 and below */ + p->p_policy = sched_getscheduler(p->p_pid); + switch (p->p_policy) { + case SCHED_FIFO: + case SCHED_RR: + switch (p->p_policy) { + case SCHED_FIFO: p->p_clname = "FF"; break; +#ifdef S42 + case SCHED_RR: p->p_clname = "FP"; break; +#else + case SCHED_RR: p->p_clname = "RT"; break; +#endif + } + if (p->p_rtpri == 0 && sched_getparam(p->p_pid, &s) == 0) { + p->p_rtpri = s.sched_priority; + /* Linux 2.4 and below */ + p->p_pri = 100 + s.sched_priority; + } + break; + case SCHED_OTHER: + p->p_clname = "TS"; + break; +#ifdef SCHED_BATCH + case SCHED_BATCH: + p->p_clname = "B"; + break; +#endif /* SCHED_BATCH */ +#ifdef SCHED_ISO + case SCHED_ISO: + p->p_clname = "ISO"; + break; +#endif /* SCHED_ISO */ + default: + p->p_clname = "??"; + } + compute_priority(p); + return OKAY; +} + +static enum okay +getproc_cmdline(struct proc *p) +{ + FILE *fp; + char *cp, *cq, *ce; + int hadzero = 0, c; + + if ((fp = fopen("cmdline", "r")) != NULL) { + cp = p->p_psargs; + cq = p->p_comm; + ce = cp + sizeof p->p_psargs - 1; + while (cp < ce && (c = getc(fp)) != EOF) { + if (c != '\0') { + if (hadzero) { + *cp++ = ' '; + if (cp == ce) + break; + hadzero = 0; + } + *cp++ = c; + if (cq) + *cq++ = c; + } else { + hadzero = 1; + if (cq) { + *cq = c; + cq = NULL; + } + } + } + *cp = '\0'; + if (cq) + *cq = '\0'; + fclose(fp); + } + return OKAY; +} + +static enum okay +getproc_status(struct proc *p) +{ + char line[LINE_MAX]; + union value *v; + FILE *fp; + char *cp; + int scanr; + + if ((fp = fopen("status", "r")) == NULL) + return STOP; + scanr = 0; + while (fgets(line, sizeof line, fp) != NULL) { + if (strncmp(line, "Uid:", 4) == 0) { + cp = &line[4]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_uid = v->v_int; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_euid = v->v_int; + scanr++; + } else if (strncmp(line, "Gid:", 4) == 0) { + cp = &line[4]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_gid = v->v_int; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_egid = v->v_int; + scanr++; + } else if (strncmp(line, "Threads:", 8) == 0) { + cp = &line[8]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_nlwp = v->v_int; + } else if (strncmp(line, "Pid:", 4) == 0) { + cp = &line[4]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, ' ', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_lwp = v->v_int; + } else if (strncmp(line, "SleepAVG:", 9) == 0) { + cp = &line[9]; + while (isspace(*cp)) + cp++; + if ((v = getval(&cp, VT_INT, '%', 0)) == NULL) { + fclose(fp); + return STOP; + } + p->p_c = (100 - v->v_int) * 80 / 100; + } + } + fclose(fp); + if (scanr != 2) + return STOP; + return OKAY; +} + +static enum okay +getproc_statm(struct proc *p) +{ + char line[LINE_MAX]; + union value *v; + FILE *fp; + char *cp; + unsigned long trs, drs, dt; + + if ((fp = fopen("statm", "r")) == NULL) + return OKAY; /* not crucial */ + if (fgets(line, sizeof line, fp) != NULL) { + cp = line; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* size */ + goto out; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* resident */ + goto out; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* share */ + goto out; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* trs */ + goto out; + trs = v->v_long; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* drs */ + goto out; + drs = v->v_long; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* lrs */ + goto out; + if ((v = getval(&cp, VT_LONG, ' ', 0)) == NULL) /* dt */ + goto out; + dt = v->v_long; + /* + * A process is considered to be swapped out if it has + * neither resident non-library text, data, nor dirty + * pages. A system process is always considered to be + * in core. + */ + if (trs + drs + dt == 0 && + (p->p_flag&(FL_LOAD|FL_SYS|FL_LOCK))==FL_LOAD) { + p->p_flag &= ~FL_LOAD; + p->p_flag |= FL_SWAP; + } + } +out: fclose(fp); + return OKAY; +} + +static enum okay +getproc(const char *dir, struct proc *p, pid_t expected_pid, pid_t lwp) +{ + enum okay result; + + memset(p, 0, sizeof *p); + if (chdir(dir) == 0) { + if ((result = getproc_stat(p, expected_pid)) == OKAY) + if ((result = getproc_scheduler(p)) == OKAY) + if ((result = getproc_cmdline(p)) == OKAY) + if ((result= getproc_status(p)) == OKAY) + result = getproc_statm(p); + chdir_to_proc(); + } else + result = STOP; + return result; +} + +static enum okay +getLWPs(const char *dir, struct proc *p, pid_t expected_pid) +{ + DIR *Dp; + struct dirent *dp; + unsigned long val; + char *x; + int fd; + + if (chdir(dir) == 0 && + (fd = open("task", O_RDONLY)) >= 0 && + fchdir(fd) == 0 && + (Dp = opendir(".")) != NULL) { + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1]=='\0' || + (dp->d_name[1]=='.' && + dp->d_name[2]=='\0'))) + continue; + val = strtoul(dp->d_name, &x, 10); + if (*x != 0) + continue; + if (fchdir(fd) < 0) { + fprintf(stderr, + "%s: cannot chdir to %s/%s/task\n", + progname, PROCDIR, dir); + errcnt = 1; + break; + } + if (getproc(dp->d_name, p, val, val) == OKAY) { + postproc(p); + if (selectproc(p) == OKAY) { + p->p_pid = expected_pid; + outproc(p); + } + } + } + closedir(Dp); + close(fd); + return OKAY; + } else { + chdir_to_proc(); + return STOP; + } +} + +#elif defined (__FreeBSD__) || defined (__DragonFly__) + +static unsigned long +getmem(void) +{ + return 0; +} + +static enum okay +getproc_status(struct proc *p, pid_t expected_pid) +{ + static char *buf; + static size_t buflen; + union value *v; + FILE *fp; + char *cp, *cq, *ce; + size_t sz, sc; + int mj, mi; + + if ((fp = fopen("status", "r")) == NULL) + return STOP; + for (cp = buf; ;) { + const unsigned chunk = 32; + + if (buflen < (sz = cp - buf + chunk)) { + sc = cp - buf; + buf = srealloc(buf, buflen = sz); + cp = &buf[sc]; + } + if ((sz = fread(cp, 1, chunk, fp)) < chunk) { + ce = &cp[sz - 1]; + break; + } + cp += chunk; + } + fclose(fp); + if (*ce != '\n') + return STOP; + *ce-- = '\0'; + cp = buf; + cq = p->p_fname; + while (*cp != ' ') { + if (cq - p->p_fname < sizeof p->p_fname - 1) { + if (cp[0] == '\\' && isdigit(cp[1]) && + isdigit(cp[2]) && isdigit(cp[3])) { + *cq++ = cp[3] - '0' + + (cp[2] - '0' << 3) + + (cp[1] - '0' << 6); + cp += 4; + } else + *cq++ = *cp++; + } else + cp++; + } + *cq = '\0'; + while (*cp == ' ') + cp++; + GETVAL_REQ(VT_INT); + p->p_pid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_ppid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_pgid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_sid = v->v_int; + if (isdigit(*cp)) { + GETVAL_COMMA(VT_INT); + mj = v->v_int; + GETVAL_REQ(VT_INT); + mi = v->v_int; + if (mj != -1 || mi != -1) + p->p_ttydev = makedev(mj, mi); + } else { + struct stat st; + char *dev; + cq = cp; + while (*cp != ' ') cp++; + *cp = '\0'; + dev = smalloc(cp - cq + 8); + strcpy(dev, "/dev/"); + strcpy(&dev[5], cq); + if (stat(dev, &st) < 0) + p->p_ttydev = PRNODEV; + else + p->p_ttydev = st.st_rdev; + free(dev); + *cp = ' '; + while (*cp == ' ') cp++; + } + while (*cp != ' ') cp++; while (*cp == ' ') cp++; + /* skip flags */ + GETVAL_COMMA(VT_LONG); + p->p_start = v->v_long; + /* microseconds */ + GETVAL_REQ(VT_LONG); + GETVAL_COMMA(VT_LONG); + p->p_time = v->v_long; + p->p_utime = v->v_long; + /* microseconds */ + GETVAL_REQ(VT_LONG); + p->p_utime += v->v_long / 100000; + GETVAL_COMMA(VT_LONG); + p->p_time += v->v_long; + p->p_ktime = v->v_long; + p->p_accutime = p->p_time; + /* microseconds */ + GETVAL_REQ(VT_LONG); + p->p_ktime += v->v_long / 100000; + if (strncmp(cp, "nochan ", 7) == 0) + p->p_state[0] = p->p_lstate[0] = 'R'; + else + p->p_state[0] = p->p_lstate[0] = 'S'; + while (*cp != ' ') { + if (p->p_state[0] == 'S') + p->p_wchan |= *cp << (*cp&30); /* fake */ + cp++; + } + while (*cp == ' ') cp++; + GETVAL_REQ(VT_INT); + p->p_euid = v->v_int; + GETVAL_REQ(VT_INT); + p->p_uid = v->v_int; + GETVAL_COMMA(VT_INT); + p->p_gid = v->v_int; + GETVAL_COMMA(VT_INT); + p->p_egid = v->v_int; + return OKAY; +} + +static enum okay +getproc_cmdline(struct proc *p) +{ + FILE *fp; + char *cp, *ce; + int hadzero = 0, c; + + if ((fp = fopen("cmdline", "r")) != NULL) { + cp = p->p_psargs; + ce = cp + sizeof p->p_psargs - 1; + while (cp < ce && (c = getc(fp)) != EOF) { + if (c != '\0') { + if (hadzero) { + *cp++ = ' '; + if (cp == ce) + break; + hadzero = 0; + } + *cp++ = c; + } else { + hadzero = 1; + } + } + *cp = '\0'; + fclose(fp); + } + if (*p->p_psargs == '\0' && p->p_size == 0) + strcpy(p->p_psargs, p->p_fname); + return OKAY; +} + +static void +priocomp(struct proc *p) +{ + static int once; + static int ranges[3][2]; + int *cur; + + if (once++ == 0) { + ranges[0][0] = sched_get_priority_min(SCHED_OTHER); + ranges[0][1] = sched_get_priority_max(SCHED_OTHER); + ranges[1][0] = sched_get_priority_min(SCHED_FIFO); + ranges[1][1] = sched_get_priority_max(SCHED_FIFO); + ranges[2][0] = sched_get_priority_min(SCHED_RR); + ranges[3][1] = sched_get_priority_max(SCHED_RR); + } + switch (p->p_policy) { + case SCHED_OTHER: + cur = ranges[0]; + break; + case SCHED_FIFO: + cur = ranges[1]; + break; + case SCHED_RR: + cur = ranges[2]; + break; + default: + return; + } + switch (p->p_policy) { + case SCHED_OTHER: + p->p_nice = getpriority(PRIO_PROCESS, p->p_pid) + 20; + break; + case SCHED_FIFO: + case SCHED_RR: + p->p_pri = ((double)p->p_intpri - cur[0]) / (cur[1] - cur[0]) * + 100 + 60; + } +} + +static enum okay +getproc_map(struct proc *p) +{ + FILE *fp; + long start, end, resident; + int c; + + if ((fp = fopen("map", "r")) == NULL) + return OKAY; + while (fscanf(fp, "0x%lx 0x%lx %ld", &start, &end, &resident) == 3) { + if (p->p_addr == 0) + p->p_addr = start; + while ((c = getc(fp)) != EOF && c != '\n'); + p->p_size += (end - start) / 1024; + p->p_orss += resident; + } + p->p_osz = p->p_size / (pagesize / 1024); + p->p_rssize = p->p_orss * (pagesize / 1024); + fclose(fp); + return OKAY; +} + +static enum okay +getproc_scheduler(struct proc *p) +{ + struct sched_param s; + + switch (p->p_policy = sched_getscheduler(p->p_pid)) { + case SCHED_FIFO: + p->p_clname = "FF"; + break; + case SCHED_RR: +#ifdef S42 + p->p_clname = "FP"; +#else + p->p_clname = "RT"; +#endif + break; + case SCHED_OTHER: + p->p_clname = "TS"; + break; + default: + p->p_clname = "??"; + } + if (sched_getparam(p->p_pid, &s) == 0) + p->p_intpri = s.sched_priority; + priocomp(p); + return OKAY; +} + +static enum okay +getproc_kvm(struct proc *p) +{ + static kvm_t *kv; + struct kinfo_proc *kp; + int c; + + if (myeuid != 0) + return OKAY; + if (kv == NULL) { + char err[_POSIX2_LINE_MAX]; + if ((kv = kvm_open(NULL, NULL, NULL, O_RDONLY, err)) == NULL) + return OKAY; + } + if ((kp = kvm_getprocs(kv, KERN_PROC_PID, p->p_pid, &c)) == NULL) + return OKAY; +#if (__FreeBSD__) < 5 || defined (__DragonFly__) + switch (kp->kp_proc.p_stat) { +#else /* __FreeBSD__ >= 5 */ + switch (kp->ki_stat) { +#endif /* __FreeBSD__ >= 5 */ + case SIDL: + p->p_state[0] = 'I'; + break; + case SRUN: + p->p_state[0] = 'R'; + break; +#if defined (SWAIT) || defined (SLOCK) +#ifdef SWAIT + case SWAIT: +#endif /* SWAIT */ +#ifdef SLOCK + case SLOCK: +#endif /* SLOCK */ + p->p_flag |= FL_LOCK; + /*FALLTHRU*/ +#endif /* SWAIT || SLOCK */ + case SSLEEP: + p->p_state[0] = 'S'; + break; + case SSTOP: + p->p_state[0] = 'T'; + break; + case SZOMB: + p->p_state[0] = 'Z'; + break; + } + p->p_lstate[0] = p->p_state[0]; +#if (__FreeBSD__) < 5 || defined (__DragonFly__) +#define ki_flag kp_proc.p_flag +#define ki_oncpu kp_proc.p_oncpu +#define ki_wchan kp_proc.p_wchan +#define ki_pri kp_proc.p_pri +#endif /* __FreeBSD__ < 5 */ + if (kp->ki_flag & P_SYSTEM) + p->p_flag |= FL_SYS; + if (kp->ki_flag & P_TRACED) + p->p_flag |= FL_TRC; +#if (__FreeBSD__) < 5 || defined (__DragonFly__) +#ifndef __DragonFly__ + p->p_intpri = kp->kp_proc.p_usrpri; + p->p_oldpri = kp->kp_proc.p_usrpri; + p->p_pri = kp->kp_proc.p_priority; +#endif /* !__DragonFly__ */ + p->p_policy = SCHED_OTHER; + p->p_clname = "TS"; +#else /* __FreeBSD__ >= 5 */ + if (kp->ki_sflag & PS_INMEM) + p->p_flag |= FL_LOAD; + if (kp->ki_sflag & PS_SWAPPINGOUT) + p->p_flag |= FL_SWAP; + p->p_oldpri = ((double)kp->ki_pri.pri_user - PRI_MIN) / + (PRI_MAX - PRI_MIN) * 60 + 60; + p->p_pri = 40 - ((double)kp->ki_pri.pri_user - PRI_MIN) / + (PRI_MAX - PRI_MIN) * 40; + if (p->p_policy != SCHED_OTHER) + p->p_pri += 100; +#endif /* __FreeBSD__ >= 5 */ +#ifndef __DragonFly__ + p->p_psr = kp->ki_oncpu; + p->p_wchan = (unsigned long)kp->ki_wchan; +#endif /* !__DragonFly__ */ + return OKAY; +} + +static enum okay +getproc(const char *dir, struct proc *p, pid_t expected_pid, pid_t lwp) +{ + enum okay result; + + memset(p, 0, sizeof *p); + if (chdir(dir) == 0) { + if ((result = getproc_status(p, expected_pid)) == OKAY) + if ((result = getproc_cmdline(p)) == OKAY) + if ((result = getproc_map(p)) == OKAY) + if ((result = getproc_scheduler(p)) == + OKAY) + result = getproc_kvm(p); + chdir_to_proc(); + } else + result = STOP; + return result; +} + +#else /* !__linux__, !__FreeBSD__, !__DragonFly__ */ + +#ifndef __sun +static unsigned long +getmem(void) +{ +#ifdef _SC_USEABLE_MEMORY + long usm; + + if ((usm = sysconf(_SC_USEABLE_MEMORY)) > 0) + return usm * (pagesize / 1024); +#endif /* _SC_USEABLE_MEMORY */ + return 0; +} +#endif /* !__sun */ + +static const char * +concat(const char *dir, const char *base) +{ + static char *name; + static long size; + long length; + char *np; + const char *cp; + + if ((length = strlen(dir) + strlen(base) + 2) > size) + name = srealloc(name, size = length); + np = name; + for (cp = dir; *cp; cp++) + *np++ = *cp; + *np++ = '/'; + for (cp = base; *cp; cp++) + *np++ = *cp; + *np = '\0'; + return name; +} + +static time_t +tv2sec(timestruc_t tv, int mult) +{ + return tv.tv_sec*mult + (tv.tv_nsec >= 500000000/mult); +} + +static enum okay +getproc_psinfo(const char *dir, struct proc *p, pid_t expected_pid) +{ + FILE *fp; + struct psinfo pi; + const char *cp; + char *np; + + if ((fp = fopen(concat(dir, "psinfo"), "r")) == NULL) + return STOP; + if (fread(&pi, 1, sizeof pi, fp) != sizeof pi || + pi.pr_pid != expected_pid) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_pid = pi.pr_pid; + strncpy(p->p_fname, pi.pr_fname, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_ppid = pi.pr_ppid; + p->p_pgid = pi.pr_pgid; + p->p_sid = pi.pr_sid; + p->p_nlwp = pi.pr_nlwp; + p->p_ttydev = pi.pr_ttydev; + p->p_time = tv2sec(pi.pr_time, 1); +#ifdef __sun + p->p_accutime = tv2sec(pi.pr_ctime, 1); +#endif /* __sun */ + p->p_start = tv2sec(pi.pr_start, 1); + p->p_size = pi.pr_size; + p->p_osz = pi.pr_size / kbytes_per_page; + p->p_rssize = pi.pr_rssize; + p->p_orss = pi.pr_rssize / kbytes_per_page; + p->p_addr = (unsigned long)pi.pr_addr; +#ifdef __sun + p->p_pctcpu = (double)pi.pr_pctcpu / 0x8000 * 100; + p->p_pctmem = (double)pi.pr_pctmem / 0x8000 * 100; +#endif /* __sun */ + strncpy(p->p_psargs, pi.pr_psargs, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + for (np = p->p_comm, cp = p->p_psargs; *cp && !isblank(*cp); cp++) + *np++ = *cp; + p->p_uid = pi.pr_uid; + p->p_gid = pi.pr_gid; +#ifdef __sun + p->p_euid = pi.pr_euid; + p->p_egid = pi.pr_egid; +#endif /* __sun */ + p->p_lflag = pi.pr_flag; +#if defined (SLOAD) + if (p->p_lflag & SLOAD) + p->p_flag |= FL_LOAD; +#elif defined (P_LOAD) + if (p->p_lflag & P_LOAD) + p->p_flag |= FL_LOAD; +#endif /* SLOAD, P_LOAD */ +#if defined (SSYS) + if (p->p_lflag & SSYS) + p->p_flag |= FL_SYS; +#elif defined (P_SYS) + if (p->p_lflag & P_SYS) + p->p_flag |= FL_SYS; +#endif /* SSYS, P_SYS */ +#if defined (SLOCK) + if (p->p_lflag & SLOCK) + p->p_flag |= FL_LOCK; +#elif defined (P_NOSWAP) + if (p->p_lflag & P_NOSWAP) + p->p_flag |= FL_LOCK; +#endif /* SLOCK, P_NOSWAP */ +#if defined (SPROCTR) + if (p->p_lflag & SPROCTR) + p->p_flag |= FL_TRC; +#elif defined (P_PROCTR) + if (p->p_lflag & P_PROCTR) + p->p_flag |= FL_TRC; +#endif /* SPROCTR, P_PROCTR */ + return OKAY; +} + +static enum okay +getproc_lwpsinfo(const char *dir, struct proc *p, pid_t lwp) +{ + static char clname[PRCLSZ+1]; + char base[100]; + FILE *fp; + struct lwpsinfo li; + + if (p->p_nlwp == 0) { /* zombie process */ + p->p_lstate[0] = p->p_state[0] = 'Z'; + return OKAY; + } + if (lwp != (pid_t)-1) { + snprintf(base, sizeof base, "lwp/%d/lwpsinfo", (int)lwp); + fp = fopen(concat(dir, base), "r"); + } else { + int i; + for (i = 1; i <= 255; i++) { + snprintf(base, sizeof base, "lwp/%d/lwpsinfo", i); + if ((fp = fopen(concat(dir, base), "r")) != NULL || + errno != ENOENT) + break; + } + } + if (fp == NULL) + return STOP; + if (fread(&li, 1, sizeof li, fp) != sizeof li) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_lwp = li.pr_lwpid; + if (Lflag) { + p->p_time = tv2sec(li.pr_time, 1); + if (li.pr_name[0]) { + strncpy(p->p_fname, li.pr_name, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + } + } + p->p_lstate[0] = p->p_state[0] = li.pr_sname; + p->p_intpri = li.pr_pri; + p->p_rtpri = li.pr_pri; + p->p_clname = clname; + memcpy(clname, li.pr_clname, PRCLSZ); +#ifdef __sun + p->p_oldpri = li.pr_oldpri; +#endif /* __sun */ + p->p_pri = li.pr_pri; + p->p_nice = li.pr_nice; + p->p_wchan = (unsigned long)li.pr_wchan; + p->p_psr = li.pr_onpro; + return OKAY; +} + +#ifdef __sun +static enum okay +getproc_usage(const char *dir, struct proc *p) +{ + FILE *fp; + struct prusage pu; + + if ((fp = fopen(concat(dir, "usage"), "r")) == NULL) + return OKAY; + if (fread(&pu, 1, sizeof pu, fp) != sizeof pu) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_pflts = pu.pr_majf; + p->p_bufr = pu.pr_inblk; + p->p_bufw = pu.pr_oublk; + p->p_mrcv = pu.pr_mrcv; + p->p_msnd = pu.pr_msnd; + p->p_utime = tv2sec(pu.pr_utime, 10); + p->p_ktime = tv2sec(pu.pr_stime, 10); + return OKAY; +} +#else /* !__sun */ +static enum okay +getproc_cred(const char *dir, struct proc *p) +{ + FILE *fp; + struct prcred pc; + + if ((fp = fopen(concat(dir, "cred"), "r")) == NULL) + /* + * Don't require this, as it may be accessible to root + * only and it's better to have no effective uids than + * to display no content at all. + */ + return OKAY; + if (fread(&pc, 1, sizeof pc, fp) != sizeof pc) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_euid = pc.pr_euid; + p->p_egid = pc.pr_egid; + return OKAY; +} +#endif /* !__sun */ + +static enum okay +getproc_status(const char *dir, struct proc *p) +{ + FILE *fp; + struct pstatus ps; + + if ((fp = fopen(concat(dir, "status"), "r")) == NULL) + /* + * Don't require this, as it may be accessible to root + * only and the children times are not that important. + */ + return OKAY; + if (fread(&ps, 1, sizeof ps, fp) != sizeof ps) { + fclose(fp); + return STOP; + } + fclose(fp); + p->p_utime = tv2sec(ps.pr_utime, 10); + p->p_ktime = tv2sec(ps.pr_stime, 10); + p->p_accutime = tv2sec(ps.pr_cutime, 1) + tv2sec(ps.pr_cstime, 1); + return OKAY; +} + +static enum okay +getproc_lwpstatus(const char *dir, struct proc *p, pid_t lwp) +{ + FILE *fp; + char base[100]; + struct lwpstatus ls; + + if (p->p_nlwp == 0) /* zombie process */ + return OKAY; + if (lwp != (pid_t)-1) { + snprintf(base, sizeof base, "lwp/%d/lwpstatus", (int)lwp); + fp = fopen(concat(dir, base), "r"); + } else { + int i; + for (i = 1; i <= 20; i++) { + snprintf(base, sizeof base, "lwp/%d/lwpstatus", i); + if ((fp = fopen(concat(dir, base), "r")) != NULL || + errno != ENOENT) + break; + } + } + if (fp == NULL) + /* + * Don't require this, as it may be accessible to root + * only and the process flags are not that important. + */ + return OKAY; + if (fread(&ls, 1, sizeof ls, fp) != sizeof ls) { + fclose(fp); + return STOP; + } + fclose(fp); + if (ls.pr_flags == PR_STOPPED && + (ls.pr_why == PR_SYSENTRY || ls.pr_why == PR_SYSEXIT)) + p->p_flag |= FL_LOCK; + return OKAY; +} + +static enum okay +getproc(const char *dir, struct proc *p, pid_t expected_pid, pid_t lwp) +{ + enum okay result; + + memset(p, 0, sizeof *p); + if ((result = getproc_psinfo(dir, p, expected_pid)) == OKAY) { + if ((result = getproc_status(dir, p)) == OKAY) +#ifdef __sun + if ((result = getproc_usage(dir, p)) == OKAY) +#else /* !__sun */ + if ((result = getproc_cred(dir, p)) == OKAY) +#endif /* !__sun */ + if ((result = getproc_lwpsinfo(dir, p, lwp)) + == OKAY) + result = getproc_lwpstatus(dir, p, lwp); + } else + result = STOP; + return result; +} + +static enum okay +getLWPs(const char *dir, struct proc *p, pid_t expected_pid) +{ + DIR *Dp; + struct dirent *dp; + unsigned long val; + char *x; + + if ((Dp = opendir(concat(dir, "lwp"))) != NULL) { + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1]=='\0' || + (dp->d_name[1]=='.' && + dp->d_name[2]=='\0'))) + continue; + val = strtoul(dp->d_name, &x, 10); + if (*x != 0) + continue; + if (getproc(dir, p, expected_pid, val) == OKAY) { + postproc(p); + if (selectproc(p) == OKAY) + outproc(p); + } + } + closedir(Dp); + return OKAY; + } else + return STOP; +} + +#endif /* !__linux__, !__FreeBSD__, !__DragonFly__ */ + +static void +postproc(struct proc *p) +{ + cleanline(p); +#ifndef __sun + if ((p->p_pctcpu = now - p->p_start) != 0) { + p->p_pctcpu = (double)p->p_time * 100 / p->p_pctcpu; + if (p->p_pctcpu < 0) + p->p_pctcpu = 0; + } + if (totalmem) + p->p_pctmem = (double)p->p_size * 100 / totalmem; +#endif /* !__sun */ +#if !defined (__linux__) && !defined (__sun) && !defined (__FreeBSD__) \ + && !defined (__DragonFly__) + p->p_oldpri = 160 - p->p_pri; +#endif /* !__linux__, !__sun */ +#if !defined (__linux__) && !defined (__FreeBSD__) && !defined (__DragonFly__) + p->p_policy = p->p_clname && strcmp(p->p_clname, "TS") ? + SCHED_RR : SCHED_OTHER; +#endif /* !__linux__, !__FreeBSD__, !__DragonFly__ */ +} +#endif /* !__hpux, !_AIX, !__NetBSD__, !__OpenBSD__, !__APPLE__ */ + +static enum okay +selectproc(struct proc *p) +{ + struct criterion *ct; + + for (ct = c0; ct; ct = ct->c_nxt) { + if (ucb_rflag && p->p_lstate[0] != 'R') + continue; + switch (ct->c_typ) { + case CR_ALL: + return OKAY; + case CR_ALL_WITH_TTY: + if (p->p_lstate[0] == 'Z') + break; +#ifndef UCB + if (p->p_pid == p->p_sid) + break; +#endif /* !UCB */ + if (p->p_ttydev != (dev_type)PRNODEV) + return OKAY; + break; + case CR_ALL_BUT_SESSION_LEADERS: + if (p->p_pid != p->p_sid) + return OKAY; + break; + case CR_WITHOUT_TTY: + if (p->p_ttydev == (dev_type)PRNODEV || + p->p_lstate[0] == 'Z') + return OKAY; + break; + case CR_NO_TTY_NO_SESSION_LEADER: + if (p->p_ttydev == (dev_type)PRNODEV && + p->p_pid != p->p_sid && + p->p_lstate[0] != 'Z') + return OKAY; + break; + case CR_PROCESS_GROUP: +#if defined (SUS) || defined (UCB) + if (p->p_sid == ct->c_val) + return OKAY; +#else /* !SUS, !UCB */ + if (p->p_pgid == ct->c_val) + return OKAY; +#endif /* !SUS, !UCB */ + break; + case CR_REAL_GID: + if (p->p_gid == ct->c_val) + return OKAY; + break; + case CR_PROCESS_ID: + if (p->p_pid == ct->c_val) + return OKAY; + break; + case CR_TTY_DEVICE: + if (/*p->p_ttydev != (dev_type)PRNODEV &&*/ + p->p_ttydev == ct->c_val && + p->p_lstate[0] != 'Z') + return OKAY; + break; + case CR_SESSION_LEADER: + if (p->p_sid == ct->c_val) + return OKAY; + break; + case CR_EFF_UID: + if (p->p_euid == ct->c_val) + return OKAY; + break; + case CR_REAL_UID: + if (p->p_uid == ct->c_val) + return OKAY; + break; + case CR_ADD_UNINTERESTING: + if (p->p_lstate[0] != 'Z' && + p->p_euid == myuid && + p->p_ttydev != (dev_type)PRNODEV && + p->p_ttydev == myproc.p_ttydev) + return OKAY; + break; + case CR_DEFAULT: + if (p->p_lstate[0] != 'Z' && +#if defined (SUS) || defined (UCB) + p->p_euid == myuid && +#endif /* SUS || UCB */ +#ifdef UCB + p->p_pid != p->p_sid && + p->p_ttydev != (dev_type)PRNODEV && +#endif /* UCB */ + p->p_ttydev == myproc.p_ttydev) + return OKAY; + break; + } + } + return STOP; +} + +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) +static void +do_procs(void) +{ + struct proc p; + DIR *Dp; + struct dirent *dp; + unsigned long val; + char *x; + + if ((Dp = opendir(".")) != NULL) { + while ((dp = readdir(Dp)) != NULL) { + if (dp->d_name[0] == '.' && (dp->d_name[1] == '\0' || + (dp->d_name[1] == '.' && + dp->d_name[2] == '\0'))) + continue; + val = strtoul(dp->d_name, &x, 10); + if (*x != 0) + continue; +#if !defined (__FreeBSD__) && !defined (__DragonFly__) + if (Lflag) + if (getLWPs(dp->d_name, &p, val) == OKAY) + continue; +#endif /* !__FreeBSD__, !__DragonFly__ */ + if (getproc(dp->d_name, &p, val, -1) == OKAY) { + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + } + closedir(Dp); + } +} +#elif defined (__hpux) + +static unsigned long +getmem(void) +{ + return 0; +} + +static void +getproc(struct proc *p, struct pst_status *pst) +{ + char *cp, *np; + + memset(p, 0, sizeof *p); + p->p_pid = pst->pst_pid; + strncpy(p->p_fname, pst->pst_ucomm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + strncpy(p->p_psargs, pst->pst_cmd, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + for (np = p->p_comm, cp = p->p_psargs; *cp && !isblank(*cp); cp++) + *np++ = *cp; + p->p_lstate[0] = pst->pst_stat; + p->p_ppid = pst->pst_ppid; + p->p_pgid = pst->pst_pgrp; + p->p_sid = pst->pst_sid; + /*p->p_lwp = pst->pst_lwpid;*/ + if (pst->pst_term.psd_major != -1 || pst->pst_term.psd_minor != -1) + p->p_ttydev = makedev(pst->pst_term.psd_major, + pst->pst_term.psd_minor); + p->p_lflag = pst->pst_flag; + p->p_time = pst->pst_utime + pst->pst_stime; + p->p_accutime = pst->pst_utime + pst->pst_stime + + pst->pst_child_utime.pst_sec + + (pst->pst_child_utime.pst_usec > + 500000) + + pst->pst_child_stime.pst_sec + + (pst->pst_child_stime.pst_usec > + 500000); + p->p_utime = pst->pst_utime * 10; + p->p_ktime = pst->pst_stime * 10; + p->p_intpri = p->p_rtpri = pst->pst_pri; + p->p_policy = pst->pst_schedpolicy; + p->p_c = pst->pst_cpu; + p->p_nice = pst->pst_nice; + p->p_nlwp = pst->pst_nlwps; + p->p_start = pst->pst_start; + p->p_osz = pst->pst_dsize + pst->pst_tsize + pst->pst_ssize; + p->p_size = p->p_osz * kbytes_per_page; + p->p_orss = pst->pst_rssize; + p->p_rssize = p->p_orss * kbytes_per_page; + p->p_pflts = pst->pst_majorfaults; + p->p_mrcv = pst->pst_msgrcv; + p->p_msnd = pst->pst_msgsnd; + p->p_addr = pst->pst_addr; + p->p_wchan = pst->pst_wchan; + p->p_psr = pst->pst_procnum; + p->p_pctcpu = pst->pst_pctcpu; + p->p_uid = pst->pst_uid; + p->p_euid = pst->pst_euid; + p->p_gid = pst->pst_gid; + p->p_egid = pst->pst_egid; +} + +static void +getlwp(struct proc *p, struct lwp_status *lwp) +{ + p->p_lwp = lwp->lwp_lwpid; + p->p_intpri = p->p_rtpri = lwp->lwp_pri; + p->p_c = lwp->lwp_cpu; + p->p_wchan = lwp->lwp_wchan; + p->p_psr = lwp->lwp_cpu; + p->p_start = lwp->lwp_start; + p->p_lstate[0] = lwp->lwp_stat; + p->p_policy = lwp->lwp_schedpolicy; + p->p_utime = lwp->lwp_utime * 10; + p->p_ktime = lwp->lwp_stime * 10; + p->p_pflts = lwp->lwp_majorfaults; + p->p_mrcv = lwp->lwp_msgrcv; + p->p_msnd = lwp->lwp_msgsnd; + /*p->p_pctcpu = lwp->lwp_pctcpu;*/ +} + +static void +postproc(struct proc *p) +{ + cleanline(p); + switch (p->p_lstate[0]) { + case PS_SLEEP: + p->p_lstate[0] = 'S'; + break; + case PS_RUN: + p->p_lstate[0] = 'R'; + break; + case PS_STOP: + p->p_lstate[0] = 'T'; + break; + case PS_ZOMBIE: + p->p_lstate[0] = 'Z'; + break; + case PS_IDLE: + case PS_OTHER: + default: + p->p_lstate[0] = 'I'; + break; + } + p->p_state[0] = p->p_lstate[0]; + if (p->p_lflag & PS_INCORE) + p->p_flag |= FL_LOAD; + if (p->p_lflag & PS_SYS) + p->p_flag |= FL_SYS; + if (p->p_lflag & PS_LOCKED) + p->p_flag |= FL_LOCK; + if (p->p_lflag & PS_TRACE) + p->p_flag |= FL_TRC; + if (p->p_lflag & PS_TRACE2) + p->p_flag |= FL_WTED; + p->p_oldpri = p->p_intpri; + p->p_pri = 220 - p->p_intpri; + switch (p->p_policy) { + case PS_TIMESHARE: + p->p_clname = "TS"; + p->p_policy = SCHED_OTHER; + p->p_pri /= 2; + break; + case PS_RTPRIO: + case PS_RR: + case PS_RR2: +#ifdef S42 + p->p_clname = "FP"; +#else + p->p_clname = "RT"; +#endif + p->p_policy = SCHED_RR; + p->p_pri += 100; + break; + case PS_FIFO: + p->p_clname = "FF"; + p->p_policy = SCHED_FIFO; + p->p_pri += 100; + break; + case PS_NOAGE: + p->p_clname = "FC"; + p->p_policy = SCHED_NOAGE; + p->p_pri /= 2; + break; + default: + p->p_clname = "??"; + } +} + +#define burst ((size_t)10) + +static void +getLWPs(struct proc *p) +{ + struct lwp_status lwp[burst]; + int i, count; + int idx = 0; + + while ((count = pstat_getlwp(lwp, sizeof *lwp, burst, idx, p->p_pid)) + > 0) { + for (i = 0; i < count; i++) { + getlwp(p, &lwp[i]); + postproc(p); + if (selectproc(p) == OKAY) + outproc(p); + } + idx = lwp[count-1].lwp_idx + 1; + } +} + +static void +do_procs(void) +{ + struct proc p; + struct pst_status pst[burst]; + int i, count; + int idx = 0; + + while ((count = pstat_getproc(pst, sizeof *pst, burst, idx)) > 0) { + for (i = 0; i < count; i++) { + getproc(&p, &pst[i]); + if (Lflag && p.p_nlwp > 1) + getLWPs(&p); + else { + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + } + idx = pst[count-1].pst_idx + 1; + } +} + +#elif defined (_AIX) + +static unsigned long +getmem(void) +{ + return 0; +} + +static time_t +tv2sec(struct timeval64 tv, int mult) +{ + return tv.tv_sec*mult + (tv.tv_usec >= 500000/mult); +} + +static void +getproc(struct proc *p, struct procentry64 *pi) +{ + char args[100], *ap, *cp, *xp; + + memset(p, 0, sizeof *p); + p->p_pid = pi->pi_pid; + strncpy(p->p_fname, pi->pi_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_lstate[0] = pi->pi_state; + p->p_ppid = pi->pi_ppid; + p->p_pgid = pi->pi_pgrp; + p->p_sid = pi->pi_sid; + p->p_ttydev = pi->pi_ttyp ? pi->pi_ttyd : PRNODEV; + p->p_lflag = pi->pi_flags; + p->p_time = pi->pi_utime + pi->pi_stime; + p->p_accutime = pi->pi_utime + pi->pi_stime; + p->p_utime = pi->pi_utime * 10; + p->p_ktime = pi->pi_stime * 10; + p->p_intpri = pi->pi_pri; + p->p_c = pi->pi_cpu; + p->p_nice = pi->pi_nice; + p->p_nlwp = pi->pi_thcount; + p->p_start = pi->pi_start; + p->p_osz = pi->pi_size; + p->p_orss = pi->pi_drss + pi->pi_trss; + p->p_pflts = pi->pi_majflt; + p->p_addr = pi->pi_adspace; + p->p_uid = pi->pi_uid; + p->p_euid = pi->pi_cred.crx_uid; + p->p_gid = pi->pi_cred.crx_rgid; + p->p_egid = pi->pi_cred.crx_gid; + if (getargs(pi, sizeof *pi, args, sizeof args) == 0) { + ap = args; + cp = p->p_psargs; + xp = p->p_comm; + while (cp < &p->p_psargs[sizeof p->p_psargs - 1]) { + if (ap[0] == '\0') { + if (ap[1] == '\0') + break; + *cp++ = ' '; + if (xp) { + *xp = '\0'; + xp = NULL; + } + } else { + *cp++ = *ap; + if (xp) + *xp++ = *ap; + } + ap++; + } + *cp = '\0'; + if (xp) + *xp = '\0'; + } +} + +static void +postproc(struct proc *p) +{ + char *np, *cp; + + if (p->p_psargs[0] == '\0') { + strncpy(p->p_psargs, p->p_fname, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + } + if (p->p_comm[0] == '\0') { + for (np = p->p_comm, cp = p->p_psargs; + *cp && !isblank(*cp); cp++) + *np++ = *cp; + } + cleanline(p); + p->p_clname = "TS"; + switch (p->p_lstate[0]) { + case SSWAP: + p->p_flag |= FL_SWAP; + /*FALLTHRU*/ + default: + case SIDL: + p->p_state[0] = 'I'; + break; + case SZOMB: + p->p_state[0] = 'Z'; + break; + case SSTOP: + p->p_state[0] = 'T'; + break; + case SACTIVE: + p->p_state[0] = 'R'; + break; + } + p->p_lstate[0] = p->p_state[0]; + if (p->p_lflag & SLOAD) + p->p_flag |= FL_LOAD; + if (p->p_lflag & SNOSWAP) + p->p_flag |= FL_LOCK; + if (p->p_lflag & SKPROC) + p->p_flag |= FL_SYS; + if (p->p_lflag & SFIXPRI) + p->p_clname = "RT"; + p->p_oldpri = p->p_intpri / 2; + p->p_pri = 255 - p->p_intpri; + p->p_size = p->p_osz * kbytes_per_page; + p->p_rssize = p->p_orss * kbytes_per_page; +} + +static void +getlwp(struct proc *p, struct thrdentry64 *ti) +{ + p->p_lwp = ti->ti_tid; + p->p_psr = ti->ti_cpuid; + p->p_wchan = ti->ti_wchan; + if (Lflag) { + p->p_intpri = ti->ti_pri; + p->p_c = ti->ti_cpu; + p->p_policy = ti->ti_policy; + p->p_utime = tv2sec(ti->ti_ru.ru_utime, 10); + p->p_ktime = tv2sec(ti->ti_ru.ru_stime, 10); + p->p_time = (p->p_utime + p->p_ktime) / 10; + } + p->p_pflts = ti->ti_ru.ru_majflt; + p->p_bufr = ti->ti_ru.ru_inblock; + p->p_bufw = ti->ti_ru.ru_oublock; + p->p_mrcv = ti->ti_ru.ru_msgrcv; + p->p_msnd = ti->ti_ru.ru_msgsnd; +} + +#define burst ((size_t)10) + +static void +getLWPs(struct proc *p) +{ + struct thrdentry64 ti[burst]; + tid64_t idx = 0; + int i, count; + + while ((count=getthrds64(p->p_pid, ti, sizeof *ti, &idx, burst)) > 0) { + for (i = 0; i < count; i++) { + getlwp(p, &ti[i]); + postproc(p); + if (selectproc(p) == OKAY) + outproc(p); + } + if (count < burst) + break; + } +} + +static void +oneLWP(struct proc *p) +{ + struct thrdentry64 ti; + tid64_t idx = 0; + + if (getthrds64(p->p_pid, &ti, sizeof ti, &idx, 1) == 1) + getlwp(p, &ti); +} + +static void +do_procs(void) +{ + struct proc p; + struct procentry64 pi[burst]; + pid_t idx = 0; + int i, count; + + while ((count = getprocs64(pi, sizeof *pi, NULL, 0, &idx, burst)) > 0) { + for (i = 0; i < count; i++) { + getproc(&p, &pi[i]); + if (Lflag && p.p_nlwp > 1) + getLWPs(&p); + else { + oneLWP(&p); + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + } + if (count < burst) + break; + } +} + +#elif defined (__OpenBSD__) + +#include <uvm/uvm_extern.h> + +static unsigned long +getmem(void) +{ + return 0; +} + +static time_t +tv2sec(long sec, long usec, int mult) +{ + return sec*mult + (usec >= 500000/mult); +} + +static void +getproc(struct proc *p, struct kinfo_proc *kp) +{ + memset(p, 0, sizeof *p); + p->p_pid = kp->kp_proc.p_pid; + strncpy(p->p_fname, kp->kp_proc.p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_lstate[0] = kp->kp_proc.p_stat; + p->p_ppid = kp->kp_eproc.e_ppid; + p->p_pgid = kp->kp_eproc.e_pgid; + p->p_sid = kp->kp_eproc.e_tpgid; /* ? */ + p->p_ttydev = kp->kp_eproc.e_tdev; + p->p_lflag = kp->kp_proc.p_flag; + p->p_time = tv2sec(kp->kp_eproc.e_pstats.p_ru.ru_utime.tv_sec + + kp->kp_eproc.e_pstats.p_ru.ru_stime.tv_sec, + kp->kp_eproc.e_pstats.p_ru.ru_utime.tv_usec + + kp->kp_eproc.e_pstats.p_ru.ru_stime.tv_usec, 1); + p->p_accutime = p->p_time + + tv2sec(kp->kp_eproc.e_pstats.p_cru.ru_utime.tv_sec + + kp->kp_eproc.e_pstats.p_cru.ru_stime.tv_sec, + kp->kp_eproc.e_pstats.p_cru.ru_utime.tv_usec + + kp->kp_eproc.e_pstats.p_cru.ru_stime.tv_usec, + 1); + p->p_utime = tv2sec(kp->kp_eproc.e_pstats.p_ru.ru_utime.tv_sec, + kp->kp_eproc.e_pstats.p_ru.ru_utime.tv_usec, 10); + p->p_ktime = tv2sec(kp->kp_eproc.e_pstats.p_ru.ru_stime.tv_sec, + kp->kp_eproc.e_pstats.p_ru.ru_stime.tv_usec, 10); + p->p_intpri = kp->kp_proc.p_usrpri; + p->p_rtpri = kp->kp_proc.p_priority; + p->p_policy = SCHED_OTHER; + p->p_c = kp->kp_proc.p_cpticks; + p->p_nice = kp->kp_proc.p_nice; + p->p_nlwp = 1; + p->p_start = tv2sec(kp->kp_eproc.e_pstats.p_start.tv_sec, + kp->kp_eproc.e_pstats.p_start.tv_usec, 1); + p->p_osz = kp->kp_eproc.e_vm.vm_tsize + kp->kp_eproc.e_vm.vm_dsize + + kp->kp_eproc.e_vm.vm_ssize; + p->p_orss = kp->kp_eproc.e_vm.vm_rssize; + p->p_pflts = kp->kp_eproc.e_pstats.p_ru.ru_majflt; + p->p_bufr = kp->kp_eproc.e_pstats.p_ru.ru_inblock; + p->p_bufw = kp->kp_eproc.e_pstats.p_ru.ru_oublock; + p->p_mrcv = kp->kp_eproc.e_pstats.p_ru.ru_msgrcv; + p->p_msnd = kp->kp_eproc.e_pstats.p_ru.ru_msgsnd; + p->p_addr = (unsigned long)kp->kp_proc.p_addr; + p->p_wchan = (unsigned long)kp->kp_proc.p_wchan; + p->p_pctcpu = kp->kp_proc.p_pctcpu; + p->p_clname = "TS"; + p->p_uid = kp->kp_eproc.e_pcred.p_ruid; + p->p_euid = kp->kp_eproc.e_ucred.cr_uid; + p->p_gid = kp->kp_eproc.e_pcred.p_rgid; + p->p_egid = kp->kp_eproc.e_ucred.cr_gid; +} + +static void +getargv(struct proc *p, struct kinfo_proc *kp, kvm_t *kt) +{ + char **args; + char *ap, *pp, *xp; + + if ((args = kvm_getargv(kt, kp, sizeof p->p_psargs)) == NULL) { + strncpy(p->p_psargs, p->p_fname, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + for (ap = p->p_comm, pp = p->p_psargs; + *pp && !isblank(*pp); pp++) + *ap++ = *pp; + return; + } + ap = args[0]; + xp = p->p_comm; + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + *pp = ' '; + if (xp) { + *xp = '\0'; + xp = NULL; + } + ap = *++args; + if (ap == NULL) + break; + } else { + if (xp) + *xp++ = *ap; + *pp = *ap++; + } + } +} + +static void +postproc(struct proc *p) +{ + cleanline(p); + switch (p->p_lstate[0]) { + default: + case SIDL: + p->p_state[0] = 'I'; + break; + case SRUN: + p->p_state[0] = 'R'; + break; + case SSLEEP: + p->p_state[0] = 'S'; + break; + case SSTOP: + p->p_state[0] = 'T'; + break; + case SZOMB: + case SDEAD: + p->p_state[0] = 'Z'; + break; + } + p->p_lstate[0] = p->p_state[0]; + if ((p->p_lflag & P_CONTROLT) == 0) + p->p_ttydev = PRNODEV; + if ((p->p_lflag & P_INMEM) == 0) + p->p_flag |= FL_SWAP; + else + p->p_flag |= FL_LOAD; + if (p->p_lflag & P_SYSTEM) + p->p_flag |= FL_SYS; + if ((p->p_lflag & P_SINTR) == 0) + p->p_flag |= FL_LOCK; + p->p_pri = p->p_rtpri; + p->p_oldpri = p->p_intpri + 40; + p->p_size = p->p_osz * kbytes_per_page; + p->p_rssize = p->p_orss * kbytes_per_page; +} + +static void +do_procs(void) +{ + struct proc p; + kvm_t *kt; + struct kinfo_proc *kp; + int i, cnt; + pid_t mypid = getpid(); + int gotme = 0; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, "kvm_open")) == NULL) + exit(1); + kp = kvm_getprocs(kt, KERN_PROC_ALL, 0, &cnt); + i = cnt; + while (--i >= 0) { + one: if (kp[i].kp_proc.p_pid == mypid) + gotme = 1; + getproc(&p, &kp[i]); + getargv(&p, &kp[i], kt); + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + if (gotme == 0) { + kp = kvm_getprocs(kt, KERN_PROC_PID, mypid, &cnt); + goto one; + } + kvm_close(kt); +} + +#elif defined (__NetBSD__) + +static unsigned long +getmem(void) +{ + return 0; +} + +static time_t +tv2sec(long sec, long usec, int mult) +{ + return sec*mult + (usec >= 500000/mult); +} + +static void +getproc(struct proc *p, struct kinfo_proc2 *kp) +{ + memset(p, 0, sizeof *p); + p->p_pid = kp->p_pid; + strncpy(p->p_fname, kp->p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_lstate[0] = kp->p_stat; + p->p_ppid = kp->p_ppid; + p->p_pgid = kp->p__pgid; + p->p_sid = kp->p_sid; + p->p_ttydev = kp->p_tdev; + p->p_lflag = kp->p_flag; + p->p_time = tv2sec(kp->p_uutime_sec + kp->p_ustime_sec, + kp->p_uutime_usec + kp->p_ustime_usec, 1); + p->p_accutime = p->p_time + tv2sec(kp->p_uctime_sec, + kp->p_uctime_usec, 1); + p->p_utime = tv2sec(kp->p_uutime_sec, kp->p_uutime_usec, 10); + p->p_ktime = tv2sec(kp->p_ustime_sec, kp->p_ustime_usec, 10); + p->p_intpri = kp->p_usrpri; + p->p_rtpri = kp->p_priority; + p->p_policy = SCHED_OTHER; + p->p_c = kp->p_cpticks; + p->p_nice = kp->p_nice; + p->p_nlwp = 1; + p->p_start = tv2sec(kp->p_ustart_sec, kp->p_ustart_usec, 1); + p->p_osz = kp->p_vm_tsize + kp->p_vm_dsize + kp->p_vm_ssize; + p->p_orss = kp->p_vm_rssize; + p->p_pflts = kp->p_uru_majflt; + p->p_bufr = kp->p_uru_inblock; + p->p_bufw = kp->p_uru_oublock; + p->p_mrcv = kp->p_uru_msgrcv; + p->p_msnd = kp->p_uru_msgsnd; + p->p_addr = kp->p_addr; + p->p_wchan = kp->p_wchan; + p->p_psr = kp->p_cpuid; + p->p_pctcpu = kp->p_pctcpu; + p->p_clname = "TS"; + p->p_uid = kp->p_ruid; + p->p_euid = kp->p_uid; + p->p_gid = kp->p_rgid; + p->p_egid = kp->p_gid; +} + +static void +getargv(struct proc *p, struct kinfo_proc2 *kp, kvm_t *kt) +{ + char **args; + char *ap, *pp, *xp; + + if ((args = kvm_getargv2(kt, kp, sizeof p->p_psargs)) == NULL) { + strncpy(p->p_psargs, p->p_fname, sizeof p->p_psargs); + p->p_psargs[sizeof p->p_psargs - 1] = '\0'; + for (ap = p->p_comm, pp = p->p_psargs; + *pp && !isblank(*pp); pp++) + *ap++ = *pp; + return; + } + ap = args[0]; + xp = p->p_comm; + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + *pp = ' '; + if (xp) { + *xp = '\0'; + xp = NULL; + } + ap = *++args; + if (ap == NULL) + break; + } else { + if (xp) + *xp++ = *ap; + *pp = *ap++; + } + } +} + +static void +postproc(struct proc *p) +{ + cleanline(p); + switch (p->p_lstate[0]) { + default: + case SIDL: + p->p_state[0] = 'I'; + break; + case SRUN: + p->p_state[0] = 'R'; + break; + case SSLEEP: + p->p_state[0] = 'S'; + break; + case SSTOP: + p->p_state[0] = 'T'; + break; + case SZOMB: + case SDEAD: + p->p_state[0] = 'Z'; + break; + case SONPROC: + p->p_state[0] = 'O'; + break; + } + p->p_lstate[0] = p->p_state[0]; + if ((p->p_lflag & P_CONTROLT) == 0) + p->p_ttydev = PRNODEV; + if ((p->p_lflag & P_INMEM) == 0) + p->p_flag |= FL_SWAP; + else + p->p_flag |= FL_LOAD; + if (p->p_lflag & P_SYSTEM) + p->p_flag |= FL_SYS; + if ((p->p_lflag & P_SINTR) == 0) + p->p_flag |= FL_LOCK; + p->p_pri = p->p_rtpri; + p->p_oldpri = p->p_intpri + 40; + p->p_size = p->p_osz * kbytes_per_page; + p->p_rssize = p->p_orss * kbytes_per_page; +} + +static void +do_procs(void) +{ + struct proc p; + kvm_t *kt; + struct kinfo_proc2 *kp; + int i, cnt; + pid_t mypid = getpid(); + int gotme = 0; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, "kvm_open")) == NULL) + exit(1); + kp = kvm_getproc2(kt, KERN_PROC_ALL, 0, sizeof *kp, &cnt); + i = cnt; + while (--i >= 0) { + one: if (kp[i].p_pid == mypid) + gotme = 1; + getproc(&p, &kp[i]); + getargv(&p, &kp[i], kt); + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + if (gotme == 0) { + kp = kvm_getproc2(kt, KERN_PROC_PID, mypid, sizeof *kp, &cnt); + goto one; + } + kvm_close(kt); +} + +#elif defined (__APPLE__) + +typedef struct kinfo_proc kinfo_proc; + +static int +GetBSDProcessList(pid_t thepid, struct kinfo_proc **procList, size_t *procCount) + /* derived from http://developer.apple.com/qa/qa2001/qa1123.html */ + /* Returns a list of all BSD processes on the system. This routine + allocates the list and puts it in *procList and a count of the + number of entries in *procCount. You are responsible for freeing + this list (use "free" from System framework). + all classic apps run in one process + On success, the function returns 0. + On error, the function returns a BSD errno value. + Preconditions: + assert( procList != NULL); + assert(*procList == NULL); + assert(procCount != NULL); + Postconditions: + assert( (err == 0) == (*procList != NULL) ); + */ +{ + int err; + struct kinfo_proc *result; + int mib[4]; + size_t length; + + mib[0] = CTL_KERN; + mib[1] = KERN_PROC; + if (thepid == 0) { + mib[2] = KERN_PROC_ALL; + mib[3] = 0; + } else { + mib[2] = KERN_PROC_PID; + mib[3] = thepid; + } + /* We start by calling sysctl with result == NULL and length == 0. + That will succeed, and set length to the appropriate length. + We then allocate a buffer of that size and call sysctl again + with that buffer. + */ + length = 0; + err = sysctl(mib, 4, NULL, &length, NULL, 0); + if (err == -1) + err = errno; + if (err == 0) { + result = smalloc(length); + err = sysctl(mib, 4, result, &length, NULL, 0); + if (err == -1) + err = errno; + if (err == ENOMEM) { + free(result); /* clean up */ + result = NULL; + } + } + *procList = result; + *procCount = err == 0 ? length / sizeof **procList : 0; + return err; +} + +static time_t +tv2sec(time_value_t *tv, int mult) +{ + return tv->seconds*mult + (tv->microseconds >= 500000/mult); +} + +static unsigned long +getmem(void) +{ + static int mib[] = {CTL_HW, HW_PHYSMEM, 0}; + size_t size; + unsigned long mem; + + size = sizeof mem; + if (sysctl(mib, 2, &mem, &size, NULL, 0) == -1) { + fprintf(stderr, "error in sysctl(): %s\n", strerror(errno)); + exit(3); + } + return mem; +} + +extern kern_return_t task_for_pid(task_port_t task, pid_t pid, task_port_t *target); + +static void +getproc(struct proc *p, struct kinfo_proc *kp) +{ + kern_return_t error; + unsigned int info_count = TASK_BASIC_INFO_COUNT; + unsigned int thread_info_count = THREAD_BASIC_INFO_COUNT; + task_port_t task; + pid_t pid; + struct task_basic_info task_binfo; + struct task_thread_times_info task_times; + time_value_t total_time, system_time; + struct task_events_info task_events; + struct policy_timeshare_info tshare; + struct policy_rr_info rr; + struct policy_fifo_info fifo; + struct thread_basic_info th_binfo; + thread_port_array_t thread_list; + int thread_count; + int j, temp, curpri; + + memset(p, 0, sizeof *p); + + p->p_pid = kp->kp_proc.p_pid; + strncpy(p->p_fname, kp->kp_proc.p_comm, sizeof p->p_fname); + p->p_fname[sizeof p->p_fname - 1] = '\0'; + p->p_lstate[0] = kp->kp_proc.p_stat; /* contains at least zombie info */ + p->p_lflag = kp->kp_proc.p_flag; + p->p_ppid = kp->kp_eproc.e_ppid; + p->p_pgid = kp->kp_eproc.e_pgid; + p->p_sid = kp->kp_eproc.e_tpgid; + p->p_ttydev = kp->kp_eproc.e_tdev == -1 ? PRNODEV : kp->kp_eproc.e_tdev; + p->p_uid = kp->kp_eproc.e_pcred.p_ruid; + p->p_euid = kp->kp_eproc.e_ucred.cr_uid; + p->p_gid = kp->kp_eproc.e_pcred.p_rgid; + p->p_egid = kp->kp_eproc.e_ucred.cr_gid; + p->p_start = kp->kp_proc.p_starttime.tv_sec + + (kp->kp_proc.p_starttime.tv_usec >= 500000); + p->p_addr = (unsigned long)kp->kp_proc.p_addr; + p->p_wchan = (unsigned long)kp->kp_proc.p_wchan; + + if (p->p_lstate[0] == SZOMB) { + p->p_lstate[0] = 7; + return; /* do not fetch more data for zombies */ + } + + pid = kp->kp_proc.p_pid; + error = task_for_pid(mach_task_self(), pid, &task); + if (error != KERN_SUCCESS) { + /* process already left the system */ + p->p_lstate[0] = 7; /* handle exited process/task like zombie */ + p->p_clname = "??"; /* will be used as nice value */ + return; + } + info_count = TASK_BASIC_INFO_COUNT; + error = task_info(task, TASK_BASIC_INFO, &task_binfo, &info_count); + if (error != KERN_SUCCESS) { + fprintf(stderr, "Error calling task_info():%d\n", error); + exit(3); + } + info_count = TASK_THREAD_TIMES_INFO_COUNT; + error = task_info(task, TASK_THREAD_TIMES_INFO, &task_times, &info_count); + if (error != KERN_SUCCESS) { + fprintf(stderr, "Error calling task_info():%d\n", error); + exit(3); + } + info_count = TASK_EVENTS_INFO_COUNT; + error = task_info(task, TASK_EVENTS_INFO, &task_events, &info_count); + if (error != KERN_SUCCESS) { + fprintf(stderr, "Error calling task_info():%d\n", error); + exit(3); + } + + total_time = task_times.user_time; + p->p_utime = tv2sec(&total_time, 1); + + system_time = task_times.system_time; + p->p_ktime = tv2sec(&system_time, 1); + + time_value_add(&total_time, &system_time); + p->p_time = tv2sec(&total_time, 1); + + time_value_add(&total_time, &task_binfo.user_time); + time_value_add(&total_time, &task_binfo.system_time); + p->p_accutime = tv2sec(&total_time, 1); + + switch(task_binfo.policy) { + case POLICY_TIMESHARE : + info_count = POLICY_TIMESHARE_INFO_COUNT; + error = task_info(task, TASK_SCHED_TIMESHARE_INFO, &tshare, &info_count); + if (error == KERN_SUCCESS) { + p->p_intpri = tshare.cur_priority; + p->p_rtpri = tshare.base_priority; + p->p_clname = "TS"; + p->p_policy = SCHED_OTHER; + } + break; + case POLICY_RR : + info_count = POLICY_RR_INFO_COUNT; + error = task_info(task, TASK_SCHED_RR_INFO, &rr, &info_count); + if (error == KERN_SUCCESS) { + p->p_intpri = rr.base_priority; + p->p_rtpri = rr.base_priority; + p->p_clname = "RT"; + p->p_policy = SCHED_RR; + } + break; + case POLICY_FIFO : + info_count = POLICY_FIFO_INFO_COUNT; + error = task_info(task, TASK_SCHED_FIFO_INFO, &fifo, &info_count); + if (error == KERN_SUCCESS) { + p->p_intpri = fifo.base_priority; + p->p_rtpri = fifo.base_priority; + p->p_clname = "FF"; + p->p_policy = SCHED_FIFO; + } + break; + } + p->p_nice = kp->kp_proc.p_nice; + + /* allocates a thread port array */ + error = task_threads(task, &thread_list, &thread_count); + if (error != KERN_SUCCESS) { + mach_port_deallocate(mach_task_self(), task); + fprintf(stderr, "Error calling task_threads():%d\n", error); + exit(3); + } + p->p_nlwp = thread_count; + /* iterate over all threads for: cpu, state, swapped, prio */ + /* it should also be possible to print all mach threads as LWPs */ + p->p_lflag |= FL_SWAP; /* assume swapped */ + curpri = p->p_intpri; + for (j = 0; j < thread_count; j++) { + info_count = THREAD_BASIC_INFO_COUNT; + error = thread_info(thread_list[j], THREAD_BASIC_INFO, &th_binfo, &info_count); + if (error != KERN_SUCCESS) { + fprintf(stderr, "Error calling thread_info():%d\n", error); + exit(3); + } + p->p_c += th_binfo.cpu_usage; + switch (th_binfo.run_state) { + case TH_STATE_RUNNING: + temp=1; + break; + case TH_STATE_UNINTERRUPTIBLE: + temp=2; + break; + case TH_STATE_WAITING: + temp=(th_binfo.sleep_time <= 20) ? 3 : 4; + break; + case TH_STATE_STOPPED: + temp=5; + break; + case TH_STATE_HALTED: + temp=6; + break; + default: + temp=8; + } + if (temp < p->p_lstate[0]) + p->p_lstate[0] = temp; + if ((th_binfo.flags & TH_FLAGS_SWAPPED ) == 0) + p->p_lflag &= ~FL_SWAP; /* in mem */ + switch(th_binfo.policy) { + case POLICY_TIMESHARE : + info_count = POLICY_TIMESHARE_INFO_COUNT; + error = thread_info(thread_list[j], THREAD_SCHED_TIMESHARE_INFO, &tshare, &info_count); + if (error == KERN_SUCCESS && curpri < tshare.cur_priority) + curpri = tshare.cur_priority; + break; + case POLICY_RR : + info_count = POLICY_RR_INFO_COUNT; + error = thread_info(thread_list[j], THREAD_SCHED_RR_INFO, &rr, &info_count); + if (error == KERN_SUCCESS && curpri < rr.base_priority) + curpri = rr.base_priority; + break; + case POLICY_FIFO : + info_count = POLICY_FIFO_INFO_COUNT; + error = thread_info(thread_list[j], THREAD_SCHED_FIFO_INFO, &fifo, &info_count); + if (error == KERN_SUCCESS && curpri < fifo.base_priority) + curpri = fifo.base_priority; + break; + } + mach_port_deallocate(mach_task_self(), thread_list[j]); + } + p->p_intpri = curpri; + /* free the thread port array */ + error = vm_deallocate(mach_task_self(), (vm_address_t)thread_list, thread_count * sizeof(thread_port_array_t)); + p->p_c = p->p_c / (TH_USAGE_SCALE/100); + p->p_pctcpu = p->p_c; + + p->p_osz = task_binfo.virtual_size / pagesize; + p->p_orss = task_binfo.resident_size / pagesize; + + p->p_pflts = task_events.pageins; + p->p_bufr = 0; + p->p_bufw = 0; + p->p_mrcv = task_events.messages_sent; /* Mach messages */ + p->p_msnd = task_events.messages_received; + + mach_port_deallocate(mach_task_self(), task); +} + +static void +getargv(struct proc *p, struct kinfo_proc *kp) +{ + size_t size, argsz; + char *argbuf; + int mib[3]; + long nargs; + char *ap, *pp, *xp; + + /* ignore kernel and zombies */ + if (kp->kp_proc.p_pid == 0 || p->p_lstate[0] == 7) + return; + + /* allocate a procargs space per process */ + mib[0] = CTL_KERN; + mib[1] = KERN_ARGMAX; + size = sizeof argsz; + if (sysctl(mib, 2, &argsz, &size, NULL, 0) == -1) { + fprintf(stderr, "error in sysctl(): %s\n", strerror(errno)); + exit(3); + } + argbuf = smalloc(argsz); + + /* fetch the process arguments */ + mib[0] = CTL_KERN; + mib[1] = KERN_PROCARGS2; + mib[2] = kp->kp_proc.p_pid; + if (sysctl(mib, 3, argbuf, &argsz, NULL, 0) == -1) { + /* process already left the system */ + return; + } + + /* the number of args is at offset 0, this works for 32 and 64bit */ + memcpy(&nargs, argbuf, sizeof nargs); + ap = argbuf + sizeof nargs; + + /* skip the exec_path */ + while (ap < &argbuf[argsz] && *ap != '\0') + ap++; + if (ap == &argbuf[argsz]) + goto DONE; /* no args to show */ + /* skip trailing '\0' chars */ + while (ap < &argbuf[argsz] && *ap == '\0') + ap++; + if (ap == &argbuf[argsz]) + goto DONE; /* no args to show */ + + xp = p->p_comm; /* copy the command name also */ + /* now concat copy the arguments */ + for (pp = p->p_psargs; pp < &p->p_psargs[sizeof p->p_psargs-1]; pp++) { + if (*ap == '\0') { + if (xp) { + *xp = '\0'; + xp = NULL; + } + if (--nargs == 0) + break; + *pp = ' '; + ++ap; + } else { + if (xp) + *xp++ = *ap; + *pp = *ap++; + } + } + *pp = '\0'; + +DONE: free(argbuf); + return; +} + +static void +postproc(struct proc *p) +{ + cleanline(p); + if (p->p_lstate[0] < 0 || p->p_lstate[0] > 8) /* play safe */ + p->p_lstate[0] = 8; + p->p_state[0] = " RSSITHZ?"[p->p_lstate[0]]; + p->p_lstate[0] = p->p_state[0]; + if (p->p_lflag & P_SYSTEM) + p->p_flag |= FL_SYS; + p->p_pri = p->p_rtpri; + p->p_oldpri = p->p_intpri; + p->p_size = p->p_osz * kbytes_per_page; + p->p_rssize = p->p_orss * kbytes_per_page; +} + +static void +do_procs(void) +{ + struct proc p; + struct kinfo_proc *kp = NULL; + size_t i, cnt; + pid_t pid0; + int err; + + /* get all processes */ + pid0 = 0; + if ((err = GetBSDProcessList(pid0, &kp, &cnt)) != 0) { + fprintf(stderr, "error getting proc list: %s\n", strerror(err)); + exit(3); + } + i = cnt; + while (--i >= 0) { + /* ignore trailing garbage processes with pid 0 */ + if (kp[i].kp_proc.p_pid == 0 && pid0++ > 0) + break; + getproc(&p, &kp[i]); + getargv(&p, &kp[i]); + postproc(&p); + if (selectproc(&p) == OKAY) + outproc(&p); + } + /* free the memory allocated by GetBSDProcessList */ + free(kp); +} + +#endif /* all */ + +/************************************************************************ + * Option scanning * + ************************************************************************/ + +static void +add_device(const char *prefix, size_t prefixlen, + const char *name, struct stat *sp) +{ + char *str; + dev_type sz; + + if (eq(name, "stdin") || eq(name, "stdout") || eq(name, "stderr")) + return; + sz = prefixlen + strlen(name) + 1; + str = smalloc(sz); + strcpy(str, prefix); + strcpy(&str[prefixlen], name); + dlook(sp->st_rdev, d0, str); +#ifdef USE_PS_CACHE + if (devfp != NULL) { + dev_type dev = sp->st_rdev; + + fwrite(&dev, sizeof dev, 1, devfp); + fwrite(&sz, sizeof sz, 1, devfp); + fwrite(str, 1, sz, devfp); + } +#endif /* USE_PS_CACHE */ +} + +static void +add_devices_from(const char *path, const char *prefix) +{ + DIR *Dp; + struct dirent *dp; + struct stat st; + size_t prefixlen; + + if (chdir(path) == 0) { + if ((Dp = opendir(".")) != NULL) { + prefixlen = strlen(prefix); + while ((dp = readdir(Dp)) != NULL) { + if (stat(dp->d_name, &st) == 0 && + S_ISCHR(st.st_mode)) + add_device(prefix, prefixlen, + dp->d_name, &st); + } + closedir(Dp); + } + } +} + +static void +devices(void) +{ +#ifdef USE_PS_CACHE + struct stat dst, fst; +#endif /* USE_PS_CACHE */ + struct output *o; + + for (o = o0; o; o = o->o_nxt) + if (o->o_typ == OU_TTY) + break; + if (o == NULL) + return; + d0 = scalloc(256, sizeof *d0); + add_devices_from("/dev/pts", "pts/"); + /* + * Names in devfs. + */ + add_devices_from("/dev/tts", "tts/"); + add_devices_from("/dev/cua", "cua/"); + add_devices_from("/dev/vc", "vc/"); + add_devices_from("/dev/vcc", "vcc/"); + add_devices_from("/dev/pty", "pty/"); +#ifdef USE_PS_CACHE + if (stat(ps_cache_file, &fst) < 0 || + (stat("/dev", &dst) == 0 && dst.st_mtime > fst.st_mtime) || + (stat("/dev/usb", &dst) == 0 && dst.st_mtime > fst.st_mtime) || + (stat("/dev/term", &dst) == 0 && dst.st_mtime > fst.st_mtime)) { +putcache: + if (dropprivs && myuid && myeuid && myuid != myeuid) + setuid(myeuid); + umask(0022); + if ((devfp = wopen(ps_cache_file)) != NULL) { + fchown(fileno(devfp), myeuid, ps_cache_gid); + fchmod(fileno(devfp), ps_cache_mode); + fwrite(cacheid, 1, strlen(cacheid) + 1, devfp); + } + if (dropprivs && myuid != myeuid) + setuid(myuid); +muststat: +#endif /* USE_PS_CACHE */ + add_devices_from("/dev/term", "term/"); + add_devices_from("/dev/usb", "usb/"); + add_devices_from("/dev", ""); +#ifdef USE_PS_CACHE + } else { + char *str; + dev_type dev; + dev_type sz; + char *thisid; + + if ((fst.st_uid != 0 && fst.st_uid != myeuid) || + (devfp = fopen(ps_cache_file, "r")) == NULL) + goto muststat; + sz = strlen(cacheid) + 1; + thisid = alloca(sz); + if (fread(thisid, 1, sz, devfp) != sz || + strcmp(cacheid, thisid)) { + fclose(devfp); + devfp = NULL; + goto putcache; + } + if (dropprivs && myuid != myeuid) + setuid(myuid); + while (fread(&dev, sizeof dev, 1, devfp) == 1 && + fread(&sz, sizeof sz, 1, devfp) == 1) { + str = smalloc(sz); + if (fread(str, 1, sz, devfp) != sz) + break; + dlook(dev, d0, str); + } + } + if (devfp != NULL) { + fclose(devfp); + devfp = NULL; + } +#endif /* USE_PS_CACHE */ +} + +#ifdef UCB +static void +usage(void) +{ + fprintf(stderr, "usage: %s [ -acglnrSuvwx ] [ -t term ] [ num ]\n", + progname); + exit(2); +} +#else /* !UCB */ +static void +usage(void) +{ + fprintf(stderr, "\ +usage: %s [ -edalfcj ] [ -r sysname ] [ -t termlist ]\n\ + [ -u uidlist ] [ -p proclist ] [ -g grplist ] [ -s sidlist ]\n", + progname); + exit(2); +} +#endif /* !UCB */ + +static const char * +element(const char **listp, int override) +{ + static char *buf; + static size_t buflen; + const char *cp, *op; + char *cq; + size_t sz; + int stop = ','; + + if (**listp == '\0') + return NULL; + op = *listp; + while (**listp != '\0') { + if (**listp == override) + stop = '\0'; + if (stop != '\0' && (**listp == stop || isblank(**listp))) + break; + (*listp)++; + } + if (**listp == '\0') + return op; + if ((sz = *listp - op + 1) > buflen) { + buflen = sz; + buf = srealloc(buf, buflen); + } + for (cp = op, cq = buf; cp < *listp; cp++, cq++) + *cq = *cp; + *cq = '\0'; + if (**listp) { + while (**listp == stop || isblank(**listp)) + (*listp)++; + } + return buf; +} + +static void +add_criterion(enum crtype cy, unsigned long val) +{ + struct criterion *ct; + + ct = scalloc(1, sizeof *ct); + ct->c_typ = cy; + ct->c_val = val; + ct->c_nxt = c0; + c0 = ct; +} + +static enum okay +get_rdev(const char *device, unsigned long *id) +{ + struct stat st; + char *file; + + *id = 0; + file = alloca(strlen(device) + 9); + strcpy(file, "/dev/"); + strcpy(&file[5], device); + if (stat(file, &st) < 0) { + strcpy(file, "/dev/tty"); + strcpy(&file[8], device); + if (stat(file, &st) == 0) + *id = st.st_rdev; + else if ((device[0] == '?' || device[0] == '-') && + device[1] == '\0') + add_criterion(CR_WITHOUT_TTY, 0); + else + return STOP; + } else + *id = st.st_rdev; + return OKAY; +} + +static void +nonnumeric(const char *string, enum crtype ct) +{ +#ifndef UCB + int c; + + switch (ct) { + case CR_PROCESS_GROUP: + c = 'g'; + break; + case CR_PROCESS_ID: + c = 'p'; + break; + case CR_SESSION_LEADER: + c = 's'; + break; + default: + c = '?'; + } + fprintf(stderr, + "%s: %s is an invalid non-numeric argument for -%c option\n", + progname, string, c); +#else /* UCB */ + fprintf(stderr, + "%s: %s is an invalid non-numeric argument for a process id\n", + progname, string); +#endif /* UCB */ +} + +static void +add_criterion_string(enum crtype ct, const char *string) +{ + struct passwd *pwd; + struct group *grp; + char *x; + unsigned long val = 0; + + switch (ct) { + case CR_ALL: + case CR_ALL_WITH_TTY: + case CR_ALL_BUT_SESSION_LEADERS: + case CR_WITHOUT_TTY: + case CR_NO_TTY_NO_SESSION_LEADER: + case CR_ADD_UNINTERESTING: + case CR_DEFAULT: + val = 0; + break; + case CR_PROCESS_GROUP: + case CR_PROCESS_ID: + case CR_SESSION_LEADER: + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-' || + *string == '\0') { + nonnumeric(string, ct); + ct = CR_INVALID_STOP; + } + break; + case CR_REAL_GID: + if ((grp = getgrnam(string)) != NULL) { + val = grp->gr_gid; + } else { + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-' || + *string == '\0') { + fprintf(stderr, "%s: unknown group %s\n", + progname, string); + ct = CR_INVALID_REAL_GID; + } + } + break; + case CR_EFF_UID: + case CR_REAL_UID: + if ((pwd = getpwnam(string)) != NULL) { + val = pwd->pw_uid; + } else { + val = strtoul(string, &x, 10); + if (*x != '\0' || *string == '+' || *string == '-' || + *string == '\0') { + fprintf(stderr, "%s: unknown user %s\n", + progname, string); + ct = (ct == CR_EFF_UID ? CR_INVALID_EFF_UID : + CR_INVALID_REAL_UID); + } + } + break; + case CR_TTY_DEVICE: + if (get_rdev(string, &val) == STOP) { + add_criterion(CR_INVALID_TTY_DEVICE, 0); + return; + } + break; + } + add_criterion(ct, val); +} + +static void +add_criteria_list(enum crtype ct, const char *list) +{ + const char *cp; + + if (*list) + while ((cp = element(&list, '\0')) != NULL) + add_criterion_string(ct, cp); + else + add_criterion_string(ct, ""); +} + +static void +add_format(enum outype ot, const char *name) +{ + struct output *op, *oq; + unsigned i = 0; + + while (outspec[i].os_typ != ot) + i++; + op = scalloc(1, sizeof *op); + op->o_typ = ot; + if (outspec[i].os_flags & OS_Lflag) + Lflag |= 1; + if (name == NULL) { + op->o_nam = outspec[i].os_def; + op->o_len = strlen(op->o_nam); + dohdr++; + } else if (*name == '\0') { + op->o_nam = ""; + op->o_len = strlen(outspec[i].os_def); + } else { + op->o_nam = smalloc(strlen(name) + 1); + strcpy(op->o_nam, name); + op->o_len = strlen(op->o_nam); + dohdr++; + } + if (o0 != NULL) { + for (oq = o0; oq->o_nxt; oq = oq->o_nxt); + oq->o_nxt = op; + } else + o0 = op; +} + +static void +add_format_string(const char *string) +{ + char *fmt; + char *name = NULL; + unsigned i; + + fmt = alloca(strlen(string) + 1); + strcpy(fmt, string); + if ((name = strchr(fmt, '=')) != NULL) { + *name++ = '\0'; + } + for (i = 0; outspec[i].os_fmt != NULL; i++) { + if (eq(outspec[i].os_fmt, fmt)) { + add_format(outspec[i].os_typ, name); + break; + } + } + if (outspec[i].os_fmt == NULL) { + fprintf(stderr, "%s: unknown output format: -o %s\n", + progname, string); + usage(); + } +} + +static void +add_format_list(const char *list) +{ + const char *cp; + + while ((cp = element(&list, '=')) != NULL) + add_format_string(cp); +} + +static void +defaults(void) +{ + FILE *fp; + + if ((fp = fopen(DEFAULT, "r")) != NULL) { + char buf[LINE_MAX]; + char *cp, *x; + + while (fgets(buf, sizeof buf, fp) != NULL) { + if (buf[0] == '\0' || buf[0] == '\n' || buf[0] == '#') + continue; + if ((cp = strchr(buf, '\n')) != NULL) + *cp = '\0'; + if (strncmp(buf, "O1_SCHEDULER=", 13) == 0) { + sched_selection = atoi(&buf[13]) ? 1 : -1; + } else if (strncmp(buf, "DROPPRIVS=", 10) == 0) { + dropprivs = strtol(&buf[10], &x, 10); + if (*x != '\0' || dropprivs != 1) + dropprivs = 0; + } +#ifdef USE_PS_CACHE + else if (strncmp(buf, "CACHE_FILE=", 11) == 0) { + if (buf[11] == '/' && buf[12] != '\0') { + ps_cache_file = smalloc(strlen( + &buf[11]) + 1); + strcpy(ps_cache_file, &buf[11]); + } + } else if (strncmp(buf, "CACHE_MODE=", 11) == 0) { + mode_t m; + + m = strtol(&buf[11], &x, 8); + if (*x == '\0') + ps_cache_mode = m; + } else if (strncmp(buf, "CACHE_GROUP=", 12) == 0) { + struct group *grp; + gid_t gid; + + if ((grp = getgrnam(&buf[12])) == NULL) { + gid = strtoul(&buf[12], &x, 10); + if (*x == '\0') + ps_cache_gid = gid; + } else + ps_cache_gid = grp->gr_gid; + } +#endif /* USE_PS_CACHE */ + } + fclose(fp); + } +} + +#ifndef UCB +static const char optstring[] = ":aAcdefg:G:jlLn:o:Pp:r:Rs:t:u:U:Ty"; +#else /* UCB */ +static const char optstring[] = ":acglLnrSuvwxt:R:AG:p:U:o:"; +#endif /* UCB */ + +/* + * If -r sysname is given, chroot() needs to be done before any files are + * opened -> scan options twice, first for evaluating '-r' and syntactic + * correctness, then for evaluating other options (in options() below). + */ +static void +sysname(int ac, char **av) +{ + extern int chroot(const char *); + const char *dir = NULL; + int i, hadflag = 0, illegal = 0; + + while ((i = getopt(ac, av, optstring)) != EOF) { + switch (i) { +#ifndef UCB + case 'r': + rflag = optarg; + break; + case 'e': + case 's': + case 'd': + case 'a': + case 't': + case 'p': + case 'u': + case 'g': + case 'U': + case 'G': + case 'A': + hadflag = 1; + break; +#else /* UCB */ + case 'R': + rflag = optarg; + break; + case 'a': + case 'x': + case 't': + case 'p': + case 'U': + case 'G': + case 'A': + hadflag = 1; + break; +#endif /* UCB */ + case ':': + fprintf(stderr, + "%s: option requires an argument -- %c\n", + progname, optopt); + illegal = 1; + break; + case '?': + fprintf(stderr, "%s: illegal option -- %c\n", + progname, optopt); + illegal = 1; + break; + } + } + if (illegal) + usage(); +#ifndef UCB + if (av[optind]) + usage(); +#else /* UCB */ + if (av[optind] && av[optind + 1]) { + fprintf(stderr, "%s: too many arguments\n", progname); + usage(); + } +#endif /* UCB */ + if (rflag) { + if (hadflag == 0) { + fprintf(stderr, + "%s: one of -%s must be used with -%c sysname\n", + progname, +#ifndef UCB + "esdatpugUGA", 'r' +#else + "axtpUGA", 'R' +#endif + ); + usage(); + } + if (*rflag != '/') { +#if defined (__linux__) || defined (__hpux) || defined (_AIX) + FILE *fp; + struct mntent *mp; +#if defined (__linux__) || defined (_AIX) + const char mtab[] = "/etc/mtab"; +#else + const char mtab[] = "/etc/mnttab"; +#endif + + if ((fp = setmntent(mtab, "r")) == NULL) { + fprintf(stderr, "%s: cannot open %s\n", + progname, mtab); + exit(1); + } + dir = NULL; + while ((mp = getmntent(fp)) != NULL) { + if (strcmp(mp->mnt_type, MNTTYPE_IGNORE) == 0) + continue; + if (strcmp(rflag, basename(mp->mnt_dir)) == 0) { + dir = sstrdup(mp->mnt_dir); + break; + } + } + endmntent(fp); +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + struct statfs *sp = NULL; + int cnt, i; + + if ((cnt = getmntinfo(&sp, MNT_WAIT)) <= 0) { + fprintf(stderr, "%s: cannot get mounts\n", + progname); + exit(1); + } + for (i = 0; i < cnt; i++) + if (!strcmp(rflag, + basename(sp[i].f_mntonname))) { + dir = sstrdup(sp[i].f_mntonname); + break; + } +#else /* SVR4 */ + FILE *fp; + struct mnttab mt; + const char mtab[] = "/etc/mnttab"; + + if ((fp = fopen(mtab, "r")) == NULL) { + fprintf(stderr, "%s: cannot open %s\n", + progname, mtab); + exit(1); + } + dir = NULL; + while (getmntent(fp, &mt) == 0) + if (!strcmp(rflag, basename(mt.mnt_mountp))) { + dir = sstrdup(mt.mnt_mountp); + break; + } + fclose(fp); +#endif /* SVR4 */ + if (dir == NULL) { + fprintf(stderr, + "%s: cannot find path to system %s\n", + progname, rflag); + exit(1); + } + } else + dir = rflag; + if (chroot(dir) < 0) { + fprintf(stderr, "%s: cannot change root to %s\n", + progname, dir); + exit(1); + } + } + optind = 1; +} + +#ifndef UCB +extern int sysv3; /* emulate SYSV3 behavior */ + +static void +options(int ac, char **av) +{ + int cflag = 0; /* priocntl format */ + int fflag = 0; /* full format */ + int jflag = 0; /* jobs format */ + int lflag = 0; /* long format */ + int Pflag = 0; /* print processor information */ + int Rflag = 0; /* EP/IX resource format */ + int Tflag = 0; /* EP/IX thread format */ + int yflag = 0; /* modify format */ + int i; + + if (getenv("SYSV3") != NULL) + sysv3 = 1; +#ifdef S42 + cflag = 1; +#endif /* S42 */ + while ((i = getopt(ac, av, optstring)) != EOF) { + switch (i) { + case 'a': + add_criterion(CR_ALL_WITH_TTY, 0); + break; + case 'c': + cflag++; + break; + case 'd': + add_criterion(CR_ALL_BUT_SESSION_LEADERS, 0); + break; + case 'e': + case 'A': + add_criterion(CR_ALL, 0); + break; + case 'f': + fflag = 1; + break; + case 'g': + add_criteria_list(CR_PROCESS_GROUP, optarg); + break; + case 'G': + add_criteria_list(CR_REAL_GID, optarg); + break; + case 'j': + jflag = 1; + break; + case 'l': + lflag = 1; + break; + case 'L': + Lflag = 3; + break; + case 'n': + fprintf(stderr, "%s: warning: -n option ignored\n", + progname); + break; + case 'o': + oflag = 1; + add_format_list(optarg); + break; + case 'P': + Pflag = 1; + break; + case 'p': + add_criteria_list(CR_PROCESS_ID, optarg); + break; + case 'r': + rflag = optarg; + break; + case 'R': + Rflag = 1; + break; + case 's': + add_criteria_list(CR_SESSION_LEADER, optarg); + break; + case 't': + add_criteria_list(CR_TTY_DEVICE, optarg); + break; + case 'T': + Tflag = 1; + break; + case 'u': +#ifdef SUS + add_criteria_list(CR_EFF_UID, optarg); + break; +#else /* !SUS */ + /*FALLTHRU*/ +#endif /* !SUS */ + case 'U': + add_criteria_list(CR_REAL_UID, optarg); + break; + case 'y': + yflag = 1; + break; + } + } + if (Rflag) + lflag = fflag = 0; + if (o0 == NULL) { +#ifdef SUS + const char *cmd_str = "CMD"; +#else + const char *cmd_str = "COMD"; + if (sysv3 && !lflag) + cmd_str = "COMMAND"; +#endif + if (fflag || jflag || lflag) { + if (jflag || (lflag && yflag)) + add_format(OU_SPACE, NULL); + if (lflag && !yflag) + add_format(OU_F, NULL); + if (lflag) + add_format(OU_S, NULL); +#ifdef SUS + if (fflag) + add_format(OU_USER, " UID"); + else if (lflag) + add_format(OU_UID, NULL); +#else /* !SUS */ + if (fflag) + add_format(OU_RUSER, " UID"); + else if (lflag) + add_format(OU_RUID, " UID"); +#endif /* !SUS */ + add_format(OU_PID, NULL); + if (Tflag && !fflag) + add_format(OU_STID, NULL); + if (fflag || lflag) + add_format(OU_PPID, NULL); + if (jflag) { + add_format(OU_PGID, NULL); + add_format(OU_SID, NULL); + } + if (Lflag & 2) { + add_format(OU_LWP, NULL); + if (fflag) + add_format(OU_NLWP, NULL); + } + if (Tflag) + add_format(OU_TID, NULL); + if (Pflag || Tflag) + add_format(OU_PSR, NULL); + if (Tflag && !fflag) + add_format(OU_NTP, NULL); +#ifndef S42 + if (cflag) { + add_format(OU_CLASS, NULL); + add_format(OU_PRI, NULL); + } else { + if (fflag || lflag) + add_format(OU_C, NULL); + if (lflag) { + add_format(OU_OPRI, NULL); + add_format(OU_NICE, NULL); + } + } +#else /* S42 */ + add_format(OU_CLASS, NULL); + add_format(OU_PRI, NULL); + if (fflag || lflag) + add_format(OU_C, NULL); +#endif /* S42 */ + if (lflag) { + if (yflag) { + add_format(OU_RSS, NULL); + add_format(OU_VSZ, " SZ"); + } else { + add_format(OU_ADDR, NULL); + add_format(OU_OSZ, NULL); + } + add_format(OU_WCHAN, NULL); + } + if (fflag) + add_format(OU_STIME, NULL); + add_format(OU_TTY, "TTY "); + if (Lflag & 2) + add_format(OU_LTIME, NULL); + else + add_format(OU_OTIME, NULL); + if (fflag) + add_format(OU_ARGS, cmd_str); + else + add_format(OU_FNAME, cmd_str); + } else { + add_format(OU_SPACE, NULL); + add_format(OU_PID, NULL); + if (Lflag & 2) + add_format(OU_LWP, NULL); + if (Tflag) { + add_format(OU_STID, NULL); + add_format(OU_TID, NULL); + } + if (Pflag || Tflag) + add_format(OU_PSR, NULL); + if (Tflag) + add_format(OU_NTP, NULL); + if (cflag) { + add_format(OU_CLASS, NULL); + add_format(OU_PRI, NULL); + } + if (Rflag) { + add_format(OU_OSZ, " SZ"); + add_format(OU_MRSZ, NULL); + add_format(OU_PFLTS, NULL); + add_format(OU_BUFR, NULL); + add_format(OU_BUFW, NULL); + add_format(OU_MRCV, NULL); + add_format(OU_MSND, NULL); + add_format(OU_UTIME, NULL); + add_format(OU_KTIME, NULL); + } else { + add_format(OU_TTY, "TTY "); + if (Lflag & 2) + add_format(OU_LTIME, NULL); + else + add_format(OU_OTIME, NULL); + } + add_format(OU_FNAME, cmd_str); + } + } +} +#else /* UCB */ +/* + * Note that the 'UCB' version is not actually oriented at historical + * BSD usage, but at /usr/ucb/ps of SVR4 (with POSIX.2 extensions). + */ +static void +options(int ac, char **av) +{ + char *cp; + int i, format = 0, agxsel = 0, illegal = 0; + int cflag = 0; /* display command name instead of args */ + int nflag = 0; /* print numerical IDs */ + int Sflag = 0; /* display accumulated time */ + int wflag = 0; /* screen width */ + + while ((i = getopt(ac, av, optstring)) != EOF) { + switch (i) { + case 'a': + agxsel |= 01; + break; + case 'A': + add_criterion(CR_ALL, 0); + break; + case 'c': + cflag = 1; + break; + case 'g': + agxsel |= 02; + break; + case 'G': + add_criteria_list(CR_REAL_GID, optarg); + break; + case 'l': + format = 'l'; + break; + case 'L': + Lflag = 1; + break; + case 'n': + nflag = 1; + break; + case 'o': + oflag = 1; + wflag = 2; /* do not limit width */ + add_format_list(optarg); + break; + case 'p': + add_criteria_list(CR_PROCESS_ID, optarg); + break; + case 'r': + ucb_rflag = 1; + break; + case 'S': + Sflag = 1; + break; + case 't': + add_criteria_list(CR_TTY_DEVICE, optarg); + agxsel &= ~04; + break; + case 'u': + format = 'u'; + break; + case 'U': + /* + * 'U' without argument is 'update ps database' in + * historical /usr/ucb/ps. We implement the POSIX.2 + * option instead. + */ + add_criteria_list(CR_REAL_UID, optarg); + break; + case 'v': + format = 'v'; + break; + case 'w': + wflag++; + break; + case 'x': + agxsel |= 04; + break; + } + } + if (illegal) + usage(); + if (av[optind]) { + add_criteria_list(CR_PROCESS_ID, av[optind]); + agxsel = 0; + ucb_rflag = 0; + } + switch (agxsel) { + case 01|04: + case 01|02|04: + add_criterion(CR_ALL, 0); + break; + case 02|04: + add_criterion(CR_WITHOUT_TTY, 0); + add_criterion(CR_ADD_UNINTERESTING, 0); + break; + case 01: + case 01|02: + add_criterion(CR_ALL_WITH_TTY, 0); + break; + case 02: + add_criterion(CR_ADD_UNINTERESTING, 0); + break; + case 04: + add_criterion(CR_NO_TTY_NO_SESSION_LEADER, 0); + break; + } + if (o0 == NULL) { + if (format == 'l') { + add_format(OU_F, NULL); + add_format(OU_RUID, " UID"); + } else if (format == 'u') { + if (nflag) + add_format(OU_RUID, " UID"); + else + add_format(OU_RUSER, "USER "); + } + if (format == 'l') { + add_format(OU_PID, NULL); + add_format(OU_PPID, NULL); + } else if (format == 'u') + add_format(OU_PID, " PID"); + else + add_format(OU_PID, " PID"); + if (format == 'l' || format == 'u') + add_format(OU_C, "CP"); + if (format == 'l') { + add_format(OU_OPRI, NULL); + add_format(OU_NICE, NULL); + } + if (format == 'l' || format == 'u') { + add_format(OU_OSZ, " SZ"); + add_format(OU_ORSS, " RSS"); + } + if (format == 'l') { + add_format(OU_WCHAN, NULL); + add_format(OU_S, NULL); + add_format(OU_TTY, "TT "); + } else + add_format(OU_TTY, "TT "); + if (format == 'u' || format == 'v' || format == 0) + add_format(OU_S, " S"); + if (format == 'u') + add_format(OU_STIME, " START"); + else if (format == 'v') { + add_format(OU_OSZ, " SIZE"); + add_format(OU_ORSS, " RSS"); + } + add_format(Sflag ? OU_ACCUTIME : OU_OTIME, NULL); + add_format(cflag ? OU_FNAME : OU_ARGS, "COMMAND"); + } + if ((cp = getenv("COLUMNS")) != NULL) + maxcolumn = strtol(cp, NULL, 10); + if (maxcolumn <= 0) { +#ifdef TIOCGWINSZ + struct winsize winsz; + + if (ioctl(1, TIOCGWINSZ, &winsz) == 0 && winsz.ws_col > 0) + maxcolumn = winsz.ws_col; + else +#endif /* TIOCGWINSZ */ + maxcolumn = 80; + } + if (wflag == 1) + maxcolumn += 52; + else if (wflag > 1) + maxcolumn = 0; +} +#endif /* UCB */ + +int +main(int argc, char **argv) +{ +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif + progname = basename(argv[0]); + sysname(argc, argv); + defaults(); + myuid = getuid(); + myeuid = geteuid(); + if (dropprivs && myuid && myeuid && myuid != myeuid) + setuid(myuid); + setlocale(LC_CTYPE, ""); + setlocale(LC_TIME, ""); + mb_cur_max = MB_CUR_MAX; + ontty = isatty(1); + options(argc, argv); + devices(); +#if !defined (__hpux) && !defined (_AIX) && !defined (__NetBSD__) && \ + !defined (__OpenBSD__) && !defined (__APPLE__) + chdir_to_proc(); +#endif +#ifdef __linux__ + get_linux_version(); + if (linux_version_lt(2, 5, 0) && has_o1_sched() == 0) + compute_priority = compute_priority_old; + else + compute_priority = compute_priority_new; +#endif /* __linux__ */ + hz = sysconf(_SC_CLK_TCK); + time(&now); +#ifdef __linux__ + uptime = sysup(); +#endif /* __linux__ */ +#ifdef __APPLE__ + { + static int mib[] = {CTL_HW, HW_PAGESIZE, 0}; + size_t size; + size = sizeof pagesize; + if (sysctl(mib, 2, &pagesize, &size, NULL, 0) == -1) { + fprintf(stderr, "error in sysctl(): %s\n", strerror(errno)); + exit(3); + } + } +#else + pagesize = sysconf(_SC_PAGESIZE); +#endif + kbytes_per_page = (pagesize >> 10); +#ifndef __sun + totalmem = getmem(); +#endif /* !__sun */ +#if defined (__linux__) || defined (__sun) + getproc("self", &myproc, getpid(), -1); +#elif defined (__FreeBSD__) || defined (__DragonFly__) + getproc("curproc", &myproc, getpid(), -1); +#elif defined (__hpux) + { + struct pst_status pst; + pid_t mypid = getpid(); + pstat_getproc(&pst, sizeof pst, (size_t)0, mypid); + getproc(&myproc, &pst); + } +#elif defined (_AIX) + { + struct stat st; + int fd; + + if ((fd = open("/dev/tty", O_RDONLY)) >= 0) { + if (stat(ttyname(fd), &st) == 0) + myproc.p_ttydev = st.st_rdev; + close(fd); + } + } +#elif defined (__OpenBSD__) + { + kvm_t *kt; + struct kinfo_proc *kp; + int mypid = getpid(); + int cnt; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, + "kvm_open")) == NULL) + exit(1); + kp = kvm_getprocs(kt, KERN_PROC_PID, mypid, &cnt); + if (kp != NULL) + getproc(&myproc, &kp[0]); + kvm_close(kt); + } +#elif defined (__NetBSD__) + { + kvm_t *kt; + struct kinfo_proc2 *kp; + int mypid = getpid(); + int cnt; + + if ((kt = kvm_open(NULL, NULL, NULL, KVM_NO_FILES, + "kvm_open")) == NULL) + exit(1); + kp = kvm_getproc2(kt, KERN_PROC_PID, mypid, + sizeof *kp, &cnt); + if (kp != NULL) + getproc(&myproc, &kp[0]); + kvm_close(kt); + } +#elif defined (__APPLE__) + { + struct kinfo_proc *kp; + pid_t mypid = getpid(); + size_t cnt; + int err; + + kp = NULL; + if ((err = GetBSDProcessList(mypid, &kp, &cnt)) != 0) { + fprintf(stderr, "error getting proc list: %s\n", strerror(err)); + exit(3); + } + if (kp != NULL) { + getproc(&myproc, kp); + free(kp); + } + } +#else /* SVR4 */ + { + /* + * /proc/self has no useful pr_ttydev value on Open UNIX 8. + */ + char num[20]; + pid_t mypid = getpid(); + snprintf(num, sizeof num, "%d", mypid); + getproc(num, &myproc, mypid, -1); + } +#endif /* SVR4 */ + if (c0 == NULL) { +#ifndef UCB + if (myproc.p_ttydev == (dev_type)PRNODEV) { + fprintf(stderr, + "%s: can't find controlling terminal\n", + progname); + exit(1); + } +#endif /* !UCB */ + add_criterion(CR_DEFAULT, 0); + } else { + struct criterion *ct; + int valid = 0, invalid = 0; + + for (ct = c0; ct; ct = ct->c_nxt) { + if (ct->c_typ == CR_INVALID_STOP) + usage(); + else if (ct->c_typ == CR_EFF_UID) + valid |= 01; + else if (ct->c_typ == CR_INVALID_EFF_UID) + invalid |= 01; + else if (ct->c_typ == CR_REAL_UID) + valid |= 02; + else if (ct->c_typ == CR_INVALID_REAL_UID) + invalid |= 02; + else if (ct->c_typ == CR_REAL_GID) + valid |= 04; + else if (ct->c_typ == CR_INVALID_REAL_GID) + invalid |= 04; + } + if ((invalid & valid) != invalid) + return 1; + } + if (dohdr) + putheader(); + do_procs(); + return errcnt; +} diff --git a/ps/ps.dfl b/ps/ps.dfl @@ -0,0 +1,39 @@ +# Sccsid @(#)ps.dfl 1.4 (gritter) 10/13/03 + +# +# ps needs to know whether a new-style O(1) scheduler (as in 2.6 +# kernels) or an old-style scheduler (as in stock 2.4 kernels) +# is used. This is normally auto-detected, but it is impossible +# to do this perfectly given the large number of variations and +# backports. +# +#O1_SCHEDULER=1 + +# +# Unless this feature was deactivated at compilation time, ps keeps +# a cache file of /dev entries to accelerate the search of names of +# terminal devices. The file is automatically kept up to date by +# ps and owned by the effective user ID ps is invoked with. The +# following settings have no effect if no cache file is kept. +# + +# +# The location of the ps cache file. +# +CACHE_FILE=/tmp/ps_cache + +# +# The octal permission mode for the ps cache file. +# +CACHE_MODE=664 + +# +# The group owner of the ps cache file. +# +CACHE_GROUP=sys + +# +# If DROPPRIVS=1 ps will drop setuid privileges after it has created +# the cache file. +# +DROPPRIVS=0 diff --git a/sed/mkfile b/sed/mkfile @@ -0,0 +1,7 @@ +BIN = sed +OBJ = sed0.o sed1.o version.o +LOCAL_CFLAGS = -DSU3 +INSTALL_BIN = sed +INSTALL_MAN1 = sed.1 + +<$mkbuild/mk.default diff --git a/sed/sed.1 b/sed/sed.1 @@ -0,0 +1,369 @@ +.\" +.\" Sccsid @(#)sed.1 1.19 (gritter) 1/24/05 +.\" Derived from sed(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH SED 1 "1/24/05" "Heirloom Toolchest" "User Commands" +.SH NAME +sed \- stream editor +.SH SYNOPSIS +\fBsed\fR [\fB\-n\fR] [\fB\-e\fI\ script\fR] [\fB\-f\fI\ sfile\fR] +[\fIfile\fR\ .\ .\ .] +.SH DESCRIPTION +.I Sed +copies the named +.I files +(standard input default) to the standard output, +edited according to a script of commands. +The +.B \-f +option causes the script to be taken from file +.IR sfile ; +these options accumulate. +If there is just one +.B \-e +option and no +.BR \-f 's, +the flag +.B \-e +may be omitted. +The +.B \-n +option suppresses the default output. +.PP +A script consists of editing commands, one per line, +of the following form: +.IP +[address [, address] ] function [arguments] +.PP +In normal operation +.I sed +cyclically copies a line of input into a +.I pattern space +(unless there is something left after +a `D' command), +applies in sequence +all commands whose +.I addresses +select that pattern space, +and at the end of the script copies the pattern space +to the standard output (except under +.BR \-n ) +and deletes the pattern space. +.PP +An +.I address +is either a decimal number that counts +input lines cumulatively across files, a `$' that +addresses the last line of input, or a context address, +`/regular expression/', in the style of +.IR ed (1) +modified thus: +.IP +The escape sequence `\en' matches a +newline embedded in the pattern space. +.IP +A command line with no addresses selects every pattern space. +.IP +A command line with +one address selects each pattern space that matches the address. +.IP +A command line with +two addresses selects the inclusive range from the first +pattern space that matches the first address through +the next pattern space that matches +the second. +(If the second address is a number less than or equal +to the line number first selected, only one +line is selected.) +Thereafter the process is repeated, looking again for the +first address. +.PP +Editing commands can be applied only to non-selected pattern +spaces by use of the negation function `!' (below). +.PP +Regular expressions are simple regular expressions with +.BR /usr/5bin/sed , +and basic regular expressions with +.BR /usr/5bin/posix/sed , +.BR /usr/5bin/posix2001/sed , +and +.BR /usr/5bin/s42/sed . +.PP +In the following list of functions the +maximum number of permissible addresses +for each function is indicated in parentheses. +.PP +An argument denoted +.I text +consists of one or more lines, +all but the last of which end with `\e' to hide the +newline. +Backslashes in text are treated like backslashes +in the replacement string of an `s' command. +.PP +An argument denoted +.I rfile +or +.I wfile +must terminate the command +line. +.B /usr/5bin/sed +and +.B /usr/5bin/s42/sed +require that it is +preceded by exactly one blank. +Each +.I wfile +is created before processing begins. +.TP 10 +(1)\|\fBa\e\fR +.br +.ns +.TP 10 +.I text +Append. +Place +.I text +on the output before +reading the next input line. +.TP 10 +(2)\|\fBb \fIlabel\fR +Branch to the `:' command bearing the +.IR label . +If +.I label +is empty, branch to the end of the script. +.TP 10 +(2)\|\fBc\e\fR +.br +.ns +.TP 10 +.I text +Change. +Delete the pattern space. +With 0 or 1 address or at the end of a 2-address range, place +.I text +on the output. +Start the next cycle. +.TP 10 +(2)\|\fBd\fR +Delete the pattern space. +Start the next cycle. +.TP 10 +(2)\|\fBD\fR +Delete the initial segment of the +pattern space through the first newline. +Start the next cycle. +.TP 10 +(2)\|\fBg\fR +Replace the contents of the pattern space +by the contents of the hold space. +.TP 10 +(2)\|\fBG\fR +Append the contents of the hold space to the pattern space. +.TP 10 +(2)\|\fBh\fR +Replace the contents of the hold space by the contents of the pattern space. +.TP 10 +(2)\|\fBH\fR +Append the contents of the pattern space to the hold space. +.TP 10 +(1)\|\fBi\e\fR +.br +.ns +.TP 10 +.I text +Insert. +Place +.I text +on the standard output. +.TP 10 +(2)\|\fBl\fR +List the pattern space on the standard output in an +unambiguous form. +Non-printing ASCII characters are spelled +in two- or three-digit ASCII, +and long lines are folded. +.TP 10 +(2)\|\fBn\fR +Copy the pattern space to the standard output. +Replace the pattern space with the next line of input. +.TP 10 +(2)\|\fBN\fR +Append the next line of input to the pattern space +with an embedded newline. +(The current line number changes.) +.TP 10 +(2)\|\fBp\fR +Print. +Copy the pattern space to the standard output. +.TP 10 +(2)\|\fBP\fR +Copy the initial segment of the pattern space through +the first newline to the standard output. +.TP 10 +(1)\|\fBq\fR +Quit. +Branch to the end of the script. +Do not start a new cycle. +.TP 10 +(2)\|\fBr \fIrfile\fR +Read the contents of +.IR rfile . +Place them on the output before reading +the next input line. +.TP 10 +(2)\|\fBs/\fIregular\ expression\fB/\fIreplacement\fB/\fIflags\fR +Substitute the +.I replacement +string for instances of the +.I regular expression +in the pattern space. +Any character may be used instead of `/'. +For a fuller description see +.IR ed (1). +.I Flags +is zero or more of +.RS +.TP +.B g +Global. +Substitute for all nonoverlapping instances of the +.I regular expression +rather than just the +first one. +.TP +.I n +\fIn\fR=1\^\(en\^512. +Substitute the \fIn\fRth occurrence of the +.I regular expression +only. +.TP +.B p +Print the pattern space if a replacement was made. +.TP +\fBw \fIwfile\fB +Write. +Append the pattern space to +.I wfile +if a replacement +was made. +.RE +.TP 10 +(2)\|\fBt \fIlabel\fR +Test. +Branch to the `:' command bearing the +.I label +if any +substitutions have been made since the most recent +reading of an input line or execution of a `t'. +If +.I label +is empty, branch to the end of the script. +.TP 10 +(2)\|\fBw \fIwfile\fR +Write. +Append the pattern space to +.IR wfile . +.TP 10 +(2)\|\fBx\fR +Exchange the contents of the pattern and hold spaces. +.TP 10 +(2)\|\fBy/\fIstring1\fB/\fIstring2\fB/\fR +Transform. +Replace all occurrences of characters in +.I string1 +with the corresponding character in +.I string2. +The lengths of +.I +string1 +and +.I string2 +must be equal. +.TP 10 +(2)\fB! \fIfunction\fB +Don't. +Apply the +.I function +(or group, if +.I function +is `{') only to lines +.I not +selected by the address(es). +.TP 10 +(0)\|\fB: \fIlabel\fR +This command does nothing; it bears a +.I label +for `b' and `t' commands to branch to. +.TP 10 +(1)\|\fB=\fR +Place the current line number on the standard output as a line. +.TP 10 +(2)\|\fB{\fR +Execute the following commands through a matching `}' +only when the pattern space is selected. +.TP 10 +(0)\| +An empty command is ignored. +.TP 10 +(0)\|\fB#\fP +Ignore the entire line, +except when the first two characters in the script are \fB#n\fP, +which has the same effect as the \f2\-n\fP command line option. +.SH "ENVIRONMENT VARIABLES" +.TP +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP +.B LC_COLLATE +Affects the collation order for range expressions, +equivalence classes, and collation symbols +in basic regular expressions. +.TP +.B LC_CTYPE +Determines the mapping of bytes to characters +in both simple and basic regular expressions, +for the +.B l +and +.B y +commands, +and the availability and composition of character classes +in basic regular expressions. +.SH SEE ALSO +ed(1), +grep(1), +awk(1), +locale(7) diff --git a/sed/sed.h b/sed/sed.h @@ -0,0 +1,191 @@ +/* + * sed -- stream editor + * + * Copyright 1975 Bell Telephone Laboratories, Incorporated + * + * Owner: lem + */ + +/* from Unix 7th Edition and Unix 32V sed */ +/* Sccsid @(#)sed.h 1.32 (gritter) 2/6/05 */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +#if defined (SUS) || defined (SU3) || defined (S42) +#include <regex.h> +#endif /* SUS || SU3 || S42 */ + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif /* _IO_getc_unlocked */ +#ifdef _IO_putc_unlocked +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif /* _IO_putc_unlocked */ +#endif /* __GLIBC__ */ + +#define CEND 16 +#define CLNUM 14 + +#if defined (SUS) || defined (SU3) || defined (S42) +struct re_emu { + char *r_dummy; + regex_t r_preg; +}; +#endif /* SUS || SU3 || S42 */ + +extern int circf, ceof, nbra, sed; + +struct yitem { + struct yitem *y_nxt; + wint_t y_oc; + wint_t y_yc; + char y_mc[MB_LEN_MAX]; +}; + +extern int ABUFSIZE; +extern int LBSIZE; +extern struct reptr **abuf; +extern int aptr; +extern char *genbuf; +extern int gbend; +extern int lbend; +extern int hend; +extern char *linebuf; +extern char *holdsp; +extern int nflag; +extern long long *tlno; + +enum cmd { + ACOM = 01, + BCOM = 020, + CCOM = 02, + CDCOM = 025, + CNCOM = 022, + COCOM = 017, + CPCOM = 023, + DCOM = 03, + ECOM = 015, + EQCOM = 013, + FCOM = 016, + GCOM = 027, + CGCOM = 030, + HCOM = 031, + CHCOM = 032, + ICOM = 04, + LCOM = 05, + NCOM = 012, + PCOM = 010, + QCOM = 011, + RCOM = 06, + SCOM = 07, + TCOM = 021, + WCOM = 014, + CWCOM = 024, + YCOM = 026, + XCOM = 033 +}; + +extern char *cp; + +#define P(n) ((n) > 0 ? &ptrspace[n - 1] : (struct reptr *)0) +#define L(n) ((n) > 0 ? &ltab[n - 1] : (struct label *)0) +#define A(n) ((n) > 0 ? &abuf[n - 1] : (struct reptr **)0) + +#define slno(ep, n) ( \ + *(ep)++ = ((n) & 0xff000000) >> 24, \ + *(ep)++ = ((n) & 0x00ff0000) >> 16, \ + *(ep)++ = ((n) & 0x0000ff00) >> 8, \ + *(ep)++ = ((n) & 0x000000ff) \ + ) + +#define glno(p) ( \ + ((p)[0]&0377) << 24 | \ + ((p)[1]&0377) << 16 | \ + ((p)[2]&0377) << 8 | \ + ((p)[3]&0377) \ + ) + +struct reptr { + char *ad1; + char *ad2; + union { + char *re1; + int lb1; + } bptr; + char *rhs; + FILE *fcode; + enum cmd command; + short gfl; + char pfl; + char inar; + char negfl; + char nsub; +}; + +extern struct reptr *ptrspace; + +struct label { + char asc[8*MB_LEN_MAX + 1]; + int chain; + int address; +}; + +extern int status; +extern int multibyte; +extern int invchar; +extern int needdol; + +extern int eargc; + +extern struct reptr *pending; +extern char *badp; + +extern void execute(const char *); +extern void fatal(const char *, ...); +extern void nonfatal(const char *, ...); +extern void aptr_inc(void); +extern wint_t wc_get(char **, int); +#define fetch(s) (multibyte ? wc_get(s, 1) : (*(*(s))++ & 0377)) +#define peek(s) (multibyte ? wc_get(s, 0) : (**(s) & 0377)) +extern struct yitem *ylook(wint_t , struct yitem **, int); +extern void *smalloc(size_t); +extern void growsp(const char *); diff --git a/sed/sed0.c b/sed/sed0.c @@ -0,0 +1,1266 @@ +/* from Unix 7th Edition sed */ +/* Sccsid @(#)sed0.c 1.64 (gritter) 3/12/05> */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <unistd.h> +#include <stdlib.h> +#include <locale.h> +#include <libgen.h> +#include <stdarg.h> +#include <wchar.h> +#include "sed.h" + +int ABUFSIZE; +struct reptr **abuf; +int aptr; +char *genbuf; +int gbend; +int lbend; +int hend; +char *linebuf; +char *holdsp; +int nflag; +long long *tlno; +char *cp; + +int status; +int multibyte; +int invchar; +int needdol; + +int eargc; + +struct reptr *ptrspace; +struct reptr *pending; +char *badp; + +static const char CGMES[] = "\1command garbled: %s"; +static const char TMMES[] = "Too much text: %s"; +static const char LTL[] = "Label too long: %s"; +static const char LINTL[] = "line too long"; +static const char AD0MES[] = "No addresses allowed: %s"; +static const char AD1MES[] = "Only one address allowed: %s"; +static FILE **fcode; +static FILE *fin; +static char *lastre; +static wchar_t sed_seof; +static int PTRSIZE; +static int eflag; +static int gflag; +static int nlno; +static char **fname; +static int nfiles; +static int rep; +static struct label *ltab; +static int lab; +static size_t LABSIZE; +static int labtab = 1; +static int depth; +static char **eargv; +static int *cmpend; +static size_t DEPTH; +static char bad; +static char compfl; +static char *progname; +static char *(*ycomp)(char **); +static int executing; + +static void fcomp(void); +static char *compsub(char **, char *); +static int rline(void); +static char *address(char **); +static int cmp(const char *, const char *); +static void text(char **); +static int search(struct label *); +static void dechain(void); +static char *ycomp_sb(char **); +static char *ycomp_mb(char **); +static void lab_inc(void); +static void rep_inc(void); +static void depth_check(void); +static void *srealloc(void *, size_t); +static void *scalloc(size_t, size_t); +static char *sed_compile(char **); +static void wfile(void); +static void morefiles(void); + +static char *null; +#define check(p, buf, sz, incr, op) \ + if (&p[1] >= &(buf)[sz]) { \ + size_t ppos = p - buf; \ + size_t opos = op - buf; \ + buf = srealloc(buf, (sz += incr) * sizeof *(buf)); \ + p = &(buf)[ppos]; \ + if (op != NULL) \ + op = &(buf)[opos]; \ + } + +int +main(int argc, char **argv) +{ + int c; + const char optstr[] = "nf:e:g"; + + sed = 1; + progname = basename(argv[0]); + eargc = argc; + eargv = argv; + +#ifdef __GLIBC__ + putenv("POSIXLY_CORRECT=1"); +#endif /* __GLIBC__ */ +#if defined (SUS) || defined (SU3) || defined (S42) + setlocale(LC_COLLATE, ""); +#endif /* SUS || SU3 || S42 */ + setlocale(LC_CTYPE, ""); + multibyte = MB_CUR_MAX > 1; + ycomp = multibyte ? ycomp_mb : ycomp_sb; + badp = &bad; + aptr_inc(); + lab_inc(); + lab_inc(); /* 0 reserved for end-pointer -> labtab = 1 */ + growsp(NULL); + rep_inc(); + pending = 0; + depth = 0; + morefiles(); + fcode[0] = stdout; + nfiles = 1; + morefiles(); + + if(eargc == 1) + exit(0); + while ((c = getopt(eargc, eargv, optstr)) != EOF) { + switch (c) { + case 'n': + nflag++; + continue; + + case 'f': + if((fin = fopen(optarg, "r")) == NULL) + fatal("Cannot open pattern-file: %s", optarg); + + fcomp(); + fclose(fin); + continue; + + case 'e': + eflag++; + fcomp(); + eflag = 0; + continue; + + case 'g': + gflag++; + continue; + + default: + exit(2); + } + } + + eargv += optind, eargc -= optind; + + + if(compfl == 0 && *eargv) { + optarg = *eargv++; + eargc--; + eflag++; + fcomp(); + eflag = 0; + } + + if(depth) + fatal("Too many {'s"); + + L(labtab)->address = rep; + + dechain(); + +/* abort(); */ /*DEBUG*/ + + executing++; + if(eargc <= 0) + execute((char *)NULL); + else while(--eargc >= 0) { + execute(*eargv++); + } + fclose(stdout); + return status; +} + +static void +fcomp(void) +{ + + register char *op, *tp, *q; + int pt, pt1; + int lpt; + + compfl = 1; + op = lastre; + + if(rline() < 0) return; + if(*linebuf == '#') { + if(linebuf[1] == 'n') + nflag = 1; + } + else { + cp = linebuf; + goto comploop; + } + + for(;;) { + if(rline() < 0) break; + + cp = linebuf; + +comploop: +/* fprintf(stdout, "cp: %s\n", cp); */ /*DEBUG*/ + while(*cp == ' ' || *cp == '\t') cp++; + if(*cp == '\0' || *cp == '#') continue; + if(*cp == ';') { + cp++; + goto comploop; + } + + q = address(&P(rep)->ad1); + if(q == badp) + fatal(CGMES, linebuf); + + if(q != 0 && q == P(rep)->ad1) { + if(op) + P(rep)->ad1 = op; + else + fatal("First RE may not be null"); + } else if(q == 0) { + P(rep)->ad1 = 0; + } else { + op = P(rep)->ad1; + if(*cp == ',' || *cp == ';') { + cp++; + q = address(&P(rep)->ad2); + if(q == badp || q == 0) + fatal(CGMES, linebuf); + if(q == P(rep)->ad2) + P(rep)->ad2 = op; + else + op = P(rep)->ad2; + + } else + P(rep)->ad2 = 0; + } + + while(*cp == ' ' || *cp == '\t') cp++; + +swit: + switch(*cp++) { + + default: + fatal("Unrecognized command: %s", linebuf); + /*NOTREACHED*/ + + case '!': + P(rep)->negfl = 1; + goto swit; + + case '{': + P(rep)->command = BCOM; + P(rep)->negfl = !(P(rep)->negfl); + depth_check(); + cmpend[depth++] = rep; + rep_inc(); + if(*cp == '\0') continue; + + goto comploop; + + case '}': + if(P(rep)->ad1) + fatal(AD0MES, linebuf); + + if(--depth < 0) + fatal("Too many }'s"); + P(cmpend[depth])->bptr.lb1 = rep; + + continue; + + case '=': + P(rep)->command = EQCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + break; + + case ':': + if(P(rep)->ad1) + fatal(AD0MES, linebuf); + + while(*cp++ == ' '); + cp--; + + + tp = L(lab)->asc; + while((*tp++ = *cp++)) + if(tp >= &(L(lab)->asc[sizeof + L(lab)->asc])) + fatal(LTL, linebuf); + *--tp = '\0'; + + if(lpt = search(L(lab))) { + if(L(lpt)->address) + fatal("Duplicate labels: %s", + linebuf); + } else { + L(lab)->chain = 0; + lpt = lab; + lab_inc(); + } + L(lpt)->address = rep; + + continue; + + case 'a': + P(rep)->command = ACOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + if(*cp == '\\') cp++; + if(*cp++ != '\n') + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + break; + case 'c': + P(rep)->command = CCOM; + if(*cp == '\\') cp++; + if(*cp++ != ('\n')) + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + needdol = 1; + break; + case 'i': + P(rep)->command = ICOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + if(*cp == '\\') cp++; + if(*cp++ != ('\n')) + fatal(CGMES, linebuf); + text(&P(rep)->bptr.re1); + break; + + case 'g': + P(rep)->command = GCOM; + break; + + case 'G': + P(rep)->command = CGCOM; + break; + + case 'h': + P(rep)->command = HCOM; + break; + + case 'H': + P(rep)->command = CHCOM; + break; + + case 't': + P(rep)->command = TCOM; + goto jtcommon; + + case 'b': + P(rep)->command = BCOM; +jtcommon: + while(*cp++ == ' '); + cp--; + + if(*cp == '\0') { + if((pt = L(labtab)->chain) != 0) { + while((pt1 = P(pt)->bptr.lb1) != 0) + pt = pt1; + P(pt)->bptr.lb1 = rep; + } else + L(labtab)->chain = rep; + break; + } + tp = L(lab)->asc; + while((*tp++ = *cp++)) + if(tp >= &(L(lab)->asc[sizeof + L(lab)->asc])) + fatal(LTL, linebuf); + cp--; + *--tp = '\0'; + + if(lpt = search(L(lab))) { + if(L(lpt)->address) { + P(rep)->bptr.lb1 = L(lpt)->address; + } else { + pt = L(lpt)->chain; + while((pt1 = P(pt)->bptr.lb1) != 0) + pt = pt1; + P(pt)->bptr.lb1 = rep; + } + } else { + L(lab)->chain = rep; + L(lab)->address = 0; + lab_inc(); + } + break; + + case 'n': + P(rep)->command = NCOM; + break; + + case 'N': + P(rep)->command = CNCOM; + break; + + case 'p': + P(rep)->command = PCOM; + break; + + case 'P': + P(rep)->command = CPCOM; + break; + + case 'r': + P(rep)->command = RCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); +#if !defined (SUS) && !defined (SU3) + if(*cp++ != ' ') + fatal(CGMES, linebuf); +#else /* SUS, SU3 */ + while (*cp == ' ' || *cp == '\t') + cp++; +#endif /* SUS, SU3 */ + text(&P(rep)->bptr.re1); + break; + + case 'd': + P(rep)->command = DCOM; + break; + + case 'D': + P(rep)->command = CDCOM; + P(rep)->bptr.lb1 = 1; + break; + + case 'q': + P(rep)->command = QCOM; + if(P(rep)->ad2) + fatal(AD1MES, linebuf); + break; + + case 'l': + P(rep)->command = LCOM; + break; + + case 's': + P(rep)->command = SCOM; + sed_seof = fetch(&cp); + q = sed_compile(&P(rep)->bptr.re1); + if(q == badp) + fatal(CGMES, linebuf); + if(q == P(rep)->bptr.re1) { + if (op == NULL) + fatal("First RE may not be null"); + P(rep)->bptr.re1 = op; + } else { + op = P(rep)->bptr.re1; + } + + if(compsub(&P(rep)->rhs, &P(rep)->nsub) == badp) + fatal(CGMES, linebuf); + sloop: if(*cp == 'g') { + cp++; + P(rep)->gfl = -1; + goto sloop; + } else if(gflag) + P(rep)->gfl = -1; + if (*cp >= '0' && *cp <= '9') { + while (*cp >= '0' && *cp <= '9') { + if (P(rep)->gfl == -1) + P(rep)->gfl = 0; + P(rep)->gfl = P(rep)->gfl * 10 + + *cp++ - '0'; + } + goto sloop; + } +#if !defined (SUS) && !defined (SU3) + if (P(rep)->gfl > 0 && P(rep)->gfl > 512) + fatal("Suffix too large - 512 max: %s", linebuf); +#endif + + if(*cp == 'p') { + cp++; + P(rep)->pfl = 1; + goto sloop; + } + + if(*cp == 'P') { + cp++; + P(rep)->pfl = 2; + goto sloop; + } + + if(*cp == 'w') { + cp++; + wfile(); + } + break; + + case 'w': + P(rep)->command = WCOM; + wfile(); + break; + + case 'x': + P(rep)->command = XCOM; + break; + + case 'y': + P(rep)->command = YCOM; + sed_seof = fetch(&cp); + if (ycomp(&P(rep)->bptr.re1) == badp) + fatal(CGMES, linebuf); + break; + + } + rep_inc(); + + if(*cp++ != '\0') { + if(cp[-1] == ';') + goto comploop; + fatal(CGMES, linebuf); + } + + } + P(rep)->command = 0; + lastre = op; +} + +static char * +compsub(char **rhsbuf, char *nsubp) +{ + register char *p, *op, *oq; + char *q; + wint_t c; + size_t sz = 32; + + *rhsbuf = smalloc(sz); + p = *rhsbuf; + q = cp; + *nsubp = 0; + for(;;) { + op = p; + oq = q; + if((c = fetch(&q)) == '\\') { + check(p, *rhsbuf, sz, 32, op) + *p = '\\'; + oq = q; + c = fetch(&q); + do { + check(p, *rhsbuf, sz, 32, op) + *++p = *oq++; + } while (oq < q); + if(c > nbra + '0' && c <= '9') + return(badp); + if (c > *nsubp + '0' && c <= '9') + *nsubp = c - '0'; + check(p, *rhsbuf, sz, 32, op) + p++; + continue; + } else { + do { + check(p, *rhsbuf, sz, 32, op) + *p++ = *oq++; + } while (oq < q); + p--; + } + if(c == sed_seof) { + check(p, *rhsbuf, sz, 32, op) + *op++ = '\0'; + cp = q; + return(op); + } + check(p, *rhsbuf, sz, 32, op) + if(*p++ == '\0') { + return(badp); + } + + } +} + +#define rlinechk() if (c >= lbend-2) \ + growsp(LINTL) + +static int +rline(void) +{ + register char *q; + register int c; + register int t; + static char *saveq; + + c = -1; + + if(eflag) { + if(eflag > 0) { + eflag = -1; + q = optarg; + rlinechk(); + while(linebuf[++c] = *q++) { + rlinechk(); + if(linebuf[c] == '\\') { + if((linebuf[++c] = *q++) == '\0') { + rlinechk(); + saveq = 0; + return(-1); + } else + continue; + } + if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + saveq = q; + return(1); + } + } + saveq = 0; + return(1); + } + if((q = saveq) == 0) return(-1); + + while(linebuf[++c] = *q++) { + rlinechk(); + if(linebuf[c] == '\\') { + if((linebuf[++c] = *q++) == '0') { + rlinechk(); + saveq = 0; + return(-1); + } else + continue; + } + if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + saveq = q; + return(1); + } + } + saveq = 0; + return(1); + } + + while((t = getc(fin)) != EOF) { + rlinechk(); + linebuf[++c] = (char)t; + if(linebuf[c] == '\\') { + t = getc(fin); + rlinechk(); + linebuf[++c] = (char)t; + } + else if(linebuf[c] == '\n') { + linebuf[c] = '\0'; + return(1); + } + } + linebuf[++c] = '\0'; + return(-1); +} + +static char * +address(char **expbuf) +{ + register char *rcp, *ep; + long long lno; + + *expbuf = NULL; + if(*cp == '$') { + cp++; + ep = *expbuf = smalloc(2 * sizeof *expbuf); + *ep++ = CEND; + *ep++ = ceof; + needdol = 1; + return(ep); + } + + if(*cp == '/' || *cp == '\\') { + if (*cp == '\\') + cp++; + sed_seof = fetch(&cp); + return(sed_compile(expbuf)); + } + + rcp = cp; + lno = 0; + + while(*rcp >= '0' && *rcp <= '9') + lno = lno*10 + *rcp++ - '0'; + + if(rcp > cp) { + if (nlno > 020000000000 || + (tlno = realloc(tlno, (nlno+1)*sizeof *tlno)) == NULL) + fatal("Too many line numbers"); + ep = *expbuf = smalloc(6 * sizeof *expbuf); + *ep++ = CLNUM; + slno(ep, nlno); + tlno[nlno++] = lno; + *ep++ = ceof; + cp = rcp; + return(ep); + } + return(0); +} + +static int +cmp(const char *a, const char *b) +{ + register const char *ra, *rb; + + ra = a - 1; + rb = b - 1; + + while(*++ra == *++rb) + if(*ra == '\0') return(0); + return(1); +} + +static void +text(char **textbuf) +{ + register char *p, *oq; + char *q; + size_t sz = 128; + + *textbuf = smalloc(sz); + p = *textbuf; + q = cp; + for(;;) { + + oq = q; + if(fetch(&q) == '\\') { + oq = q; + fetch(&q); + } + while(oq < q) + *p++ = *oq++; + if(p[-1] == '\0') { + cp = --q; + return; + } + check(p, *textbuf, sz, 128, null) + } +} + +static int +search(struct label *ptr) +{ + struct label *rp; + + rp = L(labtab); + while(rp < ptr) { + if(cmp(rp->asc, ptr->asc) == 0) + return(rp - L(labtab) + 1); + rp++; + } + + return(0); +} + + +static void +dechain(void) +{ + struct label *lptr; + int rptr, trptr; + + for(lptr = L(labtab); lptr < L(lab); lptr++) { + + if(lptr->address == 0) + fatal("Undefined label: %s", lptr->asc); + + if(lptr->chain) { + rptr = lptr->chain; + while((trptr = P(rptr)->bptr.lb1) != 0) { + P(rptr)->bptr.lb1 = lptr->address; + rptr = trptr; + } + P(rptr)->bptr.lb1 = lptr->address; + } + } +} + +static char * +ycomp_sb(char **expbuf) +{ + register int c, d; + register char *ep, *tsp; + char *sp; + + *expbuf = smalloc(0400); + ep = *expbuf; + for(c = 0; !(c & 0400); c++) + ep[c] = '\0'; + sp = cp; + for(tsp = cp; *tsp != sed_seof; tsp++) { + if(*tsp == '\\') + tsp++; + if(*tsp == '\n' || *tsp == '\0') + return(badp); + } + tsp++; + + while((c = *sp++ & 0377) != sed_seof) { + if(c == '\\') { + c = *sp == 'n' ? '\n' : *sp; + sp++; + } + if((ep[c] = d = *tsp++ & 0377) == '\\') { + ep[c] = *tsp == 'n' ? '\n' : *tsp; + tsp++; + } + if(d != '\\' && ep[c] == sed_seof || ep[c] == '\0') + return(badp); + } + if(*tsp != sed_seof) + return(badp); + cp = ++tsp; + + for(c = 0; !(c & 0400); c++) + if(ep[c] == 0) + ep[c] = (char)c; + + return(ep + 0400); +} + +static char * +ycomp_mb(char **expbuf) +{ + struct yitem **yt, *yp; + register wint_t c, d; + char *otsp, *tsp, *sp, *mp; + + tsp = sp = cp; + while ((c = fetch(&tsp)) != sed_seof) { + if (c == '\\') + c = fetch(&tsp); + if (c == '\n' || c == '\0') + return badp; + } + yt = scalloc(200, sizeof *yt); + while ((c = fetch(&sp)) != sed_seof) { + if (c == '\\') { + if ((d = fetch(&sp)) == 'n') + c = '\n'; + else + c = d; + } + otsp = tsp; + d = fetch(&tsp); + yp = ylook(c, yt, 1); + yp->y_oc = c; + if ((yp->y_yc = d) == '\\') { + otsp = tsp; + if ((c = fetch(&tsp)) == 'n') + yp->y_yc = '\n'; + else + yp->y_yc = c; + } + if (d != '\\' && yp->y_yc == sed_seof || yp->y_yc == '\0') + return badp; + mp = yp->y_mc; + if (yp->y_yc != '\n') + while (otsp < tsp) + *mp++ = *otsp++; + else + *mp++ = '\n'; + *mp = '\0'; + } + if (fetch(&tsp) != sed_seof) + return badp; + cp = tsp; + *expbuf = (char *)yt; + return &(*expbuf)[1]; +} + +static void +rep_inc(void) +{ + register char *p; + const int chunk = 16; + + if (++rep >= PTRSIZE) { + ptrspace = srealloc(ptrspace, + (PTRSIZE += chunk) * sizeof *ptrspace); + for (p = (char *)&ptrspace[PTRSIZE - chunk]; + p < (char *)&ptrspace[PTRSIZE]; p++) + *p = '\0'; + } +} + +static void +lab_inc(void) +{ + register char *p; + const int chunk = 8; + + if (++lab >= LABSIZE) { + ltab = srealloc(ltab, (LABSIZE += chunk) * sizeof *ltab); + for (p = (char *)&ltab[LABSIZE - chunk]; + p < (char *)&ltab[LABSIZE]; p++) + *p = '\0'; + } +} + +void +aptr_inc(void) +{ + register char *p; + const int chunk = 8; + + if (++aptr > ABUFSIZE) { + abuf = srealloc(abuf, (ABUFSIZE += chunk) * sizeof *abuf); + for (p = (char *)&abuf[ABUFSIZE - chunk]; + p < (char *)&abuf[ABUFSIZE]; p++) + *p = '\0'; + } +} + +static void +depth_check(void) +{ + if (depth + 1 > DEPTH) + cmpend = srealloc(cmpend, (DEPTH += 8) * sizeof *cmpend); +} + +void +nonfatal(const char *afmt, ...) +{ + va_list ap; + const char *fmt; + + if (*afmt == '\1') { + fprintf(stderr, "%s: ", progname); + fmt = &afmt[1]; + } else + fmt = afmt; + va_start(ap, afmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + status |= 1; +} + +void +fatal(const char *afmt, ...) +{ + va_list ap; + const char *fmt; + + if (*afmt == '\1') { + fprintf(stderr, "%s: ", progname); + fmt = &afmt[1]; + } else + fmt = afmt; + va_start(ap, afmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fputc('\n', stderr); + exit(2); +} + +static void * +srealloc(void *vp, size_t nbytes) +{ + void *p; + + if ((p = realloc(vp, nbytes)) == NULL) + fatal(TMMES, linebuf); + return p; +} + +void * +smalloc(size_t nbytes) +{ + return srealloc(NULL, nbytes); +} + +static void * +scalloc(size_t nmemb, size_t size) +{ + void *p; + + if ((p = calloc(nmemb, size)) == NULL) + fatal(TMMES, linebuf); + return p; +} + +#if defined (SUS) || defined (SU3) || defined (S42) +static char * +sed_compile(char **ep) +{ + struct re_emu *re; + static char *pat; + static size_t patsz; + register char *p, *oc; + wint_t c, d; + + if (*cp != sed_seof) + nbra = 0; + if (patsz == 0) + pat = smalloc(patsz = 32); + p = pat; + do { + oc = cp; + if ((c = fetch(&cp)) == sed_seof) + *p = '\0'; + else if (c == '\\') { + oc = cp; + if ((c = fetch(&cp)) == 'n') + *p = '\n'; + else { + check(p, pat, patsz, 32, null); + *p++ = '\\'; + if (c == '(') + nbra++; + goto normchar; + } + } else if (c == '[') { + check(p, pat, patsz, 32, null); + *p++ = c; + d = WEOF; + do { + oc = cp; + c = fetch(&cp); + if (c == '\0') + goto normchar; + do { + check(p, pat, patsz, 32, null); + *p++ = *oc++; + } while (oc < cp); + if (d == '[' && (c == ':' || c == '.' || + c == '=')) { + d = c; + do { + oc = cp; + c = fetch(&cp); + if (c == '\0') + goto normchar; + do { + check(p, pat, patsz,32, + null); + *p++ = *oc++; + } while (oc < cp); + } while (c != d || peek(&cp) != ']'); + oc = cp; + c = fetch(&cp); + do { + check(p, pat, patsz, 32, null); + *p++ = *oc++; + } while (oc < cp); + c = WEOF; /* == reset d and continue */ + } + d = c; + } while (c != ']'); + p--; + } else { + normchar: do { + check(p, pat, patsz, 32, null) + *p++ = *oc++; + } while (oc < cp); + p--; + } + check(p, pat, patsz, 32, null); + } while (*p++ != '\0'); + re = scalloc(1, sizeof *re); + *ep = (char *)re; + if (*pat == '^') + **ep = 1; + if (*pat != '\0') { + int reflags = 0; + +#ifdef REG_ANGLES + reflags |= REG_ANGLES; +#endif /* REG_ANGLES */ +#if defined (SU3) && defined (REG_AVOIDNULL) + reflags |= REG_AVOIDNULL; +#endif /* SU3 && AVOIDNULL */ + if (regcomp(&re->r_preg, pat, reflags) != 0) + re = (struct re_emu *)badp; + } else + **ep = 2; + p = (char *)re; + if (p != badp && *pat) + p++; + return p; +} +#else /* !SUS, !SU3, !S42 */ +static char * +sed_compile(char **ep) +{ + extern char *compile(char *, char *, char *, int); + register char *p; + size_t sz; + + for (sz = 0, p = cp; *p; p++) + if (*p == '[') + sz += 32; + sz += 2 * (p - cp) + 5; + *ep = smalloc(sz); + (*ep)[1] = '\0'; + p = compile(NULL, &(*ep)[1], &(*ep)[sz], sed_seof); + if (p == &(*ep)[1]) + return *ep; + **ep = circf; + return p; +} +#endif /* !SUS, !SU3, !S42 */ + +wint_t +wc_get(char **sc, int move) +{ + wint_t c; + char *p = *sc; + wchar_t wcbuf; + int len; + + if ((*p & 0200) == 0) { + c = *p; + p += (len = 1); + invchar = 0; + } else if ((len = mbtowc(&wcbuf, p, MB_LEN_MAX)) < 0) { + if (!executing) + fatal("invalid multibyte character: %s", p); + c = (*p++ & 0377); + mbtowc(NULL, NULL, 0); + invchar = 1; + } else if (len == 0) { + c = '\0'; + p++; + invchar = 0; + } else { + c = wcbuf; + p += len; + invchar = 0; + } + if (move) + *sc = p; + return c; +} + +/* + * Note that this hash is not optimized to distribute the items + * equally to all buckets. y commands typically handle only a + * small part of the alphabet, thus most characters will have + * no entry in the hash table. If no list exists in the bucket + * for the hash of these characters, the function can return + * quickly. + */ +#define yhash(c) (c & 0177) + +struct yitem * +ylook(wint_t c, struct yitem **yt, int make) +{ + struct yitem *yp; + int h; + + yp = yt[h = yhash(c)]; + while (yp != NULL) { + if (yp->y_oc == c) + break; + yp = yp->y_nxt; + } + if (make && yp == NULL) { + yp = scalloc(1, sizeof *yp); + yp->y_oc = c; + yp->y_nxt = yt[h]; + yt[h] = yp; + } + return yp; +} + +void +growsp(const char *msg) +{ + const int incr = 128; + int olbend, ogbend, ohend; + + olbend = lbend; + ogbend = gbend; + ohend = hend; + if ((linebuf = realloc(linebuf, lbend += incr)) == NULL || + (genbuf = realloc(genbuf, gbend += incr)) == NULL || + (holdsp = realloc(holdsp, hend += incr)) == NULL) + fatal(msg ? msg : "Cannot malloc space"); + while (olbend < lbend) + linebuf[olbend++] = '\0'; + while (ogbend < gbend) + genbuf[ogbend++] = '\0'; + while (ohend < hend) + holdsp[ohend++] = '\0'; +} + +static void +wfile(void) +{ + int i; + +#if !defined (SUS) && !defined (SU3) + if(*cp++ != ' ') + fatal(CGMES, linebuf); +#else /* SUS, SU3 */ + while (*cp == ' ' || *cp == '\t') + cp++; +#endif /* SUS, SU3 */ + + text(&fname[nfiles]); + for(i = nfiles - 1; i >= 0; i--) + if(fname[i] != NULL && cmp(fname[nfiles], fname[i]) == 0) { + P(rep)->fcode = fcode[i]; + free(fname[nfiles]); + return; + } + + if((P(rep)->fcode = fopen(fname[nfiles], "w")) == NULL) + fatal("Cannot create %s", fname[nfiles]); + fcode[nfiles++] = P(rep)->fcode; + morefiles(); +} + +static void +morefiles(void) +{ + if ((fname = realloc(fname, (nfiles+1) * sizeof *fname)) == 0 || + (fcode = realloc(fcode, (nfiles+1) * sizeof *fcode)) == 0) + fatal("Too many files in w commands"); + fname[nfiles] = 0; + fcode[nfiles] = 0; +} diff --git a/sed/sed1.c b/sed/sed1.c @@ -0,0 +1,917 @@ +/* from Unix 7th Edition sed */ +/* Sccsid @(#)sed1.c 1.42 (gritter) 2/6/05> */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <ctype.h> +#include <wchar.h> +#include <wctype.h> +#include "sed.h" + +#if !defined (SUS) && !defined (SU3) && !defined(S42) +#define INIT extern char *cp, *badp; \ + register char *sp = cp; +#define GETC() (*sp++) +#define PEEKC() (*sp) +#define UNGETC(c) (--sp) +#define RETURN(c) { cp = sp; return ep; } +#define ERROR(c) { cp = sp; return badp; } + +#define regexp_h_malloc(n) smalloc(n) +#include <regexp.h> +#endif /* !SUS && !SU3 && !S42 */ + +#ifndef CCEOF +#ifdef CEOF +#define CCEOF CEOF +#else /* !CEOF */ +#define CCEOF 22 +#endif /* !CEOF */ +#endif /* !CCEOF */ + +int ceof = CCEOF; + +#if !defined (SUS) && !defined (SU3) +static const char *trans[] = { + "\\00", + "\\01", + "\\02", + "\\03", + "\\04", + "\\05", + "\\06", + "\\07", + "-<", + "->", + "\n", + "\\13", + "\\14", + "\\15", + "\\16", + "\\17", + "\\20", + "\\21", + "\\22", + "\\23", + "\\24", + "\\25", + "\\26", + "\\27", + "\\30", + "\\31", + "\\32", + "\\33", + "\\34", + "\\35", + "\\36", + "\\37" +}; +#endif /* !SUS, !SU3 */ + +static char *cbp; +static char *ebp; +static int dolflag; +static int sflag; +static int jflag; +static int delflag; +static long long lnum; +static char ibuf[512]; +static int ibrd; +static int mflag; +static int f = -1; +static int spend; +static int genend; +static int hspend; + +static void command(struct reptr *); +static int match(char *, int, int); +static int substitute(struct reptr *); +static void dosub(char *); +static int place(int, int, int); +static int gline(int); +static void arout(void); +static void lcom(wint_t, int); +static void oout(int); +static void mout(const char *); +static void nout(wint_t); +static void wout(wint_t); +static void lout(int); + +#if defined (SUS) || defined (SU3) || defined (S42) +#define NBRA 9 +int sed; +int nbra; +int circf; +static char *braslist[NBRA]; +static char *braelist[NBRA]; +static char *loc1, *loc2, *locs; + +static int +step(char *line, char *pattern) +{ + struct re_emu *re = (struct re_emu *)&pattern[-1]; + regmatch_t bralist[NBRA+1]; + int eflag = 0; + int res; + int i, nsub; + + if (circf == 2) /* empty pattern */ + return 0; + if (locs) + eflag |= REG_NOTBOL; + /* + * Don't fetch more match locations than necessary since this + * might prevent use of DFA. + */ + nsub = mflag; + if ((res = regexec(&re->r_preg, line, nsub, bralist, eflag)) == 0) { + if (nsub > 0) { + loc1 = line + bralist[0].rm_so; + loc2 = line + bralist[0].rm_eo; + for (i = 1; i < nsub; i++) { + if (bralist[i].rm_so != -1) { + braslist[i-1] = line + bralist[i].rm_so; + braelist[i-1] = line + bralist[i].rm_eo; + } else + braslist[i-1] = braelist[i-1] = NULL; + } + } + } + return res == 0; +} +#endif /* SUS || SU3 || S42 */ + +static int lcomlen; +static int Braslist[NBRA]; +static int Braelist[NBRA]; +static int Loc1, Loc2; + +void +execute(const char *file) +{ + register char *p1, *p2; + register struct reptr *ipc; + int c; + int execc; + + if (f >= 0) + close(f); + if (file) { + if ((f = open(file, O_RDONLY)) < 0) { + nonfatal("Can't open %s", file); + return; + } + } else + f = 0; + + ebp = ibuf; + cbp = ibuf; + + if(pending) { + ipc = pending; + pending = 0; + goto yes; + } + + for(;;) { + if((execc = gline(0)) < 0) { + if (f >= 0) { + close(f); + f = -1; + } + return; + } + spend = execc; + + for(ipc = ptrspace; ipc->command; ) { + + p1 = ipc->ad1; + p2 = ipc->ad2; + + if(p1) { + + if(ipc->inar) { + if(*p2 == CEND) { + p1 = 0; + } else if(*p2 == CLNUM) { + c = glno(&p2[1]); + if(lnum > tlno[c]) { + ipc->inar = 0; + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + if(lnum == tlno[c]) { + ipc->inar = 0; + } + } else if(match(p2, 0, 0)) { + ipc->inar = 0; + } + } else if(*p1 == CEND) { + if(!dolflag) { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + + } else if(*p1 == CLNUM) { + c = glno(&p1[1]); + if(lnum != tlno[c]) { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + if(p2) { + ipc->inar = 1; +#if defined (SUS) || defined (SU3) + goto ichk; +#endif /* SUS, SU3 */ + } + } else if(match(p1, 0, 0)) { + if(p2) { + ipc->inar = 1; +#if defined (SUS) || defined (SU3) + ichk: if (*p2 == CLNUM) { + c = glno(&p2[1]); + if (lnum >= tlno[c]) + ipc->inar = 0; + } +#endif /* SUS, SU3 */ + } + } else { + if(ipc->negfl) + goto yes; + ipc++; + continue; + } + } + + if(ipc->negfl) { + ipc++; + continue; + } + yes: + command(ipc); + + if(delflag) + break; + + if(jflag) { + jflag = 0; + if((ipc = P(ipc->bptr.lb1)) == 0) { + ipc = ptrspace; + break; + } + } else + ipc++; + + } + if(!nflag && !delflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + + if(A(aptr) > abuf) { + arout(); + } + + delflag = 0; + + } +} + +static int +match(char *expbuf, int gf, int needloc) +{ + register char *p1; + int i, val; + + if(gf) { + if(*expbuf) return(0); +#if defined (SUS) || defined (SU3) || defined (S42) + if (loc1 == loc2) { + int n; + wchar_t wc; + if (multibyte && (n = mbtowc(&wc, &linebuf[Loc2], + MB_LEN_MAX)) > 0) + Loc2 += n; + else + Loc2++; + } +#endif + locs = p1 = loc2 = &linebuf[Loc2]; + } else { + p1 = linebuf; + locs = 0; + } + + mflag = needloc; + circf = *expbuf++; + val = step(p1, expbuf); + for (i = 0; i < NBRA; i++) { + Braslist[i] = braslist[i] - linebuf; + Braelist[i] = braelist[i] - linebuf; + } + Loc1 = loc1 - linebuf; + Loc2 = loc2 - linebuf; + return val; +} + +static int +substitute(struct reptr *ipc) +{ + int matchcnt = 1; + + if (match(ipc->bptr.re1, 0, ipc->nsub + 1) == 0) + return(0); + + sflag = 0; + if (ipc->gfl >= -1 && ipc->gfl <= 1) + dosub(ipc->rhs); + + if(ipc->gfl != 0) { + while(linebuf[Loc2]) { + if(match(ipc->bptr.re1, 1, ipc->nsub + 1) == 0) + break; + matchcnt++; + if (ipc->gfl == -1 || ipc->gfl == matchcnt) + dosub(ipc->rhs); + } + } + return(1); +} + +static void +dosub(char *rhsbuf) +{ + register int lc, sc; + register char *rp; + int c; + + sflag = 1; + lc = 0; /*linebuf*/ + sc = 0; /*genbuf*/ + rp = rhsbuf; + while (lc < Loc1) + genbuf[sc++] = linebuf[lc++]; + while((c = *rp++) != 0) { + if (c == '&') { + sc = place(sc, Loc1, Loc2); + continue; + } else if (c == '\\') { + c = *rp++; + if (c >= '1' && c < NBRA+'1') { + sc = place(sc, Braslist[c-'1'], + Braelist[c-'1']); + continue; + } + } + if (sc >= gbend) + growsp("output line too long."); + genbuf[sc++] = (char)c; + } + lc = Loc2; + Loc2 = sc; + do { + if (sc >= gbend) + growsp("Output line too long."); + } while (genbuf[sc++] = linebuf[lc++], lc <= spend); + genend = sc-1; + lc = 0; /*linebuf*/ + sc = 0; /*genbuf*/ + while (linebuf[lc++] = genbuf[sc++], sc <= genend); + spend = lc-1; +} + +static int +place(int asc, int al1, int al2) +{ + register int sc; + register int l1, l2; + + sc = asc; + l1 = al1; + l2 = al2; + while (l1 < l2) { + if (sc >= gbend) + growsp("Output line too long."); + genbuf[sc++] = linebuf[l1++]; + } + return(sc); +} + +static void +command(struct reptr *ipc) +{ + register int i; + wint_t c; + register char *p1, *p2; + int k1, k2, k3; + char *lp; + int execc; + + + switch(ipc->command) { + + case ACOM: + *A(aptr) = ipc; + aptr_inc(); + *A(aptr) = 0; + break; + + case CCOM: + delflag = 1; + if(!ipc->inar || dolflag) { + for(p1 = ipc->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + } + break; + case DCOM: + delflag++; + break; + case CDCOM: + p1 = p2 = linebuf; + + while(*p1 != '\n') { + if(p1++ == &linebuf[spend]) { + delflag++; + return; + } + } + + p1++; + while(*p2++ = *p1++, p1 <= &linebuf[spend]); + spend = p2-1 - linebuf; + jflag++; + break; + + case EQCOM: + fprintf(stdout, "%lld\n", lnum); + break; + + case GCOM: + p1 = linebuf; + p2 = holdsp; + while(*p1++ = *p2++, p2 <= &holdsp[hspend]); + spend = p1-1 - linebuf; + break; + + case CGCOM: + linebuf[spend++] = '\n'; + k1 = spend; + k2 = 0; /*holdsp*/ + do { + if(k1 >= lbend) + growsp(NULL); + } while(linebuf[k1++] = holdsp[k2++], k2 <= hspend); + spend = k1-1; + break; + + case HCOM: + p1 = holdsp; + p2 = linebuf; + while(*p1++ = *p2++, p2 <= &linebuf[spend]); + hspend = p1-1 - holdsp; + break; + + case CHCOM: + holdsp[hspend++] = '\n'; + k1 = hspend; + k2 = 0; /*linebuf*/ + do { + if(k1 >= hend) + growsp("\1hold space overflow !"); + } while(holdsp[k1++] = linebuf[k2++], k2 <= spend); + hspend = k1-1; + break; + + case ICOM: + for(p1 = ipc->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + break; + + case BCOM: + jflag = 1; + break; + + case LCOM: + lp = linebuf; + lcomlen = 0; + while (lp < &linebuf[spend]) { + c = fetch(&lp); + lcom(c, invchar == 0); + } +#if defined (SUS) || defined (SU3) + putc('$', stdout); +#endif /* SUS, SU3 */ + putc('\n', stdout); + break; + + case NCOM: + if(!nflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + + if(A(aptr) > abuf) + arout(); + if((execc = gline(0)) < 0) { + pending = ipc; + delflag = 1; + break; + } + spend = execc; + + break; + case CNCOM: + if(A(aptr) > abuf) + arout(); + linebuf[spend++] = '\n'; + if((execc = gline(spend)) < 0) { + pending = ipc; + delflag = 1; + break; + } + spend = execc; + break; + + case PCOM: + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + break; + case CPCOM: + cpcom: + for(p1 = linebuf; *p1 != '\n' && p1<&linebuf[spend]; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + break; + + case QCOM: + if(!nflag) { + for(p1 = linebuf; p1 < &linebuf[spend]; p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + if(A(aptr) > abuf) arout(); + fclose(stdout); + if (ibrd > 0) + lseek(f, -ibrd, SEEK_CUR); + exit(0); + case RCOM: + + *A(aptr) = ipc; + aptr_inc(); + *A(aptr) = 0; + + break; + + case SCOM: + i = substitute(ipc); + if(ipc->pfl && i) + if(ipc->pfl == 1) { + for(p1 = linebuf; p1 < &linebuf[spend]; + p1++) + putc(*p1&0377, stdout); + putc('\n', stdout); + } + else + goto cpcom; + if(i && ipc->fcode) + goto wcom; + break; + + case TCOM: + if(sflag == 0) break; + sflag = 0; + jflag = 1; + break; + + wcom: + case WCOM: + fprintf(ipc->fcode, "%s\n", linebuf); + break; + case XCOM: + p1 = linebuf; + p2 = genbuf; + while(*p2++ = *p1++, p1 <= &linebuf[spend]); + genend = p2-1 - genbuf; + p1 = holdsp; + p2 = linebuf; + while(*p2++ = *p1++, p1 <= &holdsp[hspend]); + spend = p2-1 - linebuf; + p1 = genbuf; + p2 = holdsp; + while(*p2++ = *p1++, p1 <= &genbuf[genend]); + hspend = p2-1 - holdsp; + break; + + case YCOM: + if (multibyte) { + struct yitem **yt, *yp; + + yt = (struct yitem **)ipc->bptr.re1; + k1 = 0; /*linebuf*/ + k2 = 0; /*genbuf*/ + do { + k3 = k1; + lp = &linebuf[k1]; + c = fetch(&lp); + k1 = lp - linebuf; + if (invchar == 0 && + (yp = ylook(c, yt, 0)) != NULL) { + k3 = 0; /*yp->y_mc*/ + do { + if (k2 >= gbend) + growsp("output " + "line too " + "long."); + genbuf[k2] = + yp->y_mc[k3++]; + } while (genbuf[k2++] != '\0'); + k2--; + } else { + while (k3 < k1) { + if (k2 >= gbend) + growsp("output " + "line too " + "long."); + genbuf[k2++] = + linebuf[k3++]; + } + } + } while (k1 <= spend); + genend = k2-1; + p1 = linebuf; + p2 = genbuf; + while (*p1++ = *p2++, p2 <= &genbuf[genend]); + spend = p1-1 - linebuf; + } else { + p1 = linebuf; + p2 = ipc->bptr.re1; + while((*p1 = p2[*p1 & 0377]) != 0) p1++; + } + break; + case COCOM: + case ECOM: + case FCOM: + case CWCOM: + ; + } + +} + +static int +gline(int addr) +{ + register char *p2; + register int c; + register int c1; + c1 = addr; + p2 = cbp; + for (;;) { + if (p2 >= ebp) { + if (f < 0 || (c = read(f, ibuf, sizeof ibuf)) <= 0) { + if (c1 > addr && dolflag == 0) { + c = 1; + ibuf[0] = '\n'; + close(f); + f = -1; + } else + return(-1); + } else + ibrd += c; + p2 = ibuf; + ebp = ibuf+c; + } + if ((c = *p2++ & 0377) == '\n') { + ibrd--; + if(needdol && p2 >= ebp) { + if(f<0||(c = read(f, ibuf, sizeof ibuf)) <= 0) { + close(f); + f = -1; + if(eargc == 0) + dolflag = 1; + } else + ibrd += c; + + p2 = ibuf; + ebp = ibuf + c; + } + break; + } + if(c1 >= lbend) + growsp(NULL); + linebuf[c1++] = (char)c; + ibrd--; + } + lnum++; + if(c1 >= lbend) + growsp(NULL); + linebuf[c1] = 0; + cbp = p2; + + sflag = 0; + return(c1); +} + +static void +arout(void) +{ + register char *p1; + struct reptr **a; + FILE *fi; + char c; + int t; + + for (a = abuf; *a; a++) { + if((*a)->command == ACOM) { + for(p1 = (*a)->bptr.re1; *p1; ) { + putc(*p1&0377, stdout); + p1++; + } + putc('\n', stdout); + } else { + if((fi = fopen((*a)->bptr.re1, "r")) == NULL) + continue; + while((t = getc(fi)) != EOF) { + c = t; + putc(c&0377, stdout); + } + fclose(fi); + } + } + aptr = 1; + *A(aptr) = 0; +} + +static void +lcom(wint_t c, int valid) +{ + if (!valid) { + oout(c); + return; + } +#if defined (SUS) || defined (SU3) + switch (c) { + case '\\': + mout("\\\\"); + return; + case '\a': + mout("\\a"); + return; + case '\b': + mout("\\b"); + return; + case '\f': + mout("\\f"); + return; + case '\r': + mout("\\r"); + return; + case '\t': + mout("\\t"); + return; + case '\v': + mout("\\v"); + return; + } +#else /* !SUS, !SU3 */ + if (c < 040) { + mout(trans[c]); + return; + } +#endif /* !SUS, !SU3 */ + if (multibyte) { + if (iswprint(c)) + wout(c); + else + nout(c); + } else { + if (isprint(c)) + lout(c); + else + oout(c); + } +} + +static void +oout(int c) +{ + char lbuf[5], *p; + int d; + const char *nums = "01234567"; + + p = lbuf; + *p++ = '\\'; + *p++ = nums[(c & ~077) >> 6]; + c &= 077; + d = c & 07; + *p++ = c > d ? nums[(c-d)>>3] : nums[0]; + *p++ = nums[d]; + *p = '\0'; + mout(lbuf); +} + +static void +mout(const char *p) +{ + while (*p != '\0') { + lout(*p & 0377); + p++; + } +} + +static void +nout(wint_t c) +{ + char mb[MB_LEN_MAX+1]; + char *p; + int i; + + if ((i = wctomb(mb, c)) > 0) { + mb[i] = '\0'; + for (p = mb; *p; p++) + oout(*p & 0377); + } +} + +static void +lout(int c) +{ + if (lcomlen++ > 70) { + putc('\\', stdout); + putc('\n', stdout); + lcomlen = 1; + } + putc(c, stdout); +} + +static void +wout(wint_t c) +{ + char mb[MB_LEN_MAX+1], *p; + int i, w; + + if ((i = wctomb(mb, c)) > 0) { + w = wcwidth(c); + if (lcomlen + w > 70) { + putc('\\', stdout); + putc('\n', stdout); + lcomlen = 0; + } + mb[i] = '\0'; + for (p = mb; *p; p++) + putc(*p & 0377, stdout); + lcomlen += w; + } +} diff --git a/sed/version.c b/sed/version.c @@ -0,0 +1,22 @@ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#if defined (SU3) +static const char sccsid[] USED = "@(#)sed_su3.sl 2.34 (gritter) 6/26/05"; +#elif defined (SUS) +static const char sccsid[] USED = "@(#)sed_sus.sl 2.34 (gritter) 6/26/05"; +#elif defined (S42) +static const char sccsid[] USED = "@(#)sed_s42.sl 2.34 (gritter) 6/26/05"; +#else /* !SUS, !SU3, !S42 */ +static const char sccsid[] USED = "@(#)sed.sl 2.34 (gritter) 6/26/05"; +#endif /* !SUS, !SU3, !S42 */ +/* SLIST */ +/* +sed.h: Sccsid @(#)sed.h 1.32 (gritter) 2/6/05 +sed0.c: Sccsid @(#)sed0.c 1.64 (gritter) 3/12/05> +sed1.c: Sccsid @(#)sed1.c 1.42 (gritter) 2/6/05> +*/ diff --git a/stty/mkfile b/stty/mkfile @@ -0,0 +1,8 @@ +BIN = stty +OBJ = stty.o +LOCAL_CFLAGS = -DUCB +INSTALL_BIN = stty +INSTALL_MAN1 = stty.1b +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/stty/stty.1 b/stty/stty.1 @@ -0,0 +1,293 @@ +'\" t +.\" Parts taken from stty(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH STTY 1 "7/15/04" "Heirloom Toolchest" "User Commands" +.SH NAME +stty \- set the options for a terminal +.SH SYNOPSIS +\fBstty\fR [\fB\-a\fR] [\fB\-g\fR] [\fIoptions\fR] +.SH DESCRIPTION +.I Stty +sets certain I/O options on the terminal +currently associated with standard input. +With no argument, it reports the current settings of selected options; +the +.B \-a +flag +causes it to report all options. +With the +.B \-g +flag, +.I stty +prints all settings in a form +that can be passed to it again +to restore the current settings. +.PP +The option strings are +selected from the following set; +where an option is shown prefixed by `[\-]', +it can be negated as described in the text. +.PP +Control modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBparenb\fR enable (disable) parity bits +[\fB\-\fR]\fBparodd\fR select odd (even) parity +\fBcs5 cs6 cs7 cs8\fR set character size +[\fB\-\fR]\fBcstopb\fR use two (one) stop bits per character\ \ \ \ \ \ +[\fB\-\fR]\fBclocal\fR line has (no) modem control +[\fB\-\fR]\fBhupcl\fR T{ +hang up (do not hang up) dataphone on last close +T} +[\fB\-\fR]\fBhup\fR same as \fIhupcl\fR +\fB0\fR hang up phone line immediately +T{ +.in 2n +.ti 0 +.ad l +\fB50 75 110 134 150 200 300 600 1200 1800 2400 4800 9600 +19200 38400 exta extb\fR +.in 0 +.ad b +T} T{ +Set terminal baud rate to the number given, if possible +(not all terminals support all rates). +T} +.TE +.PP +Input modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBignbrk\fR ignore (respect) break character\ \ \ \ \ \ \ \ \ \ \ +[\fB\-\fR]\fBbrkint\fR (do not) send interrupt on break +[\fB\-\fR]\fBignpar\fR (do not) ignore parity errors +[\fB\-\fR]\fBparmrk\fR (do not) mark parity errors +[\fB\-\fR]\fBinpck\fR enable (disable) parity checking +[\fB\-\fR]\fBistrip\fR T{ +(do not) strip characters to 7 bits +T} +[\fB\-\fR]\fBinlcr\fR (do not) map NL to CR +[\fB\-\fR]\fBigncr\fR (do not) ignore CR +[\fB\-\fR]\fBicrnl\fR (do not) map CR to NL +[\fB\-\fR]\fBiuclc\fR (do not) map upper to lower case +[\fB\-\fR]\fBixon\fR enable start/stop control +[\fB\-\fR]\fBixany\fR any character (DC1 only) restarts output +[\fB\-\fR]\fBixoff\fR T{ +(do not) send start/stop characters +when the input queue is nearly full +T} +[\fB\-\fR]\fBimaxbel\fR T{ +(do not) ring the bell when running out of input buffers +T} +[\fB\-\fR]\fBiutf8\fR (do not) handle input as UTF-8 +.TE +.PP +Output modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBopost\fR enable (disable) output processing\ \ \ \ \ \ \ \ \ +[\fB\-\fR]\fBolcuc\fR (do not) map upper to lower case +[\fB\-\fR]\fBonlcr\fR (do not) map NL to CR +[\fB\-\fR]\fBonocr\fR (do not) ignore CR +[\fB\-\fR]\fBonlret\fR NL is (not) return +[\fB\-\fR]\fBofill\fR (do not) use fill characters +[\fB\-\fR]\fBofdel\fR fill character is DEL (NUL) +\fBcr0 cr1 cr2 cr3\fR T{ +select style of delay for carriage return (see \fItermio\fR(3)) +T} +\fBnl0 nl1 nl2 nl3\fR select style of delay for linefeed +T{ +.in 2n +.ti 0 +.ad l +\fBtab0 tab1 tab2 tab3\fR +.in 0 +.ad b +T} select style of delay for tab +\fBff0 ff1\fR select style of delay for form feed +\fBbs0 bs1\fR select style of delay for backspace +\fBvt0 bt1\fR select style of delay for vertical tab +.TE +.PP +Local modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBisig\fR T{ +enable (disable) intr, quit, and susp processing +T} +[\fB\-\fR]\fBicanon\fR enable (disable) erase and kill processing\ +[\fB\-\fR]\fBxcase\fR (no) canonical case presentation +[\fB\-\fR]\fBecho\fR (do not) echo every character typed +[\fB\-\fR]\fBechoe\fR (do not) echo erase character as `^H ^H' +[\fB\-\fR]\fBechok\fR (do not) echo NL after kill character +[\fB\-\fR]\fBechonl\fR (do not) echo NL regardless of \fIecho\fR +[\fB\-\fR]\fBnoflsh\fR (do not) flush after intr, quit, and susp +[\fB\-\fR]\fBtostop\fR send SIGTTOU for background output +[\fB\-\fR]\fBechoctl\fR T{ +(no) visual representation of control characters +T} +[\fB\-\fR]\fBechoprt\fR (do not) echo erased characters +[\fB\-\fR]\fBechoke\fR T{ +(do not) print `^H ^H' sequences at line erase +T} +[\fB\-\fR]\fBflusho\fR output is (not) flushed +[\fB\-\fR]\fBpendin\fR (do not) retype pending input +[\fB\-\fR]\fBiexten\fR T{ +enable (disable) extended control characters +T} +.TE +.PP +Control characters: +.PP +.TS +l1w(16n) l. +\fBintr \fIc\fP\fR set intr character to \fIc\fR. +\fBquit \fIc\fP\fR set quit character to \fIc\fR. +\fBerase \fIc\fP\fR set erase character to \fIc\fR. +\fBkill \fIc\fP\fR set kill character to \fIc\fR. +\fBeof \fIc\fP\fR set eof character to \fIc\fR. +\fBeol \fIc\fP\fR set eol character to \fIc\fR. +\fBeol2 \fIc\fP\fR set eol2 character to \fIc\fR. +\fBswtch \fIc\fP\fR set swtch character to \fIc\fR. +\fBstart \fIc\fP\fR set start character to \fIc\fR. +\fBstop \fIc\fP\fR set stop character to \fIc\fR. +\fBsusp \fIc\fP\fR set susp character to \fIc\fR. +\fBdsusp \fIc\fP\fR set dsusp character to \fIc\fR. +\fBrprnt \fIc\fP\fR set rprnt character to \fIc\fR. +\fBflush \fIc\fP\fR set flush character to \fIc\fR. +\fBwerase \fIc\fP\fR set werase character to \fIc\fR. +\fBlnext \fIc\fP\fR set lnext character to \fIc\fR. +.TE +.PP +A control character may be either given as a literal character, +or as `\fB^\fIc\fR' for ASCII control characters CTRL-\fIc\fR; +the special values `\fB^\-\fR' and `\fBundef\fR' +disable the control function. +Multibyte characters cannot be used as control characters; +trying to do so will result in the control character +set to the first byte of the multibyte character sequence, +which has the undesired effect of applying the control function +to more characters than the one supplied. +.PP +Combination modes et cetera: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBraw\fR T{ +(no) raw mode input +(i.\|e. no erase, kill, interrupt, quit, EOT; parity bit passed back) +T} +\fBcooked\fR same as \fI\-raw\fR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ +\fBsane\fR reset all values to defaults +[\fB\-\fR]\fBcbreak\fR T{ +make each character available to \fIread\fR(2) +as received; no erase and kill +(make characters available to \fIread\fR +only when newline is received) +T} +[\fB\-\fR]\fBoddp\fR allow (disallow) odd parity +[\fB\-\fR]\fBevenp\fR allow (disallow) even parity +[\fB\-\fR]\fBnl\fR T{ +accept only new-line to end lines +(allow carriage return for new-line, +and output CR-LF for carriage return or new-line) +T} +[\fB\-\fR]\fBlcase\fR T{ +(do not) map upper case to lower case +T} +[\fB\-\fR]\fBLCASE\fR T{ +same as \fIlcase\fR +T} +[\fB\-\fR]\fBtabs\fR T{ +(do not) replace tabs by spaces when printing +T} +\fBnul-fill\fR use NUL to fill output +\fBdel-fill\fR use DEL to fill output +[\fB\-\fR]\fBfill\fR T{ +select NUL-filled output (disable output filling) +T} +\fBek\fR T{ +reset erase and kill characters back to normal ^H and ^U +T} +\fBtty33\fR T{ +set all modes suitable for the +Teletype Corporation Model 33 terminal. +T} +\fBtty37\fR T{ +set all modes suitable for the +Teletype Corporation Model 37 terminal. +T} +\fBvt05\fR T{ +set all modes suitable for Digital Equipment Corp. VT05 terminal +T} +\fBtn300\fR T{ +set all modes suitable for a General Electric TermiNet 300 +T} +\fBti700\fR T{ +set all modes suitable for Texas Instruments 700 series terminal +T} +\fBtek\fR T{ +set all modes suitable for Tektronix 4014 terminal +T} +\fBmin\fR T{ +minimal number of bytes for non-canonical input +T} +\fBtime\fR timeout for non-canonical input +\fBrows\fR T{ +set the height of the terminal in character cell units +T} +\fBcolumns\fR T{ +set the width of the terminal in character cell units +T} +\fBypixels\fR T{ +set the height of the terminal in pixels +T} +\fBxpixels\fR T{ +set the width of the terminal in pixels +T} +.TE +.SH "ENVIRONMENT VARIABLES" +.TP +.B SYSV3 +Affects the default and +.B \-a +output formats. +.SH "SEE ALSO" +termios(3), +tabs(1) +.SH NOTES +Not all settings are available on all platforms. diff --git a/stty/stty.1b b/stty/stty.1b @@ -0,0 +1,345 @@ +'\" t +.\" Sccsid @(#)stty.1b 1.3 (gritter) 7/15/04 +.\" Parts taken from stty(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.TH STTY 1B "7/15/04" "Heirloom Toolchest" "BSD System Compatibility" +.SH NAME +stty \- (BSD) set the options for a terminal +.SH SYNOPSIS +\fB/usr/ucb/stty\fR [\fB\-a\fR] [\fB\-g\fR] [\fB\-h\fR] [\fIoptions\fR] +.SH DESCRIPTION +.I Stty +sets certain I/O options on the current terminal. +With no argument, it reports the current settings of selected options; +the +.B \-a +flag +causes it to report all options. +With the +.B \-g +flag, +.I stty +prints all settings in a form +that can be passed to it again +to restore the current settings. +The +.B \-h +flag +prints a table of all control characters +in addition to the output given with +.IR \-a . +.PP +Special option values recognized only if given as single argument are: +.TP 12 +.B all +Print the normal output +followed by a control character table. +.TP 12 +.B everything +Same as +.IR \-h . +.TP 12 +.B speed +Print the speed rate. +.TP 12 +.B size +Print the rows and columns of the terminal. +.PP +If either +.BR \-g , +.B speed +or +.B size +are given, +.I stty +uses /dev/tty to get the terminal settings, +otherwise, the standard output is used. +.PP +The option strings are +selected from the following set; +where an option is shown prefixed by `[\-]', +it can be negated as described in the text. +.PP +Control modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBparenb\fR enable (disable) parity bits +[\fB\-\fR]\fBparodd\fR select odd (even) parity +\fBcs5 cs6 cs7 cs8\fR set character size +[\fB\-\fR]\fBcstopb\fR use two (one) stop bits per character\ \ \ \ \ \ +[\fB\-\fR]\fBclocal\fR line has (no) modem control +[\fB\-\fR]\fBhupcl\fR T{ +hang up (do not hang up) dataphone on last close +T} +[\fB\-\fR]\fBhup\fR same as \fIhupcl\fR +\fB0\fR hang up phone line immediately +T{ +.in 2n +.ti 0 +.ad l +\fB50 75 110 134 150 200 300 600 1200 1800 2400 4800 9600 +19200 38400 exta extb\fR +.in 0 +.ad b +T} T{ +Set terminal baud rate to the number given, if possible +(not all terminals support all rates). +T} +.TE +.PP +Input modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBignbrk\fR ignore (respect) break character\ \ \ \ \ \ \ \ \ \ \ +[\fB\-\fR]\fBbrkint\fR (do not) send interrupt on break +[\fB\-\fR]\fBignpar\fR (do not) ignore parity errors +[\fB\-\fR]\fBparmrk\fR (do not) mark parity errors +[\fB\-\fR]\fBinpck\fR enable (disable) parity checking +[\fB\-\fR]\fBistrip\fR T{ +(do not) strip characters to 7 bits +T} +[\fB\-\fR]\fBinlcr\fR (do not) map NL to CR +[\fB\-\fR]\fBigncr\fR (do not) ignore CR +[\fB\-\fR]\fBicrnl\fR (do not) map CR to NL +[\fB\-\fR]\fBiuclc\fR (do not) map upper to lower case +[\fB\-\fR]\fBixon\fR enable start/stop control +[\fB\-\fR]\fBixany\fR any character (DC1 only) restarts output +[\fB\-\fR]\fBdecctlq\fR same as \-ixany +[\fB\-\fR]\fBixoff\fR T{ +(do not) send start/stop characters +when the input queue is nearly full +T} +[\fB\-\fR]\fBtandem\fR same as ixoff +[\fB\-\fR]\fBimaxbel\fR T{ +(do not) ring the bell when running out of input buffers +T} +[\fB\-\fR]\fBiutf8\fR (do not) handle input as UTF-8 +.TE +.PP +Output modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBopost\fR enable (disable) output processing\ \ \ \ \ \ \ \ \ +[\fB\-\fR]\fBolcuc\fR (do not) map upper to lower case +[\fB\-\fR]\fBonlcr\fR (do not) map NL to CR +[\fB\-\fR]\fBonocr\fR (do not) ignore CR +[\fB\-\fR]\fBonlret\fR NL is (not) return +[\fB\-\fR]\fBofill\fR (do not) use fill characters +[\fB\-\fR]\fBofdel\fR fill character is DEL (NUL) +\fBcr0 cr1 cr2 cr3\fR T{ +select style of delay for carriage return (see \fItermio\fR(3)) +T} +\fBnl0 nl1 nl2 nl3\fR select style of delay for linefeed +T{ +.in 2n +.ti 0 +.ad l +\fBtab0 tab1 tab2 tab3\fR +.in 0 +.ad b +T} select style of delay for tab +\fBff0 ff1\fR select style of delay for form feed +\fBbs0 bs1\fR select style of delay for backspace +\fBvt0 bt1\fR select style of delay for vertical tab +.TE +.PP +Local modes: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBisig\fR T{ +enable (disable) intr, quit, and susp processing +T} +[\fB\-\fR]\fBicanon\fR enable (disable) erase and kill processing\ +[\fB\-\fR]\fBcbreak\fR same as \-icanon +[\fB\-\fR]\fBxcase\fR (no) canonical case presentation +[\fB\-\fR]\fBecho\fR (do not) echo every character typed +[\fB\-\fR]\fBechoe\fR (do not) echo erase character as `^H ^H' +[\fB\-\fR]\fBcrterase\fR same as echoe +[\fB\-\fR]\fBechok\fR (do not) echo NL after kill character +[\fB\-\fR]\fBlfkc\fR same as echok +[\fB\-\fR]\fBechonl\fR (do not) echo NL regardless of \fIecho\fR +[\fB\-\fR]\fBnoflsh\fR (do not) flush after intr, quit, and susp +[\fB\-\fR]\fBtostop\fR send SIGTTOU for background output +[\fB\-\fR]\fBechoctl\fR T{ +(no) visual representation of control characters +T} +[\fB\-\fR]\fBctlecho\fR same as echoctl +[\fB\-\fR]\fBechoprt\fR (do not) echo erased characters +[\fB\-\fR]\fBprterase\fR same as echoprt +[\fB\-\fR]\fBechoke\fR T{ +(do not) print `^H ^H' sequences at line erase +T} +[\fB\-\fR]\fBcrtkill same as echoke +[\fB\-\fR]\fBflusho\fR output is (not) flushed +[\fB\-\fR]\fBpendin\fR (do not) retype pending input +[\fB\-\fR]\fBiexten\fR T{ +enable (disable) extended control characters +T} +.TE +.PP +Control characters: +.PP +.TS +l1w(16n) l. +\fBintr \fIc\fP\fR set intr character to \fIc\fR. +\fBquit \fIc\fP\fR set quit character to \fIc\fR. +\fBerase \fIc\fP\fR set erase character to \fIc\fR. +\fBkill \fIc\fP\fR set kill character to \fIc\fR. +\fBeof \fIc\fP\fR set eof character to \fIc\fR. +\fBeol \fIc\fP\fR set eol character to \fIc\fR. +\fBeol2 \fIc\fP\fR set eol2 character to \fIc\fR. +\fBswtch \fIc\fP\fR set swtch character to \fIc\fR. +\fBstart \fIc\fP\fR set start character to \fIc\fR. +\fBstop \fIc\fP\fR set stop character to \fIc\fR. +\fBsusp \fIc\fP\fR set susp character to \fIc\fR. +\fBdsusp \fIc\fP\fR set dsusp character to \fIc\fR. +\fBrprnt \fIc\fP\fR set rprnt character to \fIc\fR. +\fBflush \fIc\fP\fR set flush character to \fIc\fR. +\fBwerase \fIc\fP\fR set werase character to \fIc\fR. +\fBlnext \fIc\fP\fR set lnext character to \fIc\fR. +.TE +.PP +A control character may be either given as a literal character, +or as `\fB^\fIc\fR' for ASCII control characters CTRL-\fIc\fR; +the special values `\fB^\-\fR' and `\fBundef\fR' +disable the control function. +Multibyte characters cannot be used as control characters; +trying to do so will result in the control character +set to the first byte of the multibyte character sequence, +which has the undesired effect of applying the control function +to more characters than the one supplied. +.PP +Combination modes et cetera: +.PP +.TS +l1w(16n) l. +[\fB\-\fR]\fBraw\fR T{ +(no) raw mode input +(i.\|e. no erase, kill, interrupt, quit, EOT; parity bit passed back) +T} +\fBcooked\fR same as \fI\-raw\fR\ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ +\fBsane\fR reset all values to defaults +[\fB\-\fR]\fBcbreak\fR T{ +make each character available to \fIread\fR(2) +as received; no erase and kill +(make characters available to \fIread\fR +only when newline is received) +T} +[\fB\-\fR]\fBoddp\fR allow (disallow) odd parity +[\fB\-\fR]\fBevenp\fR allow (disallow) even parity +[\fB\-\fR]\fBnl\fR T{ +accept only new-line to end lines +(allow carriage return for new-line, +and output CR-LF for carriage return or new-line) +T} +[\fB\-\fR]\fBlcase\fR T{ +(do not) map upper case to lower case +T} +[\fB\-\fR]\fBLCASE\fR T{ +same as \fIlcase\fR +T} +[\fB\-\fR]\fBtabs\fR T{ +(do not) replace tabs by spaces when printing +T} +\fBnul-fill\fR use NUL to fill output +\fBdel-fill\fR use DEL to fill output +[\fB\-\fR]\fBfill\fR T{ +select NUL-filled output (disable output filling) +T} +\fBek\fR T{ +reset erase and kill characters back to normal ^H and ^U +T} +[\fB\-\fR]\fBlitout\fR T{ +Unset (set) parenb, istrip, opost, +and cs8 (cs7). +T} +[\fB\-\fR]\fBpass8\fR T{ +Unset (set) parenb, istrip, +and cs8 (cs7). +T} +\fBcrt\fR T{ +Set echoe and echoctl. +If the speed is equal to or higher than 1200 baud, +also set echoke. +T} +\fBdec\fR T{ +set all modes suitable for +Digital Equipment Corporation terminals. +T} +\fBtty33\fR T{ +set all modes suitable for the +Teletype Corporation Model 33 terminal. +T} +\fBtty37\fR T{ +set all modes suitable for the +Teletype Corporation Model 37 terminal. +T} +\fBvt05\fR T{ +set all modes suitable for Digital Equipment Corp. VT05 terminal +T} +\fBtn300\fR T{ +set all modes suitable for a General Electric TermiNet 300 +T} +\fBti700\fR T{ +set all modes suitable for Texas Instruments 700 series terminal +T} +\fBtek\fR T{ +set all modes suitable for Tektronix 4014 terminal +T} +\fBmin\fR T{ +minimal number of bytes for non-canonical input +T} +\fBtime\fR timeout for non-canonical input +\fBrows\fR T{ +set the height of the terminal in character cell units +T} +\fBcolumns\fR T{ +set the width of the terminal in character cell units +T} +\fBcols\fR same as columns +\fBypixels\fR T{ +set the height of the terminal in pixels +T} +\fBxpixels\fR T{ +set the width of the terminal in pixels +T} +.TE +.SH "SEE ALSO" +termios(3), +tabs(1) +.SH NOTES +Not all settings are available on all platforms. diff --git a/stty/stty.c b/stty/stty.c @@ -0,0 +1,1490 @@ +/* + * stty - set the options for a terminal + * + * Gunnar Ritter, Freiburg i. Br., Germany, May 2003. + */ +/* + * Copyright (c) 2003 Gunnar Ritter + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute + * it freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source distribution. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +#ifndef UCB +static const char sccsid[] USED = "@(#)stty.sl 1.23 (gritter) 1/22/06"; +#else /* UCB */ +static const char sccsid[] USED = "@(#)/usr/ucb/stty.sl 1.23 (gritter) 1/22/06"; +#endif /* UCB */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> +#include <termios.h> +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <libgen.h> +#include <ctype.h> +#include <locale.h> +#include <pathconf.h> +#ifndef TIOCGWINSZ +#include <sys/ioctl.h> +#endif + +#ifndef VSWTCH +#ifdef VSWTC +#define VSWTCH VSWTC +#endif +#endif + +#ifdef TABDLY +static void tabs(int); +#endif +static void evenp(int); +static void oddp(int); +static void spacep(int); +static void markp(int); +static void raw(int); +static void cooked(int); +static void nl(int); +static void sane(int); +#ifdef OFDEL +static void fill(int); +#endif +#ifdef XCASE +static void lcase(int); +#endif +static void ek(int); +#ifdef TABDLY +static void tty33(int); +static void tty37(int); +static void vt05(int); +static void tn300(int); +static void ti700(int); +static void tek(int); +#endif +static void rows(int); +static void columns(int); +static void ypixels(int); +static void xpixels(int); +static void vmin(int); +static void vtime(int); +static void line(int); +#ifdef UCB +static void litout(int); +static void pass8(int); +static void crt(int); +static void dec(int); +#endif /* UCB */ + +static const struct mode { + const char *m_name; /* name of mode */ + void (*m_func)(int); /* handler function */ + long m_val; /* flag value */ + long m_dfl; /* default (not sane!) value */ + int m_flg; /* print flags: + 01 print regardless of difference + 02 print only if -a is not set + 04 print only if -a is set + 010 print under all circumstances + 020 print only if equal + 040 ignore for printing + 0100 ignore for setting + 0200 use m_dfl as mask + 0400 print only if equal even if -a + 01000 negate for setting + */ + enum { + M_SEPAR, /* separator */ + M_NSEPAR, /* new separator */ + M_IFLAG, /* in c_iflag */ + M_OFLAG, /* in c_oflag */ + M_CFLAG, /* in c_cflag */ + M_PCFLAG, /* in c_cflag, but printed w/o -a */ + M_LFLAG, /* in c_lflag */ + M_CC, /* in c_cc */ + M_FUNCT, /* handled via function */ + M_INVAL /* invalid */ + } m_type; +} modes[] = { + { "oddp", 0, PARENB|PARODD,PARENB|PARODD,0122, M_PCFLAG }, + { "evenp", 0, PARENB|PARODD,PARENB, 0122, M_PCFLAG }, + { "parity", 0, PARENB, 0, 0122, M_PCFLAG }, + { "cstopb", 0, CSTOPB, 0, 02, M_PCFLAG }, + { "hupcl", 0, HUPCL, 0, 02, M_PCFLAG }, + { "cread", 0, CREAD, CREAD, 02, M_PCFLAG }, + { "clocal", 0, CLOCAL, 0, 02, M_PCFLAG }, + { "intr", 0, VINTR, '\177', 0, M_CC }, + { "quit", 0, VQUIT, '\34', 0, M_CC }, + { "erase", 0, VERASE, '#', 0, M_CC }, + { "kill", 0, VKILL, '@', 0, M_CC }, + { "\n", 0, 0, 0, 04, M_NSEPAR }, + { "eof", 0, VEOF, '\4', 0, M_CC }, + { "eol", 0, VEOL, '\0', 0, M_CC }, + { "eol2", 0, VEOL2, '\0', 0, M_CC }, +#ifdef VSWTCH + { "swtch", 0, VSWTCH, '\32', 0, M_CC }, +#endif + { "\n", 0, 0, 0, 04, M_NSEPAR }, + { "start", 0, VSTART, '\21', 0, M_CC }, + { "stop", 0, VSTOP, '\23', 0, M_CC }, + { "susp", 0, VSUSP, '\32', 0, M_CC }, +#ifdef VDSUSP + { "dsusp", 0, VDSUSP, '\31', 0, M_CC }, +#else + { "dsusp", 0, -1, '\0', 0, M_CC }, +#endif + { "\n", 0, 0, 0, 04, M_NSEPAR }, +#ifdef VREPRINT + { "rprnt", 0, VREPRINT,'\22', 0, M_CC }, +#else + { "rprnt", 0, -1, '\0', 0, M_CC }, +#endif +#ifdef VDISCARD + { "flush", 0, VDISCARD,'\17', 0, M_CC }, +#else + { "flush", 0, -1, '\0', 0, M_CC }, +#endif +#ifdef VWERASE + { "werase", 0, VWERASE,'\27', 0, M_CC }, +#else + { "werase", 0, -1, '\0', 0, M_CC }, +#endif + { "lnext", 0, VLNEXT, '\26', 0, M_CC }, + { "\n", 0, 0, 0, 010, M_SEPAR }, + { "parenb", 0, PARENB, 0, 04, M_CFLAG }, + { "parodd", 0, PARODD, 0, 04, M_CFLAG }, + { "cs5", 0, CS5, CSIZE, 0604, M_CFLAG }, + { "cs6", 0, CS6, CSIZE, 0604, M_CFLAG }, + { "cs7", 0, CS7, CSIZE, 0604, M_CFLAG }, + { "cs8", 0, CS8, CSIZE, 0604, M_CFLAG }, + { "cstopb", 0, CSTOPB, 0, 04, M_CFLAG }, + { "hupcl", 0, HUPCL, 0, 04, M_CFLAG }, + { "hup", 0, HUPCL, 0, 040, M_CFLAG }, + { "cread", 0, CREAD, CREAD, 04, M_CFLAG }, + { "clocal", 0, CLOCAL, 0, 04, M_CFLAG }, +#ifdef LOBLK + { "loblk", 0, LOBLK, 0, 04, M_CFLAG }, +#else + { "loblk", 0, 0, 0, 04, M_INVAL }, +#endif +#ifdef PAREXT + { "parext", 0, PAREXT, 0, 04, M_CFLAG }, +#else + { "parext", 0, 0, 0, 04, M_INVAL }, +#endif + { "\n", 0, 0, 0, 04, M_SEPAR }, + { "ignbrk", 0, IGNBRK, 0, 0, M_IFLAG }, + { "brkint", 0, BRKINT, 0, 04, M_IFLAG }, +#ifndef UCB + { "brkint", 0,IGNBRK|BRKINT,BRKINT, 0122, M_IFLAG }, +#else /* UCB */ + { "brkint", 0, 0, BRKINT, 0122, M_IFLAG }, +#endif /* UCB */ + { "ignpar", 0, IGNPAR, IGNPAR, 04, M_IFLAG }, + { "inpck", 0, INPCK, INPCK, 0102, M_IFLAG }, + { "ignpar", 0,INPCK|IGNPAR, IGNPAR, 0102, M_IFLAG }, + { "parmrk", 0, PARMRK, 0, 0, M_IFLAG }, + { "inpck", 0, INPCK, INPCK, 04, M_IFLAG }, + { "istrip", 0, ISTRIP, ISTRIP, 0, M_IFLAG }, + { "inlcr", 0, INLCR, 0, 0, M_IFLAG }, + { "igncr", 0, IGNCR, 0, 0, M_IFLAG }, +#ifndef UCB + { "icrnl", 0, ICRNL, 0, 0, M_IFLAG }, +#else /* UCB */ + { "icrnl", 0, ICRNL, ICRNL, 0, M_IFLAG }, +#endif /* UCB */ +#ifdef IUCLC + { "iuclc", 0, IUCLC, 0, 0, M_IFLAG }, +#endif + { "\n", 0, 0, 0, 04, M_SEPAR }, + { "ixon", 0, IXON, IXON, 04, M_IFLAG }, +#ifndef UCB + { "ixany", 0, IXANY, IXANY, 04, M_IFLAG }, +#else /* UCB */ + { "ixany", 0, IXANY, 0, 0, M_IFLAG }, + { "decctlq", 0, IXANY, 0, 01040, M_IFLAG }, +#endif /* UCB */ + { "ixon", 0, 0, IXON, 0302, M_IFLAG }, + { "ixoff", 0, IXOFF, 0, 0, M_IFLAG }, +#ifdef UCB + { "tandem", 0, IXOFF, 0, 040, M_IFLAG }, +#endif /* UCB */ + { "imaxbel", 0, IMAXBEL,0, 0, M_IFLAG }, +#ifdef IUTF8 + { "iutf8", 0, IUTF8, 0, 0, M_IFLAG }, +#endif + { "\n", 0, 0, 0, 04, M_SEPAR }, + { "isig", 0, ISIG, ISIG, 04, M_LFLAG }, + { "icanon", 0, ICANON, ICANON, 04, M_LFLAG }, + { "cbreak", 0, ICANON, 0, 01040, M_LFLAG }, +#ifdef XCASE + { "xcase", 0, XCASE, 0, 04, M_LFLAG }, +#endif + { "opost", 0, OPOST, OPOST, 0102, M_OFLAG }, +#ifdef OLCUC + { "olcuc", 0, OLCUC, 0, 0102, M_OFLAG }, +#endif +#ifndef UCB + { "onlcr", 0, ONLCR, 0, 0102, M_OFLAG }, +#else /* UCB */ + { "onlcr", 0, ONLCR, ONLCR, 0102, M_OFLAG }, +#endif /* UCB */ + { "ocrnl", 0, OCRNL, 0, 0102, M_OFLAG }, + { "onocr", 0, ONOCR, 0, 0102, M_OFLAG }, + { "onlret", 0, ONLRET, 0, 0102, M_OFLAG }, +#if defined (OFILL) && defined (OFDEL) + { "nul-fill", 0, OFILL,OFILL|OFDEL,0202, M_OFLAG }, + { "del-fill", 0,OFILL|OFDEL,OFILL|OFDEL,0202, M_OFLAG }, +#endif +#ifdef TAB1 + { "tab1", 0, TAB1, TABDLY, 0302, M_OFLAG }, +#endif +#ifdef TAB2 + { "tab2", 0, TAB2, TABDLY, 0302, M_OFLAG }, +#endif +#ifdef TAB3 + { "tab3", 0, TAB3, TABDLY, 0302, M_OFLAG }, +#endif + { "\n", 0, 0, 0, 02, M_SEPAR }, + { "isig", 0, ISIG, ISIG, 0102, M_LFLAG }, + { "icanon", 0, ICANON, ICANON, 0102, M_LFLAG }, +#ifdef XCASE + { "xcase", 0, XCASE, 0, 0102, M_LFLAG }, +#endif +#ifndef UCB + { "echo", 0, ECHO, ECHO, 01, M_LFLAG }, + { "echoe", 0, ECHOE, ECHOE, 01, M_LFLAG }, + { "echok", 0, ECHOK, ECHOK, 01, M_LFLAG }, +#else /* UCB */ + { "echo", 0, ECHO, ECHO, 0, M_LFLAG }, + { "echoe", 0, ECHOE, ECHOE, 04, M_LFLAG }, + { "crterase", 0, ECHOE, 0, 040, M_LFLAG }, + { "echok", 0, ECHOK, ECHOK, 0, M_LFLAG }, + { "lfkc", 0, ECHOK, 0, 040, M_LFLAG }, + { "echoe", 0,ECHOE|ECHOKE, ECHOE, 0122, M_LFLAG }, + { "-echoke", 0,ECHOE|ECHOKE, ECHOE, 0122, M_LFLAG }, + { "echoprt", 0,ECHOE|ECHOPRT,0, 0122, M_LFLAG }, + { "crt", 0,ECHOE|ECHOKE,ECHOE|ECHOKE,0302,M_LFLAG }, +#endif /* UCB */ + { "lfkc", 0, ECHOK, 0, 040, M_LFLAG }, + { "echonl", 0, ECHONL, 0, 0, M_LFLAG }, + { "noflsh", 0, NOFLSH, 0, 0, M_LFLAG }, + { "\n", 0, 0, 0, 04, M_NSEPAR }, + { "tostop", 0, TOSTOP, 0, 0, M_LFLAG }, +#ifndef UCB + { "echoctl", 0, ECHOCTL,0, 0, M_LFLAG }, +#else /* UCB */ + { "echoctl", 0, ECHOCTL,ECHOCTL,0, M_LFLAG }, + { "ctlecho", 0, ECHOCTL,0, 040, M_LFLAG }, + { "prterase", 0, ECHOPRT,0, 040, M_LFLAG }, +#endif /* UCB */ + { "echoprt", 0, ECHOPRT,0, 04, M_LFLAG }, +#ifndef UCB + { "echoke", 0, ECHOKE, 0, 0, M_LFLAG }, +#else /* UCB */ + { "echoke", 0, ECHOKE, 0, 04, M_LFLAG }, + { "crtkill", 0, ECHOKE, 0, 040, M_LFLAG }, +#endif /* UCB */ + { "defecho", 0, 0, 0, 0, M_INVAL }, + { "flusho", 0, FLUSHO, 0, 0, M_LFLAG }, + { "pendin", 0, PENDIN, 0, 0, M_LFLAG }, + { "iexten", 0, IEXTEN, 0, 0, M_LFLAG }, + { "\n", 0, 0, 0, 04, M_SEPAR }, + { "opost", 0, OPOST, OPOST, 04, M_OFLAG }, +#ifdef OLCUC + { "olcuc", 0, OLCUC, 0, 04, M_OFLAG }, +#endif + { "onlcr", 0, ONLCR, 0, 04, M_OFLAG }, + { "ocrnl", 0, OCRNL, 0, 04, M_OFLAG }, + { "onocr", 0, ONOCR, 0, 04, M_OFLAG }, + { "onlret", 0, ONLRET, 0, 04, M_OFLAG }, +#ifdef OFILL + { "ofill", 0, OFILL, 0, 04, M_OFLAG }, +#endif +#ifdef OFDEL + { "ofdel", 0, OFDEL, 0, 04, M_OFLAG }, +#endif +#ifdef TAB1 + { "tab1", 0, TAB1, TABDLY, 0704, M_OFLAG }, +#endif +#ifdef TAB2 + { "tab2", 0, TAB2, TABDLY, 0704, M_OFLAG }, +#endif +#ifdef TAB3 + { "tab3", 0, TAB3, TABDLY, 0704, M_OFLAG }, +#endif + { "\n", 0, 0, 0, 04, M_SEPAR }, +#ifdef NL0 + { "nl0", 0, NL0, NLDLY, 0240, M_OFLAG }, +#endif +#ifdef NL1 + { "nl1", 0, NL1, NLDLY, 0240, M_OFLAG }, +#endif +#ifdef CR0 + { "cr0", 0, CR0, CRDLY, 0240, M_OFLAG }, +#endif +#ifdef CR1 + { "cr1", 0, CR1, CRDLY, 0240, M_OFLAG }, +#endif +#ifdef CR2 + { "cr2", 0, CR2, CRDLY, 0240, M_OFLAG }, +#endif +#ifdef CR3 + { "cr3", 0, CR3, CRDLY, 0240, M_OFLAG }, +#endif +#ifdef TAB0 + { "tab0", 0, TAB0, TABDLY, 0240, M_OFLAG }, +#endif +#ifdef TAB1 + { "tab1", 0, TAB1, TABDLY, 0240, M_OFLAG }, +#endif +#ifdef TAB2 + { "tab2", 0, TAB2, TABDLY, 0240, M_OFLAG }, +#endif +#ifdef TAB3 + { "tab3", 0, TAB3, TABDLY, 0240, M_OFLAG }, +#endif +#ifdef TABDLY + { "tabs", tabs, 0, 0, 0240, M_FUNCT }, +#endif +#ifdef BS0 + { "bs0", 0, BS0, BSDLY, 0240, M_OFLAG }, +#endif +#ifdef BS1 + { "bs1", 0, BS1, BSDLY, 0240, M_OFLAG }, +#endif +#ifdef FF0 + { "ff0", 0, FF0, FFDLY, 0240, M_OFLAG }, +#endif +#ifdef FF1 + { "ff1", 0, FF1, FFDLY, 0240, M_OFLAG }, +#endif +#ifdef VT0 + { "vt0", 0, VT0, VTDLY, 0240, M_OFLAG }, +#endif +#ifdef VT1 + { "vt1", 0, VT1, VTDLY, 0240, M_OFLAG }, +#endif +#ifdef CRTSCTS + { "ctsxon", 0, CRTSCTS,0, 040, M_OFLAG }, +#endif /* CRTSCTS */ + { "evenp", evenp, 0, 0, 040, M_FUNCT }, + { "parity", evenp, 0, 0, 040, M_FUNCT }, + { "oddp", oddp, 0, 0, 040, M_FUNCT }, + { "spacep", spacep, 0, 0, 040, M_FUNCT }, + { "markp", markp, 0, 0, 040, M_FUNCT }, + { "raw", raw, 0, 0, 040, M_FUNCT }, + { "cooked", cooked, 0, 0, 040, M_FUNCT }, + { "nl", nl, 0, 0, 040, M_FUNCT }, +#ifdef XCASE + { "lcase", lcase, 0, 0, 040, M_FUNCT }, + { "LCASE", lcase, 0, 0, 040, M_FUNCT }, +#endif + { "ek", ek, 0, 0, 040, M_FUNCT }, + { "sane", sane, 0, 0, 040, M_FUNCT }, +#ifdef OFDEL + { "fill", fill, 0, 0, 040, M_FUNCT }, +#endif +#ifdef TABDLY + { "tty33", tty33, 0, 0, 040, M_FUNCT }, + { "tty37", tty37, 0, 0, 040, M_FUNCT }, + { "vt05", vt05, 0, 0, 040, M_FUNCT }, + { "tn300", tn300, 0, 0, 040, M_FUNCT }, + { "ti700", ti700, 0, 0, 040, M_FUNCT }, + { "tek", tek, 0, 0, 040, M_FUNCT }, +#endif + { "rows", rows, 0, 0, 040, M_FUNCT }, + { "columns", columns,0, 0, 040, M_FUNCT }, +#ifdef UCB + { "cols", columns,0, 0, 040, M_FUNCT }, +#endif /* UCB */ + { "ypixels", ypixels,0, 0, 040, M_FUNCT }, + { "xpixels", xpixels,0, 0, 040, M_FUNCT }, + { "min", vmin, 0, 0, 040, M_FUNCT }, + { "time", vtime, 0, 0, 040, M_FUNCT }, + { "line", line, 0, 0, 040, M_FUNCT }, +#ifdef UCB + { "litout", litout, 0, 0, 040, M_FUNCT }, + { "pass8", pass8, 0, 0, 040, M_FUNCT }, + { "crt", crt, 0, 0, 040, M_FUNCT }, + { "dec", dec, 0, 0, 040, M_FUNCT }, +#endif /* UCB */ + { 0, 0, 0, 0, 0, M_INVAL } +}; + +static const struct { + const char *s_str; + speed_t s_val; +} speeds[] = { + { "0", B0 }, + { "50", B50 }, + { "75", B75 }, + { "110", B110 }, + { "134", B134 }, + { "134.5", B134 }, + { "150", B150 }, + { "200", B200 }, + { "300", B300 }, + { "600", B600 }, + { "1200", B1200 }, + { "1800", B1800 }, + { "2400", B2400 }, + { "4800", B4800 }, + { "9600", B9600 }, + { "19200", B19200 }, + { "19.2", B19200 }, + { "38400", B38400 }, + { "38.4", B38400 }, +#ifdef B57600 + { "57600", B57600 }, +#endif /* B57600 */ +#ifdef B115200 + { "115200", B115200 }, +#endif /* B115200 */ +#ifdef B230400 + { "230400", B230400 }, +#endif /* B230400 */ +#ifdef B460800 + { "460800", B460800 }, +#endif /* B460800 */ +#ifdef B500000 + { "500000", B500000 }, +#endif /* B500000 */ +#ifdef B576000 + { "576000", B576000 }, +#endif /* B576000 */ +#ifdef B921600 + { "921600", B921600 }, +#endif /* B921600 */ +#ifdef B1000000 + { "1000000", B1000000 }, +#endif /* B1000000 */ +#ifdef B1152000 + { "1152000", B1152000 }, +#endif /* B1152000 */ +#ifdef B1500000 + { "1500000", B1500000 }, +#endif /* B1500000 */ +#ifdef B2000000 + { "2000000", B2000000 }, +#endif /* B2000000 */ +#ifdef B2500000 + { "2500000", B2500000 }, +#endif /* B2500000 */ +#ifdef B3000000 + { "3000000", B3000000 }, +#endif /* B3000000 */ +#ifdef B3500000 + { "3500000", B3500000 }, +#endif /* B3500000 */ +#ifdef B4000000 + { "4000000", B4000000 }, +#endif /* B4000000 */ +#ifdef EXTA + { "exta", EXTA }, +#endif /* EXTA */ +#ifdef EXTB + { "extb", EXTB }, +#endif /* EXTB */ + { 0, 0 } +}; + +static const char *progname; +static const char **args; +static struct termios ts; +static struct winsize ws; +static int wschange; /* ws was changed */ +static long vdis; /* VDISABLE character */ +extern int sysv3; + +static void usage(void); +static void getattr(int); +static void list(int, int); +static int listmode(tcflag_t, struct mode, int, int); +static int listchar(cc_t *, struct mode, int, int); +static const char *baudrate(speed_t c); +static void set(void); +static void setmod(tcflag_t *, struct mode, int); +static void setchr(cc_t *, struct mode); +static void inval(void); +static void glist(void); +static void gset(void); +#ifdef UCB +static void hlist(int); +static void speed(void); +static void size(void); +#endif /* UCB */ + +#ifndef UCB +#define STTYFD 0 +#else /* UCB */ +#define STTYFD 1 +#endif /* UCB */ + +int +main(int argc, char **argv) +{ + int dds; + + progname = basename(argv[0]); + setlocale(LC_CTYPE, ""); +#ifndef UCB + if (getenv("SYSV3") != NULL) + sysv3 = 1; + getattr(STTYFD); +#endif /* !UCB */ + if (argc >= 2 && strcmp(argv[1], "--") == 0) { + argc--, argv++; + dds = 1; + } else + dds = 0; + args = argc ? (const char **)&argv[1] : (const char **)&argv[0]; + if(!dds && argc == 2 && argv[1][0] == '-' && argv[1][1] && + argv[1][2] == '\0') { + switch (argv[1][1]) { + case 'a': +#ifdef UCB + getattr(STTYFD); +#endif /* UCB */ + list(1, 0); + break; + case 'g': +#ifdef UCB + getattr(-1); +#endif /* UCB */ + glist(); + break; +#ifdef UCB + case 'h': + getattr(STTYFD); + hlist(1); + break; +#endif /* UCB */ + default: + usage(); + } + } else if (argc == 1) { +#ifdef UCB + getattr(STTYFD); +#endif /* UCB */ + list(0, 0); +#ifdef UCB + } else if (argc == 2 && strcmp(argv[1], "all") == 0) { + getattr(STTYFD); + hlist(0); + } else if (argc == 2 && strcmp(argv[1], "everything") == 0) { + getattr(STTYFD); + hlist(1); + } else if (argc == 2 && strcmp(argv[1], "speed") == 0) { + getattr(-1); + speed(); + } else if (argc == 2 && strcmp(argv[1], "size") == 0) { + getattr(-1); + size(); +#endif /* UCB */ + } else { +#ifdef UCB + getattr(STTYFD); +#endif /* UCB */ + set(); + if (tcsetattr(STTYFD, TCSADRAIN, &ts) < 0 || + wschange && ioctl(STTYFD, TIOCSWINSZ, &ws) < 0) + perror(progname); + } + return 0; +} + +static void +usage(void) +{ + fprintf(stderr, "usage: %s [-ag] [modes]\n", progname); + exit(2); +} + +static void +getattr(int fd) +{ +#ifdef UCB + const char devtty[] = "/dev/tty"; + + if (fd < 0 && (fd = open(devtty, O_RDONLY)) < 0) { + fprintf(stderr, "%s: Cannot open %s: %s\n", + progname, devtty, strerror(errno)); + exit(2); + } +#endif /* UCB */ + if (tcgetattr(fd, &ts) < 0) { + perror(progname); + exit(2); + } + if (ioctl(fd, TIOCGWINSZ, &ws) < 0) { + ws.ws_row = 0; + ws.ws_col = 0; + ws.ws_xpixel = 0; + ws.ws_ypixel = 0; + } +#if !defined (__FreeBSD__) && !defined (__DragonFly__) && !defined (__APPLE__) + vdis = fpathconf(fd, _PC_VDISABLE) & 0377; +#else + vdis = '\377' & 0377; +#endif +} + +static void +list(int aflag, int hflag) +{ + int i, d = 0; + speed_t is, os; + + is = cfgetispeed(&ts); + os = cfgetospeed(&ts); + if (is == os) + printf("speed %s baud;", baudrate(is)); + else + printf("ispeed %s baud; ospeed %s baud;", + baudrate(is), baudrate(os)); + if (aflag == 0) { + for (i = 0; modes[i].m_name; i++) { + if (modes[i].m_type == M_PCFLAG) + d += listmode(ts.c_cflag, modes[i], aflag, 1); + } + d = 0; + } + if (sysv3 && aflag == 0) { + putchar('\n'); + } else { + putchar(sysv3 ? ' ' : '\n'); + printf("rows = %d%s columns = %d; " + "ypixels = %d%s xpixels = %d%s\n", + (int)ws.ws_row, + aflag&&hflag ? "" : ";", + (int)ws.ws_col, + (int)ws.ws_ypixel, + aflag&&hflag ? "" : ";", + (int)ws.ws_xpixel, + aflag&&hflag ? "" : ";"); + } + if ((ts.c_lflag&ICANON) == 0) + printf("min = %d; time = %d;\n", + (int)ts.c_cc[VMIN], (int)ts.c_cc[VTIME]); + for (i = 0; modes[i].m_name; i++) { + if (modes[i].m_flg&040) + continue; + switch (modes[i].m_type) { + case M_NSEPAR: + if (sysv3) + break; + case M_SEPAR: + if (d && (modes[i].m_flg&8 || + (modes[i].m_flg&(aflag?2:4)) == 0)) { + fputs(modes[i].m_name, stdout); + d = 0; + } + break; + case M_IFLAG: + d += listmode(ts.c_iflag, modes[i], aflag, d); + break; + case M_OFLAG: + d += listmode(ts.c_oflag, modes[i], aflag, d); + break; + case M_CFLAG: + d += listmode(ts.c_cflag, modes[i], aflag, d); + break; + case M_LFLAG: + d += listmode(ts.c_lflag, modes[i], aflag, d); + break; + case M_CC: + if (hflag == 0) + d += listchar(ts.c_cc, modes[i], aflag, d); + break; + } + if (d >= 72 && aflag == 0) { + putchar('\n'); + d = 0; + } + } + if (d && aflag == 0) + putchar('\n'); +} + +static int +listmode(tcflag_t flag, struct mode m, int aflag, int space) +{ + int n = 0; + + if (m.m_flg&010 || (m.m_flg & (aflag?2:4)) == 0 && + (m.m_flg&0200 ? (flag&m.m_dfl) == m.m_val : + m.m_flg&020 ? (flag&m.m_val) == m.m_dfl : + (flag&m.m_val) != m.m_dfl) + | m.m_flg&1 | + (aflag != 0) ^ ((m.m_flg&(aflag?0400:0)) != 0)) { + if (space) { + putchar(' '); + n++; + } + if ((flag&m.m_val) == 0) { + putchar('-'); + n++; + } + n += printf("%s", m.m_name); + } + return n; +} + +static int +listchar(cc_t *cc, struct mode m, int aflag, int space) +{ + int n = 0; + char c = m.m_val >= 0 ? cc[m.m_val] : vdis; + + if (m.m_flg&8 || (m.m_flg & (aflag?2:4)) == 0 && + c != (m.m_dfl?m.m_dfl:vdis) | m.m_flg&1 | + (aflag != 0) ^ ((m.m_flg&(aflag?0400:0)) != 0)) { + if (space) { + putchar(' '); + n++; + } + n += printf("%s ", m.m_name); + if ((c&0377) == vdis) + n += printf(sysv3 ? "<undef>" : "= <undef>"); + else { + printf("= "); + if (c & 0200) { + c &= 0177; + putchar('-'); + n++; + } + if ((c&037) == c) + n += printf("^%c", c | (sysv3 ? 0100 : 0140)); + else if (c == '\177') + n += printf("DEL"); + else { + putchar(c & 0377); + n++; + } + } + putchar(';'); + n++; + } + return n; +} + +static const char * +baudrate(speed_t c) +{ + int i; + + for (i = 0; speeds[i].s_str; i++) + if (speeds[i].s_val == c) + return speeds[i].s_str; + return "-1"; +} + +static void +set(void) +{ + int i, gotcha, not, sspeed = 0; + speed_t ispeed0, ospeed0, ispeed1, ospeed1; + const char *ap; + struct termios tc; + + ispeed0 = ispeed1 = cfgetispeed(&ts); + ospeed0 = ospeed1 = cfgetospeed(&ts); + while (*args) { + for (i = 0; speeds[i].s_str; i++) + if (strcmp(speeds[i].s_str, *args) == 0) { + ispeed1 = ospeed1 = speeds[i].s_val; + sspeed |= 3; + goto next; + } + gotcha = 0; + if (**args == '-') { + not = 1; + ap = &args[0][1]; + } else { + not = 0; + ap = *args; + } + for (i = 0; modes[i].m_name; i++) { + if (modes[i].m_type == M_SEPAR || modes[i].m_flg&0100) + continue; + if (strcmp(modes[i].m_name, ap) == 0) { + gotcha++; + switch (modes[i].m_type) { + case M_IFLAG: + setmod(&ts.c_iflag, modes[i], not); + break; + case M_OFLAG: + setmod(&ts.c_oflag, modes[i], not); + break; + case M_CFLAG: + case M_PCFLAG: + setmod(&ts.c_cflag, modes[i], not); + break; + case M_LFLAG: + setmod(&ts.c_lflag, modes[i], not); + break; + case M_CC: + if (not) + inval(); + setchr(ts.c_cc, modes[i]); + break; + case M_FUNCT: + modes[i].m_func(not); + break; + } + } + } + if (gotcha) + goto next; + if (strcmp(*args, "ispeed") == 0) { + if (*++args == NULL) + break; + if (atol(*args) == 0) { + ispeed1 = ospeed1; + sspeed |= 1; + goto next; + } else for (i = 0; speeds[i].s_str; i++) + if (strcmp(speeds[i].s_str, *args) == 0) { + ispeed1 = speeds[i].s_val; + sspeed |= 1; + goto next; + } + inval(); + } + if (strcmp(*args, "ospeed") == 0) { + if (*++args == NULL) + break; + for (i = 0; speeds[i].s_str; i++) + if (strcmp(speeds[i].s_str, *args) == 0) { + ospeed1 = speeds[i].s_val; + sspeed |= 2; + goto next; + } + inval(); + } + gset(); + next: args++; + } + if (sspeed) { + if (sspeed == 3 && ispeed1 != ospeed1 && ospeed1 != B0) { + tc = ts; + cfsetispeed(&tc, ispeed1); + if (cfgetospeed(&tc) == cfgetospeed(&ts)) { + tc = ts; + cfsetospeed(&tc, ospeed1); + if (cfgetispeed(&tc) == cfgetispeed(&ts)) { + cfsetispeed(&ts, ispeed1); + cfsetospeed(&ts, ospeed1); + } + } + } else { + if (ispeed0 != ispeed1) + cfsetispeed(&ts, ispeed1); + if (ospeed0 != ospeed1) + cfsetospeed(&ts, ospeed1); + } + } +} + +static void +setmod(tcflag_t *t, struct mode m, int not) +{ + if (m.m_flg&0200) { + if (not) + inval(); + *t = *t&~(tcflag_t)m.m_dfl | m.m_val; + } else { + if (not ^ (m.m_flg&01000) != 0) + *t &= ~(tcflag_t)m.m_val; + else + *t |= m.m_val; + } +} + +static void +setchr(cc_t *cc, struct mode m) +{ + if (args[1] == NULL) + return; + args++; + if (m.m_val < 0) + return; + if (**args == '^') { + if (args[0][1] == '-') + cc[m.m_val] = vdis; + else if (args[0][1] == '?') + cc[m.m_val] = '\177'; + else + cc[m.m_val] = args[0][1] & 037; + } else if (strcmp(*args, "undef") == 0) + cc[m.m_val] = vdis; + else + cc[m.m_val] = **args; +} + +static void +inval(void) +{ + fprintf(stderr, "unknown mode: %s\n", *args); + exit(2); +} + +#ifdef TABDLY +static void +tabs(int not) +{ + ts.c_oflag &= ~(tcflag_t)TABDLY; + ts.c_oflag |= not ? TAB3 : TAB0; +} +#endif + +static void +evenp(int not) +{ + if (not) { + ts.c_cflag &= ~(tcflag_t)PARENB; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS8; + } else { + ts.c_cflag |= PARENB; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS7; + } +} + +static void +oddp(int not) +{ + if (not) { + ts.c_cflag &= ~(tcflag_t)(PARENB|PARODD); + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS8; + } else { + ts.c_cflag |= PARENB|PARODD; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS7; + } +} + +static void +spacep(int not) +{ + evenp(not); +} + +static void +markp(int not) +{ + oddp(not); +} + +static void +raw(int not) +{ + if (not) { + ts.c_cc[VEOF] = ts.c_cc[VMIN] = '\4'; + ts.c_cc[VEOL] = ts.c_cc[VTIME] = vdis; + ts.c_oflag |= OPOST; + ts.c_lflag |= ISIG|ICANON; + } else { + ts.c_cc[VEOF] = ts.c_cc[VMIN] = 1; + ts.c_cc[VEOL] = ts.c_cc[VTIME] = 1; + ts.c_oflag &= ~(tcflag_t)OPOST; + ts.c_lflag &= ~(tcflag_t)(ISIG|ICANON); + } +} + +static void +cooked(int not) +{ + if (not) + inval(); + raw(1); +} + +static void +nl(int not) +{ + if (not) { + ts.c_iflag |= ICRNL; + ts.c_oflag |= ONLCR; + ts.c_iflag &= ~(tcflag_t)(INLCR|IGNCR); + ts.c_oflag &= ~(tcflag_t)(OCRNL|ONLRET); + } else { + ts.c_iflag &= ~(tcflag_t)ICRNL; + ts.c_oflag &= ~(tcflag_t)ONLCR; + } +} + +static void +sane(int not) +{ + speed_t ispeed, ospeed; + + if (not) + inval(); + ispeed = cfgetispeed(&ts); + ospeed = cfgetospeed(&ts); + ts.c_cc[VINTR] = '\3'; + ts.c_cc[VQUIT] = '\34'; + ts.c_cc[VERASE] = '\10'; + ts.c_cc[VKILL] = '\25'; + ts.c_cc[VEOF] = '\4'; + ts.c_cc[VEOL] = vdis; + ts.c_cc[VEOL2] = vdis; +#ifdef VSWTCH + ts.c_cc[VSWTCH] = vdis; +#endif + ts.c_cc[VSTART] = '\21'; + ts.c_cc[VSTOP] = '\23'; + ts.c_cc[VSUSP] = '\32'; +#ifdef VREPRINT + ts.c_cc[VREPRINT] = '\22'; +#endif +#ifdef VDISCARD + ts.c_cc[VDISCARD] = '\17'; +#endif +#ifdef VWERASE + ts.c_cc[VWERASE] = '\27'; +#endif + ts.c_cc[VLNEXT] = '\26'; + ts.c_cflag = CS8|CREAD; + ts.c_lflag = ISIG|ICANON|ECHO|ECHOE|ECHOK|ECHOKE|IEXTEN; + ts.c_iflag = BRKINT|IGNPAR|ICRNL|IXON|IMAXBEL; +#ifdef IUTF8 + if (MB_CUR_MAX > 1) { + wchar_t wc; + if (mbtowc(&wc, "\303\266", 2) == 2 && wc == 0xF6 && + mbtowc(&wc, "\342\202\254", 3) == 3 && + wc == 0x20AC) + ts.c_iflag |= IUTF8; + } +#endif /* IUTF8 */ + ts.c_oflag = OPOST|ONLCR; + cfsetispeed(&ts, ispeed); + cfsetospeed(&ts, ospeed); +} + +#ifdef OFDEL +static void +fill(int not) +{ + if (not) + ts.c_oflag &= ~(tcflag_t)(OFILL|OFDEL); + else { + ts.c_oflag |= OFILL; + ts.c_oflag &= ~(tcflag_t)OFDEL; + } +} +#endif + +#ifdef XCASE +static void +lcase(int not) +{ + if (not) { + ts.c_lflag &= ~(tcflag_t)XCASE; + ts.c_iflag &= ~(tcflag_t)IUCLC; + ts.c_oflag &= ~(tcflag_t)OLCUC; + } else { + ts.c_lflag |= XCASE; + ts.c_iflag |= IUCLC; + ts.c_oflag |= OLCUC; + } +} +#endif + +static void +ek(int not) +{ + if (not) + inval(); + ts.c_cc[VERASE] = '\10'; + ts.c_cc[VKILL] = '\25'; +} + +#ifdef TABDLY +static void +tty33(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= CR1; +} + +static void +tty37(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= NL1|CR2|TAB1|FF1|VT1; +} + +static void +vt05(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= NL1; +} + +static void +tn300(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= CR1; +} + +static void +ti700(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= CR2; +} + +static void +tek(int not) +{ + if (not) + inval(); + ts.c_oflag &= ~(tcflag_t)(NLDLY|CRDLY|TABDLY|BSDLY|FFDLY|VTDLY); + ts.c_oflag |= FF1; +} +#endif + +static void +rows(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + wschange = 1; + ws.ws_row = atoi(*++args); +} + +static void +columns(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + wschange = 1; + ws.ws_col = atoi(*++args); +} + +static void +ypixels(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + wschange = 1; + ws.ws_ypixel = atoi(*++args); +} + +static void +xpixels(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + wschange = 1; + ws.ws_xpixel = atoi(*++args); +} + +static void +vmin(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + ts.c_cc[VMIN] = atoi(*++args); +} + +static void +vtime(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; + ts.c_cc[VTIME] = atoi(*++args); +} + + +static void +line(int not) +{ + if (not) + inval(); + if (args[1] == NULL) + return; +#ifdef __linux__ + ts.c_line = atoi(*++args); +#endif +} + +static const char gfmt[] ="%lx:%lx:%lx:%lx:" + "%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x:%x"; + +static void +glist(void) +{ + printf(gfmt, (long)ts.c_iflag, + (long)ts.c_oflag, + (long)ts.c_cflag, + (long)ts.c_lflag, + (int)ts.c_cc[VINTR], + (int)ts.c_cc[VQUIT], + (int)ts.c_cc[VERASE], + (int)ts.c_cc[VKILL], + (int)ts.c_cc[VEOF], + (int)ts.c_cc[VEOL], + (int)ts.c_cc[VEOL2], +#ifdef VSWTCH + (int)ts.c_cc[VSWTCH], +#else + (int)vdis, +#endif + (int)ts.c_cc[VSTART], + (int)ts.c_cc[VSTOP], + (int)ts.c_cc[VSUSP], +#ifdef VDSUSP + (int)ts.c_cc[VDSUSP], +#else + (int)vdis, +#endif +#ifdef VREPRINT + (int)ts.c_cc[VREPRINT], +#else + (int)vdis, +#endif +#ifdef VDISCARD + (int)ts.c_cc[VDISCARD], +#else + (int)vdis, +#endif +#ifdef VWERASE + (int)ts.c_cc[VWERASE], +#else + (int)vdis, +#endif + (int)ts.c_cc[VLNEXT], + (int)ts.c_cc[VMIN], + (int)ts.c_cc[VTIME]); + putchar('\n'); +} + +static void +gset(void) +{ + long iflag, oflag, cflag, lflag; + int vintr, vquit, verase, vkill, + veof, veol, veol2, vswtch, + vstart, vstop, vsusp, vdsusp, + vrprnt, vflush, vwerase, vlnext, + vmin, vtime; + + if (sscanf(*args, gfmt, + &iflag, &oflag, &cflag, &lflag, + &vintr, &vquit, &verase, &vkill, + &veof, &veol, &veol2, &vswtch, + &vstart, &vstop, &vsusp, &vdsusp, + &vrprnt, &vflush, &vwerase, &vlnext, + &vmin, &vtime) != 22) + inval(); + ts.c_iflag = iflag; + ts.c_oflag = oflag; + ts.c_cflag = cflag; + ts.c_lflag = lflag; + ts.c_cc[VINTR] = vintr; + ts.c_cc[VQUIT] = vquit; + ts.c_cc[VKILL] = vkill; + ts.c_cc[VEOF] = veof; + ts.c_cc[VEOL] = veol; + ts.c_cc[VEOL2] = veol2; +#ifdef VSWTCH + ts.c_cc[VSWTCH] = vswtch; +#endif + ts.c_cc[VSTART] = vstart; + ts.c_cc[VSTOP] = vstop; + ts.c_cc[VSUSP] = vsusp; +#ifdef VDSUSP + ts.c_cc[VDSUSP] = vdsusp; +#endif +#ifdef VREPRINT + ts.c_cc[VREPRINT] = vrprnt; +#endif +#ifdef VDISCARD + ts.c_cc[VDISCARD] = vflush; +#endif +#ifdef VWERASE + ts.c_cc[VWERASE] = vwerase; +#endif + ts.c_cc[VLNEXT] = vlnext; + ts.c_cc[VMIN] = vmin; + ts.c_cc[VTIME] = vtime; +} + +#ifdef UCB +static void +hchar(int c, int c2, int spc) +{ + int n = 0; + +chr: if (c != vdis) { + if (c & 0200) { + c &= 0177; + n += printf("M-"); + } + if ((c&037) == c) + n += printf("^%c", c | 0100); + else if (c == '\177') + n += printf("^?"); + else { + putchar(c); + n++; + } + } + if (c2 != EOF) { + putchar('/'); + n++; + c = c2; + c2 = EOF; + goto chr; + } + if (spc) + while (n++ < 7) + putchar(' '); +} + +static void +hlist(int aflag) +{ + list(aflag, 1); + printf("erase kill werase rprnt flush lnext " + "susp intr quit stop eof\n"); + hchar(ts.c_cc[VERASE]&0377, EOF, 1); + hchar(ts.c_cc[VKILL]&0377, EOF, 1); +#ifdef VWERASE + hchar(ts.c_cc[VWERASE]&0377, EOF, 1); +#else + hchar(vdis, EOF, 1); +#endif +#ifdef VREPRINT + hchar(ts.c_cc[VREPRINT]&0377, EOF, 1); +#else + hchar(vdis, EOF, 1); +#endif +#ifdef VDISCARD + hchar(ts.c_cc[VDISCARD]&0377, EOF, 1); +#else + hchar(vdis, EOF, 1); +#endif + hchar(ts.c_cc[VLNEXT]&0377, EOF, 1); + hchar(ts.c_cc[VSUSP]&0377, EOF, 1); + hchar(ts.c_cc[VINTR]&0377, EOF, 1); + hchar(ts.c_cc[VQUIT]&0377, EOF, 1); + hchar(ts.c_cc[VSTOP]&0377, ts.c_cc[VSTART]&0377, 1); + hchar(ts.c_cc[VEOF]&0377, EOF, 1); + putchar('\n'); +} + +static void +speed(void) +{ + printf("%s\n", baudrate(cfgetospeed(&ts))); +} + +static void +size(void) +{ + printf("%d %d\n", (int)ws.ws_row, (int)ws.ws_col); +} + +static void +litout(int not) +{ + if (not) { + ts.c_cflag |= PARENB; + ts.c_iflag |= ISTRIP; + ts.c_oflag |= OPOST; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS7; + } else { + ts.c_cflag &= ~(tcflag_t)PARENB; + ts.c_iflag &= ~(tcflag_t)ISTRIP; + ts.c_oflag &= ~(tcflag_t)OPOST; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS8; + } +} + +static void +pass8(int not) +{ + if (not) { + ts.c_cflag |= PARENB; + ts.c_iflag |= ISTRIP; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS7; + } else { + ts.c_cflag &= ~(tcflag_t)PARENB; + ts.c_iflag &= ~(tcflag_t)ISTRIP; + ts.c_cflag = ts.c_cflag&~(tcflag_t)CSIZE | CS8; + } +} + +static void +crt(int not) +{ + if (not) + inval(); + ts.c_lflag |= ECHOE|ECHOCTL; + if (cfgetospeed(&ts) >= B1200) + ts.c_lflag |= ECHOKE; +} + +static void +dec(int not) +{ + if (not) + inval(); + ts.c_cc[VERASE] = '\177'; + ts.c_cc[VKILL] = '\25'; + ts.c_cc[VINTR] = '\3'; + ts.c_iflag &= ~(tcflag_t)IXANY; + crt(not); +} +#endif /* UCB */ diff --git a/tar/NOTES b/tar/NOTES @@ -0,0 +1,61 @@ +Notes for the tar utility +========================= + +- tar is able to extract many GNU tar format archives; notably, large file + sizes, user and group ids, and long file and link names are supported. + Multivolume archives, sparse files, and incremental backups are not + supported. Thus for extraction for GNU tar archives, this tar behaves + identically to our cpio (and can extract GNU tar format archives written + by cpio). + + tar is also able to update GNU tar format archives so that GNU tar can + read them, but it does not use any of the GNU extensions then; thus file + names are limited to 99 characters. It currently does not allow the + creation of new GNU tar format archives; use cpio if you want to do + that. + +- The multivolume format is identically to that in cpio (both our and SVr4 + derived implementations) and incompatible to that in System V tar. Besides + the obvious advantage of interoperability, the format can be easily read + with standard Unix tools if no special support in tar or cpio is available + on foreign platforms (see the manual page). A disadvantage is that no + volume numbers are stored on the media, the user has to ensure the + correct order of parts himself. + +- Although the archives written by tar are intended to be compatible with + the tar format given in IEEE Std. 1003.1, 1996 (POSIX.1), extraction of + archives is not compliant with this standard since we use the numerical + uid and gid fields instead of the symbolic uname and gname fields when + restoring ownership of files. This is done because use of the symbolic + fields can easily lead to conflicts if the numerical user id of a user + that has no local name stored in the archive is identical to the local + numeric user id of a user whose symbolic name is mapped to a different + uid than that stored in the archive, e. g. + + name on tape uid on tape local name local uid result + foo 100 foo 101 101 + bar 101 (none) (none) 101 + + Use of the numeric fields avoids such clashes, at least if extraction + takes place in an empty directory, and allows mapping of user ids using + the 'find' and 'chown' utilities after extraction. + + In the wording of the standard, this is not 'a privileged, protection- + preserving version of the utility' (10.1.1, p. 235, l. 202-203). + +- Extraction does not conform to the tar(XCU) specification (LEGACY) in + The Single UNIX Specification, Version 2, The Open Group 1997, since + the 'x' key described there demands extraction with the same modes as + on the tape if the files are newly created, which is not done by this + implementation (as well as by any traditional Unix implementation + known to me) unless the 'p' key is also given. The modes of existing + files are also not preserved since such files are always removed before + extraction of the corresponding archived file by this implementation. + + This conflict between standard and implementations actually existed + already with the System V Interface Definition, Third and Fourth + Editions, tar(AU_CMD), which demand the same behavior as SUSv2; it + seems that no System V implementation has ever fulfilled USL's own + specification for tar. + + Gunnar Ritter 9/22/03 diff --git a/tar/mkfile b/tar/mkfile @@ -0,0 +1,9 @@ +BIN = tar +OBJ = tar.o +LOCAL_CFLAGS = -DTARDFL=\"$DFLDIR/tar\" -DSHELL=\"$SHELL\" -DSV3BIN=\"$BINDIR\" -DDEFBIN=\"$BINDIR\" +INSTALL_BIN = tar +INSTALL_MAN1 = tar.1 +INSTALL_DFL = tar.dfl +DEPS = libcommon + +<$mkbuild/mk.default diff --git a/tar/tar.1 b/tar/tar.1 @@ -0,0 +1,473 @@ +.\" +.\" Derived from tar(1), Unix 32V: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.\" Sccsid @(#)tar.1 1.46 (gritter) 3/1/05 +.TH TAR 1 "3/1/05" "Heirloom Toolchest" "User Commands" +.SH NAME +tar \- tape archiver +.SH SYNOPSIS +\fBtar\fR [\ \fIkey\fR\ ] [\ \fIfiles\fR\ ] +.SH DESCRIPTION +.I Tar +saves and restores files +on magtape or other media. +Its actions are controlled by the +.I key +argument. +The +.I key +is a string of characters containing +at most one function letter and possibly +one or more function modifiers. +Other arguments to the command are file or directory +names specifying which files are to be dumped or restored. +In all cases, appearance of a directory name refers to +the files and (recursively) subdirectories of that directory. +.PP +The function portion of +the key is specified by one of the following letters; +use of the leading hyphen is optional: +.TP 8 +.B \-r +The named files +are written +on the end of the tape. +The +.B \-c +function implies this. +.TP 8 +.B \-x +The named files are extracted from the tape. +If the named file matches a directory whose contents +had been written onto the tape, this directory is (recursively) extracted. +The owner and mode are restored (if possible). +If no file argument is given, the entire content of the +tape is extracted. +Note that if multiple entries specifying the same file +are on the tape, the last one overwrites +all earlier. +Existing target files are removed +before the file in the archive is extracted. +.TP 8 +.B \-t +The names of the specified files are listed each time they occur +on the tape. +If no file argument is given, +all of the names on the tape are listed. +.TP 8 +.B \-u +The named files are added to the tape if either they +are not already there or have +been modified since last put on the tape. +.TP 8 +.B \-c +Create a new tape; writing begins on the beginning +of the tape instead of after the last file. +This command implies +.B \-r. +.PP +The following characters may be used in addition to the letter +which selects the function desired. +.TP 10 +.BI 0 \&...\& 9 +This +modifier selects the drive on which the tape (or other media) is mounted. +Each number selects an `archive\fIN\fR=' entry in +.IR /etc/default/tar , +which is followed by up to four fields: +device name, +blocking factor, +size in kilobytes, +and tape flag (`n' or `N' for no tape). +The entry `archive=' is the default +if neither this modifier nor the +.B f +key is given +and the +.I TAPE +environment variable is not present. +The following example sets up a tape, +a 1.44 MB floppy drive +for use with `0', using a blocking factor of 18, +and a 1.2 MB floppy drive for use with `1', +using a blocking factor of 15: +.sp + archive=/dev/rmt/c0s0 20 0 +.br + archive0=/dev/dsk/f03ht 18 1440 N +.br + archive1=/dev/dsk/f15ht 15 1200 N +.TP 10 +.B v +Normally +.I tar +does its work silently. +The +.B v +(verbose) +option causes it to type the name of each file it treats +preceded by the function letter. +With the +.B t +function, +.B v +gives more information about the +tape entries than just the name. +.TP 10 +.B w +causes +.I tar +to print the action to be taken followed by file name, then +wait for user confirmation. If a word beginning with `y' +is given, the action is performed. Any other input means +don't do it. +.TP 10 +.B f +causes +.I tar +to use the next argument as the name of the archive instead +of the default (see the description for the `0\&...\&9' key above). +If the name of the file is `\-', tar writes to +standard output or reads from standard input, whichever is +appropriate. Thus, +.I tar +can be used as the head or tail of a filter chain. +.I Tar +can also be used to move hierarchies with the command +.sp + cd fromdir; tar cf \- . | (cd todir; tar xfp \-) +.TP 10 +.B b +causes +.I tar +to use the next argument as the blocking factor for tape +records. The default is device dependent. +This option should only be used with raw magnetic tape archives (See +.B f +above). +.TP 10 +.B l +tells +.I tar +to complain if it cannot resolve all of the links +to the files dumped. If this is not specified, no +error messages are printed. +.TP 10 +.B o +causes +.I tar +to set the owner and group of restored files +to those of the invoking user +instead of those specified in the archive. +This is the default +unless the caller is the super-user. +.TP 10 +.B p +causes +.I tar +to set the permission bits of extracted files +exactly to those given in the archive; +normally, the current +.I umask +is applied. +.TP 10 +.B i +tells +.I tar +not to exit when a directory checksum error is encountered. +.TP 10 +.B e +tells +.I tar +to exit at several minor error conditions +that normally cause only a warning. +.TP 10 +.B n +tells +.I tar +that the input file is not a tape. +This causes block sizes to be +reported in kilobytes instead of tape blocks with the `v' modifier. +.TP 10 +.B m +causes +.I tar +not to restore modification times on extracted files; +the modification time is the time when the file was restored. +.TP 10 +.B h +causes +.I tar +to store the target of a symbolic link in the archive +and to follow symbolic links to directories +instead of storing the link itself. +.TP 10 +.B L +Same as +.BR h . +.TP 10 +.B A +causes +.I tar +to strip a leading `/' when storing and restoring pathnames, +i.\|e. to store and extract relative names instead of absolute ones. +.TP 10 +.B k +can be used to create and extract multi-volume archives. +The next unevaluated argument is used as the maximum size in kilobytes +for each part. +If the archive does not fit on one medium, +the user is prompted for the next. +The same sizes must be given for creation and extraction +of multi-volume archives. +.TP 10 +.B B +causes +.I tar +to repeat a +.IR read (2) +operation if it gets fewer data than expected. +This is useful when reading from pipes, network transports etc., +and is enabled per default if reading from standard input, +pipes, or sockets. +.TP 10 +.B F +specifies the name of a file with additional pathnames +in the next unevaluated argument. +Each line of this file must contain a pathname +(without any leading or trailing white-space) +that is put into the archive +before further files specified on the command line. +.TP 10 +.B X +specifies the name of a file +that contains pathnames to be excluded, +in the same format as with +.BR F . +This option may be specified multiple times, +causing pathnames contained in any of the files +to be excluded. +.PP +.I Tar +also accepts the following options +intermixed with the file +.IR names : +.TP 10 +\fB\-C\fI dir\fR +Change the working directory to +.I dir +before archiving the file +.I name +immediately following +.IR dir ; +.I name +is interpreted relative to +.IR dir . +.TP 10 +\fB\-I\fI file\fR +Specifies a file with additional pathnames +as described for the +.B F +key above, +inserted at the current position in the archive. +.PP +The archive format written by +.I tar +is expected to be compatible +with the `Extended tar Format' as defined in +IEEE Std. 1003.1, 1996, +although the numerical +.I uid +and +.I gid +fields are always used for extraction, +ignoring the symbolic +.I uname +and +.I gname +fields. +.PP +IEEE Std. 1003.1, 2001 +.I pax +format extended headers are created by +.I tar +under the following circumstances: +A path name exceeds 255 bytes +or does not contain a slash character at an appropriate position, +and does not contain non-UTF-8 characters; +the name of a linked file exceeds 100 bytes; +a file is larger than 8\ GB; +a user or group ID exceeds the number 2097151. +When updating GNU or old format archives, +no extended headers are created +and any path names are limited to 100 bytes. +Most GNU extensions are supported when reading archives, +and Sun extensions also for appending. +.PP +Multi-volume archives are splitted single-volume archives. +For an archive created with +.sp + tar cfk /dev/dsk/f03ht 1440 \fIinput\fR +.sp +it is thus possible to generate a single-volume archive by executing +.sp + dd if=/dev/dsk/f03ht bs=1k count=1440 >>singlevolume +.sp +once for each floppy disk. +.PP +On archives compressed with +.IR bzip2 (1), +.IR compress (1), +or +.IR gzip (1), +the appropriate decompression program is run automatically +with the +.I t +and +.I x +keys. +.PP +The following keys are supported as extensions: +.TP 10 +.B z +causes +.I tar +to pipe its output through +.IR gzip (1) +when creating archives. +When reading archives, +this option is ignored. +.TP 10 +.B j +Similar to +.IR z , +but using +.IR bzip2 (1) +as compression program. +.TP 10 +.B Z +Similar to +.IR z , +but using +.IR compress (1) +as compression program. +.SH EXAMPLES +List the contents of a software distribution archive: +.in +2 +.sp +tar tvf distribution.tar.gz +.in -2 +.PP +Extract a software distribution archive, +preserving file permissions, +but setting the owner of all extracted files +to the invoking user +(even if invoked by +.IR root ): +.in +2 +.sp +tar xfop distribution.tar.gz +.in -2 +.PP +Write the current directory to the default archive location: +.in +2 +.sp +tar c . +.sp +.in -2 +If the default location is a floppy drive, +the archive is split across multiple volumes if necessary. +.PP +Create a +.IR gzip (1) +compressed archive of the directory +.I project +and its contents: +.in +2 +.sp +tar cf \- project | gzip \-c > project.tar.gz +.in -2 +.SH "ENVIRONMENT VARIABLES" +.TP 10 +.BR LANG ", " LC_ALL +See +.IR locale (7). +.TP 10 +.B LC_TIME +Used for the time string printed with when the keys +.B tv +are given. +.TP 10 +.B SYSV3 +If set, +old tar format archives are created with `c', +and the meaning of the `e' key is changed +such that archiving is aborted +if a file cannot fit on a single volume +of a multi-volume archive. +.TP 10 +.B TAPE +Used as default archive +if neither the `f' nor the `0\&...\&9' keys are present. +.SH FILES +/etc/default/tar +.br +/tmp/tar* +.SH "SEE ALSO" +cpio(1), +ls(1), +pax(1), +umask(2) +.SH DIAGNOSTICS +Complaints about bad key characters and tape read/write errors. +.PP +Complaints if enough memory is not available to hold +the link tables. +.SH NOTES +There is no way to ask for the +.IR n -th +occurrence of a file. +.PP +Tape errors are handled ungracefully. +.PP +The +.B u +option can be slow. +.PP +Use of the +.B r +and +.B u +options with tape devices requires +drives capable of writing between filemarks. diff --git a/tar/tar.c b/tar/tar.c @@ -0,0 +1,3204 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, March 2003. + */ +/* from Unix 32V /usr/src/cmd/tar.c */ +/* + * Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * Redistributions of source code and documentation must retain the + * above copyright notice, this list of conditions and the following + * disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed or owned by Caldera + * International, Inc. + * Neither the name of Caldera International, Inc. nor the names of + * other contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA + * INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE + * LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)tar.sl 1.180 (gritter) 10/9/10"; + +#include <sys/types.h> +#include <sys/stat.h> +#ifdef __linux__ +#include <linux/fd.h> +#if !defined (__UCLIBC__) && !defined (__dietlibc__) +#include <linux/fs.h> +#endif /* !__UCLIBC__, !__dietlibc__ */ +#undef WNOHANG +#undef WUNTRACED +#undef P_ALL +#undef P_PID +#undef P_PGID +#ifdef __dietlibc__ +#undef NR_OPEN +#undef PATH_MAX +#endif /* __dietlibc__ */ +#endif /* __linux__ */ +#include <sys/wait.h> +#include <fcntl.h> +#include <unistd.h> +#include <time.h> +#include <utime.h> +#include <stdio.h> +#include <dirent.h> +#include <signal.h> +#include "sigset.h" +#include <string.h> +#include <stdlib.h> +#include <malloc.h> +#include <libgen.h> +#include <errno.h> +#include <pwd.h> +#include <grp.h> +#include <inttypes.h> +#include <iblok.h> +#include <locale.h> +#include <alloca.h> + +#include <sys/ioctl.h> + +#if defined (__linux__) || defined (__sun) || defined (__FreeBSD__) || \ + defined (__hpux) || defined (_AIX) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) +#ifndef __G__ +#include <sys/mtio.h> +#endif +#else /* SVR4.2MP */ +#include <sys/scsi.h> +#include <sys/st01.h> +#endif /* SVR4.2MP */ + +#ifdef _AIX +#include <sys/sysmacros.h> +#endif + +#if !defined (major) && !defined (__G__) +#include <sys/mkdev.h> +#endif /* !major */ + +#include <getdir.h> +#include <asciitype.h> +#include <atoll.h> +#include <memalign.h> + +#ifdef __GLIBC__ +#ifdef _IO_getc_unlocked +#undef getc +#define getc(f) _IO_getc_unlocked(f) +#endif +#ifdef _IO_putc_unlocked +#undef putc +#define putc(c, f) _IO_putc_unlocked(c, f) +#endif +#endif + +#if defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) +/* + * For whatever reason, FreeBSD casts the return values of major() and + * minor() to signed values so that normal limit comparisons will fail. + */ +static unsigned long +mymajor(long dev) +{ + return major(dev) & 0xFFFFFFFFUL; +} +#undef major +#define major(a) mymajor(a) +static unsigned long +myminor(long dev) +{ + return minor(dev) & 0xFFFFFFFFUL; +} +#undef minor +#define minor(a) myminor(a) +#endif /* __FreeBSD__, __NetBSD__, __OpenBSD__, __DragonFly__, __APPLE__ */ + +#define TBLOCK 512 +#define MAXBLF (SSIZE_MAX/TBLOCK) +static int NBLOCK = 20; +#define NAMSIZ 100 +#define PFXSIZ 155 +#define MAGSIZ 6 +#define TIMSIZ 12 +static union hblock { + char dummy[TBLOCK]; + struct header { + char name[NAMSIZ]; + char mode[8]; + char uid[8]; + char gid[8]; + char size[12]; + char mtime[TIMSIZ]; + char chksum[8]; + char linkflag; + char linkname[NAMSIZ]; + char magic[MAGSIZ]; + char version[2]; + char uname[32]; + char gname[32]; + char devmajor[8]; + char devminor[8]; + char prefix[PFXSIZ]; + } dbuf; +} dblock, *tbuf; + +static struct internal_header { + char *name; + long namesize; + char *rname; + char *linkname; + char *rlinkname; + long linksize; +} hbuf; + +static struct islot { + struct islot *left; /* left link */ + struct islot *right; /* right link */ + struct islot *nextp; /* ordered list link */ + ino_t inum; /* inode number */ + int count; /* count of links found */ + char *pathname;/* name of first link encountered */ +} *ihead; /* ordered list for report with 'l' */ +static struct islot *inull; /* splay tree null element */ + +struct dslot { + struct dslot *nextp; /* next device */ + struct islot *itree; /* inode slots */ + dev_t devnum; /* real device id */ +}; +static struct dslot *devices;/* devices list */ + +static enum paxrec { + PR_NONE = 0000, + PR_ATIME = 0001, + PR_GID = 0002, + PR_LINKPATH = 0004, + PR_MTIME = 0010, + PR_PATH = 0020, + PR_SIZE = 0040, + PR_UID = 0100, + PR_SUN_DEVMAJOR = 0200, + PR_SUN_DEVMINOR = 0400 +} paxrec, globrec; + +static struct stat globst; + +static int rflag, xflag, vflag, tflag, mt, cflag, mflag, nflag, kflag; +static int oflag, hflag, pflag, iflag, eflag, Aflag, Bflag, Eflag; +enum { + B_UNSPEC = 0, + B_AUTO = 1, + B_DEFN = 2, + B_USER = 3 +} bflag; +static int gnuflag = -1; +static int oldflag = -1; +static long long volsize; +static char *Fflag, *Xflag; +static int term, chksum, wflag, recno, first, linkerrok; +static int freemem = 1; +static int nblock = 1; +static struct stat mtstat; +static int tapeblock = -1; +static int writerror; + +static int workdir; + +static off_t low; +static off_t high; + +static FILE *tfile; + +static const char *usefile; + +struct magtape { + const char *device; + long block; + long long size; + int nflag; +}; + +static struct magtape magtapes[] = { + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 1, 0, 0 }, + { NULL, 0, 0, 0 } +}; + +static int hadtape; + +static int maybezip; + +static enum { + POSTORDER = 0, + PREORDER = 1 +} order; + +static char *progname; +extern int sysv3; + +static dev_t *vis_dev; +static ino_t *vis_ino; +static int vis_max; + +static long files; +static long gotcha; + +static int midfile; + +static uid_t myuid; +static gid_t mygid; + +static long long rdtotal; +static long long wrtotal; + +static int N = 300; + +static void usage(void); +static void ckusefile(void); +static void cktxru(int); +static void dorep(char *[]); +static void doarg(char *); +static int endtape(int); +static void tgetdir(struct stat *); +static void tgetnam(void); +static void tgetgnu(char **, long *); +static void tgetpax(struct stat *, long long *, long long *); +static enum paxrec tgetrec(char **, char **, char **); +static long long tgetval(const char *, int); +static void passtape(struct stat *); +static void putfile(const char *, const char *, int, int); +static void putreg(const char *, const char *, int, struct stat *); +static int putsym(const char *, const char *, char **, size_t); +static void wrhdr(const char *, const char *, struct stat *); +static void wrpax(const char *, const char *, struct stat *); +static void addrec(char **, long *, long *, + const char *, const char *, long long); +static void paxnam(struct header *, const char *); +static void doxtract(char *[]); +static int xtrreg(const char *, struct stat *); +static int xtrlink(const char *, struct stat *, int); +static int xtrdev(const char *, struct stat *, mode_t); +static int xtrdir(const char *, struct stat *); +static void dotable(char *[]); +static void putempty(void); +static void longt(register struct stat *, int); +static void pmode(register struct stat *, int); +static void tselect(int *, struct stat *); +static int checkdir(register char *, struct stat *); +static void onsig(int); +static void tomodes(const char *, register struct stat *); +static int checksum(int); +static int checkw(int, const char *, struct stat *, int); +static int response(void); +static int checkupdate(const char *, struct stat *); +static void done(int); +static int prefix(register const char *, register const char *); +static off_t lookup(const char *); +static off_t bsrch(const char *, int, off_t, off_t); +static int cmp(const char *, const char *, size_t); +static int readtape(char *); +static int writetape(const char *); +static void backtape(int); +static void flushtape(void); +static void *srealloc(void *, size_t); +static void *smalloc(size_t); +static void *scalloc(size_t, size_t); +static void *bfalloc(size_t); +static char *nameof(struct header *, char *); +static int mkname(struct header *, const char *); +static char *linkof(struct header *, char *); +static int mklink(struct header *, const char *, const char *); +static void blocksof(const char *, struct stat *, long long *, long long *); +static void tchown(int (*)(const char *, uid_t, gid_t), + const char *, uid_t, gid_t); +static void edone(int); +static ssize_t mtread(void *, size_t); +static ssize_t mtwrite(const void *, size_t); +static void newvolume(void); +static void goback(int); +static void getpath(const char *, char **, char **, size_t *, size_t *); +static void setpath(const char *, char **, char **, + size_t, size_t *, size_t *); +static void defaults(void); +static void settape(int); +static void suprmsg(void); +static void odirect(void); +static void domtstat(void); +static int checkzip(const char *, int); +static int redirect(const char *, const char *, int); +static const char *getuser(uid_t); +static const char *getgroup(gid_t); +static char *sstrdup(const char *); +static void fromfile(const char *); +static void readexcl(const char *); +static void creatfile(void); +static mode_t cmask(struct stat *, int); +static struct islot *isplay(ino_t, struct islot *); +static struct islot *ifind(ino_t, struct islot **); +static void iput(struct islot *, struct islot **); +static struct dslot *dfind(struct dslot **, dev_t); +static char *sequence(void); +static void docomp(const char *); +static int jflag, zflag, Zflag; +static int utf8(const char *); +static void settmp(char *, size_t, const char *); + +int +main(int argc, char *argv[]) +{ + char *cp; + + progname = basename(argv[0]); + setlocale(LC_TIME, ""); + if (getenv("SYSV3") != NULL) + sysv3 = 2; + if (argc < 2) + usage(); + + if ((myuid = getuid()) != 0) + oflag = 1; + mygid = getgid(); + tfile = NULL; + inull = scalloc(1, sizeof *inull); + inull->left = inull->right = inull; + defaults(); + argv[argc] = 0; + argv++; + for (cp = *argv++; *cp; cp++) + switch(*cp) { + case 'f': + usefile = *argv++; + hadtape++; + break; + case 'c': + if (sysv3 > 1 && Eflag == 0) + oldflag = 1; + cflag++; + rflag++; + cktxru('r'); + break; + case 'X': + Xflag = *argv++; + if (Xflag == NULL) { + fprintf(stderr, "%s: exclude file must be " + "specified with 'X' option\n", + progname); + done(1); + } + creatfile(); + readexcl(Xflag); + break; + case 'u': + creatfile(); + /*FALLTHRU*/ + case 'r': + cktxru(*cp); + rflag++; + break; + case 'v': + vflag++; + break; + case 'w': + wflag++; + break; + case 'x': + cktxru(*cp); + xflag++; + break; + case 't': + cktxru(*cp); + tflag++; + break; + case 'm': + mflag++; + break; + case '-': + break; + case '0': + case '1': case '2': case '3': + case '4': case '5': case '6': + case '7': case '8': case '9': + settape(*cp); + hadtape++; + break; + case 'b': + bflag = B_USER; + if (*argv == NULL) + goto invblk; + nblock = atoi(*argv++); + if (nblock <= 0 || (long)nblock > MAXBLF) { + invblk: fprintf(stderr, + "%s: invalid blocksize. (Max %ld)\n", + progname, (long)MAXBLF); + done(1); + } + break; + case 'l': + linkerrok++; + break; + case 'o': + oflag++; + break; + case 'h': case 'L': + hflag++; + break; + case 'p': + pflag++; + break; + case 'i': + iflag++; + break; + case 'e': + eflag++; + break; + case 'A': + Aflag++; + break; + case 'E': + Eflag++; + oldflag = -1; + break; + case 'F': + Fflag = *argv++; + if (Fflag == NULL) { + fprintf(stderr, + "%s: F requires a file name.\n", + progname); + done(1); + } + break; + case 'k': + if (*argv == NULL || (volsize = atoll(*argv)) < 250) { + fprintf(stderr, "%s: sizes below %dk " + "not supported\n", progname, + 250); + done(1); + } + volsize *= 1024; + kflag = 1; + argv++; + /*FALLTHRU*/ + case 'n': + nflag = 2; + break; + case 'B': + Bflag = 1; + break; + case 'z': + zflag = 1; + jflag = 0; + Zflag = 0; + break; + case 'j': + jflag = 1; + zflag = 0; + Zflag = 0; + break; + case 'Z': + Zflag = 1; + jflag = 0; + zflag = 0; + break; + default: + fprintf(stderr, "%s: %c: unknown option\n", + progname, *cp & 0377); + usage(); + } + + if (hadtape == 0) { + if ((cp = getenv("TAPE")) != NULL) + usefile = sstrdup(cp); + else + settape(0); + } + /*if (Fflag && Xflag) { + fprintf(stderr, "%s: specify only one of X or F.\n", progname); + usage(); + } our implementation doesn't need to enforce this */ + if ((rflag || volsize) && (workdir = open(".", O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open working directory\n", + progname); + done(1); + } + fcntl(workdir, F_SETFD, FD_CLOEXEC); + for (files = 0; argv[files]; files++); + if (rflag) { + if (cflag && tfile != NULL && Xflag == 0) { + usage(); + done(1); + } + if (argv[0] == NULL && Fflag == NULL) { + fprintf(stderr, "%s: Missing filenames\n", + progname); + done(1); + } + if (cflag == 0 && (jflag || zflag || Zflag)) { + fprintf(stderr, "%s: can only create " + "compressed archives\n", + progname); + done(1); + } + ckusefile(); + if (strcmp(usefile, "-") == 0) { + if (cflag == 0) { + fprintf(stderr, "%s: can only create " + "standard output archives\n", + progname); + done(1); + } + mt = dup(1); + } + else if ((mt = open(usefile, O_RDWR)) < 0) { + if (cflag == 0 || (mt = creat(usefile, 0666)) < 0) { + fprintf(stderr, "%s: cannot open %s.\n", + progname, usefile); + done(1); + } + } + domtstat(); + if (jflag || zflag || Zflag) + docomp(jflag ? "bzip2" : Zflag ? "compress" : "gzip"); + dorep(argv); + } + else if (xflag) { + ckusefile(); + if (strcmp(usefile, "-") == 0) { + mt = dup(0); + Bflag = 1; + } else if ((mt = open(usefile, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s.\n", + progname, usefile); + done(1); + } + maybezip = 1; + domtstat(); + doxtract(argv); + } + else if (tflag) { + ckusefile(); + if (strcmp(usefile, "-") == 0) { + mt = dup(0); + Bflag = 1; + } else if ((mt = open(usefile, O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s.\n", + progname, usefile); + done(1); + } + maybezip = 1; + domtstat(); + dotable(argv); + } + else + usage(); + done(0); + /*NOTREACHED*/ + return 0; +} + +static void +usage(void) +{ + fprintf(stderr, "Usage: %s -{txruc}[0-9vfbk[FX]hLiBelmnopwA] " + "[tapefile] [blocksize] [tapesize] [argfile] " + "[exclude-file] [-I include-file] files ...\n", + progname); + done(1); +} + +static void +ckusefile(void) +{ + if (usefile == NULL) { + fprintf(stderr, "%s: device argument required\n", progname); + done(1); + } +} + +static void +cktxru(int c) +{ + static int txruflag; + + if (c == 't' || c == 'x' || c == 'r' || c == 'u') { + if (txruflag) { + fprintf(stderr, "%s: specify only one of [txru].\n", + progname); + usage(); + } + txruflag = c; + } +} + +static void +dorep(char *argv[]) +{ + struct stat stbuf; + + if (!cflag || Xflag) { + if (!cflag) { + tgetdir(&stbuf); + do { + passtape(&stbuf); + tgetdir(&stbuf); + } while (!endtape(1)); + } + if (tfile != NULL) { + char tname[PATH_MAX+1]; + int tfd; + pid_t pid; + fflush(tfile); + rewind(tfile); + settmp(tname, sizeof tname, "%s/tarXXXXXX"); + if ((tfd = mkstemp(tname)) < 0) { + fprintf(stderr, "%s: cannot create temporary " + "file (%s)\n", progname, tname); + done(1); + } + unlink(tname); + fcntl(tfd, F_SETFD, FD_CLOEXEC); + switch (pid = fork()) { + case -1: + fprintf(stderr, "%s: cannot fork\n", progname); + done(1); + /*NOTREACHED*/ + case 0: + dup2(fileno(tfile), 0); + dup2(tfd, 1); + execl(SHELL, "sh", "-c", + "PATH=" SV3BIN ":" DEFBIN ":$PATH; " + "LC_ALL=C export LC_ALL; " + /* + * +2 sorts by file name first, for + * binary search. + * +0 sorts by key (X overrides u). + * +1nr sorts by modtime (newer files + * first). + */ + "sort +2 +0 -1 +1nr -2 | uniq -2", + NULL); + fprintf(stderr, "%s: cannot execute %s\n", + progname, SHELL); + _exit(0177); + } + while (waitpid(pid, NULL, 0) != pid); + fclose(tfile); + lseek(tfd, 0, SEEK_SET); + tfile = fdopen(tfd, "r"); + fstat(fileno(tfile), &stbuf); + high = stbuf.st_size; + } + } + + suprmsg(); + if (sigset(SIGHUP, SIG_IGN) != SIG_IGN) + sigset(SIGINT, onsig); + if (sigset(SIGHUP, SIG_IGN) != SIG_IGN) + sigset(SIGHUP, onsig); + if (sigset(SIGQUIT, SIG_IGN) != SIG_IGN) + sigset(SIGQUIT, onsig); +/* + if (sigset(SIGTERM, SIG_IGN) != SIG_IGN) + sigset(SIGTERM, onsig); +*/ + odirect(); + if (Fflag) + fromfile(Fflag); + while (*argv && !term) { + if (argv[0][0] == '-' && argv[0][1] == 'C' && + argv[0][2] == '\0' && argv[1]) { + if (chdir(argv[1]) < 0) + fprintf(stderr, "%s: can't change directories " + "to %s: %s\n", + progname, argv[0], + strerror(errno)); + argv += 2; + if (argv[0] == NULL) + break; + } + if (argv[0][0] == '-' && argv[0][1] == 'I' && + argv[0][2] == '\0') { + if (argv[1]) { + fromfile(argv[1]); + argv += 2; + } else { + fprintf(stderr, "%s: missing file name " + "for -I flag.\n", + progname); + done(1); + } + } else { + doarg(*argv++); + goback(workdir); + } + } + putempty(); + putempty(); + flushtape(); + if (linkerrok == 1) + for (; ihead != NULL; ihead = ihead->nextp) + if (ihead->count != 0) + fprintf(stderr, "Missing links to %s\n", + ihead->pathname); +} + +static void +doarg(char *arg) +{ + register char *cp, *cp2; + + cp2 = arg; + for (cp = arg; *cp; cp++) + if (*cp == '/') + cp2 = cp; + if (cp2 != arg) { + *cp2 = '\0'; + chdir(arg); + *cp2 = '/'; + cp2++; + } + putfile(arg, cp2, workdir, 0); +} + +static int +endtape(int rew) +{ + if (hbuf.name[0] == '\0') { + if (rew) + backtape(rew); + return(1); + } + else + return(0); +} + +static void +tgetdir(register struct stat *sp) +{ + long long lval1, lval2; + + readtape( (char *) &dblock); + if (dblock.dbuf.name[0] && gnuflag < 0) + if ((gnuflag=memcmp(dblock.dbuf.magic, "ustar \0", 8)==0)!=0) + Eflag = 0; + if (dblock.dbuf.name[0] && oldflag < 0) + if ((oldflag=memcmp(dblock.dbuf.magic, "ustar", 5)!=0)!=0) + Eflag = 0; + if (hbuf.name) + hbuf.name[0] = '\0'; + if (hbuf.linkname) + hbuf.linkname[0] = '\0'; + paxrec = globrec; + *sp = globst; + while (gnuflag==0 && oldflag==0 && (dblock.dbuf.linkflag == 'x' || + dblock.dbuf.linkflag == 'g' || + dblock.dbuf.linkflag == 'X' /* sun */)) + tgetpax(sp, &lval1, &lval2); + tgetnam(); + if (hbuf.name[0] == '\0') + return; + sp->st_mode = tgetval(dblock.dbuf.mode, sizeof dblock.dbuf.mode)&07777; + if ((paxrec & PR_UID) == 0) + sp->st_uid = tgetval(dblock.dbuf.uid, sizeof dblock.dbuf.uid); + if ((paxrec & PR_GID) == 0) + sp->st_gid = tgetval(dblock.dbuf.gid, sizeof dblock.dbuf.gid); + if ((paxrec & PR_SIZE) == 0) + sp->st_size = + tgetval(dblock.dbuf.size, sizeof dblock.dbuf.size); + if ((paxrec & PR_MTIME) == 0) + sp->st_mtime = + tgetval(dblock.dbuf.mtime, sizeof dblock.dbuf.mtime); + sscanf(dblock.dbuf.chksum, "%o", &chksum); + if (chksum != checksum(0) && chksum != checksum(1)) { + fprintf(stderr, "%s: directory checksum error\n", progname); + if (iflag == 0) + done(2); + } + if ((paxrec & PR_SUN_DEVMAJOR) == 0) + sscanf(dblock.dbuf.devmajor, "%llo", &lval1); + if ((paxrec & PR_SUN_DEVMINOR) == 0) + sscanf(dblock.dbuf.devminor, "%llo", &lval2); + sp->st_rdev = makedev(lval1, lval2); + if (tfile != NULL) { + if (strchr(hbuf.name, '\n') == NULL) { + int s; + s = 3 * fprintf(tfile, "u %0*lo %s\n", TIMSIZ, + (long)sp->st_mtime, hbuf.name); + if (s > N) + N = s; + } else + fprintf(stderr, "%s: warning: file name '%s' in " + "archive contains a newline character " + "and will always be added to archive\n", + progname, hbuf.name); + } +} + +static void +tgetnam(void) +{ +again: if (dblock.dbuf.linkflag == 'L' && (gnuflag>0 || + strcmp(dblock.dbuf.name, "././@LongLink") == 0)) { + tgetgnu(&hbuf.name, &hbuf.namesize); + goto again; + } + if (dblock.dbuf.linkflag == 'K' && (gnuflag>0 || + strcmp(dblock.dbuf.name, "././@LongLink") == 0)) { + tgetgnu(&hbuf.linkname, &hbuf.linksize); + goto again; + } + if ((hbuf.name == NULL || hbuf.namesize < NAMSIZ+PFXSIZ+2) && + (paxrec & PR_PATH) == 0) { + hbuf.namesize = NAMSIZ+PFXSIZ+2; + hbuf.name = srealloc(hbuf.name, hbuf.namesize); + hbuf.name[0] = '\0'; + } + if ((hbuf.linkname == NULL || hbuf.linksize < NAMSIZ+1) && + (paxrec & PR_LINKPATH) == 0) { + hbuf.linksize = NAMSIZ+1; + hbuf.linkname = srealloc(hbuf.linkname, hbuf.linksize); + hbuf.linkname[0] = '\0'; + } + if (hbuf.name[0] == '\0' && (paxrec & PR_PATH) == 0) + nameof(&dblock.dbuf, hbuf.name); + if (hbuf.linkname[0] == '\0' && (paxrec & PR_LINKPATH) == 0) + linkof(&dblock.dbuf, hbuf.linkname); + hbuf.rname = hbuf.name; + if (Aflag) { + while (hbuf.rname[0] == '/') + hbuf.rname++; + if (hbuf.name[0] && hbuf.rname[0] == '\0') + hbuf.rname = "."; + } + hbuf.rlinkname = hbuf.linkname; + if (Aflag) { + while (hbuf.rlinkname[0] == '/') + hbuf.rlinkname++; + if (hbuf.linkname[0] && hbuf.rlinkname[0] == '\0') + hbuf.rlinkname = "."; + } +} + +static void +tgetgnu(char **np, long *sp) +{ + char buf[TBLOCK]; + long long blocks; + long n, bytes; + + n = tgetval(dblock.dbuf.size, sizeof dblock.dbuf.size); + if (*sp <= n) + *np = srealloc(*np, *sp = n+1); + blocks = n; + blocks += TBLOCK-1; + blocks /= TBLOCK; + bytes = n; + while (blocks-- > 0) { + readtape(buf); + memcpy(&(*np)[n-bytes], buf, bytes>TBLOCK?TBLOCK:bytes); + bytes -= TBLOCK; + } + (*np)[n] = '\0'; + readtape((char *)&dblock); +} + +static void +tgetpax(struct stat *sp, long long *devmajor, long long *devminor) +{ + char *keyword, *value; + char buf[TBLOCK]; + char *block, *bp; + long long n, blocks, bytes; + enum paxrec pr; + + n = tgetval(dblock.dbuf.size, sizeof dblock.dbuf.size); + bp = block = smalloc(n+1); + blocks = n; + blocks += TBLOCK-1; + blocks /= TBLOCK; + bytes = n; + while (blocks-- > 0) { + readtape(buf); + memcpy(&block[n-bytes], buf, bytes>TBLOCK?TBLOCK:bytes); + bytes -= TBLOCK; + } + block[n] = '\0'; + while (bp < &block[n]) { + int c; + pr = tgetrec(&bp, &keyword, &value); + switch (pr) { + case PR_ATIME: + sp->st_atime = strtoll(value, NULL, 10); + break; + case PR_GID: + sp->st_gid = strtoll(value, NULL, 10); + break; + case PR_LINKPATH: + c = strlen(value); + if (hbuf.linkname == NULL || hbuf.linksize < c+1) { + hbuf.linksize = c+1; + hbuf.linkname = srealloc(hbuf.linkname, c+1); + } + strcpy(hbuf.linkname, value); + break; + case PR_MTIME: + sp->st_mtime = strtoll(value, NULL, 10); + break; + case PR_PATH: + c = strlen(value); + if (hbuf.name == NULL || hbuf.namesize < c+1) { + hbuf.namesize = c+1; + hbuf.name = srealloc(hbuf.name, c+1); + } + strcpy(hbuf.name, value); + break; + case PR_SIZE: + sp->st_size = strtoll(value, NULL, 10); + break; + case PR_UID: + sp->st_uid = strtoll(value, NULL, 10); + break; + case PR_SUN_DEVMAJOR: + *devmajor = strtoll(value, NULL, 10); + break; + case PR_SUN_DEVMINOR: + *devminor = strtoll(value, NULL, 10); + break; + } + paxrec |= pr; + } + if (dblock.dbuf.linkflag == 'g') { + globrec = paxrec & ~(PR_LINKPATH|PR_PATH|PR_SIZE); + globst = *sp; + } else if (dblock.dbuf.linkflag == 'X') + Eflag = 1; + readtape((char *)&dblock); + free(block); +} + +static enum paxrec +tgetrec(char **bp, char **keyword, char **value) +{ + char *x; + long n = 0; + enum paxrec pr; + + *keyword = ""; + *value = ""; + while (**bp && (n = strtol(*bp, &x, 10)) <= 0 && (*x!=' ' || *x!='\t')) + do + (*bp)++; + while (**bp && **bp != '\n'); + if (*x == '\0' || **bp == '\0') { + (*bp)++; + return PR_NONE; + } + while (x < &(*bp)[n] && (*x == ' ' || *x == '\t')) + x++; + if (x == &(*bp)[n] || *x == '=') + goto out; + *keyword = x; + while (x < &(*bp)[n] && *x != '=') + x++; + if (x == &(*bp)[n]) + goto out; + *x = '\0'; + if (&x[1] < &(*bp)[n]) + *value = &x[1]; + (*bp)[n-1] = '\0'; +out: *bp = &(*bp)[n]; + if (strcmp(*keyword, "atime") == 0) + pr = PR_ATIME; + else if (strcmp(*keyword, "gid") == 0) + pr = PR_GID; + else if (strcmp(*keyword, "linkpath") == 0) + pr = PR_LINKPATH; + else if (strcmp(*keyword, "mtime") == 0) + pr = PR_MTIME; + else if (strcmp(*keyword, "path") == 0) + pr = PR_PATH; + else if (strcmp(*keyword, "size") == 0) + pr = PR_SIZE; + else if (strcmp(*keyword, "uid") == 0) + pr = PR_UID; + else if (strcmp(*keyword, "SUN.devmajor") == 0) + pr = PR_SUN_DEVMAJOR; + else if (strcmp(*keyword, "SUN.devminor") == 0) + pr = PR_SUN_DEVMINOR; + else + pr = PR_NONE; + return pr; +} + +static long long +tgetval(const char *s, int k) +{ + long long n = 0; + int i, h = 0; + + if (gnuflag>0 && s[0] & 0200) { + for (i = k-1; i > 0; i--) { + n += (s[i]&0377) << h; + h += 8; + } + n += (s[0]&0177) << h; + } else { + i = 0; + while (spacechar(s[i]&0377)) + i++; + while (i < k && s[i] && !spacechar(s[i]&0377)) { + n *= 8; + n += s[i++]-'0'; + } + } + return n; +} + +static void +passtape(register struct stat *sp) +{ + long long blocks; + char buf[TBLOCK]; + + switch (dblock.dbuf.linkflag) { + case '2': + case '3': + case '4': + case '5': + case '6': + break; + case '1': + if (oldflag > 0 || gnuflag > 0) + break; + /*FALLTHRU*/ + default: + blocks = sp->st_size; + blocks += TBLOCK-1; + blocks /= TBLOCK; + + while (blocks-- > 0) { + if (((mtstat.st_mode&S_IFMT) == S_IFBLK || + (mtstat.st_mode&S_IFMT) == S_IFREG) && + recno >= nblock && blocks >= nblock) { + long long lpos; + lpos = (blocks/nblock)*nblock*TBLOCK; + if ((volsize == 0 || rdtotal+lpos < volsize) && + lseek(mt, lpos, SEEK_CUR) + != (off_t)-1) { + blocks %= nblock; + rdtotal += lpos; + } + } + readtape(buf); + } + } +} + +static void +putfile(const char *longname, const char *shortname, int olddir, int vis_cnt) +{ + struct stat stbuf; + int infile = -1; + char *copy = NULL, *cend; + size_t sz, slen, ss; + struct direc *dp; + struct getdb *db; + int i, j; + int skip = 0; + char *symblink = 0; + + paxrec = globrec; + if ((hflag ? stat : lstat)(shortname, &stbuf) < 0) { + fprintf(stderr, "%s: could not stat %s\n", progname, longname); + edone(1); + return; + } + if ((stbuf.st_mode&S_IFMT) == S_IFREG && + stbuf.st_dev == mtstat.st_dev && + stbuf.st_ino == mtstat.st_ino) { + fprintf(stderr, "%s: %s same as archive file\n", + progname, longname); + return; + } + if ((stbuf.st_mode&S_IFMT) == S_IFREG && + stbuf.st_size > 077777777777LL) { + if (gnuflag > 0 || oldflag > 0) { + fprintf(stderr, "%s: %s too large (limit 8 GB)\n", + progname, longname); + return; + } + paxrec |= PR_SIZE; + } + if (sysv3 && eflag && (stbuf.st_mode&S_IFMT) == S_IFREG && + stbuf.st_size > volsize - wrtotal - 512) { + fprintf(stderr, "%s: Single file cannot fit on volume\n", + progname); + done(3); + } + if ((stbuf.st_mode&S_IFMT) == S_IFREG || + (stbuf.st_mode&S_IFMT) == S_IFDIR) { + infile = open(shortname, O_RDONLY); + if (infile < 0) { + fprintf(stderr, "%s: %s: cannot open file\n", + progname, longname); + edone(1); + return; + } + } + + if (tfile != NULL && (i = checkupdate(longname, &stbuf)) != '\0') { + if (infile >= 0 && (stbuf.st_mode & S_IFMT) == S_IFDIR && + i != 'X') + skip = 1; + else { + if (i == 'X' && vflag) + fprintf(stderr, "a %s%s excluded\n", longname, + (stbuf.st_mode&S_IFMT) == S_IFDIR ? + "/" : ""); + goto ret; + } + } + if (!skip && checkw('r', shortname, &stbuf, -1) == 0) + goto ret; + + if ((stbuf.st_mode & S_IFMT) == S_IFDIR && infile >= 0) { + if (order == PREORDER) + goto out; + cont: if (hflag) { + for (i = 0; i < vis_cnt; i++) + if (stbuf.st_dev == vis_dev[i] && + stbuf.st_ino == vis_ino[i]) + goto fini; + if (vis_cnt >= vis_max) { + vis_max += 20; + vis_dev = srealloc(vis_dev, sizeof *vis_dev * + vis_max); + vis_ino = srealloc(vis_ino, sizeof *vis_ino * + vis_max); + } + vis_dev[vis_cnt] = stbuf.st_dev; + vis_ino[vis_cnt] = stbuf.st_ino; + vis_cnt++; + } + getpath(longname, &copy, &cend, &sz, &slen); + if (fchdir(infile) < 0) { + fprintf(stderr, "can't change directories to %s\n", + shortname); + goto fini; + } + db = getdb_alloc(shortname, infile); + while ((dp = getdir(db, &j)) != NULL && !term) { + if (dp->d_ino == 0) + continue; + if (strcmp(".", dp->d_name) == 0 || + strcmp("..", dp->d_name) == 0) + continue; + setpath(dp->d_name, &copy, &cend, slen, &sz, &ss); + putfile(copy, dp->d_name, infile, vis_cnt); + i++; + } + getdb_free(db); + if (fchdir(olddir) < 0) { + fprintf(stderr, "cannot change back?: %s\n", + strerror(errno)); + done(1); + } + fini: free(copy); + if (order == PREORDER) + goto ret; + } +out: + tomodes(longname, &stbuf); + + if (skip || mkname(&dblock.dbuf, longname) < 0) + goto ret; + + if (stbuf.st_nlink > 1 && (stbuf.st_mode&S_IFMT) != S_IFDIR) { + struct dslot *dp; + struct islot *ip; + + dp = dfind(&devices, stbuf.st_dev); + if ((ip = ifind(stbuf.st_ino, &dp->itree)) != NULL) { + ip->count--; + sprintf(dblock.dbuf.size, "%11.11o", 0); + if (mklink(&dblock.dbuf, ip->pathname, longname) < 0) + goto ret; + dblock.dbuf.linkflag = '1'; + sprintf(dblock.dbuf.chksum, "%7.7o", checksum(0)); + if (paxrec != PR_NONE && oldflag <= 0 && gnuflag <= 0) + wrpax(longname, ip->pathname, &stbuf); + writetape( (char *) &dblock); + if (vflag) + fprintf(stderr, "a %s link to %s\n", + longname, ip->pathname); + goto ret; + } + else { + int namelen = strlen(longname); + ip = calloc(1, sizeof *ip); + if (ip == 0 || (ip->pathname=malloc(namelen+1)) == 0) { + if (freemem) { + write(2, "Out of memory. " + "Link information lost\n", 37); + freemem = 0; + edone(1); + } + } else { + ip->nextp = ihead; + ihead = ip; + ip->inum = stbuf.st_ino; + ip->count = stbuf.st_nlink - 1; + strcpy(ip->pathname, longname); + iput(ip, &dp->itree); + } + } + } + + switch (stbuf.st_mode & S_IFMT) { + default: + def: fprintf(stderr, "%s: %s is not a file. Not dumped\n", + progname, longname); + edone(1); + goto ret; + case S_IFREG: + dblock.dbuf.linkflag = '0'; + wrhdr(longname, NULL, &stbuf); + putreg(longname, shortname, infile, &stbuf); + goto ret; + case S_IFLNK: + if (putsym(longname, shortname, &symblink, stbuf.st_size) < 0) + goto ret; + dblock.dbuf.linkflag = '2'; + break; + case S_IFCHR: + if (oldflag > 0) + goto def; + dblock.dbuf.linkflag = '3'; + break; + case S_IFBLK: + if (oldflag > 0) + goto def; + dblock.dbuf.linkflag = '4'; + break; + case S_IFDIR: + if (oldflag > 0) + goto nop; + dblock.dbuf.linkflag = '5'; + break; + case S_IFIFO: + if (oldflag > 0) + goto def; + dblock.dbuf.linkflag = '6'; + break; + } + wrhdr(longname, symblink, &stbuf); + free(symblink); +nop: if (order == PREORDER && infile >= 0 && (stbuf.st_mode&S_IFMT)==S_IFDIR) + goto cont; +ret: if (infile >= 0) + close(infile); +} + +static void +putreg(const char *longname, const char *shortname, int infile, struct stat *sp) +{ + long long blocks; + char buf[TBLOCK]; + int i; + + blocks = (sp->st_size + (TBLOCK-1)) / TBLOCK; + midfile = 1; + while ((i = read(infile, buf, TBLOCK)) > 0 && blocks > 0) { + if (i < TBLOCK) + memset(&buf[i], 0, TBLOCK - i); + writetape(buf); + blocks--; + } + close(infile); + midfile = 0; + if (blocks != 0 || i != 0) { + fprintf(stderr, "%s: file changed size\n", longname); + edone(1); + } + while (blocks-- > 0) + putempty(); +} + +static int +putsym(const char *longname, const char *shortname, + char **symblink, size_t size) +{ + size_t n; + ssize_t len; + + n = size ? size : PATH_MAX; + *symblink = smalloc(n+1); + if ((len = readlink(shortname, *symblink, n)) < 0) { + fprintf(stderr, "can't read symbolic link %s\n", longname); + edone(1); + return -1; + } + (*symblink)[len] = '\0'; + if (len >= 100) { + if (oldflag <= 0 && gnuflag <= 0 && utf8(*symblink)) { + paxrec |= PR_LINKPATH; + strcpy(dblock.dbuf.linkname, sequence()); + return 0; + } + fprintf(stderr, "%s: %s: symbolic link too long\n", + progname, longname); + edone(1); + return -1; + } + memcpy(dblock.dbuf.linkname, *symblink, len); + if (len < NAMSIZ) + dblock.dbuf.linkname[len] = '\0'; + return 0; +} + +static void +wrhdr(const char *longname, const char *symblink, struct stat *sp) +{ + long long blocks; + + blocks = (sp->st_mode&S_IFMT) == S_IFREG ? + (sp->st_size + (TBLOCK-1)) / TBLOCK : 0; + if (vflag) { + if (nflag) + fprintf(stderr, "seek = %lldK\t", + (wrtotal+recno*2+1023)/1024); + fprintf(stderr, "a %s%s ", longname, + (sp->st_mode&S_IFMT) == S_IFDIR ? "/" : ""); + if (symblink) + fprintf(stderr, "symbolic link to %s\n", symblink); + else if (nflag) + fprintf(stderr, "%lldK\n", + blocks&01?blocks|02:blocks>>1); + else + fprintf(stderr, "%lld tape blocks\n", blocks); + } + sprintf(dblock.dbuf.chksum, "%7.7o", checksum(0)); + if (paxrec != PR_NONE && oldflag <= 0 && gnuflag <= 0) + wrpax(longname, symblink, sp); + writetape( (char *) &dblock); +} + +static void +wrpax(const char *longname, const char *linkname, struct stat *sp) +{ + char oblock[TBLOCK]; + char *pdata = NULL; + long psize = 0, pcur = 0; + long long blocks, i; + + memcpy(oblock, (char *)&dblock, TBLOCK); + memset((char *)&dblock, 0, TBLOCK); + if (paxrec & PR_ATIME) + addrec(&pdata, &psize, &pcur, "atime", NULL, sp->st_atime); + if (paxrec & PR_GID) + addrec(&pdata, &psize, &pcur, "gid", NULL, sp->st_gid); + if (paxrec & PR_LINKPATH) + addrec(&pdata, &psize, &pcur, "linkpath", linkname, 0); + if (paxrec & PR_MTIME) + addrec(&pdata, &psize, &pcur, "mtime", NULL, sp->st_mtime); + if (paxrec & PR_PATH) + addrec(&pdata, &psize, &pcur, "path", longname, 0); + if (paxrec & PR_SIZE) + addrec(&pdata, &psize, &pcur, "size", NULL, sp->st_size); + if (paxrec & PR_UID) + addrec(&pdata, &psize, &pcur, "uid", NULL, sp->st_uid); + if (paxrec & PR_SUN_DEVMAJOR) + addrec(&pdata, &psize, &pcur, "SUN.devmajor", NULL, + major(sp->st_rdev)); + if (paxrec & PR_SUN_DEVMINOR) + addrec(&pdata, &psize, &pcur, "SUN.devminor", NULL, + minor(sp->st_rdev)); + paxnam(&dblock.dbuf, longname); + sprintf(dblock.dbuf.mode, "%7.7o", 0444); + sprintf(dblock.dbuf.uid, "%7.7o", 0); + sprintf(dblock.dbuf.gid, "%7.7o", 0); + sprintf(dblock.dbuf.size, "%11.11lo", pcur); + sprintf(dblock.dbuf.mtime, "%11.11o", 0); + strcpy(dblock.dbuf.magic, "ustar"); + dblock.dbuf.version[0] = dblock.dbuf.version[1] = '0'; + strcpy(dblock.dbuf.uname, "root"); + strcpy(dblock.dbuf.gname, "root"); + dblock.dbuf.linkflag = Eflag ? 'X' : 'x'; + sprintf(dblock.dbuf.chksum, "%7.7o", checksum(0)); + writetape( (char *) &dblock); + memset(&pdata[pcur], 0, psize - pcur); + blocks = (pcur + (TBLOCK-1)) / TBLOCK; + for (i = 0; i < blocks; i++) + writetape(&pdata[i*TBLOCK]); + memcpy((char *)&dblock, oblock, TBLOCK); + free(pdata); +} + +static void +addrec(char **pdata, long *psize, long *pcur, + const char *keyword, const char *sval, long long lval) +{ + char dval[25], xval[25]; + long od, d, r; + + if (sval == 0) { + sprintf(xval, "%lld", lval); + sval = xval; + } + r = strlen(keyword) + strlen(sval) + 3; + d = 0; + do { + od = d; + sprintf(dval, "%ld", od + r); + d = strlen(dval); + } while (d != od); + *psize += d + r + 1 + 512; + *pdata = srealloc(*pdata, *psize); + sprintf(&(*pdata)[*pcur], "%s %s=%s\n", dval, keyword, sval); + *pcur += d + r; +} + +static void +paxnam(struct header *hp, const char *name) +{ + char buf[257], *bp; + const char *cp, *np; + int bl = 0; + static int pid; + + if (pid == 0) + pid = getpid(); + for (np = name; *np; np++); + while (np > name && *np != '/') { + np--; + bl++; + } + if ((np > name || *name == '/') && np-name <= 120) + for (bp = buf, cp = name; cp < np; bp++, cp++) + *bp = *cp; + else { + *buf = '.'; + bp = &buf[1]; + } + snprintf(bp, sizeof buf - (bp - buf), "/PaxHeaders.%d/%s", + pid, bl < 100 ? np>name?&np[1]:name : sequence()); + mkname(hp, buf); +} + +static void +doxtract(char *argv[]) +{ + struct stat stbuf; + char *name; + char **cp; + int try; + + suprmsg(); + for (;;) { + try = 0; + tgetdir(&stbuf); + if (endtape(0)) + break; + + name = hbuf.rname; + if (*argv == 0) + goto gotit; + + for (cp = argv; *cp; cp++) + if (prefix(*cp, name)) { + try = 1; + goto gotit; + } + passtape(&stbuf); + continue; + +gotit: + if (checkw('x', name, &stbuf, dblock.dbuf.linkflag) == 0) { + passtape(&stbuf); + continue; + } + + if (checkdir(name, &stbuf) == '/') + goto dir; + + switch (dblock.dbuf.linkflag) { + default: + case '0': + case '\0': + if (xtrreg(name, &stbuf) < 0) + continue; + break; + case '1': + if (xtrlink(name, &stbuf, 0) == 0) + passtape(&stbuf); + else if (stbuf.st_size > 0) + xtrreg(name, &stbuf); + continue; + case '2': + xtrlink(name, &stbuf, 1); + continue; + case '3': + if (xtrdev(name, &stbuf, S_IFCHR) < 0) + continue; + break; + case '4': + if (xtrdev(name, &stbuf, S_IFBLK) < 0) + continue; + break; + case '5': + dir: if (xtrdir(name, &stbuf) < 0) + continue; + break; + case '6': + if (xtrdev(name, &stbuf, S_IFIFO) < 0) + continue; + break; + } + if (pflag) + chmod(name, stbuf.st_mode & cmask(&stbuf, 0)); + if (mflag == 0) { + struct utimbuf timep; + + if (paxrec & PR_ATIME) + timep.actime = stbuf.st_atime; + else + timep.actime = time(NULL); + timep.modtime = stbuf.st_mtime; + if (utime(name, &timep) < 0) + fprintf(stderr, "can't set time on %s\n", name); + } + gotcha += try; + } + if (gotcha < files) + fprintf(stderr, "%s: %ld file(s) not extracted\n", + progname, files - gotcha); +} + +static int +xtrreg(const char *name, struct stat *sp) +{ + long long blocks, bytes; + char buf[TBLOCK]; + int ofile; + + remove(name); + if ((ofile = creat(name, sp->st_mode & cmask(sp, 1))) < 0) { + fprintf(stderr, "%s: %s - cannot create\n", progname, name); + edone(1); + passtape(sp); + return -1; + } + tchown(chown, name, sp->st_uid, sp->st_gid); + blocksof(name, sp, &blocks, &bytes); + while (blocks-- > 0) { + readtape(buf); + if (bytes > TBLOCK) { + if (write(ofile, buf, TBLOCK) < 0) { + fprintf(stderr, + "%s: %s: HELP - extract write error\n", + progname, name); + done(2); + } + } else + if (write(ofile, buf, (int) bytes) < 0) { + fprintf(stderr, + "%s: %s: HELP - extract write error\n", + progname, name); + done(2); + } + bytes -= TBLOCK; + } + close(ofile); + return 0; +} + +static int +xtrlink(const char *name, struct stat *sp, int symbolic) +{ + struct stat nst, ost; + + if (lstat(name, &nst) == 0) { + if ((nst.st_mode & S_IFMT) == S_IFDIR) + rmdir(name); + else if (!symbolic && lstat(hbuf.rlinkname, &ost) == 0 && + nst.st_dev == ost.st_dev && + nst.st_ino == ost.st_ino) + /* An attempt to hardlink "name" to itself. This + * happens if a file with more than link has been + * stored in the archive more than once under the + * same name. This is odd but the best we can do + * is nothing at all in such a case. */ + goto good; + else + unlink(name); + } + if ((symbolic?symlink:link)(symbolic?hbuf.linkname:hbuf.rlinkname, + name) < 0) { + if (symbolic) + fprintf(stderr, "%s: symbolic link failed\n", name); + else + fprintf(stderr, "%s: %s: cannot link\n", + progname, name); + edone(1); + return -1; + } +good: if (vflag) + fprintf(stderr, "%s %s %s\n", name, + symbolic ? "symbolic link to" : "linked to", + hbuf.linkname); + if (symbolic) + tchown(lchown, name, sp->st_uid, sp->st_gid); + return 0; +} + +static int +xtrdev(const char *name, struct stat *sp, mode_t type) +{ + remove(name); + if (mknod(name, sp->st_mode&cmask(sp, 1) | type, sp->st_rdev) < 0) { + fprintf(stderr, "Can't create special %s\n", name); + edone(1); + return -1; + } + tchown(chown, name, sp->st_uid, sp->st_gid); + return 0; +} + +static int +xtrdir(const char *name, struct stat *sp) +{ + remove(name); + if (mkdir(name, sp->st_mode&cmask(sp, 1)|0700) < 0 && errno != EEXIST) { + fprintf(stderr, "%s: %s: %s\n", progname, name, + strerror(errno)); + edone(1); + return -1; + } + tchown(chown, name, sp->st_uid, sp->st_gid); + return 0; +} + +static void +blocksof(const char *name, struct stat *sp, long long *blocks, long long *bytes) +{ + *blocks = ((*bytes = sp->st_size) + TBLOCK-1)/TBLOCK; + if (vflag) + fprintf(stderr, "x %s, %lld bytes, ""%lld%s\n", + name, *bytes, + nflag ? (*blocks&01?*blocks|02:*blocks)>>1 : *blocks, + nflag ? "K" : " tape blocks"); +} + +static void +tchown(int (*chfn)(const char *, uid_t, gid_t), + const char *name, uid_t uid, gid_t gid) +{ + if (oflag == 0) { + if (chfn(name, uid, -1) < 0) + fprintf(stderr, "%s: %s: owner not changed\n", + progname, name); + if (chfn(name, -1, gid) < 0) + fprintf(stderr, "%s: %s: group not changed\n", + progname, name); + } +} + +static void +dotable(char *argv[]) +{ + struct stat stbuf; + char **cp; + char *name; + + for (;;) { + tgetdir(&stbuf); + if (endtape(0)) + break; + name = hbuf.name; + if (*argv == 0) + goto yes; + for (cp = argv; *cp; cp++) + if (prefix(*cp, name)) { + gotcha++; + goto yes; + } + goto no; + yes: if (vflag) + longt(&stbuf, dblock.dbuf.linkflag); + printf("%s", name); + if (dblock.dbuf.linkflag == '1') + printf(" linked to %s", hbuf.linkname); + else if (dblock.dbuf.linkflag == '2') + printf(" symbolic link to %s", hbuf.linkname); + printf("\n"); + no: passtape(&stbuf); + } + if (gotcha < files) + fprintf(stderr, "%s: %ld file(s) not found\n", + progname, files - gotcha); +} + +static void +putempty(void) +{ + char buf[TBLOCK]; + + memset(buf, 0, sizeof buf); + writetape(buf); +} + +static void +longt(register struct stat *st, int linkflag) +{ + struct tm *tp; + char buf[20]; + + pmode(st, linkflag); + printf("%3ld/%-3ld", (long)st->st_uid, (long)st->st_gid); + printf(" %6lld", (long long)st->st_size); + tp = localtime(&st->st_mtime); + strftime(buf, sizeof buf, "%b %e %H:%M %Y", tp); + printf(" %17.17s ", buf); +} + +#define SUID 04000 +#define SGID 02010 +#define NFMT 02000 +#define ROWN 0400 +#define WOWN 0200 +#define XOWN 0100 +#define RGRP 040 +#define WGRP 020 +#define XGRP 010 +#define ROTH 04 +#define WOTH 02 +#define XOTH 01 +#define STXT 01000 +static int m1[] = { 1, ROWN, 'r', '-' }; +static int m2[] = { 1, WOWN, 'w', '-' }; +static int m3[] = { 2, SUID, 's', XOWN, 'x', '-' }; +static int m4[] = { 1, RGRP, 'r', '-' }; +static int m5[] = { 1, WGRP, 'w', '-' }; +static int m6[] = { 3, SGID, 's', NFMT, 'l', XGRP, 'x', '-' }; +static int m7[] = { 1, ROTH, 'r', '-' }; +static int m8[] = { 1, WOTH, 'w', '-' }; +static int m9[] = { 2, STXT, 't', XOTH, 'x', '-' }; + +static int *m[] = { m1, m2, m3, m4, m5, m6, m7, m8, m9}; + +static void +pmode(register struct stat *st, int linkflag) +{ + register int **mp; + int c; + + switch (linkflag) { + case -1: + switch (st->st_mode & S_IFMT) { + case S_IFLNK: c = 'l'; break; + case S_IFCHR: c = 'c'; break; + case S_IFBLK: c = 'b'; break; + case S_IFDIR: c = 'd'; break; + case S_IFIFO: c = 'p'; break; + default: c = '-'; + } break; + case '2': c = 'l'; break; + case '3': c = 'c'; break; + case '4': c = 'b'; break; + case '5': c = 'd'; break; + case '6': c = 'p'; break; + default: c = '-'; + } + printf("%c", c); + for (mp = &m[0]; mp < &m[9];) + tselect(*mp++, st); +} + +static void +tselect(int *pairp, struct stat *st) +{ + register int n, *ap; + + ap = pairp; + n = *ap++; + while (--n>=0 && (ap++, (st->st_mode&ap[-1])!=ap[-1])) + ap++; + printf("%c", *ap); +} + +static int +checkdir(register char *name, struct stat *sp) +{ + register char *cp; + + for (cp = name; *cp; cp++) { + if (*cp == '/' && cp > name) { + *cp = '\0'; + if (access(name, X_OK) < 0) { + if (mkdir(name, 0777) < 0 && errno != EEXIST) { + fprintf(stderr, "%s: %s: %s\n", + progname, name, + strerror(errno)); + edone(1); + } + tchown(chown, name, sp->st_uid, sp->st_gid); + } + *cp = '/'; + } + } + return cp > name ? cp[-1] : cp[0]; +} + +/*ARGUSED*/ +static void +onsig(int signo) +{ + sigset(signo, SIG_IGN); + if (midfile) { + fprintf(stderr, "%s: Interrupted in the middle of a file\n", + progname); + done(signo | 0200); + } + term = 1; +} + +static void +tomodes(const char *name, register struct stat *sp) +{ + const char *cp; + int mode; + + memset(&dblock, 0, sizeof dblock); + mode = gnuflag<=0&&!Eflag?sp->st_mode&07777:sp->st_mode&(07777|S_IFMT); + sprintf(dblock.dbuf.mode, "%7.7o", mode); + sprintf(dblock.dbuf.uid, "%7.7lo", (long)(sp->st_uid <= 07777777 ? + sp->st_uid : (paxrec |= PR_UID, 60001))); + sprintf(dblock.dbuf.gid, "%7.7lo", (long)(sp->st_gid <= 07777777 ? + sp->st_gid : (paxrec |= PR_GID, 60001))); + sprintf(dblock.dbuf.size, "%11.11llo", (sp->st_mode&S_IFMT)==S_IFREG ? + (long long)sp->st_size&077777777777LL : 0LL); + sprintf(dblock.dbuf.mtime, "%11.11lo", (long)sp->st_mtime); + if (oldflag <= 0) { + strcpy(dblock.dbuf.magic, gnuflag>0 ? "ustar " : "ustar"); + if (gnuflag <= 0) + dblock.dbuf.version[0] = dblock.dbuf.version[1] = '0'; + if ((cp = getuser(sp->st_uid)) != NULL) + sprintf(dblock.dbuf.uname, "%.31s", cp); + else + fprintf(stderr, + "%s: could not get passwd information for %s\n", + progname, name); + if ((cp = getgroup(sp->st_gid)) != NULL) + sprintf(dblock.dbuf.gname, "%.31s", cp); + else + fprintf(stderr, + "%s: could not get group information for %s\n", + progname, name); + if (Eflag && major(sp->st_rdev) > 07777777 && + ((sp->st_mode&S_IFMT) == S_IFBLK || + (sp->st_mode&S_IFMT) == S_IFCHR)) + paxrec |= PR_SUN_DEVMAJOR; + sprintf(dblock.dbuf.devmajor, "%7.7o", + (int)major(sp->st_rdev)&07777777); + if (Eflag && minor(sp->st_rdev) > 07777777 && + ((sp->st_mode&S_IFMT) == S_IFBLK || + (sp->st_mode&S_IFMT) == S_IFCHR)) + paxrec |= PR_SUN_DEVMINOR; + sprintf(dblock.dbuf.devminor, "%7.7o", + (int)minor(sp->st_rdev)&07777777); + } +} + +static int +checksum(int invert) +{ + register uint32_t i; + register char *cp; + + for (cp = dblock.dbuf.chksum; + cp < &dblock.dbuf.chksum[sizeof(dblock.dbuf.chksum)]; + cp++) + *cp = ' '; + i = 0; + for (cp = dblock.dummy; cp < &dblock.dummy[TBLOCK]; cp++) + i += oldflag>0^invert ? *(signed char *)cp : *cp & 0377; + return(i); +} + +static int +checkw(int c, const char *name, struct stat *sp, int linkflag) +{ + if (wflag) { + printf("%c ", c); + if (vflag) + longt(sp, linkflag); + printf("%s: ", name); + fflush(stdout); + if (response() == 'y'){ + return(1); + } + return(0); + } + return(1); +} + +static int +response(void) +{ + char c, c2; + + if (read(0, &c, 1) == 1 && c != '\n') + while (read(0, &c2, 1) == 1 && c2 != '\n'); + else c = 'n'; + return(c); +} + +static int +checkupdate(const char *arg, struct stat *sp) +{ + long mtime; /* cf. LONG_MAX in readexcl() */ + off_t seekp; + char c; + + rewind(tfile); + for (;;) { + if ((seekp = lookup(arg)) < 0) + return(0); + fseeko(tfile, seekp+1, SEEK_SET); + fscanf(tfile, "%c %lo", &c, &mtime); + if (c == 'u' && sp->st_mtime > mtime) + return(0); + else + return(c); + } +} + +static void +done(int n) +{ + if (rflag && mt >= 0 && close(mt) < 0 && writerror == 0) { + fprintf(stderr, "%s: tape write error\n", progname); + if (n == 0) + n = 2; + } + exit(n); +} + +static int +prefix(register const char *s1, register const char *s2) +{ + while (*s1) + if (*s1++ != *s2++) + return(0); + if (*s2) + return(*s2 == '/'); + return(1); +} + +static int njab; +static off_t +lookup(const char *s) +{ + return bsrch(s, strlen(s), low, high); +} + +static off_t +bsrch(const char *s, int n, off_t l, off_t h) +{ + register int i, j; + char *b; + off_t m, m1; + + b = alloca(N); + njab = 0; + +loop: + if(l >= h) + return(-1L); + m = l + (h-l)/2 - N/2; + if(m < l) + m = l; + fseeko(tfile, m, SEEK_SET); + fread(b, 1, N, tfile); + njab++; + for(i=0; i<N; i++) { + if(b[i] == '\n') + break; + m++; + } + if(m >= h) + return(-1L); + m1 = m; + j = i; + for(i++; i<N; i++) { + m1++; + if(b[i] == '\n') + break; + } + i = cmp(b+j, s, n); + if(i < 0) { + h = m; + goto loop; + } + if(i > 0) { + l = m1; + goto loop; + } + return(m); +} + +static int +cmp(const char *b, const char *s, size_t n) +{ + register int i; + + if(b[0] != '\n') + abort(); + for(i=0; i<n; i++) { + if((b[i+1+TIMSIZ+1+2]&0377) > (s[i]&0377)) + return(-1); + if((b[i+1+TIMSIZ+1+2]&0377) < (s[i]&0377)) + return(1); + } + return(b[i+1+TIMSIZ+1+2] == '\n'? 0 : -1); +} + +static int +readtape(char *buffer) +{ + static int rd; + int i = -1, j; + +again: if (recno >= nblock || first == 0) { + if (first == 0 && nblock == 1 && bflag == 0) + j = NBLOCK; + else + j = nblock; + if (volsize % TBLOCK*j) { + fprintf(stderr, "%s: Volume size not a multiple " + "of block size.\n", progname); + done(1); + } + if ((rd = i = mtread(tbuf, TBLOCK*j)) < 0) { + fprintf(stderr, "%s: tape read error\n", progname); + done(3); + } + if (maybezip && checkzip(tbuf[0].dummy, i) == 1) + goto again; + if (first == 0 || i == 0) { + if ((i % TBLOCK) != 0 || i == 0) { + tbe: fprintf(stderr, "%s: tape blocksize error\n", + progname); + done(3); + } + i /= TBLOCK; + if (i != nblock) { + if ((mtstat.st_mode&S_IFMT) == S_IFCHR && + (i != 1 || bflag >= B_DEFN)) + fprintf(stderr, "%s: blocksize = %d\n", + progname, i); + nblock = i; + } + if (tapeblock == 0) + tapeblock = i; + } + recno = 0; + } + first = 1; + if ((rd -= TBLOCK) < 0) + goto tbe; + memcpy(buffer, &tbuf[recno++], TBLOCK); + return(TBLOCK); +} + +static int +writetape(const char *buffer) +{ + first = 1; + if (recno >= nblock) { + if (mtwrite(tbuf, TBLOCK*nblock) < 0) { + fprintf(stderr, "%s: tape write error\n", progname); + writerror++; + done(2); + } + recno = 0; + } + memcpy(&tbuf[recno++], buffer, TBLOCK); + if (recno >= nblock) { + if (mtwrite(tbuf, TBLOCK*nblock) < 0) { + fprintf(stderr, "%s: tape write error\n", progname); + writerror++; + done(2); + } + recno = 0; + } + return(TBLOCK); +} + +static void +tseek(int n, int rew) +{ + int fault; +#ifndef __G__ + if (tapeblock > 0 && rew) { +#if defined (__linux__) || defined (__sun) || defined (__FreeBSD__) || \ + defined (__hpux) || defined (_AIX) || defined (__NetBSD__) || \ + defined (__OpenBSD__) || defined (__DragonFly__) || defined (__APPLE__) + struct mtop mo; + mo.mt_op = n > 0 ? MTFSR : MTBSR; + mo.mt_count = (n > 0 ? n : -n) / tapeblock; + fault = ioctl(mt, MTIOCTOP, &mo) < 0; +#else /* SVR4.2MP */ + int t, a; + t = n > 0 ? T_SBF : T_SBB; + a = (n > 0 ? n : -n) / tapeblock; + fault = ioctl(mt, t, a) < 0; +#endif /* SVR4.2MP */ + } else +#endif + fault = lseek(mt, TBLOCK*n, SEEK_CUR) == (off_t)-1; + if (fault && rew) { + fprintf(stderr, "%s: device seek error\n", progname); + done(4); + } +} + +static void +backtape(int rew) +{ + tseek(-nblock, rew); + if (recno >= nblock) { + recno = nblock - 1; + if (mtread(tbuf, TBLOCK*nblock) < 0) { + fprintf(stderr, "%s: tape read error after seek\n", + progname); + done(4); + } + tseek(-nblock, rew); + } else if (rew) + recno--; +} + +static void +flushtape(void) +{ + if (mtwrite(tbuf, TBLOCK*nblock) < 0) { + fprintf(stderr, "%s: tape write error\n", progname); + writerror++; + done(2); + } +} + +static void * +srealloc(void *op, size_t size) +{ + void *np; + + if ((np = realloc(op, size)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +static void * +smalloc(size_t size) +{ + return srealloc(NULL, size); +} + +static void * +scalloc(size_t count, size_t nelem) +{ + void *np; + + if ((np = calloc(count, nelem)) == NULL) { + write(2, "no memory\n", 10); + _exit(077); + } + return np; +} + +static void * +bfalloc(size_t n) +{ + static long pagesize; + void *vp; + + if (pagesize == 0) + if ((pagesize = sysconf(_SC_PAGESIZE)) < 0) + pagesize = 4096; + if ((vp = memalign(pagesize, n)) == NULL) { + fprintf(stderr, "%s: cannot allocate physio buffer\n", + progname); + done(1); + } + return vp; +} + +static char * +nameof(struct header *hp, char *buf) +{ + const char *cp; + register char *bp = buf; + + if (gnuflag <= 0 && hp->prefix[0] != '\0') { + cp = hp->prefix; + while (cp < &hp->prefix[PFXSIZ] && *cp) + *bp++ = *cp++; + if (bp > buf) + *bp++ = '/'; + } + cp = hp->name; + while (cp < &hp->name[NAMSIZ] && *cp) + *bp++ = *cp++; + *bp = '\0'; + return buf; +} + +static int +mkname(struct header *hp, const char *fn) +{ + const char *cp, *cs = NULL; + + if (Aflag) + while (*fn == '/') + fn++; + for (cp = fn; *cp; cp++) { + if (*cp == '/' && cp[1] != '\0' && cp > fn && + cp - fn <= PFXSIZ && + gnuflag <= 0 && oldflag <= 0) + cs = cp; + } + if (cp - (cs ? &cs[1] : fn) > NAMSIZ) { + if (oldflag <= 0 && gnuflag <= 0 && utf8(fn)) { + paxrec |= PR_PATH; + strcpy(hp->name, sequence()); + return 0; + } + fprintf(stderr, "%s: file name too long\n", fn); + edone(1); + return -1; + } + if (cs && cp - fn > NAMSIZ) { + memcpy(hp->prefix, fn, cs - fn); + if (cs - fn < PFXSIZ) + hp->prefix[cs - fn] = '\0'; + memcpy(hp->name, &cs[1], cp - &cs[1]); + if (cp - &cs[1] < NAMSIZ) + hp->name[cp - &cs[1]] = '\0'; + } else { + memcpy(hp->name, fn, cp - fn); + if (cp - fn < NAMSIZ) + hp->name[cp - fn] = '\0'; + } + return 0; +} + +static char * +linkof(struct header *hp, char *buf) +{ + const char *cp; + register char *bp = buf; + + + cp = hp->linkname; + while (cp < &hp->linkname[NAMSIZ] && *cp) + *bp++ = *cp++; + *bp = '\0'; + return buf; +} + +static int +mklink(struct header *hp, const char *fn, const char *refname) +{ + const char *cp; + + if (Aflag) + while (*fn == '/') + fn++; + for (cp = fn; *cp; cp++); + if (cp - fn > NAMSIZ) { + if (oldflag <= 0 && gnuflag <= 0 && utf8(fn)) { + paxrec |= PR_LINKPATH; + strcpy(hp->linkname, sequence()); + return 0; + } + fprintf(stderr, "%s: %s: linked to %s\n", + progname, refname, fn); + fprintf(stderr, "%s: %s: linked name too long\n", + progname, fn); + edone(1); + return -1; + } + memcpy(hp->linkname, fn, cp - fn); + if (cp - fn < NAMSIZ) + hp->linkname[cp - fn] = '\0'; + return 0; +} + +static void +edone(int i) +{ + if (eflag && sysv3 == 0) + done(i); +} + +static ssize_t +mtwrite(const void *vdata, size_t sz) +{ + register ssize_t wo, wt = 0; + const char *data = vdata; + + if (volsize && wrtotal >= volsize) { + newvolume(); + wrtotal = 0; + } + do { + if ((wo = write(mt, data + wt, sz - wt)) < 0) { + if (errno == EINTR) + continue; + else if (wt > 0) { + wt += wo; + break; + } else + return wo; + } + wt += wo; + } while (wt < sz); + wrtotal += sz; + return wt; +} + +static ssize_t +mtread(void *vdata, size_t sz) +{ + register ssize_t ro, rt = 0; + char *data = vdata; + + if (volsize && rdtotal >= volsize) { + newvolume(); + rdtotal = 0; + } + do { + if ((ro = read(mt, data + rt, sz - rt)) <= 0) { + if (ro < 0) { + if (errno == EINTR) + continue; + } + if (rt > 0) { + rt += ro; + break; + } + return ro; + } + rt += ro; + } while (Bflag != 0 && rt < sz); + rdtotal += sz; + return rt; +} + +static void +newvolume(void) +{ + static int ttyfd = -1; + int curfd; + char c; + + if (close(mt) < 0) { + fprintf(stderr, "%s: close error on archive: %s\n", progname, + strerror(errno)); + done(1); + } + if ((curfd = open(".", O_RDONLY)) < 0) { + fprintf(stderr, "cannot open current directory: %s\n", + strerror(errno)); + done(1); + } + goback(workdir); + fprintf(stderr, "%s: please insert new volume, then press RETURN.\a", + progname); + if (ttyfd < 0 && isatty(0) || ttyfd == 0) + ttyfd = 0; + else + ttyfd = open("/dev/tty", O_RDONLY); + do + if (read(ttyfd, &c, 1) != 1) + done(0); + while (c != '\n'); + if (ttyfd > 0) + close(ttyfd); + if ((mt = open(usefile, rflag ? O_RDWR : O_RDONLY)) < 0) { + fprintf(stderr, "%s: cannot open %s\n", progname, usefile); + done(1); + } + domtstat(); + if (rflag) + odirect(); + goback(curfd); + close(curfd); +} + +static void +goback(int fd) +{ + if (fchdir(fd) < 0) { + fprintf(stderr, "cannot change back?: %s\n", strerror(errno)); + done(1); + } +} + +static void +getpath(const char *path, char **file, char **filend, size_t *sz, size_t *slen) +{ + *sz = 14 + strlen(path) + 2; + *file = smalloc(*sz); + *filend = *file; + if (path[0] == '/' && path[1] == '\0') + *(*filend)++ = '/'; + else { + const char *cp = path; + while ((*(*filend)++ = *cp++) != '\0'); + (*filend)[-1] = '/'; + } + *slen = *filend - *file; +} + +static void +setpath(const char *base, char **file, char **filend, + size_t slen, size_t *sz, size_t *ss) +{ + if (slen + (*ss = strlen(base)) >= *sz) { + *sz += slen + *ss + 15; + *file = srealloc(*file, *sz); + *filend = &(*file)[slen]; + } + strcpy(*filend, base); +} + +static void +defaults(void) +{ + struct iblok *ip; + char *line = NULL, *x, *y, *cp; + size_t size = 0; + struct magtape *mp; + + if ((ip = ib_open(TARDFL, 0)) == NULL) + return; + while (ib_getlin(ip, &line, &size, srealloc) != 0) { + if (strncmp(line, "archive", 7) == 0) { + if (line[8] == '=' && line[7] >= '0' && line[7] <= '9'){ + mp = &magtapes[line[7] - '0']; + x = &line[9]; + } else if (line[7] == '=') { + x = &line[8]; + mp = &magtapes[10]; + } else + continue; + for (y = x; *y && *y != ' ' && *y != '\t'; y++); + mp->device = cp = smalloc(y - x + 1); + while (x < y) + *cp++ = *x++; + *cp = '\0'; + mp->block = 1; + mp->size = 0; + mp->nflag = 0; + if (*x) { + mp->block = strtol(x, &y, 10); + if (y > x && *(x=y)) { + mp->size = strtoll(x, &y, 10) * 1024; + if (y > x && *(x=y)) { + while (*x && (*x == ' ' || + *x == '\t' || + *x == '\n')) + x++; + if (*x == 'n' || *x == 'N') + mp->nflag = 1; + else + mp->nflag = 0; + } + } + } + } else if (strncmp(line, "order=", 6) == 0) { + if (strcmp(&line[6], "post\n") == 0) + order = POSTORDER; + else if (strcmp(&line[6], "pre\n") == 0) + order = PREORDER; + } + } + ib_close(ip); + if (line) + free(line); +} + +static void +settape(int c) +{ + struct magtape *mp; + + if (c >= '0' && c <= '9') + mp = &magtapes[c - '0']; + else { + if (magtapes[10].device) + mp = &magtapes[10]; + else + mp = &magtapes[0]; + c = '0'; + } + if (mp->device == NULL) { + fprintf(stderr, "%s: missing or invalid 'archive%c=' entry " + "in %s.\n", progname, c, TARDFL); + return; + } + usefile = mp->device; + if (bflag == 0 && mp->block > 0) { + nblock = mp->block; + bflag = B_DEFN; + } + if (kflag == 0) + volsize = mp->size; + if (nflag < 2) + nflag = mp->nflag; +} + +static void +suprmsg(void) +{ + if (Aflag && vflag) + fprintf(stderr, "Suppressing absolute pathnames\n"); +} + +static void +odirect(void) +{ +#if defined (__linux__) && defined (O_DIRECT) + /* + * If we are operating on a floppy disk block device and know + * its track size, use direct i/o. This has the advantage that + * signals can be delivered after each write(); otherwise, the + * kernel will buffer the entire data, close() will put us in + * a non-interruptible blocking state and the user has to wait + * ~40 seconds for return after he presses the interrupt key. + * + * This has no negative speed impact as long as the blocking + * factor is set to a multiple of the track size of the floppy. + * The only values still useful today (2003) seem to be 18 for + * 3.5 inch high densitiy disks at 1440 kB and 15 for 5.25 inch + * high density disks at 1200 kB, so we specify these in the + * default file; consult fd(4) for other values, or give no + * values at all and autodetect them in the code above. + * + * Addendum: Use direct i/o for all block devices if a block + * size was specified or detected since the symptoms are + * generally the same as for floppy disks (e. g. with USB + * memory sticks). But don't use it when reading since it + * just slows down operation then. + */ + if ((mtstat.st_mode&S_IFMT) == S_IFBLK && bflag) { + int flags; + if ((flags = fcntl(mt, F_GETFL)) != -1) + fcntl(mt, F_SETFL, flags | O_DIRECT); + } +#endif /* __linux__ && O_DIRECT */ +} + +static void +domtstat(void) +{ + static int twice; + + if (fstat(mt, &mtstat) < 0) { + fprintf(stderr, "%s: cannot stat archive\n", progname); + done(1); + } + if ((mtstat.st_mode&S_IFMT) == S_IFIFO || + (mtstat.st_mode&S_IFMT) == S_IFSOCK) + Bflag = 1; +#if defined (__linux__) + if ((mtstat.st_mode&S_IFMT) == S_IFBLK) { + struct floppy_struct fs; + int blkbsz; + if (ioctl(mt, FDGETPRM, &fs) == 0) { + if (kflag == 0 && volsize == 0) + volsize = fs.size * FD_SECTSIZE(&fs); + if (bflag == 0 && nblock == 1 && twice == 0) { + if ((nblock=fs.sect*FD_SECTSIZE(&fs)/512)==0 || + nblock > NBLOCK) + nblock = 1; + else + bflag = B_AUTO; + } +#ifdef O_DIRECT + if (bflag && (tflag || xflag)) { + int flags; + if ((flags = fcntl(mt, F_GETFL)) != -1) + fcntl(mt, F_SETFL, flags | O_DIRECT); + } +#endif /* O_DIRECT */ +#ifdef BLKBSZGET + } else if (ioctl(mt, BLKBSZGET, &blkbsz) == 0) { + if (bflag == 0 && nblock == 1 && twice == 0 && + (blkbsz&0777) == 0) { + nblock = blkbsz >> 9; + bflag = B_AUTO; + } +#endif /* BLKBSZGET */ + } +#ifndef __G__ + } else if ((mtstat.st_mode&S_IFMT) == S_IFCHR) { + struct mtget mg; + if (ioctl(mt, MTIOCGET, &mg) == 0) + tapeblock = ((mg.mt_dsreg&MT_ST_BLKSIZE_MASK) + >> MT_ST_BLKSIZE_SHIFT); +#endif + } +#elif defined (__sun) + if ((mtstat.st_mode&S_IFMT) == S_IFCHR) { + struct mtdrivetype_request mr; + static struct mtdrivetype md; + mr.size = sizeof md; + mr.mtdtp = &md; + if (ioctl(mt, MTIOCGETDRIVETYPE, &mr) == 0) + tapeblock = md.bsize; + } +#elif defined (__FreeBSD__) || defined (__NetBSD__) || defined (__OpenBSD__) \ + || defined (__DragonFly__) || defined (__APPLE__) + if ((mtstat.st_mode&S_IFMT) == S_IFCHR) { + struct mtget mg; + if (ioctl(mt, MTIOCGET, &mg) == 0) + tapeblock = mg.mt_blksiz; + } +#elif defined (__hpux) || defined (_AIX) +#else /* SVR4.2MP */ + if ((mtstat.st_mode&S_IFMT) == S_IFCHR) { + struct blklen bl; + if (ioctl(mt, T_RDBLKLEN, &bl) == 0) + /* + * This ioctl is (apparently) not useful. It just + * returns 1 as minimum and 16M-1 as maximum size + * on DAT/DDS tape drives. + */ + tapeblock = 0; + } +#endif /* SVR4.2MP */ + if (tapeblock > 0) { + if (tapeblock % TBLOCK != 0) { + fprintf(stderr, "%s: tape blocksize error\n", + progname); + done(3); + } + tapeblock /= TBLOCK; + if (tapeblock > NBLOCK) + NBLOCK = tapeblock; +#if defined (__linux__) || defined (__sun) || defined (__FreeBSD__) || \ + defined (__NetBSD__) || defined (__OpenBSD__) || \ + defined (__DragonFly__) || defined (__APPLE__) + if (bflag == 0 && cflag && twice == 0) { + if (nblock == 1) { + if ((nblock = tapeblock) > NBLOCK) + nblock = 1; + else + bflag = B_AUTO; + } + } +#endif /* __linux__ || __sun || __FreeBSD__ || __NetBSD__ || __OpenBSD__ || + __DragonFly__ || __APPLE__ */ + } + if (twice == 0 && bflag == 0 && tapeblock < 0) { + if ((nblock = mtstat.st_blksize >> 9) > NBLOCK) + nblock = NBLOCK; + else if (nblock <= 0) + nblock = 1; + else if ((mtstat.st_mode&S_IFMT) != S_IFCHR) + bflag = B_AUTO; + } + twice = 1; + if (nblock > NBLOCK) + NBLOCK = nblock; + tbuf = bfalloc(sizeof *tbuf * NBLOCK); +} + +static int +checkzip(const char *bp, int rd) +{ + if (rd <= TBLOCK || (memcmp(&bp[257], "ustar", 5) && + memcmp(&bp[257], "\0\0\0\0\0", 5))) { + if (bp[0] == 'B' && bp[1] == 'Z' && bp[2] == 'h') + return redirect("bzip2", "-cd", rd); + else if (bp[0] == '\37' && bp[1] == '\235') + return redirect("zcat", NULL, rd); + else if (bp[0] == '\37' && bp[1] == '\213') + return redirect("gzip", "-cd", rd); + } + maybezip = 0; + return -1; +} + +static int +redirect(const char *arg0, const char *arg1, int rd) +{ + int pd[2]; + + if (pipe(pd) < 0) + return -1; + switch (fork()) { + case 0: + if (tapeblock >= 0 || lseek(mt, -rd, SEEK_CUR) == (off_t)-1) { + int xpd[2]; + if (pipe(xpd) == 0 && fork() == 0) { + int wo, wt; + close(xpd[0]); + do { + wo = wt = 0; + do { + if ((wo = write(xpd[1], + tbuf + wt, + rd - wt)) + <= 0) { + if (errno == EINTR) + continue; + _exit(0); + } + wt += wo; + } while (wt < rd); + } while ((rd=mtread(tbuf, TBLOCK*nblock)) >= 0); + if (rd < 0) + fprintf(stderr, "%s: tape read error\n", + progname); + _exit(0); + } else { + close(xpd[1]); + dup2(xpd[0], 0); + close(xpd[0]); + } + } else + dup2(mt, 0); + close(mt); + dup2(pd[1], 1); + close(pd[0]); + close(pd[1]); + execlp(arg0, arg0, arg1, NULL); + fprintf(stderr, "%s: could not exec %s: %s\n", + progname, arg0, strerror(errno)); + _exit(0177); + /*NOTREACHED*/ + default: + Bflag = 1; + tapeblock = -1; + dup2(pd[0], mt); + close(pd[0]); + close(pd[1]); + domtstat(); + break; + case -1: + return -1; + } + return 1; +} + +#define CACHESIZE 16 + +static const char * +getuser(uid_t uid) +{ + static struct { + char *name; + uid_t uid; + } cache[CACHESIZE]; + static int last; + int i; + struct passwd *pwd; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].uid == uid) + goto found; + if ((pwd = getpwuid(uid)) != NULL) + name = pwd->pw_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = sstrdup(name); + cache[i].uid = uid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +static const char * +getgroup(gid_t gid) +{ + static struct { + char *name; + gid_t gid; + } cache[CACHESIZE]; + static int last; + int i; + struct group *grp; + const char *name; + + for (i = 0; i < CACHESIZE && cache[i].name; i++) + if (cache[i].gid == gid) + goto found; + if ((grp = getgrgid(gid)) != NULL) + name = grp->gr_name; + else + name = ""; + if (i >= CACHESIZE) { + if (last >= CACHESIZE) + last = 0; + i = last++; + } + if (cache[i].name) + free(cache[i].name); + cache[i].name = sstrdup(name); + cache[i].gid = gid; +found: return cache[i].name[0] ? cache[i].name : NULL; +} + +static char * +sstrdup(const char *op) +{ + char *np; + + np = smalloc(strlen(op) + 1); + strcpy(np, op); + return np; +} + +static void +fromfile(const char *fn) +{ + struct iblok *ip; + char *line = NULL; + size_t size = 0, len; + + if ((ip = ib_open(fn, 0)) == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, fn, strerror(errno)); + goback(workdir); + } else { + while ((len = ib_getlin(ip, &line, &size, srealloc)) != 0) { + if (line[len-1] == '\n') + line[--len] = '\0'; + doarg(line); + goback(workdir); + } + ib_close(ip); + if (line) + free(line); + } +} + +static void +readexcl(const char *fn) +{ + FILE *fp; + int c, slash, s; + + if ((fp = fopen(fn, "r")) == NULL) { + fprintf(stderr, "%s: could not open %s: %s\n", progname, fn, + strerror(errno)); + done(1); + } + do { + if ((c = getc(fp)) != EOF && c != '\n') { + slash = 0; + s = fprintf(tfile, "X %0*lo %c", TIMSIZ, LONG_MAX, c); + while ((c = getc(fp)) != EOF && c != '\n') { + if (c == '/') { + slash = 1; + continue; + } else if (slash == 1) { + putc('/', tfile); + s++; + slash = 0; + } + putc(c, tfile); + s++; + } + putc('\n', tfile); + s++; + s *= 3; + if (s > N) + N = s; + } + } while (c != EOF); + fclose(fp); +} + +static void +creatfile(void) +{ + char tname[PATH_MAX+1]; + + if (tfile != NULL) + return; + settmp(tname, sizeof tname, "%s/tarXXXXXX"); + if ((tfile = fdopen(mkstemp(tname), "w")) == NULL) { + fprintf(stderr, "%s: cannot create temporary file (%s)\n", + progname, tname); + done(1); + } + unlink(tname); + fcntl(fileno(tfile), F_SETFD, FD_CLOEXEC); + fprintf(tfile, "\177 %0*lo !!!!!/!/!/!/!/!/!/!\n", TIMSIZ, 0L); +} + +static mode_t +cmask(struct stat *sp, int creation) +{ + mode_t mask = 07777; + + if (myuid != 0 || oflag || creation) { + if (sp->st_uid != myuid || sp->st_gid != mygid) { + mask &= ~(mode_t)S_ISUID; + if ((sp->st_mode&S_IFMT)!=S_IFDIR && sp->st_mode&0010) + mask &= ~(mode_t)S_ISGID; + if ((sp->st_mode&S_IFMT)==S_IFDIR && sp->st_gid!=mygid) + mask &= ~(mode_t)S_ISGID; + } + } + return mask; +} + +/* + * Top-down splay function for inode tree. + */ +static struct islot * +isplay(ino_t ino, struct islot *x) +{ + struct islot hdr; + struct islot *leftmax, *rightmin; + struct islot *y; + + hdr.left = hdr.right = inull; + leftmax = rightmin = &hdr; + inull->inum = ino; + while (ino != x->inum) { + if (ino < x->inum) { + if (ino < x->left->inum) { + y = x->left; + x->left = y->right; + y->right = x; + x = y; + } + if (x->left == inull) + break; + rightmin->left = x; + rightmin = x; + x = x->left; + } else { + if (ino > x->right->inum) { + y = x->right; + x->right = y->left; + y->left = x; + x = y; + } + if (x->right == inull) + break; + leftmax->right = x; + leftmax = x; + x = x->right; + } + } + leftmax->right = x->left; + rightmin->left = x->right; + x->left = hdr.right; + x->right = hdr.left; + inull->inum = !ino; + return x; +} + +/* + * Find the inode number ino. + */ +static struct islot * +ifind(ino_t ino, struct islot **it) +{ + if (*it == NULL) + return NULL; + *it = isplay(ino, *it); + return (*it)->inum == ino ? *it : NULL; +} + +/* + * Put ik into the tree. + */ +static void +iput(struct islot *ik, struct islot **it) +{ + if ((*it) == NULL) { + ik->left = ik->right = inull; + (*it) = ik; + } else { + /* ifind() is always called before */ + /*(*it) = isplay(ik->inum, (*it));*/ + if (ik->inum < (*it)->inum) { + ik->left = (*it)->left; + ik->right = (*it); + (*it)->left = inull; + (*it) = ik; + } else if ((*it)->inum < ik->inum) { + ik->right = (*it)->right; + ik->left = (*it); + (*it)->right = inull; + (*it) = ik; + } + } +} + +/* + * Find the device dev or add it to the device/inode forest if not + * already present. + */ +static struct dslot * +dfind(struct dslot **root, dev_t dev) +{ + struct dslot *ds, *dp; + + for (ds = *root, dp = NULL; ds; dp = ds, ds = ds->nextp) + if (ds->devnum == dev) + break; + if (ds == NULL) { + ds = scalloc(1, sizeof *ds); + ds->devnum = dev; + if (*root == NULL) + *root = ds; + else + dp->nextp = ds; + } + return ds; +} + +static char * +sequence(void) +{ + static char buf[25]; + static long long d; + + sprintf(buf, "%10.10lld", ++d); + return buf; +} + +static void +docomp(const char *name) +{ + int pd[2]; + struct stat ost; + + if (tapeblock >= 0) { + fprintf(stderr, "%s: Refusing to write compressed data " + "to tapes.\n", progname); + done(1); + } + if (pipe(pd) < 0) { + fprintf(stderr, "%s: pipe() failed\n", progname); + done(1); + } + switch (fork()) { + case 0: + dup2(mt, 1); + close(mt); + ftruncate(1, 0); + dup2(pd[0], 0); + close(pd[0]); + close(pd[1]); + execlp(name, name, "-c", NULL); + fprintf(stderr, "%s: could not exec %s\n", progname, name); + _exit(0177); + /*NOTREACHED*/ + case -1: + fprintf(stderr, "%s: could not fork(), try again later\n", + progname); + done(1); + /*NOTREACHED*/ + default: + dup2(pd[1], mt); + close(pd[0]); + close(pd[1]); + } + ost = mtstat; + domtstat(); + mtstat.st_dev = ost.st_dev; + mtstat.st_ino = ost.st_ino; +} + +static int +utf8(const char *cp) +{ + int c, n; + + while (*cp) if ((c = *cp++ & 0377) & 0200) { + if (c == (c & 037 | 0300)) + n = 1; + else if (c == (c & 017 | 0340)) + n = 2; + else if (c == (c & 07 | 0360)) + n = 3; + else if (c == (c & 03 | 0370)) + n = 4; + else if (c == (c & 01 | 0374)) + n = 5; + else + return 0; + while (n--) { + c = *cp++ & 0377; + if (c != (c & 077 | 0200)) + return 0; + } + } + return 1; +} + +static void +settmp(char *tbuf, size_t len, const char *template) +{ + char *tmpdir; + + if ((tmpdir = getenv("TMPDIR")) == NULL) + tmpdir = "/tmp"; + if (snprintf(tbuf, len, template, tmpdir) >= len) + snprintf(tbuf, len, template, "/tmp"); +} diff --git a/tar/tar.dfl b/tar/tar.dfl @@ -0,0 +1,9 @@ +# +# Sccsid @(#)tar.dfl 1.4 (gritter) 2/4/04 + +archive=/dev/rmt/c0s0 20 0 + +archive0=/dev/dsk/f0t 18 1440 +archive1=/dev/dsk/f1t 15 1200 +archive2=/dev/cdrom/c1t0d0 20 0 +archive3=/dev/cdrom/c1t0d1 20 0 diff --git a/yacc/depsinc.mk b/yacc/depsinc.mk @@ -0,0 +1 @@ +YACC = $yacc_DEPDIR/yacc -P$yacc_DEPDIR/yaccpar diff --git a/yacc/dextern b/yacc/dextern @@ -0,0 +1,319 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1993 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "dextern 6.17 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)dextern 1.6 (gritter) 11/10/05 + */ + +#include <stdio.h> +#include <ctype.h> +#include <string.h> +#include <stdlib.h> +#include <wctype.h> + + /* MANIFEST CONSTANT DEFINITIONS */ +#define WORD32 + + /* base of nonterminal internal numbers */ + +#define NTBASE (10000000) + + /* internal codes for error and accept actions */ + +#define ERRCODE 8190 +#define ACCEPTCODE 8191 + + /* sizes and limits */ + +#define ACTSIZE 4000 +#define MEMSIZE 2000 +#define PSTSIZE 1024 +#define NSTATES 1000 +#define NTERMS 127 +#define NPROD 300 +#define NNONTERM 600 +#define TEMPSIZE 800 +#define CNAMSZ 1000 +#define LSETSIZE 950 +#define WSETSIZE 850 + +#define NAMESIZE 50 +#define NTYPES 1000 + +#define NMBCHARSZ 100 +#define LKFACTOR 5 + +#ifdef WORD32 + /* bit packing macros (may be machine dependent) */ +#define BIT(a, i) ((a)[(i)>>5] & (1<<((i)&037))) +#define SETBIT(a, i) ((a)[(i)>>5] |= (1<<((i)&037))) + + /* number of words needed to hold n+1 bits */ +#define NWORDS(n) (((n)+32)/32) + +#else + + /* bit packing macros (may be machine dependent) */ +#define BIT(a, i) ((a)[(i)>>4] & (1<<((i)&017))) +#define SETBIT(a, i) ((a)[(i)>>4] |= (1<<((i)&017))) + + /* number of words needed to hold n+1 bits */ +#define NWORDS(n) (((n)+16)/16) +#endif + + /* + * relationships which must hold: + * TBITSET ints must hold NTERMS+1 bits... + * WSETSIZE >= NNONTERM + * LSETSIZE >= NNONTERM + * TEMPSIZE >= NTERMS + NNONTERMs + 1 + * TEMPSIZE >= NSTATES + */ + + /* associativities */ + +#define NOASC 0 /* no assoc. */ +#define LASC 1 /* left assoc. */ +#define RASC 2 /* right assoc. */ +#define BASC 3 /* binary assoc. */ + + /* flags for state generation */ + +#define DONE 0 +#define MUSTDO 1 +#define MUSTLOOKAHEAD 2 + + /* flags for a rule having an action, and being reduced */ + +#define ACTFLAG 04 +#define REDFLAG 010 + + /* output parser flags */ +#define YYFLAG1 (-10000000) + + /* macros for getting associativity and precedence levels */ + +#define ASSOC(i) ((i)&07) +#define PLEVEL(i) (((i)>>4)&077) +#define TYPE(i) ((i>>10)&077) + + /* macros for setting associativity and precedence levels */ + +#define SETASC(i, j) i |= j +#define SETPLEV(i, j) i |= (j<<4) +#define SETTYPE(i, j) i |= (j<<10) + + /* looping macros */ + +#define TLOOP(i) for (i = 1; i <= ntokens; ++i) +#define NTLOOP(i) for (i = 0; i <= nnonter; ++i) +#define PLOOP(s, i) for (i = s; i < nprod; ++i) +#define SLOOP(i) for (i = 0; i < nstate; ++i) +#define WSBUMP(x) ++x +#define WSLOOP(s, j) for (j = s; j < &wsets[cwp]; ++j) +#define ITMLOOP(i, p, q) q = pstate[i+1]; for (p = pstate[i]; p < q; ++p) +#define SETLOOP(i) for (i = 0; i < tbitset; ++i) + + /* I/O descriptors */ + +extern FILE * finput; /* input file */ +extern FILE * faction; /* file for saving actions */ +extern FILE * fdefine; /* file for #defines */ +extern FILE * ftable; /* y.tab.c file */ +extern FILE * ftemp; /* tempfile to pass 2 */ +extern FILE * fdebug; /* tempfile for two debugging info arrays */ +extern FILE * foutput; /* y.output file */ + + /* structure declarations */ + +typedef struct looksets { + int *lset; +} LOOKSETS; + +typedef struct item { + int *pitem; + LOOKSETS *look; +} ITEM; + +typedef struct toksymb { + wchar_t *name; + int value; +} TOKSYMB; + +typedef struct mbclit { + wchar_t character; + int tvalue; /* token issued for the character */ +} MBCLIT; + +typedef struct ntsymb { + wchar_t *name; + int tvalue; +} NTSYMB; + +typedef struct wset { + int *pitem; + int flag; + LOOKSETS ws; +} WSET; + + /* token information */ + +extern int ntokens; /* number of tokens */ +extern TOKSYMB *tokset; +extern int ntoksz; + + /* + * multibyte (c > 255) character literals are + * handled as though they were tokens except + * that it generates a separate mapping table. + */ +extern int nmbchars; /* number of mb literals */ +extern MBCLIT *mbchars; +extern int nmbcharsz; + + /* nonterminal information */ + +extern int nnonter; /* the number of nonterminals */ +extern NTSYMB *nontrst; +extern int nnontersz; + + /* grammar rule information */ + +extern int nprod; /* number of productions */ +extern int **prdptr; /* pointers to descriptions of productions */ +extern int *levprd; /* contains production levels to break conflicts */ +extern wchar_t *had_act; /* set if reduction has associated action code */ + + /* state information */ + +extern int nstate; /* number of states */ +extern ITEM **pstate; /* pointers to the descriptions of the states */ +extern int *tystate; /* contains type information about the states */ +extern int *defact; /* the default action of the state */ + +extern int size; + + /* lookahead set information */ + +extern int TBITSET; +extern LOOKSETS *lkst; +extern int nolook; /* flag to turn off lookahead computations */ + + /* working set information */ + +extern WSET *wsets; + + /* storage for productions */ + +extern int *mem0; +extern int *mem; +extern int *tracemem; +extern int new_memsize; + + /* storage for action table */ + +extern int *amem; +extern int *memp; /* next free action table position */ +extern int *indgo; /* index to the stored goto table */ +extern int new_actsize; + + /* temporary vector, indexable by states, terms, or ntokens */ + +extern int *temp1; +extern int lineno; /* current line number */ + + /* statistics collection variables */ + +extern int zzgoent; +extern int zzgobest; +extern int zzacent; +extern int zzexcp; +extern int zzrrconf; +extern int zzsrconf; + + /* define external functions */ + +extern void setup(int, char * []); +extern void closure(int); +extern void output(void); +extern void aryfil(int *, int, int); +extern void error(char *, ...); +extern void warning(int, char *, ...); +extern void putitem(int *, LOOKSETS *); +extern void go2out(void); +extern void hideprod(void); +extern void callopt(void); +extern void warray(wchar_t *, int *, int); +extern wchar_t *symnam(int); +extern wchar_t *writem(int *); +extern void exp_mem(int); +extern void exp_act(int **); +extern int apack(int *, int); +extern int state(int); +extern void fprintf3(FILE *, const char *, const wchar_t *, const char *, ...); +extern void error3(const char *, const wchar_t *, const char *, ...); + + /* multibyte i/o */ + +#undef getwc +#define getwc(f) yacc_getwc(f) +extern wint_t yacc_getwc(FILE *); +#undef putwc +#define putwc(c, f) yacc_putwc(c, f) +extern wint_t yacc_putwc(wchar_t, FILE *); + + /* yaccpar location */ + +extern char *parser; + + /* default settings for a number of macros */ + + /* name of yacc tempfiles */ + +#ifndef TEMPNAME +#define TEMPNAME "yacc.tmp" +#endif + +#ifndef ACTNAME +#define ACTNAME "yacc.acts" +#endif + +#ifndef DEBUGNAME +#define DEBUGNAME "yacc.debug" +#endif + + /* command to clobber tempfiles after use */ + +#ifndef ZAPFILE +#define ZAPFILE(x) unlink(x) +#endif diff --git a/yacc/getopt.c b/yacc/getopt.c @@ -0,0 +1,222 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)getopt.c 1.10 (gritter) 12/16/07 + */ +/* from OpenSolaris "getopt.c 1.23 05/06/08 SMI" */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + + +/* + * See getopt(3C) and SUS/XPG getopt() for function definition and + * requirements. + * + * This actual implementation is a bit looser than the specification + * as it allows any character other than ':' to be used as an option + * character - The specification only guarantees the alnum characters + * ([a-z][A-Z][0-9]). + */ + +#include <sys/types.h> +#include <string.h> +#include <stdio.h> + +extern ssize_t write(int, const void *, size_t); + +char *optarg = NULL; +int optind = 1; +int opterr = 1; +int optopt = 0; + +#define ERR(s, c) err(s, c, optstring, argv[0]) +static void +err(const char *s, int c, const char *optstring, const char *argv0) +{ + char errbuf[256], *ep = errbuf; + const char *cp; + + if (opterr && optstring[0] != ':') { + for (cp = argv0; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = ": "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = s; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + for (cp = " -- "; *cp && ep<&errbuf[sizeof errbuf]; cp++, ep++) + *ep = *cp; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = c; + if (ep<&errbuf[sizeof errbuf]) + *ep++ = '\n'; + write(2, errbuf, ep - errbuf); + } +} + +/* + * getopt_sp is required to keep state between successive calls to getopt() + * while extracting aggregated options (ie: -abcd). Hence, getopt() is not + * thread safe or reentrant, but it really doesn't matter. + * + * So, why isn't this "static" you ask? Because the historical Bourne + * shell has actually latched on to this little piece of private data. + */ +int getopt_sp = 1; + +/* + * Determine if the specified character (c) is present in the string + * (optstring) as a regular, single character option. If the option is found, + * return a pointer into optstring pointing at the option character, + * otherwise return null. The character ':' is not allowed. + */ +static char * +parse(const char *optstring, const char c) +{ + char *cp = (char *)optstring; + + if (c == ':') + return (NULL); + do { + if (*cp == c) + return (cp); + } while (*cp++ != '\0'); + return (NULL); +} + +/* + * External function entry point. + */ +int +getopt(int argc, char *const *argv, const char *optstring) +{ + char c; + char *cp; + + /* + * Has the end of the options been encountered? The following + * implements the SUS requirements: + * + * If, when getopt() is called: + * argv[optind] is a null pointer + * *argv[optind] is not the character '-' + * argv[optind] points to the string "-" + * getopt() returns -1 without changing optind. If + * argv[optind] points to the string "--" + * getopt() returns -1 after incrementing optind. + */ + if (getopt_sp == 1) { + if (optind >= argc || argv[optind][0] != '-' || + argv[optind] == NULL || argv[optind][1] == '\0') + return (EOF); + else if (strcmp(argv[optind], "--") == 0) { + optind++; + return (EOF); + } + } + + /* + * Getting this far indicates that an option has been encountered. + * Note that the syntax of optstring applies special meanings to + * the characters ':' and '(', so they are not permissible as + * option letters. A special meaning is also applied to the ')' + * character, but its meaning can be determined from context. + * Note that the specification only requires that the alnum + * characters be accepted. + */ + optopt = c = (unsigned char)argv[optind][getopt_sp]; + optarg = NULL; + if ((cp = parse(optstring, c)) == NULL) { + /* LINTED: variable format specifier */ + ERR("illegal option", c); + if (argv[optind][++getopt_sp] == '\0') { + optind++; + getopt_sp = 1; + } + return ('?'); + } + optopt = c = *cp; + + /* + * A valid option has been identified. If it should have an + * option-argument, process that now. SUS defines the setting + * of optarg as follows: + * + * 1. If the option was the last character in the string pointed to + * by an element of argv, then optarg contains the next element + * of argv, and optind is incremented by 2. If the resulting + * value of optind is not less than argc, this indicates a + * missing option-argument, and getopt() returns an error + * indication. + * + * 2. Otherwise, optarg points to the string following the option + * character in that element of argv, and optind is incremented + * by 1. + * + * The second clause allows -abcd (where b requires an option-argument) + * to be interpreted as "-a -b cd". + */ + if (*(cp + 1) == ':') { + /* The option takes an argument */ + if (argv[optind][getopt_sp+1] != '\0') { + optarg = &argv[optind++][getopt_sp+1]; + } else if (++optind >= argc) { + /* LINTED: variable format specifier */ + ERR("option requires an argument", c); + getopt_sp = 1; + optarg = NULL; + return (optstring[0] == ':' ? ':' : '?'); + } else + optarg = argv[optind++]; + getopt_sp = 1; + } else { + /* The option does NOT take an argument */ + if (argv[optind][++getopt_sp] == '\0') { + getopt_sp = 1; + optind++; + } + optarg = NULL; + } + return (c); +} /* getopt() */ + +#ifdef __APPLE__ +/* + * Starting with Mac OS 10.5 Leopard, <unistd.h> turns getopt() + * into getopt$UNIX2003() by default. Consequently, this function + * is called instead of the one defined above. However, optind is + * still taken from this file, so in effect, options are not + * properly handled. Defining an own getopt$UNIX2003() function + * works around this issue. + */ +int +getopt$UNIX2003(int argc, char *const argv[], const char *optstring) +{ + return getopt(argc, argv, optstring); +} +#endif /* __APPLE__ */ diff --git a/yacc/libmai.c b/yacc/libmai.c @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "libmai.c 6.9 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)libmai.c 1.3 (gritter) 6/18/05 + */ + +#include <locale.h> + +extern int yyparse(void); + +int +main(void) +{ + setlocale(LC_ALL, ""); + yyparse(); + return (0); +} diff --git a/yacc/libzer.c b/yacc/libzer.c @@ -0,0 +1,45 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* Copyright (c) 1989 AT&T */ +/* All Rights Reserved */ + + +/* + * Copyright 2004 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* from OpenSolaris "libzer.c 6.6 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)libzer.c 1.3 (gritter) 6/18/05 + */ + +#include <stdio.h> + +void +yyerror(const char *s) +{ + fprintf(stderr, "%s\n", s); +} diff --git a/yacc/mkfile b/yacc/mkfile @@ -0,0 +1,15 @@ +BIN = yacc +LIB = liby.a +OBJ = y1.o y2.o y3.o y4.o y5.o getopt.o +LOBJ = libmai.o libzer.o +LOCAL_CFLAGS = -DPARSER=\"$LIBDIR/yaccpar\" +INSTALL_BIN = yacc +INSTALL_LIB = yaccpar +INSTALL_MAN1 = yacc.1 + +<$mkbuild/mk.default + +y1.o: dextern +y2.o: dextern sgs.h +y3.o: dextern +y4.o: dextern diff --git a/yacc/sgs.h b/yacc/sgs.h @@ -0,0 +1,49 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)main.c 1.2 (gritter) 6/14/05 + */ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4 +#define USED __attribute__ ((used)) +#elif defined __GNUC__ +#define USED __attribute__ ((unused)) +#else +#define USED +#endif +static const char sccsid[] USED = "@(#)yacc.sl 2.6 (gritter) 11/26/05"; +static const char pkg[] = "Heirloom Development Tools"; +static const char rel[] = "2.6 (gritter) 11/26/05"; +/* SLIST */ +/* +dextern: * Sccsid @(#)dextern 1.6 (gritter) 11/10/05 +getopt.c: * Sccsid @(#)getopt.c 1.8 (gritter) 8/2/05 +libmai.c: * Sccsid @(#)libmai.c 1.3 (gritter) 6/18/05 +libzer.c: * Sccsid @(#)libzer.c 1.3 (gritter) 6/18/05 +y1.c: * Sccsid @(#)y1.c 1.7 (gritter) 11/26/05 +y2.c: * Sccsid @(#)y2.c 1.11 (gritter) 11/26/05 +y3.c: * Sccsid @(#)y3.c 1.5 (gritter) 11/26/05 +y4.c: * Sccsid @(#)y4.c 1.5 (gritter) 11/26/05 +y5.c: * Sccsid @(#)y5.c 1.1 (gritter) 6/25/05 +yaccpar: * Sccsid @(#)yaccpar 1.5 (gritter) 11/26/05 +*/ diff --git a/yacc/y1.c b/yacc/y1.c @@ -0,0 +1,1098 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1990 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "y1.c 6.27 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)y1.c 1.7 (gritter) 11/26/05 + */ + +#include "dextern" +#include <sys/param.h> +#include <errno.h> +#include <unistd.h> +#include <locale.h> +#include <stdarg.h> /* For error() */ +#include <wchar.h> + +static void mktbls(void); +static void others(void); +static void summary(void); +static wchar_t *chcopy(wchar_t *, wchar_t *); +static int setunion(int *, int *); +static void prlook(LOOKSETS *); +static void cpres(void); +static void cpfir(void); +static void cempty(void); +static void stagen(void); +static LOOKSETS *flset(LOOKSETS *); +static void exp_lkst(void); +static void exp_wsets(void); +static void exp_states(void); +static void exp_psmem(void); + + /* lookahead computations */ + +int TBITSET; +static int tbitset; /* size of lookahead sets */ +LOOKSETS *lkst; +static int lsetsize; + +static int nlset = 0; /* next lookahead set index */ +int nolook = 0; /* flag to suppress lookahead computations */ +static LOOKSETS clset; /* temporary storage for lookahead computations */ + +static ITEM *psmem, *zzmemsz; +static int new_pstsize = PSTSIZE; + + /* working set computations */ + +WSET *wsets; +int cwp; +static int wsetsz = 0; /* number of WSET items in wsets block */ + + /* state information */ + +int nstate = 0; /* number of states */ +static int nstatesz = NSTATES; /* number of state space allocated */ +ITEM **pstate; /* ptr to descriptions of the states */ +int *tystate; /* contains type info about the states */ +int *indgo; /* index to the stored goto table */ +static int *tmp_lset; +static int *tstates; /* states generated by terminal gotos */ +static int *ntstates; /* states generated by non-term gotos */ +static int *mstates; /* chain of overflows of term/nonterm */ + /* generation lists */ + + /* storage for the actions in the parser */ + +int *amem, *memp; /* next free action table position */ +int new_actsize = ACTSIZE; + + /* other storage areas */ + +int *temp1; /* temp storate, indexed by terms+ntokens or states */ +int lineno = 0; /* current input line number */ +int size; +static int fatfl = 1; /* if on, error is fatal */ +static int nerrors = 0; /* number of errors */ + + /* storage for information about the nonterminals */ + +static int ***pres; /* vector of pointers to productions */ + /* yielding each nonterminal */ +static LOOKSETS **pfirst; /* vector of pointers to first sets for */ + /* each nonterminal */ +static int *pempty; /* vector of nonterminals nontrivially */ + /* deriving e */ +extern int nprodsz; + +static char *sav_argv0; +char run_directory[MAXPATHLEN]; +char current_work_directory[MAXPATHLEN]; + +int +main(int argc, char *argv[]) +{ + setlocale(LC_CTYPE, ""); + + sav_argv0 = argv[0]; + setup(argc, argv); /* initialize and read productions */ + TBITSET = NWORDS(ntoksz*LKFACTOR); + tbitset = NWORDS(ntokens*LKFACTOR); + mktbls(); + cpres(); /* make table of which productions yield a */ + /* given nonterminal */ + cempty(); /* make a table of which nonterminals can match */ + /* the empty string */ + cpfir(); /* make a table of firsts of nonterminals */ + stagen(); /* generate the states */ + output(); /* write the states and the tables */ + go2out(); + hideprod(); + summary(); + callopt(); + others(); + return 0; +} + + +static void +mktbls(void) +{ + int i; + + size = ntoksz + nnontersz +1; + if (size < nstatesz) + size = nstatesz; + if (size < new_memsize) + size = new_memsize; + + amem = malloc(sizeof (int) * new_actsize); + psmem = malloc(sizeof (ITEM) * new_pstsize); + if ((psmem == NULL) || (amem == NULL)) + error("couldn't allocate initial table"); + zzmemsz = psmem; + memp = amem; + + /* + * For lkst + */ +#define INIT_LSIZE nnontersz*LKFACTOR + tmp_lset = calloc((size_t)(TBITSET * (INIT_LSIZE+1)), sizeof (int)); + if (tmp_lset == NULL) + error("could not allocate lookset array"); + lkst = malloc(sizeof (LOOKSETS) * (INIT_LSIZE + 1)); + for (i = 0; i <= INIT_LSIZE; ++i) + lkst[i].lset = tmp_lset + TBITSET * i; + tmp_lset = NULL; + + /* + * For wsets + */ + tmp_lset = calloc((size_t)(TBITSET * (nnontersz+1)), sizeof (int)); + if (tmp_lset == NULL) + error("could not allocate lookset array"); + wsets = (WSET *) malloc(sizeof (WSET) * (nnontersz + 1)); + for (i = 0; i <= nnontersz; ++i) + wsets[i].ws.lset = tmp_lset + TBITSET * i; + tmp_lset = NULL; + + clset.lset = malloc(sizeof (int)*TBITSET); + tstates = malloc(sizeof (int)*(ntoksz + 1)); + ntstates = malloc(sizeof (int)*(nnontersz + 1)); + temp1 = malloc(sizeof (int)*size); + pres = malloc(sizeof (int **)*(nnontersz + 2)); + pfirst = malloc(sizeof (LOOKSETS *)*(nnontersz + 2)); + pempty = malloc(sizeof (int)*(nnontersz + 1)); + + pstate = malloc(sizeof (ITEM *)*(nstatesz+2)); + tystate = malloc(sizeof (int)*nstatesz); + indgo = malloc(sizeof (int)*nstatesz); + mstates = malloc(sizeof (int)*nstatesz); + defact = malloc(sizeof (int)*nstatesz); + + if ((lkst == NULL) || (wsets == NULL) || (tstates == NULL) || + (ntstates == NULL) || (temp1 == NULL) || (pres == NULL) || + (pfirst == NULL) || (pempty == NULL) || (pstate == NULL) || + (tystate == NULL) || (indgo == NULL) || (mstates == NULL) || + (defact == NULL) || (clset.lset == NULL)) + error("cannot allocate tables in mktbls()"); + + aryfil(ntstates, nnontersz+1, 0); + aryfil(tstates, ntoksz+1, 0); + wsetsz = nnontersz + 1; + lsetsize = INIT_LSIZE + 1; +} + +/* put out other arrays, copy the parsers */ +static void +others(void) +{ + extern int gen_lines; + register int c, i, j; + int tmpline; + + /* This routine has been "stolen" from the driver */ + if (parser == NULL) + parser = PARSER; + + finput = fopen(parser, "r"); + if (finput == NULL) + error("cannot find parser %s", parser); + + warray(L"yyr1", levprd, nprod); + + aryfil(temp1, nprod, 0); + /* had_act[i] is either 1 or 0 */ + PLOOP(1, i) + temp1[i] = ((prdptr[i+1] - prdptr[i]-2) << 1) | had_act[i]; + warray(L"yyr2", temp1, nprod); + + aryfil(temp1, nstate, -10000000); + TLOOP(i) + for (j = tstates[i]; j != 0; j = mstates[j]) + temp1[j] = tokset[i].value; + NTLOOP(i) + for (j = ntstates[i]; j != 0; j = mstates[j]) + temp1[j] = -i; + warray(L"yychk", temp1, nstate); + + warray(L"yydef", defact, nstate); + + if ((fdebug = fopen(DEBUGNAME, "r")) == NULL) + error("cannot open yacc.debug"); + while ((c = getwc(fdebug)) != EOF) + putwc(c, ftable); + fclose(fdebug); + ZAPFILE(DEBUGNAME); + + if (gen_lines) + fprintf(ftable, "# line\t1 \"%s\"\n", parser); + tmpline = 1; + /* copy parser text */ + while ((c = getwc(finput)) != EOF) { + if (c == '\n') + tmpline++; + if (c == L'$') { + if ((c = getwc(finput)) != L'A') + putwc(L'$', ftable); + else { /* copy actions */ + tmpline++; + faction = fopen(ACTNAME, "r"); + if (faction == NULL) + error("cannot open action tempfile"); + while ((c = getwc(faction)) != EOF) + putwc(c, ftable); + fclose(faction); + if (gen_lines) + fprintf(ftable, + "\n# line\t%d \"%s\"", + tmpline, + parser); + ZAPFILE(ACTNAME); + c = getwc(finput); + } + } + putwc(c, ftable); + } + fclose(ftable); +} + + +/* copies string q into p, returning next free char ptr */ +static wchar_t * +chcopy(wchar_t *p, wchar_t *q) +{ + while (*p = *q++) + ++p; + return (p); +} + +#define ISIZE 400 +/* creates output string for item pointed to by pp */ +wchar_t * +writem(int *pp) +{ + int i, *p; + static int isize = ISIZE; + static wchar_t *sarr = NULL; + wchar_t *q; + + if (sarr == NULL) { + sarr = malloc(sizeof (wchar_t) * isize); + if (sarr == NULL) + error("could not allocate output string array"); + for (i = 0; i < isize; ++i) + sarr[i] = L' '; + } + for (p = pp; *p > 0; ++p) /* EMPTY */; + p = prdptr[-*p]; + q = chcopy(sarr, nontrst[*p-NTBASE].name); + q = chcopy(q, L" : "); + + for (;;) { + *q++ = ++p == pp ? L'_' : L' '; + *q = 0; + if ((i = *p) <= 0) + break; + q = chcopy(q, symnam(i)); + while (q > &sarr[isize-30]) { + static wchar_t *sarrbase; + + sarrbase = sarr; + isize += ISIZE; + sarr = realloc(sarr, sizeof (*sarr) * isize); + if (sarr == NULL) + error("cannot expand sarr arrays"); + q = q - sarrbase + sarr; + } + } + + /* an item calling for a reduction */ + if ((i = *pp) < 0) { + q = chcopy(q, L" ("); + swprintf(q, q + isize - sarr, L"%d)", -i); + } + return (sarr); +} + +/* return a pointer to the name of symbol i */ +wchar_t * +symnam(int i) +{ + wchar_t *cp; + + cp = (i >= NTBASE) ? nontrst[i-NTBASE].name : tokset[i].name; + if (*cp == L' ') + ++cp; + return (cp); +} + +static int zzcwp = 0; +static int zzclose = 0; +int zzgoent = 0; +int zzgobest = 0; +int zzacent = 0; +int zzexcp = 0; +int zzsrconf = 0; +int zzrrconf = 0; + +/* output the summary on the tty */ +static void +summary(void) +{ + if (foutput != NULL) { + fprintf(foutput, + "\n%d/%d terminals, %d/%d nonterminals\n", + ntokens, ntoksz, nnonter, nnontersz); + fprintf(foutput, + "%d/%d grammar rules, %d/%d states\n", + nprod, nprodsz, nstate, nstatesz); + fprintf(foutput, + "%d shift/reduce, %d reduce/reduce conflicts reported\n", + zzsrconf, zzrrconf); + fprintf(foutput, + "%d/%d working sets used\n", zzcwp, wsetsz); + fprintf(foutput, + "memory: states,etc. %d/%d, parser %d/%d\n", + mem-tracemem, new_memsize, memp-amem, new_actsize); + fprintf(foutput, + "%d/%d distinct lookahead sets\n", nlset, lsetsize); + fprintf(foutput, + "%d extra closures\n", zzclose - 2*nstate); + fprintf(foutput, + "%d shift entries, %d exceptions\n", zzacent, zzexcp); + fprintf(foutput, + "%d goto entries\n", zzgoent); + fprintf(foutput, + "%d entries saved by goto default\n", zzgobest); + } + if (zzsrconf != 0 || zzrrconf != 0) { + fprintf(stderr, "\nconflicts: "); + if (zzsrconf) + fprintf(stderr, "%d shift/reduce", zzsrconf); + if (zzsrconf && zzrrconf) + fprintf(stderr, ", "); + if (zzrrconf) + fprintf(stderr, "%d reduce/reduce", zzrrconf); + fprintf(stderr, "\n"); + } + + if (ftemp != NULL) + fclose(ftemp); + if (fdefine != NULL) + fclose(fdefine); +} + +/* write out error comment */ +void +error(char *s, ...) +{ + extern char *infile; + va_list ap; + + va_start(ap, s); + + ++nerrors; + if (!lineno) + fprintf(stderr, "command line: fatal: "); + else { + fprintf(stderr, "\"%s\", ", infile); + fprintf(stderr, "line %d: fatal: ", lineno); + } + vfprintf(stderr, s, ap); + fprintf(stderr, "\n"); + va_end(ap); + if (!fatfl) + return; + summary(); + exit(1); +} + +/* + * Print out a warning message. + */ +void +warning(int flag, char *s, ...) +{ + extern char *infile; + va_list ap; + va_start(ap, s); + + fprintf(stderr, "\"%s\", ", infile); + /* + * If flag, print lineno as well. + */ + if (flag == 0) + fprintf(stderr, "warning: "); + else + fprintf(stderr, "line %d: warning: ", lineno); + vfprintf(stderr, s, ap); + fprintf(stderr, "\n"); + va_end(ap); +} + +/* set elements 0 through n-1 to c */ +void +aryfil(int *v, int n, int c) +{ + int i; + for (i = 0; i < n; ++i) + v[i] = c; +} + +/* set a to the union of a and b */ +/* return 1 if b is not a subset of a, 0 otherwise */ +static int +setunion(register int *a, register int *b) +{ + register int i, x, sub; + + sub = 0; + SETLOOP(i) { + *a = (x = *a) | *b++; + if (*a++ != x) + sub = 1; + } + return (sub); +} + +static void +prlook(LOOKSETS *p) +{ + register int j, *pp; + pp = p->lset; + if (pp == 0) + fprintf(foutput, "\tNULL"); + else { + fprintf(foutput, " { "); + TLOOP(j) { + if (BIT(pp, j)) + fprintf(foutput, "%ls ", symnam(j)); + } + fprintf(foutput, "}"); + } +} + +/* + * compute an array with the beginnings of productions yielding + * given nonterminals + * The array pres points to these lists + * the array pyield has the lists: the total size is only NPROD+1 + */ +static void +cpres(void) +{ + register int **ptrpy; + int **pyield; + register int c, j, i; + + /* + * 2/29/88 - + * nprodsz is the size of the tables describing the productions. + * Normally this will be NPROD unless the production tables have + * been expanded, in which case the tables will be NPROD * N(where + * N is the number of times the tables had to be expanded.) + */ + if ((pyield = malloc(sizeof (int *) * nprodsz)) == NULL) + error("cannot allocate space for pyield array"); + + ptrpy = pyield; + + NTLOOP(i) { + c = i+NTBASE; + pres[i] = ptrpy; + fatfl = 0; /* make undefined symbols nonfatal */ + PLOOP(0, j) { + if (*prdptr[j] == c) /* linear search for all c's */ + *ptrpy++ = prdptr[j] + 1; + } + if (pres[i] == ptrpy) { /* c not found */ + error("undefined nonterminal: %ls", nontrst[i].name); + } + } + pres[i] = ptrpy; + fatfl = 1; + if (nerrors) { + summary(); + exit(1); + } + if (ptrpy != &pyield[nprod]) + error("internal Yacc error: pyield %d", ptrpy-&pyield[nprod]); +} + +static int indebug = 0; +/* compute an array with the first of nonterminals */ +static void +cpfir(void) +{ + register int *p, **s, i, **t, ch, changes; + + zzcwp = nnonter; + NTLOOP(i) { + aryfil(wsets[i].ws.lset, tbitset, 0); + t = pres[i+1]; + /* initially fill the sets */ + for (s = pres[i]; s < t; ++s) { + /* check if ch is non-terminal */ + for (p = *s; (ch = *p) > 0; ++p) { + if (ch < NTBASE) { /* should be token */ + SETBIT(wsets[i].ws.lset, ch); + break; + } else if (!pempty[ch-NTBASE]) + break; + } + } + } + + /* now, reflect transitivity */ + + changes = 1; + while (changes) { + changes = 0; + NTLOOP(i) { + t = pres[i+1]; + for (s = pres[i]; s < t; ++s) { + for (p = *s; (ch = (*p-NTBASE)) >= 0; ++p) { + changes |= setunion(wsets[i].ws.lset, + wsets[ch].ws.lset); + if (!pempty[ch]) + break; + } + } + } + } + + NTLOOP(i) + pfirst[i] = flset(&wsets[i].ws); + if (!indebug) + return; + if ((foutput != NULL)) { + NTLOOP(i) { + fprintf(foutput, "\n%ls: ", nontrst[i].name); + prlook(pfirst[i]); + fprintf(foutput, " %d\n", pempty[i]); + } + } +} + +/* sorts last state,and sees if it equals earlier ones. returns state number */ +int +state(int c) +{ + int size1, size2; + register int i; + ITEM *p1, *p2, *k, *l, *q1, *q2; + p1 = pstate[nstate]; + p2 = pstate[nstate+1]; + if (p1 == p2) + return (0); /* null state */ + /* sort the items */ + for (k = p2 - 1; k > p1; k--) { /* make k the biggest */ + for (l = k-1; l >= p1; --l) + if (l->pitem > k->pitem) { + int *s; + LOOKSETS *ss; + s = k->pitem; + k->pitem = l->pitem; + l->pitem = s; + ss = k->look; + k->look = l->look; + l->look = ss; + } + } + size1 = p2 - p1; /* size of state */ + + for (i = (c >= NTBASE) ? ntstates[c-NTBASE] : tstates[c]; + i != 0; + i = mstates[i]) { + /* get ith state */ + q1 = pstate[i]; + q2 = pstate[i+1]; + size2 = q2 - q1; + if (size1 != size2) + continue; + k = p1; + for (l = q1; l < q2; l++) { + if (l->pitem != k->pitem) + break; + ++k; + } + if (l != q2) + continue; + /* found it */ + pstate[nstate+1] = pstate[nstate]; /* delete last state */ + /* fix up lookaheads */ + if (nolook) + return (i); + for (l = q1, k = p1; l < q2; ++l, ++k) { + int s; + SETLOOP(s) + clset.lset[s] = l->look->lset[s]; + if (setunion(clset.lset, k->look->lset)) { + tystate[i] = MUSTDO; + /* register the new set */ + l->look = flset(&clset); + } + } + return (i); + } + /* state is new */ + if (nolook) + error("yacc state/nolook error"); + pstate[nstate+2] = p2; + if (nstate+1 >= nstatesz) + exp_states(); + if (c >= NTBASE) { + mstates[nstate] = ntstates[c - NTBASE]; + ntstates[c - NTBASE] = nstate; + } else { + mstates[nstate] = tstates[c]; + tstates[c] = nstate; + } + tystate[nstate] = MUSTDO; + return (nstate++); +} + +static int pidebug = 0; + +void +putitem(int *ptr, LOOKSETS *lptr) +{ + register ITEM *j; + + if (pidebug && (foutput != NULL)) + fprintf(foutput, + "putitem(%ls), state %d\n", writem(ptr), nstate); + j = pstate[nstate+1]; + j->pitem = ptr; + if (!nolook) + j->look = flset(lptr); + pstate[nstate+1] = ++j; + if (j > zzmemsz) { + zzmemsz = j; + if (zzmemsz >= &psmem[new_pstsize]) + exp_psmem(); + /* error("out of state space"); */ + } +} + +/* + * mark nonterminals which derive the empty string + * also, look for nonterminals which don't derive any token strings + */ +static void +cempty(void) +{ +#define EMPTY 1 +#define WHOKNOWS 0 +#define OK 1 + register int i, *p; + + /* + * first, use the array pempty to detect productions + * that can never be reduced + */ + + /* set pempty to WHONOWS */ + aryfil(pempty, nnonter+1, WHOKNOWS); + + /* + * now, look at productions, marking nonterminals which + * derive something + */ + more: + PLOOP(0, i) { + if (pempty[*prdptr[i] - NTBASE]) + continue; + for (p = prdptr[i] + 1; *p >= 0; ++p) + if (*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS) + break; + if (*p < 0) { /* production can be derived */ + pempty[*prdptr[i]-NTBASE] = OK; + goto more; + } + } + + /* now, look at the nonterminals, to see if they are all OK */ + + NTLOOP(i) { + /* + * the added production rises or falls as the + * start symbol ... + */ + if (i == 0) + continue; + if (pempty[i] != OK) { + fatfl = 0; + error("nonterminal %ls never derives any token string", + nontrst[i].name); + } + } + + if (nerrors) { + summary(); + exit(1); + } + + /* + * now, compute the pempty array, to see which nonterminals + * derive the empty string + */ + + /* set pempty to WHOKNOWS */ + + aryfil(pempty, nnonter+1, WHOKNOWS); + + /* loop as long as we keep finding empty nonterminals */ + +again: + PLOOP(1, i) { + /* not known to be empty */ + if (pempty[*prdptr[i]-NTBASE] == WHOKNOWS) { + for (p = prdptr[i]+1; + *p >= NTBASE && pempty[*p-NTBASE] == EMPTY; + ++p); + /* we have a nontrivially empty nonterminal */ + if (*p < 0) { + pempty[*prdptr[i]-NTBASE] = EMPTY; + goto again; /* got one ... try for another */ + } + } + } +} + +/* generate the states */ +static int gsdebug = 0; +static void +stagen(void) +{ + int i, j; + register int c; + register WSET *p, *q; + + /* initialize */ + + nstate = 0; + + pstate[0] = pstate[1] = psmem; + aryfil(clset.lset, tbitset, 0); + putitem(prdptr[0] + 1, &clset); + tystate[0] = MUSTDO; + nstate = 1; + pstate[2] = pstate[1]; + + aryfil(amem, new_actsize, 0); + + /* now, the main state generation loop */ + + more: + SLOOP(i) { + if (tystate[i] != MUSTDO) + continue; + tystate[i] = DONE; + aryfil(temp1, nnonter + 1, 0); + /* take state i, close it, and do gotos */ + closure(i); + WSLOOP(wsets, p) { /* generate goto's */ + if (p->flag) + continue; + p->flag = 1; + c = *(p->pitem); + if (c <= 1) { + if (pstate[i+1]-pstate[i] <= p-wsets) + tystate[i] = MUSTLOOKAHEAD; + continue; + } + /* do a goto on c */ + WSLOOP(p, q) { + /* this item contributes to the goto */ + if (c == *(q->pitem)) { + putitem(q->pitem + 1, &q->ws); + q->flag = 1; + } + } + if (c < NTBASE) + state(c); /* register new state */ + else temp1[c-NTBASE] = state(c); + } + if (gsdebug && (foutput != NULL)) { + fprintf(foutput, "%d: ", i); + NTLOOP(j) { + if (temp1[j]) + fprintf(foutput, + "%ls %d, ", nontrst[j].name, + temp1[j]); + } + fprintf(foutput, "\n"); + } + indgo[i] = apack(&temp1[1], nnonter - 1) - 1; + goto more; /* we have done one goto; do some more */ + } + /* no more to do... stop */ +} + +/* generate the closure of state i */ +static int cldebug = 0; /* debugging flag for closure */ +void +closure(int i) +{ + int c, ch, work, k; + register WSET *u, *v; + int *pi; + int **s, **t; + ITEM *q; + register ITEM *p; + int idx1 = 0; + + ++zzclose; + + /* first, copy kernel of state i to wsets */ + cwp = 0; + ITMLOOP(i, p, q) { + wsets[cwp].pitem = p->pitem; + wsets[cwp].flag = 1; /* this item must get closed */ + SETLOOP(k) + wsets[cwp].ws.lset[k] = p->look->lset[k]; + WSBUMP(cwp); + } + + /* now, go through the loop, closing each item */ + + work = 1; + while (work) { + work = 0; + /* + * WSLOOP(wsets, u) { + */ + for (idx1 = 0; idx1 < cwp; idx1++) { + u = &wsets[idx1]; + if (u->flag == 0) + continue; + c = *(u->pitem); /* dot is before c */ + if (c < NTBASE) { + u->flag = 0; + /* + * only interesting case is where . is + * before nonterminal + */ + continue; + } + + /* compute the lookahead */ + aryfil(clset.lset, tbitset, 0); + + /* find items involving c */ + + WSLOOP(u, v) { + if (v->flag == 1 && *(pi = v->pitem) == c) { + v->flag = 0; + if (nolook) + continue; + while ((ch = *++pi) > 0) { + /* terminal symbol */ + if (ch < NTBASE) { + SETBIT(clset.lset, ch); + break; + } + /* nonterminal symbol */ + setunion(clset.lset, + pfirst[ch-NTBASE]->lset); + if (!pempty[ch-NTBASE]) + break; + } + if (ch <= 0) + setunion(clset.lset, + v->ws.lset); + } + } + + /* now loop over productions derived from c */ + + c -= NTBASE; /* c is now nonterminal number */ + + t = pres[c+1]; + for (s = pres[c]; s < t; ++s) { + /* put these items into the closure */ + WSLOOP(wsets, v) { /* is the item there */ + /* yes, it is there */ + if (v->pitem == *s) { + if (nolook) + goto nexts; + if (setunion(v->ws.lset, + clset.lset)) + v->flag = work = 1; + goto nexts; + } + } + + /* not there; make a new entry */ + if (cwp + 1 >= wsetsz) + exp_wsets(); + + wsets[cwp].pitem = *s; + wsets[cwp].flag = 1; + if (!nolook) { + work = 1; + SETLOOP(k) + wsets[cwp].ws.lset[k] = + clset.lset[k]; + } + WSBUMP(cwp); + nexts:; + } + } + } + + /* have computed closure; flags are reset; return */ + + if (&wsets[cwp] > &wsets[zzcwp]) + zzcwp = cwp; + if (cldebug && (foutput != NULL)) { + fprintf(foutput, "\nState %d, nolook = %d\n", i, nolook); + WSLOOP(wsets, u) { + if (u->flag) + fprintf(foutput, "flag set!\n"); + u->flag = 0; + fprintf(foutput, "\t%ls", writem(u->pitem)); + prlook(&u->ws); + fprintf(foutput, "\n"); + } + } +} + +static LOOKSETS * +flset(LOOKSETS *p) +{ + /* decide if the lookahead set pointed to by p is known */ + /* return pointer to a perminent location for the set */ + + int j, *w; + register int *u, *v; + register LOOKSETS *q; + + for (q = &lkst[nlset]; q-- > lkst; ) { + u = p->lset; + v = q->lset; + w = & v[tbitset]; + while (v < w) + if (*u++ != *v++) + goto more; + /* we have matched */ + return (q); + more:; + } + /* add a new one */ + q = &lkst[nlset++]; + if (nlset >= lsetsize) { + exp_lkst(); + q = &lkst[nlset++]; + } + SETLOOP(j) q->lset[j] = p->lset[j]; + return (q); +} + +static void +exp_lkst(void) +{ + int i, j; + static LOOKSETS *lookbase; + + lookbase = lkst; + lsetsize += LSETSIZE; + tmp_lset = calloc(TBITSET * (lsetsize-LSETSIZE), sizeof (int)); + if (tmp_lset == NULL) + error("could not expand lookset array"); + lkst = realloc(lkst, sizeof (LOOKSETS) * lsetsize); + for (i = lsetsize-LSETSIZE, j = 0; i < lsetsize; ++i, ++j) + lkst[i].lset = tmp_lset + TBITSET * j; + tmp_lset = NULL; + if (lkst == NULL) + error("could not expand lookahead sets"); + for (i = 0; i <= nnonter; ++i) + pfirst[i] = pfirst[i] - lookbase + lkst; + for (i = 0; i <= nstate+1; ++i) { + if (psmem[i].look) + psmem[i].look = psmem[i].look - lookbase + lkst; + if (pstate[i]->look) + pstate[i]->look = pstate[i]->look - lookbase + lkst; + } +} + +static void +exp_wsets(void) +{ + int i, j; + + wsetsz += WSETSIZE; + tmp_lset = calloc(TBITSET * (wsetsz-WSETSIZE), sizeof (int)); + if (tmp_lset == NULL) + error("could not expand lookset array"); + wsets = realloc(wsets, sizeof (WSET) * wsetsz); + for (i = wsetsz-WSETSIZE, j = 0; i < wsetsz; ++i, ++j) + wsets[i].ws.lset = tmp_lset + TBITSET * j; + tmp_lset = NULL; + if (wsets == NULL) + error("could not expand working sets"); +} + +static void +exp_states(void) +{ + nstatesz += NSTATES; + + pstate = realloc(pstate, sizeof (ITEM *)*(nstatesz+2)); + mstates = realloc(mstates, sizeof (int)*nstatesz); + defact = realloc(defact, sizeof (int)*nstatesz); + tystate = realloc(tystate, sizeof (int)*nstatesz); + indgo = realloc(indgo, sizeof (int)*nstatesz); + + if ((*pstate == NULL) || (tystate == NULL) || (defact == NULL) || + (indgo == NULL) || (mstates == NULL)) + error("cannot expand table of states"); +} + +static void +exp_psmem(void) +{ + int i; + + new_pstsize += PSTSIZE; + psmem = realloc(psmem, sizeof (ITEM) * new_pstsize); + if (psmem == NULL) + error("cannot expand pstate memory"); + + zzmemsz = zzmemsz - pstate[0] + psmem; + for (i = 1; i <= nstate+1; ++i) + pstate[i] = pstate[i] - pstate[0] + psmem; + pstate[0] = psmem; +} diff --git a/yacc/y2.c b/yacc/y2.c @@ -0,0 +1,1758 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 2002 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "y2.c 6.35 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)y2.c 1.11 (gritter) 11/26/05 + */ + +#include "dextern" +#include "sgs.h" +#include <wchar.h> +#include <unistd.h> +#define IDENTIFIER 257 + +#define MARK 258 +#define TERM 259 +#define LEFT 260 +#define RIGHT 261 +#define BINARY 262 +#define PREC 263 +#define LCURLY 264 +#define C_IDENTIFIER 265 /* name followed by colon */ +#define NUMBER 266 +#define START 267 +#define TYPEDEF 268 +#define TYPENAME 269 +#define UNION 270 +#define ENDFILE 0 +#define LHS_TEXT_LEN 80 /* length of lhstext */ +#define RHS_TEXT_LEN 640 /* length of rhstext */ + /* communication variables between various I/O routines */ + +#define v_FLAG 0x01 +#define d_FLAG 0x02 +#define DEFAULT_PREFIX "y" + +char *infile; /* input file name */ +static int numbval; /* value of an input number */ +static int toksize = NAMESIZE; +static wchar_t *tokname; /* input token name */ +char *parser = NULL; /* location of common parser */ + +static void finact(void); +static wchar_t *cstash(wchar_t *); +static void defout(void); +static void cpyunion(void); +static void cpycode(void); +static void cpyact(int); +static void lhsfill(wchar_t *); +static void rhsfill(wchar_t *); +static void lrprnt(void); +static void beg_debug(void); +static void end_toks(void); +static void end_debug(void); +static void exp_tokname(void); +static void exp_prod(void); +static void exp_ntok(void); +static void exp_nonterm(void); +static int defin(int, wchar_t *); +static int gettok(void); +static int chfind(int, wchar_t *); +static int skipcom(void); +static int findchtok(int); +static void put_prefix_define(char *); + + +/* storage of names */ + +/* + * initial block to place token and + * nonterminal names are stored + * points to initial block - more space + * is allocated as needed. + */ +static wchar_t cnamesblk0[CNAMSZ]; +static wchar_t *cnames = cnamesblk0; + +/* place where next name is to be put in */ +static wchar_t *cnamp = cnamesblk0; + +/* number of defined symbols output */ +static int ndefout = 3; + + /* storage of types */ +static int defunion = 0; /* union of types defined? */ +static int ntypes = 0; /* number of types defined */ +static wchar_t *typeset[NTYPES]; /* pointers to type tags */ + + /* symbol tables for tokens and nonterminals */ + +int ntokens = 0; +int ntoksz = NTERMS; +TOKSYMB *tokset; +int *toklev; + +int nnonter = -1; +NTSYMB *nontrst; +int nnontersz = NNONTERM; + +static int start; /* start symbol */ + + /* assigned token type values */ +static int extval = 0; + + /* input and output file descriptors */ + +FILE *finput; /* yacc input file */ +FILE *faction; /* file for saving actions */ +FILE *fdefine; /* file for # defines */ +FILE *ftable; /* y.tab.c file */ +FILE *ftemp; /* tempfile to pass 2 */ +FILE *fdebug; /* where the strings for debugging are stored */ +FILE *foutput; /* y.output file */ + + /* output string */ + +static wchar_t *lhstext; +static wchar_t *rhstext; + + /* storage for grammar rules */ + +int *mem0; /* production storage */ +int *mem; +int *tracemem; +extern int *optimmem; +int new_memsize = MEMSIZE; +int nprod = 1; /* number of productions */ +int nprodsz = NPROD; + +int **prdptr; +int *levprd; +wchar_t *had_act; + +/* flag for generating the # line's default is yes */ +int gen_lines = 1; +int act_lines = 0; + +/* flag for whether to include runtime debugging */ +static int gen_testing = 0; + +/* flag for version stamping--default turned off */ +static char *v_stmp = "n"; + +int nmbchars = 0; /* number of mb literals in mbchars */ +MBCLIT *mbchars = (MBCLIT *) 0; /* array of mb literals */ +int nmbcharsz = 0; /* allocated space for mbchars */ + +void +setup(int argc, char *argv[]) +{ int ii, i, j, lev, t, ty; + /* ty is the sequencial number of token name in tokset */ + int c; + int *p; + char *cp; + wchar_t actname[8]; + unsigned int options = 0; + char *file_prefix = DEFAULT_PREFIX; + char *sym_prefix = ""; +#define F_NAME_LENGTH 4096 + char fname[F_NAME_LENGTH+1]; + + foutput = NULL; + fdefine = NULL; + i = 1; + + tokname = malloc(sizeof (wchar_t) * toksize); + tokset = malloc(sizeof (TOKSYMB) * ntoksz); + toklev = malloc(sizeof (int) * ntoksz); + nontrst = malloc(sizeof (NTSYMB) * nnontersz); + mem0 = malloc(sizeof (int) * new_memsize); + prdptr = malloc(sizeof (int *) * (nprodsz+2)); + levprd = malloc(sizeof (int) * (nprodsz+2)); + had_act = calloc(nprodsz + 2, sizeof (wchar_t)); + lhstext = malloc(sizeof (wchar_t) * LHS_TEXT_LEN); + rhstext = malloc(sizeof (wchar_t) * RHS_TEXT_LEN); + aryfil(toklev, ntoksz, 0); + aryfil(levprd, nprodsz, 0); + for (ii = 0; ii < ntoksz; ++ii) + tokset[ii].value = 0; + for (ii = 0; ii < nnontersz; ++ii) + nontrst[ii].tvalue = 0; + aryfil(mem0, new_memsize, 0); + mem = mem0; + tracemem = mem0; + + while ((c = getopt(argc, argv, "vVdltp:Q:Y:P:b:")) != EOF) + switch (c) { + case 'v': + options |= v_FLAG; + break; + case 'V': + fprintf(stderr, "yacc: %s , %s\n", pkg, rel); + break; + case 'Q': + v_stmp = optarg; + if (*v_stmp != 'y' && *v_stmp != 'n') + error("yacc: -Q should be followed by [y/n]"); + break; + case 'd': + options |= d_FLAG; + break; + case 'l': + gen_lines = 0; /* don't gen #lines */ + break; + case 't': + gen_testing = 1; /* set YYDEBUG on */ + break; + case 'Y': + cp = malloc(strlen(optarg) + sizeof ("/yaccpar") + 1); + cp = strcpy(cp, optarg); + parser = strcat(cp, "/yaccpar"); + break; + case 'P': + parser = optarg; + break; + case 'p': + if (strcmp(optarg, "yy") != 0) + sym_prefix = optarg; + else + sym_prefix = ""; + break; + case 'b': + file_prefix = optarg; + break; + case '?': + default: + fprintf(stderr, +"Usage: yacc [-vVdlt] [-Q(y/n)] [-P driver_file] file\n"); + exit(1); + } + /* + * Open y.output if -v is specified + */ + if (options & v_FLAG) { + strncpy(fname, + file_prefix, + F_NAME_LENGTH-strlen(".output")); + strcat(fname, ".output"); + foutput = fopen(fname, "w"); + if (foutput == NULL) + error("cannot open y.output"); + } + + /* + * Open y.tab.h if -d is specified + */ + if (options & d_FLAG) { + strncpy(fname, + file_prefix, + F_NAME_LENGTH-strlen(".tab.h")); + strcat(fname, ".tab.h"); + fdefine = fopen(fname, "w"); + if (fdefine == NULL) + error("cannot open y.tab.h"); + } + + fdebug = fopen(DEBUGNAME, "w"); + if (fdebug == NULL) + error("cannot open yacc.debug"); + /* + * Open y.tab.c + */ + strncpy(fname, + file_prefix, + F_NAME_LENGTH-strlen(".tab.c")); + strcat(fname, ".tab.c"); + ftable = fopen(fname, "w"); + if (ftable == NULL) + error("cannot open %s", fname); + + ftemp = fopen(TEMPNAME, "w"); + faction = fopen(ACTNAME, "w"); + if (ftemp == NULL || faction == NULL) + error("cannot open temp file"); + + if ((finput = fopen(infile = argv[optind], "r")) == NULL) + error("cannot open input file"); + + lineno = 1; + cnamp = cnames; + defin(0, L"$end"); + extval = 0400; + defin(0, L"error"); + defin(1, L"$accept"); + mem = mem0; + lev = 0; + ty = 0; + i = 0; + beg_debug(); /* initialize fdebug file */ + + /* + * sorry -- no yacc parser here..... + * we must bootstrap somehow... + */ + + t = gettok(); + if (*v_stmp == 'y') + fprintf(ftable, "\ +#if __GNUC__ >= 3 && __GNUC_MINOR__ >= 4 || __GNUC__ >= 4\n\ +#define YYUSED __attribute__ ((used))\n\ +#elif defined __GNUC__\n\ +#define YYUSED __attribute__ ((unused))\n\ +#else\n\ +#define YYUSED\n\ +#endif\n\ +static const char yyident[] USED = \"yacc: %s\"\n", rel); + for (; t != MARK && t != ENDFILE; ) { + int tok_in_line; + switch (t) { + + case L';': + t = gettok(); + break; + + case START: + if ((t = gettok()) != IDENTIFIER) { + error("bad %%start construction"); + } + start = chfind(1, tokname); + t = gettok(); + continue; + + case TYPEDEF: + tok_in_line = 0; + if ((t = gettok()) != TYPENAME) + error("bad syntax in %%type"); + ty = numbval; + for (;;) { + t = gettok(); + switch (t) { + + case IDENTIFIER: + /* + * The following lines are idented to left. + */ + tok_in_line = 1; + if ((t = chfind(1, tokname)) < NTBASE) { + j = TYPE(toklev[t]); + if (j != 0 && j != ty) { + error("type redeclaration of token %ls", + tokset[t].name); + } + else + SETTYPE(toklev[t], ty); + } else { + j = nontrst[t-NTBASE].tvalue; + if (j != 0 && j != ty) { + error( + "type redeclaration of nonterminal %ls", + nontrst[t-NTBASE].name); + } + else + nontrst[t-NTBASE].tvalue = ty; + } + /* FALLTHRU */ + /* + * End Indentation + */ + case L',': + continue; + + case L';': + t = gettok(); + break; + default: + break; + } + if (!tok_in_line) + error( + "missing tokens or illegal tokens"); + break; + } + continue; + + case UNION: + /* copy the union declaration to the output */ + cpyunion(); + defunion = 1; + t = gettok(); + continue; + + case LEFT: + case BINARY: + case RIGHT: + i++; + /* FALLTHRU */ + case TERM: + tok_in_line = 0; + + /* nonzero means new prec. and assoc. */ + lev = (t-TERM) | 04; + ty = 0; + + /* get identifiers so defined */ + + t = gettok(); + if (t == TYPENAME) { /* there is a type defined */ + ty = numbval; + t = gettok(); + } + + for (;;) { + switch (t) { + + case L',': + t = gettok(); + continue; + + case L';': + break; + + case IDENTIFIER: + tok_in_line = 1; + j = chfind(0, tokname); + if (j > NTBASE) { + error("%ls is not a token.", + tokname); + } + if (lev & ~04) { + if (ASSOC(toklev[j]) & ~04) + error( + "redeclaration of precedence of %ls", + tokname); + SETASC(toklev[j], lev); + SETPLEV(toklev[j], i); + } else { + if (ASSOC(toklev[j])) + warning(1, + "redeclaration of precedence of %ls.", + tokname); + SETASC(toklev[j], lev); + } + if (ty) { + if (TYPE(toklev[j])) + error( + "redeclaration of type of %ls", + tokname); + SETTYPE(toklev[j], ty); + } + if ((t = gettok()) == NUMBER) { + tokset[j].value = numbval; + if (j < ndefout && j > 2) { + error( + "type number of %ls should be defined earlier", + tokset[j].name); + } + if (numbval >= -YYFLAG1) { + error( + "token numbers must be less than %d", + -YYFLAG1); + } + t = gettok(); + } + continue; + + } + if (!tok_in_line) + error( + "missing tokens or illegal tokens"); + break; + } + continue; + + case LCURLY: + defout(); + cpycode(); + t = gettok(); + continue; + + default: + error("syntax error"); + + } + + } + + if (t == ENDFILE) { + error("unexpected EOF before %%%%"); + } + + /* t is MARK */ + + defout(); + end_toks(); /* all tokens dumped - get ready for reductions */ + + fprintf(ftable, "\n#ifdef __STDC__\n"); + fprintf(ftable, "#include <stdlib.h>\n"); + fprintf(ftable, "#include <string.h>\n"); + fprintf(ftable, "#define YYCONST const\n"); + fprintf(ftable, "#else\n"); + fprintf(ftable, "#include <malloc.h>\n"); + fprintf(ftable, "#include <memory.h>\n"); + fprintf(ftable, "#define YYCONST\n"); + fprintf(ftable, "#endif\n"); + + if (sym_prefix[0] != '\0') + put_prefix_define(sym_prefix); + + fprintf(ftable, "\n#if defined(__cplusplus) || defined(__STDC__)\n"); + fprintf(ftable, + "\n#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(ftable, "extern \"C\" {\n"); + fprintf(ftable, "#endif\n"); + fprintf(ftable, "#ifndef yyerror\n"); + fprintf(ftable, "#if defined(__cplusplus)\n"); + fprintf(ftable, " void yyerror(YYCONST char *);\n"); + fprintf(ftable, "#endif\n"); + fprintf(ftable, "#endif\n"); + fprintf(ftable, "#ifndef yylex\n"); + fprintf(ftable, " int yylex(void);\n"); + fprintf(ftable, "#endif\n"); + fprintf(ftable, " int yyparse(void);\n"); + fprintf(ftable, "#if defined(__cplusplus) && defined(__EXTERN_C__)\n"); + fprintf(ftable, "}\n"); + fprintf(ftable, "#endif\n"); + fprintf(ftable, "\n#endif\n\n"); + + fprintf(ftable, "#define yyclearin yychar = -1\n"); + fprintf(ftable, "#define yyerrok yyerrflag = 0\n"); + fprintf(ftable, "extern int yychar;\nextern int yyerrflag;\n"); + if (!(defunion || ntypes)) + fprintf(ftable, + "#ifndef YYSTYPE\n#define YYSTYPE int\n#endif\n"); + fprintf(ftable, "YYSTYPE yylval;\n"); + fprintf(ftable, "YYSTYPE yyval;\n"); + fprintf(ftable, "typedef int yytabelem;\n"); + fprintf(ftable, + "#ifndef YYMAXDEPTH\n#define YYMAXDEPTH 150\n#endif\n"); + fprintf(ftable, "#if YYMAXDEPTH > 0\n"); + fprintf(ftable, "int yy_yys[YYMAXDEPTH], *yys = yy_yys;\n"); + fprintf(ftable, "YYSTYPE yy_yyv[YYMAXDEPTH], *yyv = yy_yyv;\n"); + fprintf(ftable, "#else /* user does initial allocation */\n"); + fprintf(ftable, "int *yys;\nYYSTYPE *yyv;\n#endif\n"); + fprintf(ftable, "static int yymaxdepth = YYMAXDEPTH;\n"); + + prdptr[0] = mem; + /* added production */ + *mem++ = NTBASE; + + /* if start is 0, we will overwrite with the lhs of the first rule */ + *mem++ = start; + *mem++ = 1; + *mem++ = 0; + prdptr[1] = mem; + + while ((t = gettok()) == LCURLY) + cpycode(); + + if (t != C_IDENTIFIER) + error("bad syntax on first rule"); + + if (!start) + prdptr[0][1] = chfind(1, tokname); + + /* read rules */ + + while (t != MARK && t != ENDFILE) { + + /* process a rule */ + + if (t == L'|') { + rhsfill((wchar_t *)0); /* restart fill of rhs */ + *mem = *prdptr[nprod-1]; + if (++mem >= &tracemem[new_memsize]) + exp_mem(1); + } else if (t == C_IDENTIFIER) { + *mem = chfind(1, tokname); + if (*mem < NTBASE) + error("illegal nonterminal in grammar rule"); + if (++mem >= &tracemem[new_memsize]) + exp_mem(1); + lhsfill(tokname); /* new rule: restart strings */ + } else + error("illegal rule: missing semicolon or | ?"); + + /* read rule body */ + + + t = gettok(); + more_rule: + while (t == IDENTIFIER) { + *mem = chfind(1, tokname); + if (*mem < NTBASE) + levprd[nprod] = toklev[*mem]& ~04; + if (++mem >= &tracemem[new_memsize]) + exp_mem(1); + rhsfill(tokname); /* add to rhs string */ + t = gettok(); + } + + if (t == PREC) { + if (gettok() != IDENTIFIER) + error("illegal %%prec syntax"); + j = chfind(2, tokname); + if (j >= NTBASE) + error("nonterminal %ls illegal after %%prec", + nontrst[j-NTBASE].name); + levprd[nprod] = toklev[j] & ~04; + t = gettok(); + } + + if (t == L'=') { + had_act[nprod] = 1; + levprd[nprod] |= ACTFLAG; + fprintf(faction, "\ncase %d:", nprod); + cpyact(mem-prdptr[nprod] - 1); + fprintf(faction, " break;"); + if ((t = gettok()) == IDENTIFIER) { + /* action within rule... */ + + lrprnt(); /* dump lhs, rhs */ + swprintf(actname, sizeof actname, + L"$$%d", nprod); + /* + * make it nonterminal + */ + j = chfind(1, actname); + + /* + * the current rule will become rule + * number nprod+1 move the contents down, + * and make room for the null + */ + + if (mem + 2 >= &tracemem[new_memsize]) + exp_mem(1); + for (p = mem; p >= prdptr[nprod]; --p) + p[2] = *p; + mem += 2; + + /* enter null production for action */ + + p = prdptr[nprod]; + + *p++ = j; + *p++ = -nprod; + + /* update the production information */ + + levprd[nprod+1] = levprd[nprod] & ~ACTFLAG; + levprd[nprod] = ACTFLAG; + + if (++nprod >= nprodsz) + exp_prod(); + prdptr[nprod] = p; + + /* + * make the action appear in + * the original rule + */ + *mem++ = j; + if (mem >= &tracemem[new_memsize]) + exp_mem(1); + /* get some more of the rule */ + goto more_rule; + } + } + while (t == L';') + t = gettok(); + *mem++ = -nprod; + if (mem >= &tracemem[new_memsize]) + exp_mem(1); + + /* check that default action is reasonable */ + + if (ntypes && !(levprd[nprod] & ACTFLAG) && + nontrst[*prdptr[nprod]-NTBASE].tvalue) { + /* no explicit action, LHS has value */ + register int tempty; + tempty = prdptr[nprod][1]; + if (tempty < 0) + error("must return a value, since LHS has a type"); + else if (tempty >= NTBASE) + tempty = nontrst[tempty-NTBASE].tvalue; + else + tempty = TYPE(toklev[tempty]); + if (tempty != nontrst[*prdptr[nprod]-NTBASE].tvalue) { + error( + "default action causes potential type clash"); + } + } + + if (++nprod >= nprodsz) + exp_prod(); + prdptr[nprod] = mem; + levprd[nprod] = 0; + } + /* end of all rules */ + + end_debug(); /* finish fdebug file's input */ + finact(); + if (t == MARK) { + if (gen_lines) + fprintf(ftable, "\n# line %d \"%s\"\n", + lineno, infile); + while ((c = getwc(finput)) != EOF) + putwc(c, ftable); + } + fclose(finput); +} + +static void +finact(void) +{ + /* finish action routine */ + fclose(faction); + fprintf(ftable, "# define YYERRCODE %d\n", tokset[2].value); +} + +static wchar_t * +cstash(s) +register wchar_t *s; +{ + wchar_t *temp; + static int used = 0; + static int used_save = 0; + static int exp_cname = CNAMSZ; + int len = wcslen(s); + + /* + * 2/29/88 - + * Don't need to expand the table, just allocate new space. + */ + used_save = used; + while (len >= (exp_cname - used_save)) { + exp_cname += CNAMSZ; + if (!used) + free(cnames); + if ((cnames = malloc(sizeof (wchar_t)*exp_cname)) == NULL) + error("cannot expand string dump"); + cnamp = cnames; + used = 0; + } + + temp = cnamp; + do { + *cnamp++ = *s; + } while (*s++); + used += cnamp - temp; + return (temp); +} + +static int +defin(int t, register wchar_t *s) +{ + /* define s to be a terminal if t=0 or a nonterminal if t=1 */ + + register int val = 0; + + if (t) { + if (++nnonter >= nnontersz) + exp_nonterm(); + nontrst[nnonter].name = cstash(s); + return (NTBASE + nnonter); + } + /* must be a token */ + if (++ntokens >= ntoksz) + exp_ntok(); + tokset[ntokens].name = cstash(s); + + /* establish value for token */ + + if (s[0] == L' ' && s[2] == 0) { /* single character literal */ + val = findchtok(s[1]); + } else if (s[0] == L' ' && s[1] == L'\\') { /* escape sequence */ + if (s[3] == 0) { /* single character escape sequence */ + switch (s[2]) { + /* character which is escaped */ + case L'a': + warning(1, + "\\a is ANSI C \"alert\" character"); +#if __STDC__ - 1 == 0 + val = L'\a'; + break; +#else + val = L'\007'; + break; +#endif + case L'v': val = L'\v'; break; + case L'n': val = L'\n'; break; + case L'r': val = L'\r'; break; + case L'b': val = L'\b'; break; + case L't': val = L'\t'; break; + case L'f': val = L'\f'; break; + case L'\'': val = L'\''; break; + case L'"': val = L'"'; break; + case L'?': val = L'?'; break; + case L'\\': val = L'\\'; break; + default: error("invalid escape"); + } + } else if (s[2] <= L'7' && s[2] >= L'0') { /* \nnn sequence */ + int i = 3; + val = s[2] - L'0'; + while (iswdigit(s[i]) && i <= 4) { + if (s[i] >= L'0' && s[i] <= L'7') + val = val * 8 + s[i] - L'0'; + else + error("illegal octal number"); + i++; + } + if (s[i] != 0) + error("illegal \\nnn construction"); + if (val > 255) + error( +"\\nnn exceed \\377; use \\xnnnnnnnn for wchar_t value of multibyte char"); + if (val == 0 && i >= 4) + error("'\\000' is illegal"); + } else if (s[2] == L'x') { /* hexadecimal \xnnn sequence */ + int i = 3; + val = 0; + warning(1, "\\x is ANSI C hex escape"); + if (iswxdigit(s[i])) + while (iswxdigit(s[i])) { + int tmpval; + if (iswdigit(s[i])) + tmpval = s[i] - L'0'; + else if (s[i] >= L'a') + tmpval = s[i] - L'a' + 10; + else + tmpval = s[i] - L'A' + 10; + val = 16 * val + tmpval; + i++; + } + else + error("illegal hexadecimal number"); + if (s[i] != 0) + error("illegal \\xnn construction"); +#define LWCHAR_MAX 0x7fffffff + if ((unsigned)val > LWCHAR_MAX) + error(" \\xnnnnnnnn exceed %#x", LWCHAR_MAX); + if (val == 0) + error("'\\x00' is illegal"); + val = findchtok(val); + } else + error("invalid escape"); + } else { + val = extval++; + } + tokset[ntokens].value = val; + toklev[ntokens] = 0; + return (ntokens); +} + +static void +defout(void) +{ + /* write out the defines (at the end of the declaration section) */ + + register int i, c; + register wchar_t *cp; + + for (i = ndefout; i <= ntokens; ++i) { + + cp = tokset[i].name; + if (*cp == L' ') /* literals */ + { + fprintf(fdebug, "\t\"%ls\",\t%d,\n", + tokset[i].name + 1, tokset[i].value); + continue; /* was cp++ */ + } + + for (; (c = *cp) != 0; ++cp) { + if (iswlower(c) || iswupper(c) || + iswdigit(c) || c == L'_') /* EMPTY */; + else + goto nodef; + } + + fprintf(fdebug, "\t\"%ls\",\t%d,\n", tokset[i].name, + tokset[i].value); + fprintf(ftable, "# define %ls %d\n", tokset[i].name, + tokset[i].value); + if (fdefine != NULL) + fprintf(fdefine, "# define %ls %d\n", + tokset[i].name, + tokset[i].value); + + nodef:; + } + ndefout = ntokens+1; +} + +static int +gettok(void) +{ + register int i, base; + static int peekline; /* number of '\n' seen in lookahead */ + register int c, match, reserve; +begin: + reserve = 0; + lineno += peekline; + peekline = 0; + c = getwc(finput); + /* + * while (c == ' ' || c == '\n' || c == '\t' || c == '\f') { + */ + while (iswspace(c)) { + if (c == L'\n') + ++lineno; + c = getwc(finput); + } + if (c == L'/') { /* skip comment */ + lineno += skipcom(); + goto begin; + } + + switch (c) { + + case EOF: + return (ENDFILE); + case L'{': + ungetwc(c, finput); + return (L'='); /* action ... */ + case L'<': /* get, and look up, a type name (union member name) */ + i = 0; + while ((c = getwc(finput)) != L'>' && + c != EOF && c != L'\n') { + tokname[i] = c; + if (++i >= toksize) + exp_tokname(); + } + if (c != L'>') + error("unterminated < ... > clause"); + tokname[i] = 0; + if (i == 0) + error("missing type name in < ... > clause"); + for (i = 1; i <= ntypes; ++i) { + if (!wcscmp(typeset[i], tokname)) { + numbval = i; + return (TYPENAME); + } + } + typeset[numbval = ++ntypes] = cstash(tokname); + return (TYPENAME); + + case L'"': + case L'\'': + match = c; + tokname[0] = L' '; + i = 1; + for (;;) { + c = getwc(finput); + if (c == L'\n' || c == EOF) + error("illegal or missing ' or \""); + if (c == L'\\') { + c = getwc(finput); + tokname[i] = L'\\'; + if (++i >= toksize) + exp_tokname(); + } else if (c == match) break; + tokname[i] = c; + if (++i >= toksize) + exp_tokname(); + } + break; + + case L'%': + case L'\\': + + switch (c = getwc(finput)) { + + case L'0': return (TERM); + case L'<': return (LEFT); + case L'2': return (BINARY); + case L'>': return (RIGHT); + case L'%': + case L'\\': return (MARK); + case L'=': return (PREC); + case L'{': return (LCURLY); + default: reserve = 1; + } + + default: + + if (iswdigit(c)) { /* number */ + numbval = c - L'0'; + base = (c == L'0') ? 8 : 10; + for (c = getwc(finput); + iswdigit(c); + c = getwc(finput)) { + numbval = numbval*base + c - L'0'; + } + ungetwc(c, finput); + return (NUMBER); + } else if (iswlower(c) || iswupper(c) || + c == L'_' || c == L'.' || + c == L'$') { + i = 0; + while (iswlower(c) || iswupper(c) || + iswdigit(c) || c == L'_' || + c == L'.' || c == L'$') { + tokname[i] = c; + if (reserve && iswupper(c)) + tokname[i] = towlower(c); + if (++i >= toksize) + exp_tokname(); + c = getwc(finput); + } + } + else + return (c); + + ungetwc(c, finput); + } + + tokname[i] = 0; + + if (reserve) { /* find a reserved word */ + if (!wcscmp(tokname, L"term")) + return (TERM); + if (!wcscmp(tokname, L"token")) + return (TERM); + if (!wcscmp(tokname, L"left")) + return (LEFT); + if (!wcscmp(tokname, L"nonassoc")) + return (BINARY); + if (!wcscmp(tokname, L"binary")) + return (BINARY); + if (!wcscmp(tokname, L"right")) + return (RIGHT); + if (!wcscmp(tokname, L"prec")) + return (PREC); + if (!wcscmp(tokname, L"start")) + return (START); + if (!wcscmp(tokname, L"type")) + return (TYPEDEF); + if (!wcscmp(tokname, L"union")) + return (UNION); + error("invalid escape, or illegal reserved word: %ls", tokname); + } + + /* look ahead to distinguish IDENTIFIER from C_IDENTIFIER */ + + c = getwc(finput); + /* + * while (c == ' ' || c == '\t' || c == '\n' || c == '\f' || c == '/') + * { + */ + while (iswspace(c) || c == L'/') { + if (c == L'\n') { + ++peekline; + } else if (c == L'/') { /* look for comments */ + peekline += skipcom(); + } + c = getwc(finput); + } + if (c == L':') + return (C_IDENTIFIER); + ungetwc(c, finput); + return (IDENTIFIER); +} + +static int +fdtype(int t) +{ + /* determine the type of a symbol */ + register int v; + if (t >= NTBASE) + v = nontrst[t-NTBASE].tvalue; + else + v = TYPE(toklev[t]); + if (v <= 0) + error("must specify type for %ls", + (t >= NTBASE) ? nontrst[t-NTBASE].name: + tokset[t].name); + return (v); +} + +static int +chfind(int t, register wchar_t *s) +{ + int i; + + if (s[0] == ' ') + t = 0; + TLOOP(i) { + if (!wcscmp(s, tokset[i].name)) { + return (i); + } + } + NTLOOP(i) { + if (!wcscmp(s, nontrst[i].name)) { + return (i + NTBASE); + } + } + /* cannot find name */ + if (t > 1) + error("%ls should have been defined earlier", s); + return (defin(t, s)); +} + +static void +cpyunion(void) +{ + /* + * copy the union declaration to the output, + * and the define file if present + */ + int level, c; + if (gen_lines) + fprintf(ftable, "\n# line %d \"%s\"\n", lineno, infile); + fprintf(ftable, "typedef union\n"); + if (fdefine) + fprintf(fdefine, "\ntypedef union\n"); + fprintf(ftable, "#ifdef __cplusplus\n\tYYSTYPE\n#endif\n"); + if (fdefine) + fprintf(fdefine, "#ifdef __cplusplus\n\tYYSTYPE\n#endif\n"); + + level = 0; + for (;;) { + if ((c = getwc(finput)) == EOF) + error("EOF encountered while processing %%union"); + putwc(c, ftable); + if (fdefine) + putwc(c, fdefine); + + switch (c) { + + case L'\n': + ++lineno; + break; + + case L'{': + ++level; + break; + + case L'}': + --level; + if (level == 0) { /* we are finished copying */ + fprintf(ftable, " YYSTYPE;\n"); + if (fdefine) + fprintf(fdefine, + " YYSTYPE;\nextern YYSTYPE yylval;\n"); + return; + } + } + } +} + +static void +cpycode(void) +{ + /* copies code between \{ and \} */ + + int c; + c = getwc(finput); + if (c == L'\n') { + c = getwc(finput); + lineno++; + } + if (gen_lines) + fprintf(ftable, "\n# line %d \"%s\"\n", lineno, infile); + while (c != EOF) { + if (c == L'\\') { + if ((c = getwc(finput)) == L'}') + return; + else + putwc(L'\\', ftable); + } else if (c == L'%') { + if ((c = getwc(finput)) == L'}') + return; + else + putwc(L'%', ftable); + } + putwc(c, ftable); + if (c == L'\n') + ++lineno; + c = getwc(finput); + } + error("eof before %%}"); +} + +static int +skipcom(void) +{ + /* skip over comments */ + register int c, i = 0; /* i is the number of lines skipped */ + + /* skipcom is called after reading a / */ + + if (getwc(finput) != L'*') + error("illegal comment"); + c = getwc(finput); + while (c != EOF) { + while (c == L'*') { + if ((c = getwc(finput)) == L'/') + return (i); + } + if (c == L'\n') + ++i; + c = getwc(finput); + } + error("EOF inside comment"); + /* NOTREACHED */ + return 0; +} + +static void +cpyact(int offset) +{ + /* copy C action to the next ; or closing } */ + int brac, c, match, i, t, j, s, tok, argument, m; + wchar_t id_name[NAMESIZE+1]; + int id_idx = 0; + + if (gen_lines) { + fprintf(faction, "\n# line %d \"%s\"\n", lineno, infile); + act_lines++; + } + brac = 0; + id_name[0] = 0; +loop: + c = getwc(finput); +swt: + switch (c) { + case L';': + if (brac == 0) { + putwc(c, faction); + return; + } + goto lcopy; + case L'{': + brac++; + goto lcopy; + case L'$': + s = 1; + tok = -1; + argument = 1; + while ((c = getwc(finput)) == L' ' || c == L'\t') /* EMPTY */; + if (c == L'<') { /* type description */ + ungetwc(c, finput); + if (gettok() != TYPENAME) + error("bad syntax on $<ident> clause"); + tok = numbval; + c = getwc(finput); + } + if (c == L'$') { + fprintf(faction, "yyval"); + if (ntypes) { /* put out the proper tag... */ + if (tok < 0) + tok = fdtype(*prdptr[nprod]); + fprintf(faction, ".%ls", typeset[tok]); + } + goto loop; + } + if (iswalpha(c)) { + int same = 0; + int id_sw = 0; + ungetwc(c, finput); + if (gettok() != IDENTIFIER) + error("bad action format"); + /* + * Save the number of non-terminal + */ + id_sw = nnonter; + t = chfind(1, tokname); + /* + * Check if the identifier is added as a non-terminal + */ + if (id_sw != nnonter) + id_sw = 1; + else + id_sw = 0; + while ((c = getwc(finput)) == L' ' || + c == L'\t') /* EMPTY */; + if (c == L'#') { + while ((c = getwc(finput)) == L' ' || + c == L'\t') /* EMPTY */; + if (iswdigit(c)) { + m = 0; + while (iswdigit(c)) { + m = m*10+c-L'0'; + c = getwc(finput); + } + argument = m; + } else + error("illegal character \"#\""); + } + if (argument < 1) + error("illegal action argument no."); + for (i = 1; i <= offset; ++i) + if (prdptr[nprod][i] == t) + if (++same == argument) { + fprintf(faction, + "yypvt[-%d]", offset-i); + if (ntypes) { + if (tok < 0) + tok = + /* CSTYLED */ + fdtype(prdptr[nprod][i]); + fprintf(faction, + ".%ls", typeset[tok]); + } + goto swt; + } + /* + * This used to be handled as error. + * Treat this as a valid C statement. + * (Likely id with $ in.) + * If non-terminal is added, remove it from the list. + */ + fprintf(faction, "$%ls", tokname); + warning(1, + "Illegal character '$' in Ansi C symbol: %ls$%ls.", + id_name, tokname); + + if (id_sw == 1) + --nnonter; + goto swt; + } + if (c == '-') { + s = -s; + c = getwc(finput); + } + if (iswdigit(c)) { + j = 0; + while (iswdigit(c)) { + j = j*10 + c - L'0'; + c = getwc(finput); + } + j = j*s - offset; + if (j > 0) { + error("Illegal use of $%d", j + offset); + } + fprintf(faction, "yypvt[-%d]", -j); + if (ntypes) { /* put out the proper tag */ + if (j + offset <= 0 && tok < 0) + error("must specify type of $%d", + j + offset); + if (tok < 0) + tok = fdtype(prdptr[nprod][j+offset]); + fprintf(faction, + ".%ls", typeset[tok]); + } + goto swt; + } + putwc(L'$', faction); + if (s < 0) + putwc(L'-', faction); + goto swt; + case L'}': + if (--brac) + goto lcopy; + putwc(c, faction); + return; + case L'/': /* look for comments */ + putwc(c, faction); + c = getwc(finput); + if (c != L'*') + goto swt; + /* it really is a comment */ + putwc(c, faction); + c = getwc(finput); + while (c != EOF) { + while (c == L'*') { + putwc(c, faction); + if ((c = getwc(finput)) == L'/') + goto lcopy; + } + putwc(c, faction); + if (c == L'\n') + ++lineno; + c = getwc(finput); + } + error("EOF inside comment"); + /* FALLTHRU */ + case L'\'': /* character constant */ + case L'"': /* character string */ + match = c; + putwc(c, faction); + while ((c = getwc(finput)) != EOF) { + if (c == L'\\') { + putwc(c, faction); + c = getwc(finput); + if (c == L'\n') + ++lineno; + } else if (c == match) + goto lcopy; + else if (c == L'\n') + error("newline in string or char. const."); + putwc(c, faction); + } + error("EOF in string or character constant"); + /* FALLTHRU */ + case EOF: + error("action does not terminate"); + /* FALLTHRU */ + case L'\n': + ++lineno; + goto lcopy; + } +lcopy: + putwc(c, faction); + /* + * Save the possible identifier name. + * Used to print out a warning message. + */ + if (id_idx >= NAMESIZE) { + /* + * Error. Silently ignore. + */ + ; + } + /* + * If c has a possibility to be a + * part of identifier, save it. + */ + else if (iswalnum(c) || c == L'_') { + id_name[id_idx++] = c; + id_name[id_idx] = 0; + } else { + id_idx = 0; + id_name[id_idx] = 0; + } + goto loop; +} + +static void +lhsfill(s) /* new rule, dump old (if exists), restart strings */ +wchar_t *s; +{ + static int lhs_len = LHS_TEXT_LEN; + int s_lhs = wcslen(s); + if (s_lhs >= lhs_len) { + lhs_len = s_lhs + 2; + lhstext = realloc(lhstext, sizeof (wchar_t)*lhs_len); + if (lhstext == NULL) + error("couldn't expanded LHS length"); + } + rhsfill(NULL); + wcscpy(lhstext, s); /* don't worry about too long of a name */ +} + +static void +rhsfill(wchar_t *s) /* either name or 0 */ +{ + static wchar_t *loc; /* next free location in rhstext */ + static int rhs_len = RHS_TEXT_LEN; + static int used = 0; + int s_rhs = (s == NULL ? 0 : wcslen(s)); + register wchar_t *p; + + if (!s) /* print out and erase old text */ + { + if (*lhstext) /* there was an old rule - dump it */ + lrprnt(); + (loc = rhstext)[0] = 0; + return; + } + /* add to stuff in rhstext */ + p = s; + + used = loc - rhstext; + if ((s_rhs + 3) >= (rhs_len - used)) { + static wchar_t *textbase; + textbase = rhstext; + rhs_len += s_rhs + RHS_TEXT_LEN; + rhstext = realloc(rhstext, sizeof (wchar_t)*rhs_len); + if (rhstext == NULL) + error("couldn't expanded RHS length"); + loc = loc - textbase + rhstext; + } + + *loc++ = L' '; + if (*s == L' ') /* special quoted symbol */ + { + *loc++ = L'\''; /* add first quote */ + p++; + } + while (*loc = *p++) + if (loc++ > &rhstext[ RHS_TEXT_LEN ] - 3) + break; + + if (*s == L' ') + *loc++ = L'\''; + *loc = 0; /* terminate the string */ +} + +static void +lrprnt (void) /* print out the left and right hand sides */ +{ + wchar_t *rhs; + wchar_t *m_rhs = NULL; + + if (!*rhstext) /* empty rhs - print usual comment */ + rhs = L" /* empty */"; + else { + int idx1; /* tmp idx used to find if there are d_quotes */ + int idx2; /* tmp idx used to generate escaped string */ + wchar_t *p; + /* + * Check if there are any double quote in RHS. + */ + for (idx1 = 0; rhstext[idx1] != 0; idx1++) { + if (rhstext[idx1] == L'"') { + /* + * A double quote is found. + */ + idx2 = wcslen(rhstext)*2; + p = m_rhs = malloc((idx2 + 1)*sizeof (wchar_t)); + if (m_rhs == NULL) + error( + "Couldn't allocate memory for RHS."); + /* + * Copy string + */ + for (idx2 = 0; rhstext[idx2] != 0; idx2++) { + /* + * Check if this quote is escaped or not + */ + if (rhstext[idx2] == L'"') { + int tmp_l = idx2-1; + int cnt = 0; + while (tmp_l >= 0 && + rhstext[tmp_l] == '\\') { + cnt++; + tmp_l--; + } + /* + * If quote is not escaped, + * then escape it. + */ + if (cnt%2 == 0) + *p++ = L'\\'; + } + *p++ = rhstext[idx2]; + } + *p = 0; + /* + * Break from the loop + */ + break; + } + } + if (m_rhs == NULL) + rhs = rhstext; + else + rhs = m_rhs; + } + fprintf(fdebug, "\t\"%ls :%ls\",\n", lhstext, rhs); + if (m_rhs) + free(m_rhs); +} + + +static void +beg_debug (void) /* dump initial sequence for fdebug file */ +{ + fprintf(fdebug, "typedef struct\n"); + fprintf(fdebug, "#ifdef __cplusplus\n\tyytoktype\n"); + fprintf(fdebug, "#endif\n{\n"); + fprintf(fdebug, "#ifdef __cplusplus\nconst\n#endif\n"); + fprintf(fdebug, "char *t_name; int t_val; } yytoktype;\n"); + fprintf(fdebug, "#ifndef YYDEBUG\n#\tdefine YYDEBUG\t%d", gen_testing); + fprintf(fdebug, "\t/*%sallow debugging */\n#endif\n\n", + gen_testing ? " " : " don't "); + fprintf(fdebug, "#if YYDEBUG\n\nyytoktype yytoks[] =\n{\n"); +} + + +static void +end_toks (void) /* finish yytoks array, get ready for yyred's strings */ +{ + fprintf(fdebug, "\t\"-unknown-\",\t-1\t/* ends search */\n"); + fprintf(fdebug, "};\n\n"); + fprintf(fdebug, + "#ifdef __cplusplus\nconst\n#endif\n"); + fprintf(fdebug, "char * yyreds[] =\n{\n"); + fprintf(fdebug, "\t\"-no such reduction-\",\n"); +} + + +static void +end_debug (void) /* finish yyred array, close file */ +{ + lrprnt(); /* dump last lhs, rhs */ + fprintf(fdebug, "};\n#endif /* YYDEBUG */\n"); + fclose(fdebug); +} + + +/* + * 2/29/88 - + * The normal length for token sizes is NAMESIZE - If a token is + * seen that has a longer length, expand "tokname" by NAMESIZE. + */ +static void +exp_tokname(void) +{ + toksize += NAMESIZE; + tokname = realloc(tokname, sizeof (wchar_t) * toksize); +} + + +/* + * 2/29/88 - + * + */ +static void +exp_prod(void) +{ + int i; + nprodsz += NPROD; + + prdptr = realloc(prdptr, sizeof (int *) * (nprodsz+2)); + levprd = realloc(levprd, sizeof (int) * (nprodsz+2)); + had_act = realloc(had_act, sizeof (wchar_t) * (nprodsz+2)); + for (i = nprodsz-NPROD; i < nprodsz+2; ++i) + had_act[i] = 0; + + if ((*prdptr == NULL) || (levprd == NULL) || (had_act == NULL)) + error("couldn't expand productions"); +} + +/* + * 2/29/88 - + * Expand the number of terminals. Initially there are NTERMS; + * each time space runs out, the size is increased by NTERMS. + * The total size, however, cannot exceed MAXTERMS because of + * the way LOOKSETS(struct looksets) is set up. + * Tables affected: + * tokset, toklev : increased to ntoksz + * + * tables with initial dimensions of TEMPSIZE must be changed if + * (ntoksz + NNONTERM) >= TEMPSIZE : temp1[] + */ +static void +exp_ntok(void) +{ + ntoksz += NTERMS; + + tokset = realloc(tokset, sizeof (TOKSYMB) * ntoksz); + toklev = realloc(toklev, sizeof (int) * ntoksz); + + if ((tokset == NULL) || (toklev == NULL)) + error("couldn't expand NTERMS"); +} + + +static void +exp_nonterm(void) +{ + nnontersz += NNONTERM; + + nontrst = realloc(nontrst, sizeof (TOKSYMB) * nnontersz); + if (nontrst == NULL) + error("couldn't expand NNONTERM"); +} + +void +exp_mem(int flag) +{ + int i; + static int *membase; + new_memsize += MEMSIZE; + + membase = tracemem; + tracemem = realloc(tracemem, sizeof (int) * new_memsize); + if (tracemem == NULL) + error("couldn't expand mem table"); + if (flag) { + for (i = 0; i <= nprod; ++i) + prdptr[i] = prdptr[i] - membase + tracemem; + mem = mem - membase + tracemem; + } else { + size += MEMSIZE; + temp1 = realloc(temp1, sizeof (int)*size); + optimmem = optimmem - membase + tracemem; + } +} + +static int +findchtok(int chlit) +/* + * findchtok(chlit) returns the token number for a character literal + * chlit that is "bigger" than 255 -- the max char value that the + * original yacc was build for. This yacc treate them as though + * an ordinary token. + */ +{ + int i; + + if (chlit < 0xff) + return (chlit); /* single-byte char */ + for (i = 0; i < nmbchars; ++i) { + if (mbchars->character == chlit) + return (mbchars->tvalue); + } + + /* Not found. Register it! */ + if (++nmbchars > nmbcharsz) { /* Make sure there's enough space */ + nmbcharsz += NMBCHARSZ; + mbchars = realloc(mbchars, sizeof (MBCLIT)*nmbcharsz); + if (mbchars == NULL) + error("too many character literals"); + } + mbchars[nmbchars-1].character = chlit; + return (mbchars[nmbchars-1].tvalue = extval++); + /* Return the newly assigned token. */ +} + +/* + * When -p is specified, symbol prefix for + * yy{parse, lex, error}(), + * yy{lval, val, char, debug, errflag, nerrs} + * are defined to the specified name. + */ +static void +put_prefix_define(char *pre) +{ + char *syms[] = { + /* Functions */ + "parse", + "lex", + "error", + /* Variables */ + "lval", + "val", + "char", + "debug", + "errflag", + "nerrs", + NULL}; + int i; + + for (i = 0; syms[i]; i++) + fprintf(ftable, "#define\tyy%s\t%s%s\n", + syms[i], pre, syms[i]); +} diff --git a/yacc/y3.c b/yacc/y3.c @@ -0,0 +1,568 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1990 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "y3.c 6.17 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)y3.c 1.5 (gritter) 11/26/05 + */ + +#include "dextern" + +static void go2gen(int); +static void precftn(int, int, int); +static void wract(int); +static void wrstate(int); +static void wdef(wchar_t *, int); +#ifndef NOLIBW +static void wrmbchars(void); +#endif /* !NOLIBW */ + /* important local variables */ +static int lastred; /* number of the last reduction of a state */ +int *defact; +extern int *toklev; +extern int cwp; + +/* print the output for the states */ +void +output(void) +{ + int i, k, c; + register WSET *u, *v; + + fprintf(ftable, "static YYCONST yytabelem yyexca[] ={\n"); + + SLOOP(i) { /* output the stuff for state i */ + nolook = !(tystate[i] == MUSTLOOKAHEAD); + closure(i); + /* output actions */ + nolook = 1; + aryfil(temp1, ntoksz+nnontersz+1, 0); + WSLOOP(wsets, u) { + c = *(u->pitem); + if (c > 1 && c < NTBASE && temp1[c] == 0) { + WSLOOP(u, v) { + if (c == *(v->pitem)) + putitem(v->pitem + 1, + (LOOKSETS *)0); + } + temp1[c] = state(c); + } else if (c > NTBASE && + temp1[(c -= NTBASE) + ntokens] == 0) { + temp1[c + ntokens] = amem[indgo[i] + c]; + } + } + if (i == 1) + temp1[1] = ACCEPTCODE; + /* now, we have the shifts; look at the reductions */ + lastred = 0; + WSLOOP(wsets, u) { + c = *(u->pitem); + if (c <= 0) { /* reduction */ + lastred = -c; + TLOOP(k) { + if (BIT(u->ws.lset, k)) { + if (temp1[k] == 0) + temp1[k] = c; + else if (temp1[k] < 0) { + /* + * reduce/reduce + * conflict + */ + if (foutput != NULL) + fprintf(foutput, + "\n%d: reduce/reduce conflict" + " (red'ns %d and %d ) on %ls", + i, -temp1[k], + lastred, symnam(k)); + if (-temp1[k] > lastred) + temp1[k] = -lastred; + ++zzrrconf; + } else + /* + * potentia + * shift/reduce + * conflict. + */ + precftn(lastred, k, i); + } + } + } + } + wract(i); + } + + fprintf(ftable, "\t};\n"); + wdef(L"YYNPROD", nprod); +#ifndef NOLIBW + if (nmbchars > 0) { + wrmbchars(); + } +#endif /* !NOLIBW */ +} + +static int pkdebug = 0; +int +apack(int *p, int n) +{ + /* pack state i from temp1 into amem */ + int off; + register int *pp, *qq; + int *q, /**r,*/ *rr; + int diff; + + /* + * we don't need to worry about checking because we + * we will only look up entries known to be there... + */ + + /* eliminate leading and trailing 0's */ + + q = p + n; + for (pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off) + /* EMPTY */; + if (pp > q) + return (0); /* no actions */ + p = pp; + + /* now, find a place for the elements from p to q, inclusive */ + /* for( rr=amem; rr<=r; ++rr,++off ){ */ /* try rr */ + rr = amem; + for (; ; ++rr, ++off) { + while (rr >= &amem[new_actsize-1]) + exp_act(&rr); + qq = rr; + for (pp = p; pp <= q; ++pp, ++qq) { + if (*pp) { + diff = qq - rr; + while (qq >= &amem[new_actsize-1]) { + exp_act(&rr); + qq = diff + rr; + } + if (*pp != *qq && *qq != 0) + goto nextk; + } + } + + /* we have found an acceptable k */ + + if (pkdebug && foutput != NULL) + fprintf(foutput, "off = %d, k = %d\n", off, rr-amem); + + qq = rr; + for (pp = p; pp <= q; ++pp, ++qq) { + if (*pp) { + diff = qq - rr; + while (qq >= &amem[new_actsize-1]) { + exp_act(&rr); + qq = diff + rr; + } + if (qq > memp) + memp = qq; + *qq = *pp; + } + } + if (pkdebug && foutput != NULL) { + for (pp = amem; pp <= memp; pp += 10) { + fprintf(foutput, "\t"); + for (qq = pp; qq <= pp + 9; ++qq) + fprintf(foutput, "%d ", *qq); + fprintf(foutput, "\n"); + } + } + return (off); + nextk:; + } + /* error("no space in action table" ); */ + /* NOTREACHED */ +} + +void +go2out(void) +{ + /* output the gotos for the nontermninals */ + int i, j, k, best, count, cbest, times; + + fprintf(ftemp, "$\n"); /* mark begining of gotos */ + + for (i = 1; i <= nnonter; ++i) { + go2gen(i); + /* find the best one to make default */ + best = -1; + times = 0; + for (j = 0; j < nstate; ++j) { /* is j the most frequent */ + if (tystate[j] == 0) + continue; + if (tystate[j] == best) + continue; + /* is tystate[j] the most frequent */ + count = 0; + cbest = tystate[j]; + for (k = j; k < nstate; ++k) + if (tystate[k] == cbest) + ++count; + if (count > times) { + best = cbest; + times = count; + } + } + + /* best is now the default entry */ + zzgobest += (times-1); + for (j = 0; j < nstate; ++j) { + if (tystate[j] != 0 && tystate[j] != best) { + fprintf(ftemp, "%d,%d,", j, tystate[j]); + zzgoent += 1; + } + } + + /* now, the default */ + + zzgoent += 1; + fprintf(ftemp, "%d\n", best); + + } +} + +static int g2debug = 0; +static void +go2gen(int c) +{ + /* output the gotos for nonterminal c */ + int i, work, cc; + ITEM *p, *q; + + /* first, find nonterminals with gotos on c */ + aryfil(temp1, nnonter + 1, 0); + temp1[c] = 1; + + work = 1; + while (work) { + work = 0; + PLOOP(0, i) { + if ((cc = prdptr[i][1] - NTBASE) >= 0) { + /* cc is a nonterminal */ + if (temp1[cc] != 0) { + /* + * cc has a goto on c + * thus, the left side of + * production i does too. + */ + cc = *prdptr[i] - NTBASE; + if (temp1[cc] == 0) { + work = 1; + temp1[cc] = 1; + } + } + } + } + } + + /* now, we have temp1[c] = 1 if a goto on c in closure of cc */ + + if (g2debug && foutput != NULL) { + fprintf(foutput, "%ls: gotos on ", nontrst[c].name); + NTLOOP(i) if (temp1[i]) + fprintf(foutput, "%ls ", nontrst[i].name); + fprintf(foutput, "\n"); + } + + /* now, go through and put gotos into tystate */ + aryfil(tystate, nstate, 0); + SLOOP(i) { + ITMLOOP(i, p, q) { + if ((cc = *p->pitem) >= NTBASE) { + if (temp1[cc -= NTBASE]) { + /* goto on c is possible */ + tystate[i] = amem[indgo[i] + c]; + break; + } + } + } + } +} + +/* decide a shift/reduce conflict by precedence. */ +static void +precftn(int r, int t, int s) +{ + + /* + * r is a rule number, t a token number + * the conflict is in state s + * temp1[t] is changed to reflect the action + */ + + int lp, lt, action; + + lp = levprd[r]; + lt = toklev[t]; + if (PLEVEL(lt) == 0 || PLEVEL(lp) == 0) { + /* conflict */ + if (foutput != NULL) + fprintf(foutput, + "\n%d: shift/reduce conflict" + " (shift %d, red'n %d) on %ls", + s, temp1[t], r, symnam(t)); + ++zzsrconf; + return; + } + if (PLEVEL(lt) == PLEVEL(lp)) + action = ASSOC(lt) & ~04; + else if (PLEVEL(lt) > PLEVEL(lp)) + action = RASC; /* shift */ + else + action = LASC; /* reduce */ + + switch (action) { + case BASC: /* error action */ + temp1[t] = ERRCODE; + return; + case LASC: /* reduce */ + temp1[t] = -r; + return; + } +} + +static void +wract(int i) +{ + /* output state i */ + /* temp1 has the actions, lastred the default */ + int p, p0, p1; + int ntimes, tred, count, j; + int flag; + + /* find the best choice for lastred */ + + lastred = 0; + ntimes = 0; + TLOOP(j) { + if (temp1[j] >= 0) + continue; + if (temp1[j] + lastred == 0) + continue; + /* count the number of appearances of temp1[j] */ + count = 0; + tred = -temp1[j]; + levprd[tred] |= REDFLAG; + TLOOP(p) { + if (temp1[p] + tred == 0) + ++count; + } + if (count > ntimes) { + lastred = tred; + ntimes = count; + } + } + + /* + * for error recovery, arrange that, if there is a shift on the + * error recovery token, `error', that the default be the error action + */ + if (temp1[2] > 0) + lastred = 0; + + /* clear out entries in temp1 which equal lastred */ + TLOOP(p) { + if (temp1[p] + lastred == 0) + temp1[p] = 0; + } + + wrstate(i); + defact[i] = lastred; + + flag = 0; + TLOOP(p0) { + if ((p1 = temp1[p0]) != 0) { + if (p1 < 0) { + p1 = -p1; + goto exc; + } else if (p1 == ACCEPTCODE) { + p1 = -1; + goto exc; + } else if (p1 == ERRCODE) { + p1 = 0; + goto exc; + exc: + if (flag++ == 0) + fprintf(ftable, "-1, %d,\n", i); + fprintf(ftable, + "\t%d, %d,\n", tokset[p0].value, p1); + ++zzexcp; + } else { + fprintf(ftemp, + "%d,%d,", tokset[p0].value, p1); + ++zzacent; + } + } + } + if (flag) { + defact[i] = -2; + fprintf(ftable, "\t-2, %d,\n", lastred); + } + fprintf(ftemp, "\n"); +} + +static void +wrstate(int i) +{ + /* writes state i */ + register int j0, j1; + register ITEM *pp, *qq; + register WSET *u; + + if (foutput == NULL) + return; + fprintf(foutput, "\nstate %d\n", i); + ITMLOOP(i, pp, qq) { + fprintf(foutput, "\t%ls\n", writem(pp->pitem)); + } + if (tystate[i] == MUSTLOOKAHEAD) { + /* print out empty productions in closure */ + WSLOOP(wsets + (pstate[i + 1] - pstate[i]), u) { + if (*(u->pitem) < 0) + fprintf(foutput, "\t%ls\n", writem(u->pitem)); + } + } + + /* check for state equal to another */ + TLOOP(j0) if ((j1 = temp1[j0]) != 0) { + fprintf(foutput, "\n\t%ls ", symnam(j0)); + if (j1 > 0) { /* shift, error, or accept */ + if (j1 == ACCEPTCODE) + fprintf(foutput, "accept"); + else if (j1 == ERRCODE) + fprintf(foutput, "error"); + else + fprintf(foutput, "shift %d", j1); + } + else + fprintf(foutput, "reduce %d", -j1); + } + + /* output the final production */ + if (lastred) + fprintf(foutput, "\n\t. reduce %d\n\n", lastred); + else + fprintf(foutput, "\n\t. error\n\n"); + + /* now, output nonterminal actions */ + j1 = ntokens; + for (j0 = 1; j0 <= nnonter; ++j0) { + if (temp1[++j1]) + fprintf(foutput, "\t%ls goto %d\n", + symnam(j0 + NTBASE), temp1[j1]); + } +} + +static void +wdef(wchar_t *s, int n) +{ + /* output a definition of s to the value n */ + fprintf(ftable, "# define %ls %d\n", s, n); +} + +void +warray(wchar_t *s, int *v, int n) +{ + register int i; + fprintf(ftable, "static YYCONST yytabelem %ls[]={\n", s); + for (i = 0; i < n; ) { + if (i % 10 == 0) + fprintf(ftable, "\n"); + fprintf(ftable, "%6d", v[i]); + if (++i == n) + fprintf(ftable, " };\n"); + else + fprintf(ftable, ","); + } +} + +void +hideprod(void) +{ + /* + * in order to free up the mem and amem arrays for the optimizer, + * and still be able to output yyr1, etc., after the sizes of + * the action array is known, we hide the nonterminals + * derived by productions in levprd. + */ + + register int i, j; + + j = 0; + levprd[0] = 0; + PLOOP(1, i) { + if (!(levprd[i] & REDFLAG)) { + ++j; + if (foutput != NULL) { + fprintf(foutput, + "Rule not reduced: %ls\n", + writem(prdptr[i])); + } + } + levprd[i] = *prdptr[i] - NTBASE; + } + if (j) + fprintf(stderr, "%d rules never reduced\n", j); +} + + +#ifndef NOLIBW +static int +cmpmbchars(MBCLIT *p, MBCLIT *q) +{ + /* Compare two MBLITs. */ + return ((p->character) - (q->character)); +} + +static void +wrmbchars(void) +{ + int i; + wdef(L"YYNMBCHARS", nmbchars); + qsort(mbchars, nmbchars, sizeof (*mbchars), + (int (*)(const void *, const void *))cmpmbchars); + fprintf(ftable, + "static struct{\n\twchar_t character;" + "\n\tint tvalue;\n}yymbchars[YYNMBCHARS]={\n"); + for (i = 0; i < nmbchars; ++i) { + fprintf(ftable, "\t{%#x,%d}", + (int)mbchars[i].character, mbchars[i].tvalue); + if (i < nmbchars - 1) { + /* Not the last. */ + fprintf(ftable, ",\n"); + } + } + fprintf(ftable, "\n};\n"); +} +#endif /* !NOLIBW */ diff --git a/yacc/y4.c b/yacc/y4.c @@ -0,0 +1,485 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1990 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "y4.c 6.15 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)y4.c 1.5 (gritter) 11/26/05 + */ + +#include "dextern" +#include <wchar.h> +#include <unistd.h> +#define NOMORE -1000 + +static void gin(int); +static void stin(int); +static void osummary(void); +static void aoutput(void); +static void arout(wchar_t *, int *, int); +static int nxti(void); +static int gtnm(void); + +static int *ggreed; +static int *pgo; +static int *yypgo; + +static int maxspr = 0; /* maximum spread of any entry */ +static int maxoff = 0; /* maximum offset into an array */ +int *optimmem; +static int *maxa; + +static int nxdb = 0; +static int adb = 0; + +void +callopt(void) +{ + register int i, *p, j, k, *q; + + ggreed = malloc(sizeof (int) * size); + pgo = malloc(sizeof (int) * size); + yypgo = &nontrst[0].tvalue; + + /* read the arrays from tempfile and set parameters */ + + if ((finput = fopen(TEMPNAME, "r")) == NULL) + error("optimizer cannot open tempfile"); + + optimmem = tracemem; + pgo[0] = 0; + temp1[0] = 0; + nstate = 0; + nnonter = 0; + for (;;) { + switch (gtnm()) { + + case L'\n': + temp1[++nstate] = (--optimmem) - tracemem; + /* FALLTHRU */ + + case L',': + continue; + + case L'$': + break; + + default: + error("bad tempfile"); + } + break; + } + + temp1[nstate] = yypgo[0] = (--optimmem) - tracemem; + + for (;;) { + switch (gtnm()) { + + case L'\n': + yypgo[++nnonter] = optimmem-tracemem; + /* FALLTHRU */ + case L',': + continue; + + case EOF: + break; + + default: + error("bad tempfile"); + } + break; + } + + yypgo[nnonter--] = (--optimmem) - tracemem; + + for (i = 0; i < nstate; ++i) { + k = 32000000; + j = 0; + q = tracemem + temp1[i+1]; + for (p = tracemem + temp1[i]; p < q; p += 2) { + if (*p > j) + j = *p; + if (*p < k) + k = *p; + } + if (k <= j) { + /* + * nontrivial situation + * temporarily, kill this for compatibility + */ + /* j -= k; j is now the range */ + if (k > maxoff) + maxoff = k; + } + tystate[i] = (temp1[i+1] - temp1[i]) + 2*j; + if (j > maxspr) + maxspr = j; + } + + /* initialize ggreed table */ + for (i = 1; i <= nnonter; ++i) { + ggreed[i] = 1; + j = 0; + /* minimum entry index is always 0 */ + q = tracemem + yypgo[i+1] -1; + for (p = tracemem + yypgo[i]; p < q; p += 2) { + ggreed[i] += 2; + if (*p > j) + j = *p; + } + ggreed[i] = ggreed[i] + 2*j; + if (j > maxoff) + maxoff = j; + } + + /* now, prepare to put the shift actions into the amem array */ + for (i = 0; i < new_actsize; ++i) + amem[i] = 0; + maxa = amem; + + for (i = 0; i < nstate; ++i) { + if (tystate[i] == 0 && adb > 1) + fprintf(ftable, "State %d: null\n", i); + indgo[i] = YYFLAG1; + } + + while ((i = nxti()) != NOMORE) { + if (i >= 0) + stin(i); + else + gin(-i); + } + + if (adb > 2) { /* print a array */ + for (p = amem; p <= maxa; p += 10) { + fprintf(ftable, "%4d ", p-amem); + for (i = 0; i < 10; ++i) + fprintf(ftable, "%4d ", p[i]); + fprintf(ftable, "\n"); + } + } + /* write out the output appropriate to the language */ + aoutput(); + osummary(); + ZAPFILE(TEMPNAME); +} + +static void +gin(int i) +{ + register int *r, *s, *q1, *q2; + int *p; + + /* enter gotos on nonterminal i into array amem */ + ggreed[i] = 0; + + q2 = tracemem + yypgo[i+1] - 1; + q1 = tracemem + yypgo[i]; + + /* now, find a place for it */ + + /* for( p=amem; p < &amem[new_actsize]; ++p ){ */ + p = amem; + for (;;) { + while (p >= &amem[new_actsize]) + exp_act(&p); + if (*p) + goto nextgp; + for (r = q1; r < q2; r += 2) { + s = p + *r + 1; + /* + * Check if action table needs to + * be expanded or not. If so, + * expand it. + */ + while (s >= &amem[new_actsize]) { + exp_act(&p); + s = p + *r + 1; + } + if (*s) + goto nextgp; + if (s > maxa) { + while ((maxa = s) >= &amem[new_actsize]) + /* error( "amem array overflow" ); */ + exp_act(&p); + } + } + /* we have found a spot */ + *p = *q2; + if (p > maxa) { + while ((maxa = p) >= &amem[new_actsize]) + /* error("amem array overflow"); */ + exp_act(&p); + } + for (r = q1; r < q2; r += 2) { + s = p + *r + 1; + /* + * Check if action table needs to + * be expanded or not. If so, + * expand it. + */ + while (s >= &amem[new_actsize]) { + exp_act(&p); + s = p + *r + 1; + } + *s = r[1]; + } + + pgo[i] = p - amem; + if (adb > 1) + fprintf(ftable, + "Nonterminal %d, entry at %d\n", i, pgo[i]); + goto nextgi; + + nextgp: + ++p; + } + /* error( "cannot place goto %d\n", i ); */ + nextgi:; +} + +static void +stin(int i) +{ + register int *r, n, nn, flag, j, *q1, *q2; + int *s; + + tystate[i] = 0; + + /* Enter state i into the amem array */ + + q2 = tracemem + temp1[i + 1]; + q1 = tracemem + temp1[i]; + /* Find an acceptable place */ + + nn = -maxoff; + more: + for (n = nn; n < new_actsize; ++n) { + flag = 0; + for (r = q1; r < q2; r += 2) { + s = *r + n + amem; + if (s < amem) + goto nextn; + /* + * Check if action table needs to + * be expanded or not. If so, + * expand it. + */ + while (s >= &amem[new_actsize]) { + exp_act(NULL); + s = *r + n + amem; + } + if (*s == 0) + ++flag; + else if (*s != r[1]) + goto nextn; + } + + /* + * check that the position equals another + * only if the states are identical + */ + for (j = 0; j < nstate; ++j) { + if (indgo[j] == n) { + if (flag) + /* + * we have some disagreement. + */ + goto nextn; + if (temp1[j+1] + temp1[i] == + temp1[j] + temp1[i+1]) { + /* states are equal */ + indgo[i] = n; + if (adb > 1) + fprintf(ftable, + "State %d: entry at" + " %d equals state %d\n", + i, n, j); + return; + } + goto nextn; /* we have some disagreement */ + } + } + + for (r = q1; r < q2; r += 2) { + while ((s = *r + n + amem) >= &amem[new_actsize]) { + /* + * error( "out of space"); + */ + exp_act(NULL); + } + if (s > maxa) + maxa = s; + if (*s != 0 && *s != r[1]) + error( + "clobber of amem array, pos'n %d, by %d", + s-amem, r[1]); + *s = r[1]; + } + indgo[i] = n; + if (adb > 1) + fprintf(ftable, + "State %d: entry at %d\n", i, indgo[i]); + return; + nextn:; + } + + /* error( "Error; failure to place state %d\n", i ); */ + exp_act(NULL); + nn = new_actsize - ACTSIZE; + goto more; + /* NOTREACHED */ +} + +static int +nxti(void) +{ + /* finds the next i */ + register int i, max, maxi = 0; + max = 0; + + for (i = 1; i <= nnonter; ++i) + if (ggreed[i] >= max) { + max = ggreed[i]; + maxi = -i; + } + + for (i = 0; i < nstate; ++i) + if (tystate[i] >= max) { + max = tystate[i]; + maxi = i; + } + if (nxdb) + fprintf(ftable, "nxti = %d, max = %d\n", maxi, max); + if (max == 0) + return (NOMORE); + else + return (maxi); +} + +static void +osummary(void) +{ + /* write summary */ + register int i, *p; + + if (foutput == NULL) + return; + i = 0; + for (p = maxa; p >= amem; --p) { + if (*p == 0) + ++i; + } + + fprintf(foutput, "Optimizer space used: input %d/%d, output %d/%d\n", + optimmem-tracemem + 1, new_memsize, maxa-amem + 1, new_actsize); + fprintf(foutput, "%d table entries, %d zero\n", (maxa-amem) + 1, i); + fprintf(foutput, + "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff); + +} + +static void +aoutput(void) +{ + /* this version is for C */ + /* write out the optimized parser */ + + fprintf(ftable, "# define YYLAST %d\n", maxa-amem + 1); + arout(L"yyact", amem, (maxa - amem) + 1); + arout(L"yypact", indgo, nstate); + arout(L"yypgo", pgo, nnonter + 1); +} + +static void +arout(wchar_t *s, int *v, int n) +{ + register int i; + + fprintf(ftable, "static YYCONST yytabelem %ls[]={\n", s); + for (i = 0; i < n; ) { + if (i % 10 == 0) + fprintf(ftable, "\n"); + fprintf(ftable, "%6d", v[i]); + if (++i == n) + fprintf(ftable, " };\n"); + else + fprintf(ftable, ","); + } +} + +static int +gtnm(void) +{ + register int s, val, c; + + /* read and convert an integer from the standard input */ + /* return the terminating character */ + /* blanks, tabs, and newlines are ignored */ + + s = 1; + val = 0; + + while ((c = getwc(finput)) != EOF) { + if (iswdigit(c)) + val = val * 10 + c - L'0'; + else if (c == L'-') + s = -1; + else + break; + } + *optimmem++ = s*val; + if (optimmem >= &tracemem[new_memsize]) + exp_mem(0); + return (c); +} + +void +exp_act(int **ptr) +{ + static int *actbase; + int i; + new_actsize += ACTSIZE; + + actbase = amem; + amem = realloc(amem, sizeof (int) * new_actsize); + if (amem == NULL) + error("couldn't expand action table"); + + for (i = new_actsize-ACTSIZE; i < new_actsize; ++i) + amem[i] = 0; + if (ptr != NULL) + *ptr = *ptr - actbase + amem; + if (memp >= amem) + memp = memp - actbase + amem; + if (maxa >= amem) + maxa = maxa - actbase + amem; +} diff --git a/yacc/y5.c b/yacc/y5.c @@ -0,0 +1,70 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)y5.c 1.1 (gritter) 6/25/05 + */ +#include <stdlib.h> +#include <wchar.h> +#include <stdio.h> +#include <limits.h> +#include <errno.h> + +extern int error(char *, ...); + +/* + * This is like getwc() but issues an error message when an illegal + * byte sequence is encountered. + */ +wint_t +yacc_getwc(FILE *fp) +{ + wint_t wc; + + if ((wc = getwc(fp)) != WEOF) + return wc; + if (ferror(fp) && errno == EILSEQ) + error("illegal byte sequence"); + return wc; +} + +/* + * A substitute for putwc(), to ensure that stdio output FILE objects + * are always byte-oriented. + */ +wint_t +yacc_putwc(wchar_t wc, FILE *fp) +{ + char mb[MB_LEN_MAX]; + int i, n; + + if ((n = wctomb(mb, wc)) < 0) { + wctomb(mb, 0); + errno = EILSEQ; + return WEOF; + } + for (i = 0; i < n; i++) + if (putc(mb[i]&0377, fp) == EOF) + return WEOF; + return wc; +} diff --git a/yacc/yacc.1 b/yacc/yacc.1 @@ -0,0 +1,169 @@ +.\" +.\" Sccsid @(#)yacc.1 1.5 (gritter) 11/27/05 +.\" Derived from yacc(1), Unix 7th edition: +.\" Copyright(C) Caldera International Inc. 2001-2002. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" Redistributions of source code and documentation must retain the +.\" above copyright notice, this list of conditions and the following +.\" disclaimer. +.\" Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" All advertising materials mentioning features or use of this software +.\" must display the following acknowledgement: +.\" This product includes software developed or owned by Caldera +.\" International, Inc. +.\" Neither the name of Caldera International, Inc. nor the names of +.\" other contributors may be used to endorse or promote products +.\" derived from this software without specific prior written permission. +.\" +.\" USE OF THE SOFTWARE PROVIDED FOR UNDER THIS LICENSE BY CALDERA +.\" INTERNATIONAL, INC. AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR +.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL CALDERA INTERNATIONAL, INC. BE +.\" LIABLE FOR ANY DIRECT, INDIRECT INCIDENTAL, SPECIAL, EXEMPLARY, OR +.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +.\" BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +.\" WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +.\" OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +.\" EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +.\" +.TH YACC 1 "11/27/05" "Heirloom Development Tools" "User Commands" +.SH NAME +yacc \- yet another compiler-compiler +.SH SYNOPSIS +.HP +.ad l +.nh +\fByacc\fR [\fB\-vVdlt\fR] [\fB\-Q\fR(\fBy\fR|\fBn\fR)] +[\fB\-b\ \fIfile_prefix\fR] [\fB\-p\ \fIsym_prefix\fR] [\fB\-P\ \fIparser\fR] +[\fB\-Y\ \fIdirectory\fR] +\fIgrammar\fR +.br +.ad b +.SH DESCRIPTION +.I Yacc +converts a context-free grammar into a set of +tables for a simple automaton which executes an LR(1) parsing +algorithm. +The grammar may be ambiguous; +specified precedence rules are used to break ambiguities. +.PP +The output file, +.IR y.tab.c , +must be compiled by the C compiler +to produce a program +.IR yyparse . +This program must be loaded with the lexical analyzer program, +.IR yylex , +as well as +.I main +and +.IR yyerror , +an error handling routine. +These routines must be supplied by the user; +.IR Lex (1) +is useful for creating lexical analyzers usable by +.IR yacc . +.PP +The following options are accepted: +.TP +\fB\-b\ \fIfile_prefix\fR +Use +.I file_prefix +instead of `y' +when generating output files, +i.\|e. generate `\fIfile_prefix\fR.tab.c' and so forth. +This option was introduced by POSIX.2. +.TP +.B \-d +If this option is used, the file +.I y.tab.h +is generated with the +.I define +statements that associate the +\fIyacc\fR-assigned `token codes' with the user-declared `token names'. +This allows source files other than +.I y.tab.c +to access the token codes. +.TP +.B \-l +Do not emit `#line' preprocessor directives when writing +.IR y.tab.c . +These are normally inserted to facilitate debugging. +.TP +\fB\-p\ \fIsym_prefix\fR +Use +.I sym_prefix +instead of `yy' as the prefix for names with global scope, +i.\|e. generate `\fIsym_prefix\fRparse' and so forth. +This option was introduced by POSIX.2. +.TP +\fB\-P\ \fIdriver_file\fR +Use the file +.I driver_file +as driver file instead of `yaccpar'. +.TP +\fB\-Q\fR(\fBy\fR|\fBn\fR) +With +.BR \-Qy , +a version identification variable is put into y.tab.c. +With +.B \-Qn +(the default), no such variable is generated. +.TP +.B \-t +Enable run-time debugging code by default, +i.\|e. define the `YYDEBUG' preprocessor symbol. +.TP +.B \-v +If this option is given, the file +.I y.output +is prepared, which contains a description of the parsing tables +and a report on +conflicts generated by ambiguities in the grammar. +.TP +.B \-V +Causes version information for +.I yacc +to be printed. +.TP +\fB\-Y \fIdirectory\fR +Use the file `\fIdirectory\fR/yaccpar' as driver file. +This option is an extension. +.SH FILES +.ta \w'/usr/ccs/lib/yaccpar 'u +y.output +.br +y.tab.c +.br +y.tab.h defines for token names +.br +yacc.tmp, yacc.acts temporary files +.br +/usr/ccs/lib/yaccpar parser prototype for C programs +.br +/usr/ccs/lib/liby.a library with default `main' and `yyerror' +.SH "SEE ALSO" +.IR lex (1) +.br +.I "LR Parsing" +by A. V. Aho and S. C. Johnson, +Computing Surveys, June, 1974. +.br +.I "YACC \- Yet Another Compiler Compiler" +by S. C. Johnson. +.SH DIAGNOSTICS +The number of reduce-reduce and shift-reduce conflicts +is reported on the standard output; +a more detailed report is +found in the +.I y.output +file. +Similarly, if some rules are not reachable from the +start symbol, this is also reported. diff --git a/yacc/yaccpar b/yacc/yaccpar @@ -0,0 +1,565 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License, Version 1.0 only + * (the "License"). You may not use this file except in compliance + * with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright 1993 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ + +/* Copyright (c) 1988 AT&T */ +/* All Rights Reserved */ + +/* from OpenSolaris "yaccpar 6.18 05/06/08 SMI" */ + +/* + * Portions Copyright (c) 2005 Gunnar Ritter, Freiburg i. Br., Germany + * + * Sccsid @(#)yaccpar 1.5 (gritter) 11/26/05 + */ + +/* +** Skeleton parser driver for yacc output +*/ + +/* +** yacc user known macros and defines +*/ +#define YYERROR goto yyerrlab +#define YYACCEPT return(0) +#define YYABORT return(1) +#define YYBACKUP( newtoken, newvalue )\ +{\ + if ( yychar >= 0 || ( yyr2[ yytmp ] >> 1 ) != 1 )\ + {\ + yyerror( "syntax error - cannot backup" );\ + goto yyerrlab;\ + }\ + yychar = newtoken;\ + yystate = *yyps;\ + yylval = newvalue;\ + goto yynewstate;\ +} +#define YYRECOVERING() (!!yyerrflag) +#define YYNEW(type) malloc(sizeof(type) * yynewmax) +#define YYCOPY(to, from, type) \ + (type *) memcpy(to, (char *) from, yymaxdepth * sizeof (type)) +#define YYENLARGE( from, type) \ + (type *) realloc((char *) from, yynewmax * sizeof(type)) +#ifndef YYDEBUG +# define YYDEBUG 1 /* make debugging available */ +#endif + +/* +** user known globals +*/ +int yydebug; /* set to 1 to get debugging */ + +/* +** driver internal defines +*/ +#define YYFLAG (-10000000) + +/* +** global variables used by the parser +*/ +YYSTYPE *yypv; /* top of value stack */ +int *yyps; /* top of state stack */ + +int yystate; /* current state */ +int yytmp; /* extra var (lasts between blocks) */ + +int yynerrs; /* number of errors */ +int yyerrflag; /* error recovery flag */ +int yychar; /* current input token number */ + + + +#ifdef YYNMBCHARS +#define YYLEX() yycvtok(yylex()) +/* +** yycvtok - return a token if i is a wchar_t value that exceeds 255. +** If i<255, i itself is the token. If i>255 but the neither +** of the 30th or 31st bit is on, i is already a token. +*/ +#if defined(__STDC__) || defined(__cplusplus) +int yycvtok(int i) +#else +int yycvtok(i) int i; +#endif +{ + int first = 0; + int last = YYNMBCHARS - 1; + int mid; + wchar_t j; + + if(i&0x60000000){/*Must convert to a token. */ + if( yymbchars[last].character < i ){ + return i;/*Giving up*/ + } + while ((last>=first)&&(first>=0)) {/*Binary search loop*/ + mid = (first+last)/2; + j = yymbchars[mid].character; + if( j==i ){/*Found*/ + return yymbchars[mid].tvalue; + }else if( j<i ){ + first = mid + 1; + }else{ + last = mid -1; + } + } + /*No entry in the table.*/ + return i;/* Giving up.*/ + }else{/* i is already a token. */ + return i; + } +} +#else/*!YYNMBCHARS*/ +#define YYLEX() yylex() +#endif/*!YYNMBCHARS*/ + +/* +** yyparse - return 0 if worked, 1 if syntax error not recovered from +*/ +#if defined(__STDC__) || defined(__cplusplus) +int yyparse(void) +#else +int yyparse() +#endif +{ + register YYSTYPE *yypvt = 0; /* top of value stack for $vars */ + +#if defined(__cplusplus) || defined(lint) || defined(__GNUC__) +/* + hacks to please C++, lint, and gcc - goto's inside + switch should never be executed +*/ + static int _yaccpar_lint_hack = -1; + switch (_yaccpar_lint_hack) + { + case 1: goto yyerrlab; + case 2: goto yynewstate; + } +#endif + + /* + ** Initialize externals - yyparse may be called more than once + */ + yypv = &yyv[-1]; + yyps = &yys[-1]; + yystate = 0; + yytmp = 0; + yynerrs = 0; + yyerrflag = 0; + yychar = -1; + +#if YYMAXDEPTH <= 0 + if (yymaxdepth <= 0) + { + if ((yymaxdepth = YYEXPAND(0)) <= 0) + { + yyerror("yacc initialization error"); + YYABORT; + } + } +#endif + + { + register YYSTYPE *yy_pv; /* top of value stack */ + register int *yy_ps; /* top of state stack */ + register int yy_state; /* current state */ + register int yy_n; /* internal state number info */ + goto yystack; /* moved from 6 lines above to here to please C++ */ + + /* + ** get globals into registers. + ** branch to here only if YYBACKUP was called. + */ + yynewstate: + yy_pv = yypv; + yy_ps = yyps; + yy_state = yystate; + goto yy_newstate; + + /* + ** get globals into registers. + ** either we just started, or we just finished a reduction + */ + yystack: + yy_pv = yypv; + yy_ps = yyps; + yy_state = yystate; + + /* + ** top of for (;;) loop while no reductions done + */ + yy_stack: + /* + ** put a state and value onto the stacks + */ +#if YYDEBUG + /* + ** if debugging, look up token value in list of value vs. + ** name pairs. 0 and negative (-1) are special values. + ** Note: linear search is used since time is not a real + ** consideration while debugging. + */ + if ( yydebug ) + { + register int yy_i; + + printf( "State %d, token ", yy_state ); + if ( yychar == 0 ) + printf( "end-of-file\n" ); + else if ( yychar < 0 ) + printf( "-none-\n" ); + else + { + for ( yy_i = 0; yytoks[yy_i].t_val >= 0; + yy_i++ ) + { + if ( yytoks[yy_i].t_val == yychar ) + break; + } + printf( "%s\n", yytoks[yy_i].t_name ); + } + } +#endif /* YYDEBUG */ + if ( ++yy_ps >= &yys[ yymaxdepth ] ) /* room on stack? */ + { + /* + ** reallocate and recover. Note that pointers + ** have to be reset, or bad things will happen + */ + long yyps_index = (yy_ps - yys); + long yypv_index = (yy_pv - yyv); + long yypvt_index = (yypvt - yyv); + int yynewmax; +#ifdef YYEXPAND + yynewmax = YYEXPAND(yymaxdepth); +#else + yynewmax = 2 * yymaxdepth; /* double table size */ + if (yymaxdepth == YYMAXDEPTH) /* first time growth */ + { + char *newyys = (char *)YYNEW(int); + char *newyyv = (char *)YYNEW(YYSTYPE); + if (newyys != 0 && newyyv != 0) + { + yys = YYCOPY(newyys, yys, int); + yyv = YYCOPY(newyyv, yyv, YYSTYPE); + } + else + yynewmax = 0; /* failed */ + } + else /* not first time */ + { + yys = YYENLARGE(yys, int); + yyv = YYENLARGE(yyv, YYSTYPE); + if (yys == 0 || yyv == 0) + yynewmax = 0; /* failed */ + } +#endif + if (yynewmax <= yymaxdepth) /* tables not expanded */ + { + yyerror( "yacc stack overflow" ); + YYABORT; + } + yymaxdepth = yynewmax; + + yy_ps = yys + yyps_index; + yy_pv = yyv + yypv_index; + yypvt = yyv + yypvt_index; + } + *yy_ps = yy_state; + *++yy_pv = yyval; + + /* + ** we have a new state - find out what to do + */ + yy_newstate: + if ( ( yy_n = yypact[ yy_state ] ) <= YYFLAG ) + goto yydefault; /* simple state */ +#if YYDEBUG + /* + ** if debugging, need to mark whether new token grabbed + */ + yytmp = yychar < 0; +#endif + if ( ( yychar < 0 ) && ( ( yychar = YYLEX() ) < 0 ) ) + yychar = 0; /* reached EOF */ +#if YYDEBUG + if ( yydebug && yytmp ) + { + register int yy_i; + + printf( "Received token " ); + if ( yychar == 0 ) + printf( "end-of-file\n" ); + else if ( yychar < 0 ) + printf( "-none-\n" ); + else + { + for ( yy_i = 0; yytoks[yy_i].t_val >= 0; + yy_i++ ) + { + if ( yytoks[yy_i].t_val == yychar ) + break; + } + printf( "%s\n", yytoks[yy_i].t_name ); + } + } +#endif /* YYDEBUG */ + if ( ( ( yy_n += yychar ) < 0 ) || ( yy_n >= YYLAST ) ) + goto yydefault; + if ( yychk[ yy_n = yyact[ yy_n ] ] == yychar ) /*valid shift*/ + { + yychar = -1; + yyval = yylval; + yy_state = yy_n; + if ( yyerrflag > 0 ) + yyerrflag--; + goto yy_stack; + } + + yydefault: + if ( ( yy_n = yydef[ yy_state ] ) == -2 ) + { +#if YYDEBUG + yytmp = yychar < 0; +#endif + if ( ( yychar < 0 ) && ( ( yychar = YYLEX() ) < 0 ) ) + yychar = 0; /* reached EOF */ +#if YYDEBUG + if ( yydebug && yytmp ) + { + register int yy_i; + + printf( "Received token " ); + if ( yychar == 0 ) + printf( "end-of-file\n" ); + else if ( yychar < 0 ) + printf( "-none-\n" ); + else + { + for ( yy_i = 0; + yytoks[yy_i].t_val >= 0; + yy_i++ ) + { + if ( yytoks[yy_i].t_val + == yychar ) + { + break; + } + } + printf( "%s\n", yytoks[yy_i].t_name ); + } + } +#endif /* YYDEBUG */ + /* + ** look through exception table + */ + { + register YYCONST int *yyxi = yyexca; + + while ( ( *yyxi != -1 ) || + ( yyxi[1] != yy_state ) ) + { + yyxi += 2; + } + while ( ( *(yyxi += 2) >= 0 ) && + ( *yyxi != yychar ) ) + ; + if ( ( yy_n = yyxi[1] ) < 0 ) + YYACCEPT; + } + } + + /* + ** check for syntax error + */ + if ( yy_n == 0 ) /* have an error */ + { + /* no worry about speed here! */ + switch ( yyerrflag ) + { + case 0: /* new error */ + yyerror( "syntax error" ); + goto skip_init; + yyerrlab: + /* + ** get globals into registers. + ** we have a user generated syntax type error + */ + yy_pv = yypv; + yy_ps = yyps; + yy_state = yystate; + skip_init: + yynerrs++; + /* FALLTHRU */ + case 1: + case 2: /* incompletely recovered error */ + /* try again... */ + yyerrflag = 3; + /* + ** find state where "error" is a legal + ** shift action + */ + while ( yy_ps >= yys ) + { + yy_n = yypact[ *yy_ps ] + YYERRCODE; + if ( yy_n >= 0 && yy_n < YYLAST && + yychk[yyact[yy_n]] == YYERRCODE) { + /* + ** simulate shift of "error" + */ + yy_state = yyact[ yy_n ]; + goto yy_stack; + } + /* + ** current state has no shift on + ** "error", pop stack + */ +#if YYDEBUG +# define _POP_ "Error recovery pops state %d, uncovers state %d\n" + if ( yydebug ) + printf( _POP_, *yy_ps, + yy_ps[-1] ); +# undef _POP_ +#endif + yy_ps--; + yy_pv--; + } + /* + ** there is no state on stack with "error" as + ** a valid shift. give up. + */ + YYABORT; + case 3: /* no shift yet; eat a token */ +#if YYDEBUG + /* + ** if debugging, look up token in list of + ** pairs. 0 and negative shouldn't occur, + ** but since timing doesn't matter when + ** debugging, it doesn't hurt to leave the + ** tests here. + */ + if ( yydebug ) + { + register int yy_i; + + printf( "Error recovery discards " ); + if ( yychar == 0 ) + printf( "token end-of-file\n" ); + else if ( yychar < 0 ) + printf( "token -none-\n" ); + else + { + for ( yy_i = 0; + yytoks[yy_i].t_val >= 0; + yy_i++ ) + { + if ( yytoks[yy_i].t_val + == yychar ) + { + break; + } + } + printf( "token %s\n", + yytoks[yy_i].t_name ); + } + } +#endif /* YYDEBUG */ + if ( yychar == 0 ) /* reached EOF. quit */ + YYABORT; + yychar = -1; + goto yy_newstate; + } + }/* end if ( yy_n == 0 ) */ + /* + ** reduction by production yy_n + ** put stack tops, etc. so things right after switch + */ +#if YYDEBUG + /* + ** if debugging, print the string that is the user's + ** specification of the reduction which is just about + ** to be done. + */ + if ( yydebug ) + printf( "Reduce by (%d) \"%s\"\n", + yy_n, yyreds[ yy_n ] ); +#endif + yytmp = yy_n; /* value to switch over */ + yypvt = yy_pv; /* $vars top of value stack */ + /* + ** Look in goto table for next state + ** Sorry about using yy_state here as temporary + ** register variable, but why not, if it works... + ** If yyr2[ yy_n ] doesn't have the low order bit + ** set, then there is no action to be done for + ** this reduction. So, no saving & unsaving of + ** registers done. The only difference between the + ** code just after the if and the body of the if is + ** the goto yy_stack in the body. This way the test + ** can be made before the choice of what to do is needed. + */ + { + /* length of production doubled with extra bit */ + register int yy_len = yyr2[ yy_n ]; + + if ( !( yy_len & 01 ) ) + { + yy_len >>= 1; + yyval = ( yy_pv -= yy_len )[1]; /* $$ = $1 */ + yy_state = yypgo[ yy_n = yyr1[ yy_n ] ] + + *( yy_ps -= yy_len ) + 1; + if ( yy_state >= YYLAST || + yychk[ yy_state = + yyact[ yy_state ] ] != -yy_n ) + { + yy_state = yyact[ yypgo[ yy_n ] ]; + } + goto yy_stack; + } + yy_len >>= 1; + yyval = ( yy_pv -= yy_len )[1]; /* $$ = $1 */ + yy_state = yypgo[ yy_n = yyr1[ yy_n ] ] + + *( yy_ps -= yy_len ) + 1; + if ( yy_state >= YYLAST || + yychk[ yy_state = yyact[ yy_state ] ] != -yy_n ) + { + yy_state = yyact[ yypgo[ yy_n ] ]; + } + } + /* save until reenter driver code */ + yystate = yy_state; + yyps = yy_ps; + yypv = yy_pv; + } + /* + ** code supplied by user is placed in this switch + */ + switch( yytmp ) + { + $A + } + goto yystack; /* reset registers in driver code */ +} +